Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

googlescholar.py 2.7KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. from abc import ABC
  17. import pandas as pd
  18. from agent.settings import DEBUG
  19. from agent.component.base import ComponentBase, ComponentParamBase
  20. from scholarly import scholarly
  21. class GoogleScholarParam(ComponentParamBase):
  22. """
  23. Define the GoogleScholar component parameters.
  24. """
  25. def __init__(self):
  26. super().__init__()
  27. self.top_n = 6
  28. self.sort_by = 'relevance'
  29. self.year_low = None
  30. self.year_high = None
  31. self.patents = True
  32. def check(self):
  33. self.check_positive_integer(self.top_n, "Top N")
  34. self.check_valid_value(self.sort_by, "GoogleScholar Sort_by", ['date', 'relevance'])
  35. self.check_boolean(self.patents, "Whether or not to include patents, defaults to True")
  36. class GoogleScholar(ComponentBase, ABC):
  37. component_name = "GoogleScholar"
  38. def _run(self, history, **kwargs):
  39. ans = self.get_input()
  40. ans = " - ".join(ans["content"]) if "content" in ans else ""
  41. if not ans:
  42. return GoogleScholar.be_output("")
  43. scholar_client = scholarly.search_pubs(ans, patents=self._param.patents, year_low=self._param.year_low,
  44. year_high=self._param.year_high, sort_by=self._param.sort_by)
  45. scholar_res = []
  46. for i in range(self._param.top_n):
  47. try:
  48. pub = next(scholar_client)
  49. scholar_res.append({"content": 'Title: ' + pub['bib']['title'] + '\n_Url: <a href="' + pub[
  50. 'pub_url'] + '"></a> ' + "\n author: " + ",".join(pub['bib']['author']) + '\n Abstract: ' + pub[
  51. 'bib'].get('abstract', 'no abstract')})
  52. except StopIteration or Exception as e:
  53. print("**ERROR** " + str(e))
  54. break
  55. if not scholar_res:
  56. return GoogleScholar.be_output("")
  57. df = pd.DataFrame(scholar_res)
  58. if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
  59. return df