浏览代码

Add component arxiv (#1587)

### What problem does this PR solve?


### Type of change

- [x] New Feature (non-breaking change which adds functionality)
tags/v0.9.0
H 1年前
父节点
当前提交
4da3ee400b
没有帐户链接到提交者的电子邮件
共有 5 个文件被更改,包括 72 次插入0 次删除
  1. 1
    0
      graph/component/__init__.py
  2. 68
    0
      graph/component/arxiv.py
  3. 1
    0
      requirements.txt
  4. 1
    0
      requirements_arm.txt
  5. 1
    0
      requirements_dev.txt

+ 1
- 0
graph/component/__init__.py 查看文件

from .duckduckgo import DuckDuckGo, DuckDuckGoParam from .duckduckgo import DuckDuckGo, DuckDuckGoParam
from .wikipedia import Wikipedia, WikipediaParam from .wikipedia import Wikipedia, WikipediaParam
from .pubmed import PubMed, PubMedParam from .pubmed import PubMed, PubMedParam
from .arxiv import ArXiv, ArXivParam




def component_class(class_name): def component_class(class_name):

+ 68
- 0
graph/component/arxiv.py 查看文件

#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import random
from abc import ABC
from functools import partial
import arxiv
import pandas as pd
from graph.settings import DEBUG
from graph.component.base import ComponentBase, ComponentParamBase


class ArXivParam(ComponentParamBase):
"""
Define the ArXiv component parameters.
"""

def __init__(self):
super().__init__()
self.top_n = 6
self.sort_by = 'submittedDate'

def check(self):
self.check_positive_integer(self.top_n, "Top N")
self.check_valid_value(self.sort_by, "ArXiv Search Sort_by",
['submittedDate', 'lastUpdatedDate', 'relevance'])


class ArXiv(ComponentBase, ABC):
component_name = "ArXiv"

def _run(self, history, **kwargs):
ans = self.get_input()
ans = " - ".join(ans["content"]) if "content" in ans else ""
if not ans:
return ArXiv.be_output("")

sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
"lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
'submittedDate': arxiv.SortCriterion.SubmittedDate}
arxiv_client = arxiv.Client()
search = arxiv.Search(
query=ans,
max_results=self._param.top_n,
sort_by=sort_choices[self._param.sort_by]
)
arxiv_res = [
{"content": 'Title: ' + i.title + '\nPdf_Url: <a href="' + i.pdf_url + '"></a> \nSummary: ' + i.summary} for
i in list(arxiv_client.results(search))]

if not arxiv_res:
return ArXiv.be_output("")

df = pd.DataFrame(arxiv_res)
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
return df

+ 1
- 0
requirements.txt 查看文件

arxiv==2.1.3
Aspose.Slides==24.2.0 Aspose.Slides==24.2.0
BCEmbedding==0.1.3 BCEmbedding==0.1.3
Bio==1.7.1 Bio==1.7.1

+ 1
- 0
requirements_arm.txt 查看文件

groq==0.9.0 groq==0.9.0
wikipedia==1.4.0 wikipedia==1.4.0
Bio==1.7.1 Bio==1.7.1
arxiv==2.1.3

+ 1
- 0
requirements_dev.txt 查看文件

groq==0.9.0 groq==0.9.0
wikipedia==1.4.0 wikipedia==1.4.0
Bio==1.7.1 Bio==1.7.1
arxiv==2.1.3

正在加载...
取消
保存