瀏覽代碼

style: fix typo and format code (#2618)

### What problem does this PR solve?

- Fix typo
- Remove unused import
- Format code

### Type of change

- [x] Other (please describe): typo and format
tags/v0.12.0
yqkcn 1 年之前
父節點
當前提交
34abcf7704
沒有連結到貢獻者的電子郵件帳戶。
共有 5 個檔案被更改,包括 12 行新增16 行删除
  1. 2
    3
      api/db/services/llm_service.py
  2. 2
    3
      graphrag/index.py
  3. 4
    4
      rag/app/knowledge_graph.py
  4. 1
    1
      rag/llm/chat_model.py
  5. 3
    5
      rag/utils/__init__.py

+ 2
- 3
api/db/services/llm_service.py 查看文件



num = 0 num = 0
try: try:
for u in cls.query(tenant_id = tenant_id, llm_name=mdlnm):
num += cls.model.update(used_tokens = u.used_tokens + used_tokens)\
for u in cls.query(tenant_id=tenant_id, llm_name=mdlnm):
num += cls.model.update(used_tokens=u.used_tokens + used_tokens)\
.where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\ .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
.execute() .execute()
except Exception as e: except Exception as e:
return return
yield chunk yield chunk


def chat(self, system, history, gen_conf): def chat(self, system, history, gen_conf):
txt, used_tokens = self.mdl.chat(system, history, gen_conf) txt, used_tokens = self.mdl.chat(system, history, gen_conf)
if not TenantLLMService.increase_usage( if not TenantLLMService.increase_usage(

+ 2
- 3
graphrag/index.py 查看文件

# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import re
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import json import json
from functools import reduce from functools import reduce
from api.db.services.user_service import TenantService from api.db.services.user_service import TenantService
from graphrag.community_reports_extractor import CommunityReportsExtractor from graphrag.community_reports_extractor import CommunityReportsExtractor
from graphrag.entity_resolution import EntityResolution from graphrag.entity_resolution import EntityResolution
from graphrag.graph_extractor import GraphExtractor
from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
from graphrag.mind_map_extractor import MindMapExtractor from graphrag.mind_map_extractor import MindMapExtractor
from rag.nlp import rag_tokenizer from rag.nlp import rag_tokenizer
from rag.utils import num_tokens_from_string from rag.utils import num_tokens_from_string
return g return g




def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=["organization", "person", "location", "event", "time"]):
def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=DEFAULT_ENTITY_TYPES):
_, tenant = TenantService.get_by_id(tenant_id) _, tenant = TenantService.get_by_id(tenant_id)
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id) llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
ext = GraphExtractor(llm_bdl) ext = GraphExtractor(llm_bdl)

+ 4
- 4
rag/app/knowledge_graph.py 查看文件

import re import re


from graphrag.index import build_knowlege_graph_chunks
from graphrag.index import build_knowledge_graph_chunks
from rag.app import naive from rag.app import naive
from rag.nlp import rag_tokenizer, tokenize_chunks from rag.nlp import rag_tokenizer, tokenize_chunks


parser_config["layout_recognize"] = False parser_config["layout_recognize"] = False
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
parser_config=parser_config, callback=callback) parser_config=parser_config, callback=callback)
chunks = build_knowlege_graph_chunks(tenant_id, sections, callback,
parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
)
chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
)
for c in chunks: c["docnm_kwd"] = filename for c in chunks: c["docnm_kwd"] = filename


doc = { doc = {

+ 1
- 1
rag/llm/chat_model.py 查看文件

from openai import OpenAI from openai import OpenAI
import openai import openai
from ollama import Client from ollama import Client
from volcengine.maas.v2 import MaasService
from rag.nlp import is_english from rag.nlp import is_english
from rag.utils import num_tokens_from_string from rag.utils import num_tokens_from_string
from groq import Groq from groq import Groq
import requests import requests
import asyncio import asyncio



class Base(ABC): class Base(ABC):
def __init__(self, key, model_name, base_url): def __init__(self, key, model_name, base_url):
self.client = OpenAI(api_key=key, base_url=base_url) self.client = OpenAI(api_key=key, base_url=base_url)

+ 3
- 5
rag/utils/__init__.py 查看文件

def num_tokens_from_string(string: str) -> int: def num_tokens_from_string(string: str) -> int:
"""Returns the number of tokens in a text string.""" """Returns the number of tokens in a text string."""
try: try:
num_tokens = len(encoder.encode(string))
return num_tokens
except Exception as e:
pass
return 0
return len(encoder.encode(string))
except Exception:
return 0




def truncate(string: str, max_len: int) -> str: def truncate(string: str, max_len: int) -> str:

Loading…
取消
儲存