### What problem does this PR solve? - Fix typo - Remove unused import - Format code ### Type of change - [x] Other (please describe): typo and formattags/v0.12.0
| num = 0 | num = 0 | ||||
| try: | try: | ||||
| for u in cls.query(tenant_id = tenant_id, llm_name=mdlnm): | |||||
| num += cls.model.update(used_tokens = u.used_tokens + used_tokens)\ | |||||
| for u in cls.query(tenant_id=tenant_id, llm_name=mdlnm): | |||||
| num += cls.model.update(used_tokens=u.used_tokens + used_tokens)\ | |||||
| .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\ | .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\ | ||||
| .execute() | .execute() | ||||
| except Exception as e: | except Exception as e: | ||||
| return | return | ||||
| yield chunk | yield chunk | ||||
| def chat(self, system, history, gen_conf): | def chat(self, system, history, gen_conf): | ||||
| txt, used_tokens = self.mdl.chat(system, history, gen_conf) | txt, used_tokens = self.mdl.chat(system, history, gen_conf) | ||||
| if not TenantLLMService.increase_usage( | if not TenantLLMService.increase_usage( |
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # | # | ||||
| import re | |||||
| from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
| import json | import json | ||||
| from functools import reduce | from functools import reduce | ||||
| from api.db.services.user_service import TenantService | from api.db.services.user_service import TenantService | ||||
| from graphrag.community_reports_extractor import CommunityReportsExtractor | from graphrag.community_reports_extractor import CommunityReportsExtractor | ||||
| from graphrag.entity_resolution import EntityResolution | from graphrag.entity_resolution import EntityResolution | ||||
| from graphrag.graph_extractor import GraphExtractor | |||||
| from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES | |||||
| from graphrag.mind_map_extractor import MindMapExtractor | from graphrag.mind_map_extractor import MindMapExtractor | ||||
| from rag.nlp import rag_tokenizer | from rag.nlp import rag_tokenizer | ||||
| from rag.utils import num_tokens_from_string | from rag.utils import num_tokens_from_string | ||||
| return g | return g | ||||
| def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=["organization", "person", "location", "event", "time"]): | |||||
| def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=DEFAULT_ENTITY_TYPES): | |||||
| _, tenant = TenantService.get_by_id(tenant_id) | _, tenant = TenantService.get_by_id(tenant_id) | ||||
| llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id) | llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id) | ||||
| ext = GraphExtractor(llm_bdl) | ext = GraphExtractor(llm_bdl) |
| import re | import re | ||||
| from graphrag.index import build_knowlege_graph_chunks | |||||
| from graphrag.index import build_knowledge_graph_chunks | |||||
| from rag.app import naive | from rag.app import naive | ||||
| from rag.nlp import rag_tokenizer, tokenize_chunks | from rag.nlp import rag_tokenizer, tokenize_chunks | ||||
| parser_config["layout_recognize"] = False | parser_config["layout_recognize"] = False | ||||
| sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, | sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, | ||||
| parser_config=parser_config, callback=callback) | parser_config=parser_config, callback=callback) | ||||
| chunks = build_knowlege_graph_chunks(tenant_id, sections, callback, | |||||
| parser_config.get("entity_types", ["organization", "person", "location", "event", "time"]) | |||||
| ) | |||||
| chunks = build_knowledge_graph_chunks(tenant_id, sections, callback, | |||||
| parser_config.get("entity_types", ["organization", "person", "location", "event", "time"]) | |||||
| ) | |||||
| for c in chunks: c["docnm_kwd"] = filename | for c in chunks: c["docnm_kwd"] = filename | ||||
| doc = { | doc = { |
| from openai import OpenAI | from openai import OpenAI | ||||
| import openai | import openai | ||||
| from ollama import Client | from ollama import Client | ||||
| from volcengine.maas.v2 import MaasService | |||||
| from rag.nlp import is_english | from rag.nlp import is_english | ||||
| from rag.utils import num_tokens_from_string | from rag.utils import num_tokens_from_string | ||||
| from groq import Groq | from groq import Groq | ||||
| import requests | import requests | ||||
| import asyncio | import asyncio | ||||
| class Base(ABC): | class Base(ABC): | ||||
| def __init__(self, key, model_name, base_url): | def __init__(self, key, model_name, base_url): | ||||
| self.client = OpenAI(api_key=key, base_url=base_url) | self.client = OpenAI(api_key=key, base_url=base_url) |
| def num_tokens_from_string(string: str) -> int: | def num_tokens_from_string(string: str) -> int: | ||||
| """Returns the number of tokens in a text string.""" | """Returns the number of tokens in a text string.""" | ||||
| try: | try: | ||||
| num_tokens = len(encoder.encode(string)) | |||||
| return num_tokens | |||||
| except Exception as e: | |||||
| pass | |||||
| return 0 | |||||
| return len(encoder.encode(string)) | |||||
| except Exception: | |||||
| return 0 | |||||
| def truncate(string: str, max_len: int) -> str: | def truncate(string: str, max_len: int) -> str: |