Browse Source

Code refactor. (#4291)

### What problem does this PR solve?

### Type of change

- [x] Refactoring
tags/nightly
Kevin Hu 10 months ago
parent
commit
8fb18f37f6
No account linked to committer's email address

+ 10
- 0
agent/component/answer.py View File

import random import random
from abc import ABC from abc import ABC
from functools import partial from functools import partial
from typing import Tuple, Union


import pandas as pd import pandas as pd


def set_exception(self, e): def set_exception(self, e):
self.exception = e self.exception = e


def output(self, allow_partial=True) -> Tuple[str, Union[pd.DataFrame, partial]]:
if allow_partial:
return super.output()

for r, c in self._canvas.history[::-1]:
if r == "user":
return self._param.output_var_name, pd.DataFrame([{"content": c}])

self._param.output_var_name, pd.DataFrame([])



+ 4
- 0
api/apps/canvas_app.py View File



canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id}) canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
canvas.history.append(("assistant", final_ans["content"])) canvas.history.append(("assistant", final_ans["content"]))
if not canvas.path[-1]:
canvas.path.pop(-1)
if final_ans.get("reference"): if final_ans.get("reference"):
canvas.reference.append(final_ans["reference"]) canvas.reference.append(final_ans["reference"])
cvs.dsl = json.loads(str(canvas)) cvs.dsl = json.loads(str(canvas))
UserCanvasService.update_by_id(req["id"], cvs.to_dict()) UserCanvasService.update_by_id(req["id"], cvs.to_dict())
except Exception as e: except Exception as e:
cvs.dsl = json.loads(str(canvas)) cvs.dsl = json.loads(str(canvas))
if not canvas.path[-1]:
canvas.path.pop(-1)
UserCanvasService.update_by_id(req["id"], cvs.to_dict()) UserCanvasService.update_by_id(req["id"], cvs.to_dict())
traceback.print_exc() traceback.print_exc()
yield "data:" + json.dumps({"code": 500, "message": str(e), yield "data:" + json.dumps({"code": 500, "message": str(e),

+ 2
- 4
api/apps/dialog_app.py View File

} }
if not DialogService.save(**dia): if not DialogService.save(**dia):
return get_data_error_result(message="Fail to new a dialog!") return get_data_error_result(message="Fail to new a dialog!")
e, dia = DialogService.get_by_id(dia["id"])
if not e:
return get_data_error_result(message="Fail to new a dialog!")
return get_json_result(data=dia.to_json())
return get_json_result(data=dia)
else: else:
del req["dialog_id"] del req["dialog_id"]
if "kb_names" in req: if "kb_names" in req:
if not e: if not e:
return get_data_error_result(message="Fail to update a dialog!") return get_data_error_result(message="Fail to update a dialog!")
dia = dia.to_dict() dia = dia.to_dict()
dia.update(req)
dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"]) dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
return get_json_result(data=dia) return get_json_result(data=dia)
except Exception as e: except Exception as e:

+ 2
- 1
api/apps/kb_app.py View File

return get_data_error_result( return get_data_error_result(
message="Database error (Document removal)!") message="Database error (Document removal)!")
f2d = File2DocumentService.get_by_document_id(doc.id) f2d = File2DocumentService.get_by_document_id(doc.id)
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
if f2d:
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
File2DocumentService.delete_by_document_id(doc.id) File2DocumentService.delete_by_document_id(doc.id)
FileService.filter_delete( FileService.filter_delete(
[File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) [File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name])

+ 4
- 0
api/utils/api_utils.py View File

if len(e.args) > 1: if len(e.args) > 1:
return get_json_result( return get_json_result(
code=settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1]) code=settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1])
if repr(e).find("index_not_found_exception") >= 0:
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR,
message="No chunk found, please upload file and parse it.")

return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e)) return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e))





+ 4
- 4
graphrag/graph_prompt.py View File



-Steps- -Steps-
1. Identify all entities. For each identified entity, extract the following information: 1. Identify all entities. For each identified entity, extract the following information:
- entity_name: Name of the entity, capitalized
- entity_name: Name of the entity, capitalized, in language of 'Text'
- entity_type: One of the following types: [{entity_types}] - entity_type: One of the following types: [{entity_types}]
- entity_description: Comprehensive description of the entity's attributes and activities
- entity_description: Comprehensive description of the entity's attributes and activities in language of 'Text'
Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description> Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>


2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other. 2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
For each pair of related entities, extract the following information: For each pair of related entities, extract the following information:
- source_entity: name of the source entity, as identified in step 1 - source_entity: name of the source entity, as identified in step 1
- target_entity: name of the target entity, as identified in step 1 - target_entity: name of the target entity, as identified in step 1
- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
- relationship_description: explanation as to why you think the source entity and the target entity are related to each other in language of 'Text'
- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity - relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
Format each relationship as ("relationship"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_strength>) Format each relationship as ("relationship"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_strength>)


3. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
3. Return output as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.


4. When finished, output {completion_delimiter} 4. When finished, output {completion_delimiter}



+ 1
- 1
graphrag/utils.py View File

return bin return bin




def set_llm_cache(llmnm, txt, v: str, history, genconf):
def set_llm_cache(llmnm, txt, v, history, genconf):
hasher = xxhash.xxh64() hasher = xxhash.xxh64()
hasher.update(str(llmnm).encode("utf-8")) hasher.update(str(llmnm).encode("utf-8"))
hasher.update(str(txt).encode("utf-8")) hasher.update(str(txt).encode("utf-8"))

+ 3
- 5
rag/app/laws.py View File



if re.search(r"\.docx$", filename, re.IGNORECASE): if re.search(r"\.docx$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.") callback(0.1, "Start to parse.")
for txt in Docx()(filename, binary):
sections.append(txt)
callback(0.8, "Finish parsing.")
chunks = sections
return tokenize_chunks(chunks, doc, eng, pdf_parser)
chunks = Docx()(filename, binary)
callback(0.7, "Finish parsing.")
return tokenize_chunks(chunks, doc, eng, None)


elif re.search(r"\.pdf$", filename, re.IGNORECASE): elif re.search(r"\.pdf$", filename, re.IGNORECASE):
pdf_parser = Pdf() if kwargs.get( pdf_parser = Pdf() if kwargs.get(

+ 2
- 2
rag/app/manual.py View File

sections = [(t, lvl, [[0] * 5]) for t, lvl in sections] sections = [(t, lvl, [[0] * 5]) for t, lvl in sections]
# set pivot using the most frequent type of title, # set pivot using the most frequent type of title,
# then merge between 2 pivot # then merge between 2 pivot
if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.1:
if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.03:
max_lvl = max([lvl for _, lvl in pdf_parser.outlines]) max_lvl = max([lvl for _, lvl in pdf_parser.outlines])
most_level = max(0, max_lvl - 1) most_level = max(0, max_lvl - 1)
levels = [] levels = []
res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
return res return res


if re.search(r"\.docx$", filename, re.IGNORECASE):
elif re.search(r"\.docx$", filename, re.IGNORECASE):
docx_parser = Docx() docx_parser = Docx()
ti_list, tbls = docx_parser(filename, binary, ti_list, tbls = docx_parser(filename, binary,
from_page=0, to_page=10000, callback=callback) from_page=0, to_page=10000, callback=callback)

+ 1
- 1
rag/app/table.py View File

"datetime": "_dt", "datetime": "_dt",
"bool": "_kwd"} "bool": "_kwd"}
for df in dfs: for df in dfs:
for n in ["id", "index", "idx"]:
for n in ["id", "_id", "index", "idx"]:
if n in df.columns: if n in df.columns:
del df[n] del df[n]
clmns = df.columns.values clmns = df.columns.values

Loading…
Cancel
Save