### What problem does this PR solve? ### Type of change - [x] Refactoringtags/nightly
| @@ -16,6 +16,7 @@ | |||
| import random | |||
| from abc import ABC | |||
| from functools import partial | |||
| from typing import Tuple, Union | |||
| import pandas as pd | |||
| @@ -76,4 +77,13 @@ class Answer(ComponentBase, ABC): | |||
| def set_exception(self, e): | |||
| self.exception = e | |||
| def output(self, allow_partial=True) -> Tuple[str, Union[pd.DataFrame, partial]]: | |||
| if allow_partial: | |||
| return super.output() | |||
| for r, c in self._canvas.history[::-1]: | |||
| if r == "user": | |||
| return self._param.output_var_name, pd.DataFrame([{"content": c}]) | |||
| self._param.output_var_name, pd.DataFrame([]) | |||
| @@ -146,12 +146,16 @@ def run(): | |||
| canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id}) | |||
| canvas.history.append(("assistant", final_ans["content"])) | |||
| if not canvas.path[-1]: | |||
| canvas.path.pop(-1) | |||
| if final_ans.get("reference"): | |||
| canvas.reference.append(final_ans["reference"]) | |||
| cvs.dsl = json.loads(str(canvas)) | |||
| UserCanvasService.update_by_id(req["id"], cvs.to_dict()) | |||
| except Exception as e: | |||
| cvs.dsl = json.loads(str(canvas)) | |||
| if not canvas.path[-1]: | |||
| canvas.path.pop(-1) | |||
| UserCanvasService.update_by_id(req["id"], cvs.to_dict()) | |||
| traceback.print_exc() | |||
| yield "data:" + json.dumps({"code": 500, "message": str(e), | |||
| @@ -103,10 +103,7 @@ def set_dialog(): | |||
| } | |||
| if not DialogService.save(**dia): | |||
| return get_data_error_result(message="Fail to new a dialog!") | |||
| e, dia = DialogService.get_by_id(dia["id"]) | |||
| if not e: | |||
| return get_data_error_result(message="Fail to new a dialog!") | |||
| return get_json_result(data=dia.to_json()) | |||
| return get_json_result(data=dia) | |||
| else: | |||
| del req["dialog_id"] | |||
| if "kb_names" in req: | |||
| @@ -117,6 +114,7 @@ def set_dialog(): | |||
| if not e: | |||
| return get_data_error_result(message="Fail to update a dialog!") | |||
| dia = dia.to_dict() | |||
| dia.update(req) | |||
| dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"]) | |||
| return get_json_result(data=dia) | |||
| except Exception as e: | |||
| @@ -185,7 +185,8 @@ def rm(): | |||
| return get_data_error_result( | |||
| message="Database error (Document removal)!") | |||
| f2d = File2DocumentService.get_by_document_id(doc.id) | |||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) | |||
| if f2d: | |||
| FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) | |||
| File2DocumentService.delete_by_document_id(doc.id) | |||
| FileService.filter_delete( | |||
| [File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name]) | |||
| @@ -120,6 +120,10 @@ def server_error_response(e): | |||
| if len(e.args) > 1: | |||
| return get_json_result( | |||
| code=settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1]) | |||
| if repr(e).find("index_not_found_exception") >= 0: | |||
| return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, | |||
| message="No chunk found, please upload file and parse it.") | |||
| return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e)) | |||
| @@ -11,20 +11,20 @@ Given a text document that is potentially relevant to this activity and a list o | |||
| -Steps- | |||
| 1. Identify all entities. For each identified entity, extract the following information: | |||
| - entity_name: Name of the entity, capitalized | |||
| - entity_name: Name of the entity, capitalized, in language of 'Text' | |||
| - entity_type: One of the following types: [{entity_types}] | |||
| - entity_description: Comprehensive description of the entity's attributes and activities | |||
| - entity_description: Comprehensive description of the entity's attributes and activities in language of 'Text' | |||
| Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description> | |||
| 2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other. | |||
| For each pair of related entities, extract the following information: | |||
| - source_entity: name of the source entity, as identified in step 1 | |||
| - target_entity: name of the target entity, as identified in step 1 | |||
| - relationship_description: explanation as to why you think the source entity and the target entity are related to each other | |||
| - relationship_description: explanation as to why you think the source entity and the target entity are related to each other in language of 'Text' | |||
| - relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity | |||
| Format each relationship as ("relationship"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_strength>) | |||
| 3. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter. | |||
| 3. Return output as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter. | |||
| 4. When finished, output {completion_delimiter} | |||
| @@ -81,7 +81,7 @@ def get_llm_cache(llmnm, txt, history, genconf): | |||
| return bin | |||
| def set_llm_cache(llmnm, txt, v: str, history, genconf): | |||
| def set_llm_cache(llmnm, txt, v, history, genconf): | |||
| hasher = xxhash.xxh64() | |||
| hasher.update(str(llmnm).encode("utf-8")) | |||
| hasher.update(str(txt).encode("utf-8")) | |||
| @@ -153,11 +153,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, | |||
| if re.search(r"\.docx$", filename, re.IGNORECASE): | |||
| callback(0.1, "Start to parse.") | |||
| for txt in Docx()(filename, binary): | |||
| sections.append(txt) | |||
| callback(0.8, "Finish parsing.") | |||
| chunks = sections | |||
| return tokenize_chunks(chunks, doc, eng, pdf_parser) | |||
| chunks = Docx()(filename, binary) | |||
| callback(0.7, "Finish parsing.") | |||
| return tokenize_chunks(chunks, doc, eng, None) | |||
| elif re.search(r"\.pdf$", filename, re.IGNORECASE): | |||
| pdf_parser = Pdf() if kwargs.get( | |||
| @@ -193,7 +193,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, | |||
| sections = [(t, lvl, [[0] * 5]) for t, lvl in sections] | |||
| # set pivot using the most frequent type of title, | |||
| # then merge between 2 pivot | |||
| if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.1: | |||
| if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.03: | |||
| max_lvl = max([lvl for _, lvl in pdf_parser.outlines]) | |||
| most_level = max(0, max_lvl - 1) | |||
| levels = [] | |||
| @@ -256,7 +256,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, | |||
| res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) | |||
| return res | |||
| if re.search(r"\.docx$", filename, re.IGNORECASE): | |||
| elif re.search(r"\.docx$", filename, re.IGNORECASE): | |||
| docx_parser = Docx() | |||
| ti_list, tbls = docx_parser(filename, binary, | |||
| from_page=0, to_page=10000, callback=callback) | |||
| @@ -185,7 +185,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, | |||
| "datetime": "_dt", | |||
| "bool": "_kwd"} | |||
| for df in dfs: | |||
| for n in ["id", "index", "idx"]: | |||
| for n in ["id", "_id", "index", "idx"]: | |||
| if n in df.columns: | |||
| del df[n] | |||
| clmns = df.columns.values | |||