### What problem does this PR solve? Fix typo in code ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>tags/v0.19.1
| @@ -84,14 +84,14 @@ def init_superuser(): | |||
| {"role": "user", "content": "Hello!"}], gen_conf={}) | |||
| if msg.find("ERROR: ") == 0: | |||
| logging.error( | |||
| "'{}' dosen't work. {}".format( | |||
| "'{}' doesn't work. {}".format( | |||
| tenant["llm_id"], | |||
| msg)) | |||
| embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"]) | |||
| v, c = embd_mdl.encode(["Hello!"]) | |||
| if c == 0: | |||
| logging.error( | |||
| "'{}' dosen't work!".format( | |||
| "'{}' doesn't work!".format( | |||
| tenant["embd_id"])) | |||
| @@ -73,11 +73,11 @@ class UserCanvasService(CommonService): | |||
| User.nickname, | |||
| User.avatar.alias('tenant_avatar'), | |||
| ] | |||
| angents = cls.model.select(*fields) \ | |||
| agents = cls.model.select(*fields) \ | |||
| .join(User, on=(cls.model.user_id == User.id)) \ | |||
| .where(cls.model.id == pid) | |||
| # obj = cls.model.query(id=pid)[0] | |||
| return True, angents.dicts()[0] | |||
| return True, agents.dicts()[0] | |||
| except Exception as e: | |||
| print(e) | |||
| return False, None | |||
| @@ -100,25 +100,25 @@ class UserCanvasService(CommonService): | |||
| cls.model.update_time | |||
| ] | |||
| if keywords: | |||
| angents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where( | |||
| agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where( | |||
| ((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission == | |||
| TenantPermission.TEAM.value)) | ( | |||
| cls.model.user_id == user_id)), | |||
| (fn.LOWER(cls.model.title).contains(keywords.lower())) | |||
| ) | |||
| else: | |||
| angents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where( | |||
| agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where( | |||
| ((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission == | |||
| TenantPermission.TEAM.value)) | ( | |||
| cls.model.user_id == user_id)) | |||
| ) | |||
| if desc: | |||
| angents = angents.order_by(cls.model.getter_by(orderby).desc()) | |||
| agents = agents.order_by(cls.model.getter_by(orderby).desc()) | |||
| else: | |||
| angents = angents.order_by(cls.model.getter_by(orderby).asc()) | |||
| count = angents.count() | |||
| angents = angents.paginate(page_number, items_per_page) | |||
| return list(angents.dicts()), count | |||
| agents = agents.order_by(cls.model.getter_by(orderby).asc()) | |||
| count = agents.count() | |||
| agents = agents.paginate(page_number, items_per_page) | |||
| return list(agents.dicts()), count | |||
| def completion(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs): | |||
| @@ -18,9 +18,9 @@ | |||
| # from beartype.claw import beartype_all # <-- you didn't sign up for this | |||
| # beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code | |||
| from api.utils.log_utils import initRootLogger | |||
| from api.utils.log_utils import init_root_logger | |||
| from plugin import GlobalPluginManager | |||
| initRootLogger("ragflow_server") | |||
| init_root_logger("ragflow_server") | |||
| import logging | |||
| import os | |||
| @@ -158,7 +158,7 @@ def filename_type(filename): | |||
| if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename): | |||
| return FileType.DOC.value | |||
| if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus|mp3)$", filename): | |||
| if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename): | |||
| return FileType.AURAL.value | |||
| if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename): | |||
| @@ -30,7 +30,7 @@ def get_project_base_directory(): | |||
| ) | |||
| return PROJECT_BASE | |||
| def initRootLogger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"): | |||
| def init_root_logger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"): | |||
| global initialized_root_logger | |||
| if initialized_root_logger: | |||
| return | |||
| @@ -35,6 +35,6 @@ def crypt(line): | |||
| if __name__ == "__main__": | |||
| pswd = crypt(sys.argv[1]) | |||
| print(pswd) | |||
| print(decrypt(pswd)) | |||
| passwd = crypt(sys.argv[1]) | |||
| print(passwd) | |||
| print(decrypt(passwd)) | |||
| @@ -312,7 +312,7 @@ class PermissionEnum(StrEnum): | |||
| team = auto() | |||
| class ChunkMethodnEnum(StrEnum): | |||
| class ChunkMethodEnum(StrEnum): | |||
| naive = auto() | |||
| book = auto() | |||
| email = auto() | |||
| @@ -382,7 +382,7 @@ class CreateDatasetReq(Base): | |||
| description: str | None = Field(default=None, max_length=65535) | |||
| embedding_model: Annotated[str, StringConstraints(strip_whitespace=True, max_length=255), Field(default="", serialization_alias="embd_id")] | |||
| permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16) | |||
| chunk_method: ChunkMethodnEnum = Field(default=ChunkMethodnEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id") | |||
| chunk_method: ChunkMethodEnum = Field(default=ChunkMethodEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id") | |||
| parser_config: ParserConfig | None = Field(default=None) | |||
| @field_validator("avatar") | |||
| @@ -69,7 +69,7 @@ class RAGFlowDocxParser: | |||
| max_type = max(max_type.items(), key=lambda x: x[1])[0] | |||
| colnm = len(df.iloc[0, :]) | |||
| hdrows = [0] # header is not nessesarily appear in the first line | |||
| hdrows = [0] # header is not necessarily appear in the first line | |||
| if max_type == "Nu": | |||
| for r in range(1, len(df)): | |||
| tys = Counter([blockType(str(df.iloc[r, j])) | |||
| @@ -21,7 +21,7 @@ from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk | |||
| from rag.prompts import vision_llm_figure_describe_prompt | |||
| def vision_figure_parser_figure_data_wraper(figures_data_without_positions): | |||
| def vision_figure_parser_figure_data_wrapper(figures_data_without_positions): | |||
| return [ | |||
| ( | |||
| (figure_data[1], [figure_data[0]]), | |||
| @@ -180,13 +180,13 @@ class RAGFlowPdfParser: | |||
| return fea | |||
| @staticmethod | |||
| def sort_X_by_page(arr, threashold): | |||
| def sort_X_by_page(arr, threshold): | |||
| # sort using y1 first and then x1 | |||
| arr = sorted(arr, key=lambda r: (r["page_number"], r["x0"], r["top"])) | |||
| for i in range(len(arr) - 1): | |||
| for j in range(i, -1, -1): | |||
| # restore the order using th | |||
| if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threashold \ | |||
| if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threshold \ | |||
| and arr[j + 1]["top"] < arr[j]["top"] \ | |||
| and arr[j + 1]["page_number"] == arr[j]["page_number"]: | |||
| tmp = arr[j] | |||
| @@ -264,13 +264,13 @@ class RAGFlowPdfParser: | |||
| for b in self.boxes: | |||
| if b.get("layout_type", "") != "table": | |||
| continue | |||
| ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3) | |||
| ii = Recognizer.find_overlapped_with_threshold(b, rows, thr=0.3) | |||
| if ii is not None: | |||
| b["R"] = ii | |||
| b["R_top"] = rows[ii]["top"] | |||
| b["R_bott"] = rows[ii]["bottom"] | |||
| ii = Recognizer.find_overlapped_with_threashold( | |||
| ii = Recognizer.find_overlapped_with_threshold( | |||
| b, headers, thr=0.3) | |||
| if ii is not None: | |||
| b["H_top"] = headers[ii]["top"] | |||
| @@ -285,7 +285,7 @@ class RAGFlowPdfParser: | |||
| b["C_left"] = clmns[ii]["x0"] | |||
| b["C_right"] = clmns[ii]["x1"] | |||
| ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3) | |||
| ii = Recognizer.find_overlapped_with_threshold(b, spans, thr=0.3) | |||
| if ii is not None: | |||
| b["H_top"] = spans[ii]["top"] | |||
| b["H_bott"] = spans[ii]["bottom"] | |||
| @@ -106,7 +106,7 @@ class LayoutRecognizer(Recognizer): | |||
| bxs.pop(i) | |||
| continue | |||
| ii = self.find_overlapped_with_threashold(bxs[i], lts_, | |||
| ii = self.find_overlapped_with_threshold(bxs[i], lts_, | |||
| thr=0.4) | |||
| if ii is None: # belong to nothing | |||
| bxs[i]["layout_type"] = "" | |||
| @@ -52,20 +52,20 @@ class Recognizer: | |||
| self.label_list = label_list | |||
| @staticmethod | |||
| def sort_Y_firstly(arr, threashold): | |||
| def sort_Y_firstly(arr, threshold): | |||
| def cmp(c1, c2): | |||
| diff = c1["top"] - c2["top"] | |||
| if abs(diff) < threashold: | |||
| if abs(diff) < threshold: | |||
| diff = c1["x0"] - c2["x0"] | |||
| return diff | |||
| arr = sorted(arr, key=cmp_to_key(cmp)) | |||
| return arr | |||
| @staticmethod | |||
| def sort_X_firstly(arr, threashold): | |||
| def sort_X_firstly(arr, threshold): | |||
| def cmp(c1, c2): | |||
| diff = c1["x0"] - c2["x0"] | |||
| if abs(diff) < threashold: | |||
| if abs(diff) < threshold: | |||
| diff = c1["top"] - c2["top"] | |||
| return diff | |||
| arr = sorted(arr, key=cmp_to_key(cmp)) | |||
| @@ -239,15 +239,15 @@ class Recognizer: | |||
| e -= 1 | |||
| break | |||
| max_overlaped_i, max_overlaped = None, 0 | |||
| max_overlapped_i, max_overlapped = None, 0 | |||
| for i in range(s, e): | |||
| ov = Recognizer.overlapped_area(bxs[i], box) | |||
| if ov <= max_overlaped: | |||
| if ov <= max_overlapped: | |||
| continue | |||
| max_overlaped_i = i | |||
| max_overlaped = ov | |||
| max_overlapped_i = i | |||
| max_overlapped = ov | |||
| return max_overlaped_i | |||
| return max_overlapped_i | |||
| @staticmethod | |||
| def find_horizontally_tightest_fit(box, boxes): | |||
| @@ -264,7 +264,7 @@ class Recognizer: | |||
| return min_i | |||
| @staticmethod | |||
| def find_overlapped_with_threashold(box, boxes, thr=0.3): | |||
| def find_overlapped_with_threshold(box, boxes, thr=0.3): | |||
| if not boxes: | |||
| return | |||
| max_overlapped_i, max_overlapped, _max_overlapped = None, thr, 0 | |||
| @@ -84,13 +84,13 @@ def get_table_html(img, tb_cpns, ocr): | |||
| clmns = LayoutRecognizer.layouts_cleanup(boxes, clmns, 5, 0.5) | |||
| for b in boxes: | |||
| ii = LayoutRecognizer.find_overlapped_with_threashold(b, rows, thr=0.3) | |||
| ii = LayoutRecognizer.find_overlapped_with_threshold(b, rows, thr=0.3) | |||
| if ii is not None: | |||
| b["R"] = ii | |||
| b["R_top"] = rows[ii]["top"] | |||
| b["R_bott"] = rows[ii]["bottom"] | |||
| ii = LayoutRecognizer.find_overlapped_with_threashold(b, headers, thr=0.3) | |||
| ii = LayoutRecognizer.find_overlapped_with_threshold(b, headers, thr=0.3) | |||
| if ii is not None: | |||
| b["H_top"] = headers[ii]["top"] | |||
| b["H_bott"] = headers[ii]["bottom"] | |||
| @@ -104,7 +104,7 @@ def get_table_html(img, tb_cpns, ocr): | |||
| b["C_left"] = clmns[ii]["x0"] | |||
| b["C_right"] = clmns[ii]["x1"] | |||
| ii = LayoutRecognizer.find_overlapped_with_threashold(b, spans, thr=0.3) | |||
| ii = LayoutRecognizer.find_overlapped_with_threshold(b, spans, thr=0.3) | |||
| if ii is not None: | |||
| b["H_top"] = spans[ii]["top"] | |||
| b["H_bott"] = spans[ii]["bottom"] | |||
| @@ -29,7 +29,7 @@ from tika import parser | |||
| from api.db import LLMType | |||
| from api.db.services.llm_service import LLMBundle | |||
| from deepdoc.parser import DocxParser, ExcelParser, HtmlParser, JsonParser, MarkdownParser, PdfParser, TxtParser | |||
| from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wraper | |||
| from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wrapper | |||
| from deepdoc.parser.pdf_parser import PlainParser, VisionParser | |||
| from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table | |||
| @@ -379,7 +379,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, | |||
| sections, tables = Docx()(filename, binary) | |||
| if vision_model: | |||
| figures_data = vision_figure_parser_figure_data_wraper(sections) | |||
| figures_data = vision_figure_parser_figure_data_wrapper(sections) | |||
| try: | |||
| docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs) | |||
| boosted_figures = docx_vision_parser(callback=callback) | |||
| @@ -21,7 +21,7 @@ import sys | |||
| import threading | |||
| import time | |||
| from api.utils.log_utils import initRootLogger, get_project_base_directory | |||
| from api.utils.log_utils import init_root_logger, get_project_base_directory | |||
| from graphrag.general.index import run_graphrag | |||
| from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache | |||
| from rag.prompts import keyword_extraction, question_proposal, content_tagging | |||
| @@ -773,5 +773,5 @@ async def main(): | |||
| if __name__ == "__main__": | |||
| faulthandler.enable() | |||
| initRootLogger(CONSUMER_NAME) | |||
| init_root_logger(CONSUMER_NAME) | |||
| trio.run(main) | |||