瀏覽代碼

Fix typo in code (#8327)

### What problem does this PR solve?

Fix typo in code

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
tags/v0.19.1
Jin Hai 4 月之前
父節點
當前提交
4a2ff633e0
沒有連結到貢獻者的電子郵件帳戶。

+ 2
- 2
api/db/init_data.py 查看文件

@@ -84,14 +84,14 @@ def init_superuser():
{"role": "user", "content": "Hello!"}], gen_conf={})
if msg.find("ERROR: ") == 0:
logging.error(
"'{}' dosen't work. {}".format(
"'{}' doesn't work. {}".format(
tenant["llm_id"],
msg))
embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
v, c = embd_mdl.encode(["Hello!"])
if c == 0:
logging.error(
"'{}' dosen't work!".format(
"'{}' doesn't work!".format(
tenant["embd_id"]))



+ 9
- 9
api/db/services/canvas_service.py 查看文件

@@ -73,11 +73,11 @@ class UserCanvasService(CommonService):
User.nickname,
User.avatar.alias('tenant_avatar'),
]
angents = cls.model.select(*fields) \
agents = cls.model.select(*fields) \
.join(User, on=(cls.model.user_id == User.id)) \
.where(cls.model.id == pid)
# obj = cls.model.query(id=pid)[0]
return True, angents.dicts()[0]
return True, agents.dicts()[0]
except Exception as e:
print(e)
return False, None
@@ -100,25 +100,25 @@ class UserCanvasService(CommonService):
cls.model.update_time
]
if keywords:
angents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
TenantPermission.TEAM.value)) | (
cls.model.user_id == user_id)),
(fn.LOWER(cls.model.title).contains(keywords.lower()))
)
else:
angents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
TenantPermission.TEAM.value)) | (
cls.model.user_id == user_id))
)
if desc:
angents = angents.order_by(cls.model.getter_by(orderby).desc())
agents = agents.order_by(cls.model.getter_by(orderby).desc())
else:
angents = angents.order_by(cls.model.getter_by(orderby).asc())
count = angents.count()
angents = angents.paginate(page_number, items_per_page)
return list(angents.dicts()), count
agents = agents.order_by(cls.model.getter_by(orderby).asc())
count = agents.count()
agents = agents.paginate(page_number, items_per_page)
return list(agents.dicts()), count

def completion(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):

+ 2
- 2
api/ragflow_server.py 查看文件

@@ -18,9 +18,9 @@
# from beartype.claw import beartype_all # <-- you didn't sign up for this
# beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code

from api.utils.log_utils import initRootLogger
from api.utils.log_utils import init_root_logger
from plugin import GlobalPluginManager
initRootLogger("ragflow_server")
init_root_logger("ragflow_server")

import logging
import os

+ 1
- 1
api/utils/file_utils.py 查看文件

@@ -158,7 +158,7 @@ def filename_type(filename):
if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
return FileType.DOC.value

if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus|mp3)$", filename):
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
return FileType.AURAL.value

if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):

+ 1
- 1
api/utils/log_utils.py 查看文件

@@ -30,7 +30,7 @@ def get_project_base_directory():
)
return PROJECT_BASE

def initRootLogger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
def init_root_logger(logfile_basename: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
global initialized_root_logger
if initialized_root_logger:
return

+ 3
- 3
api/utils/t_crypt.py 查看文件

@@ -35,6 +35,6 @@ def crypt(line):


if __name__ == "__main__":
pswd = crypt(sys.argv[1])
print(pswd)
print(decrypt(pswd))
passwd = crypt(sys.argv[1])
print(passwd)
print(decrypt(passwd))

+ 2
- 2
api/utils/validation_utils.py 查看文件

@@ -312,7 +312,7 @@ class PermissionEnum(StrEnum):
team = auto()


class ChunkMethodnEnum(StrEnum):
class ChunkMethodEnum(StrEnum):
naive = auto()
book = auto()
email = auto()
@@ -382,7 +382,7 @@ class CreateDatasetReq(Base):
description: str | None = Field(default=None, max_length=65535)
embedding_model: Annotated[str, StringConstraints(strip_whitespace=True, max_length=255), Field(default="", serialization_alias="embd_id")]
permission: PermissionEnum = Field(default=PermissionEnum.me, min_length=1, max_length=16)
chunk_method: ChunkMethodnEnum = Field(default=ChunkMethodnEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id")
chunk_method: ChunkMethodEnum = Field(default=ChunkMethodEnum.naive, min_length=1, max_length=32, serialization_alias="parser_id")
parser_config: ParserConfig | None = Field(default=None)

@field_validator("avatar")

+ 1
- 1
deepdoc/parser/docx_parser.py 查看文件

@@ -69,7 +69,7 @@ class RAGFlowDocxParser:
max_type = max(max_type.items(), key=lambda x: x[1])[0]

colnm = len(df.iloc[0, :])
hdrows = [0] # header is not nessesarily appear in the first line
hdrows = [0] # header is not necessarily appear in the first line
if max_type == "Nu":
for r in range(1, len(df)):
tys = Counter([blockType(str(df.iloc[r, j]))

+ 1
- 1
deepdoc/parser/figure_parser.py 查看文件

@@ -21,7 +21,7 @@ from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
from rag.prompts import vision_llm_figure_describe_prompt


def vision_figure_parser_figure_data_wraper(figures_data_without_positions):
def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
return [
(
(figure_data[1], [figure_data[0]]),

+ 5
- 5
deepdoc/parser/pdf_parser.py 查看文件

@@ -180,13 +180,13 @@ class RAGFlowPdfParser:
return fea

@staticmethod
def sort_X_by_page(arr, threashold):
def sort_X_by_page(arr, threshold):
# sort using y1 first and then x1
arr = sorted(arr, key=lambda r: (r["page_number"], r["x0"], r["top"]))
for i in range(len(arr) - 1):
for j in range(i, -1, -1):
# restore the order using th
if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threashold \
if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threshold \
and arr[j + 1]["top"] < arr[j]["top"] \
and arr[j + 1]["page_number"] == arr[j]["page_number"]:
tmp = arr[j]
@@ -264,13 +264,13 @@ class RAGFlowPdfParser:
for b in self.boxes:
if b.get("layout_type", "") != "table":
continue
ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3)
ii = Recognizer.find_overlapped_with_threshold(b, rows, thr=0.3)
if ii is not None:
b["R"] = ii
b["R_top"] = rows[ii]["top"]
b["R_bott"] = rows[ii]["bottom"]

ii = Recognizer.find_overlapped_with_threashold(
ii = Recognizer.find_overlapped_with_threshold(
b, headers, thr=0.3)
if ii is not None:
b["H_top"] = headers[ii]["top"]
@@ -285,7 +285,7 @@ class RAGFlowPdfParser:
b["C_left"] = clmns[ii]["x0"]
b["C_right"] = clmns[ii]["x1"]

ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3)
ii = Recognizer.find_overlapped_with_threshold(b, spans, thr=0.3)
if ii is not None:
b["H_top"] = spans[ii]["top"]
b["H_bott"] = spans[ii]["bottom"]

+ 1
- 1
deepdoc/vision/layout_recognizer.py 查看文件

@@ -106,7 +106,7 @@ class LayoutRecognizer(Recognizer):
bxs.pop(i)
continue

ii = self.find_overlapped_with_threashold(bxs[i], lts_,
ii = self.find_overlapped_with_threshold(bxs[i], lts_,
thr=0.4)
if ii is None: # belong to nothing
bxs[i]["layout_type"] = ""

+ 10
- 10
deepdoc/vision/recognizer.py 查看文件

@@ -52,20 +52,20 @@ class Recognizer:
self.label_list = label_list

@staticmethod
def sort_Y_firstly(arr, threashold):
def sort_Y_firstly(arr, threshold):
def cmp(c1, c2):
diff = c1["top"] - c2["top"]
if abs(diff) < threashold:
if abs(diff) < threshold:
diff = c1["x0"] - c2["x0"]
return diff
arr = sorted(arr, key=cmp_to_key(cmp))
return arr

@staticmethod
def sort_X_firstly(arr, threashold):
def sort_X_firstly(arr, threshold):
def cmp(c1, c2):
diff = c1["x0"] - c2["x0"]
if abs(diff) < threashold:
if abs(diff) < threshold:
diff = c1["top"] - c2["top"]
return diff
arr = sorted(arr, key=cmp_to_key(cmp))
@@ -239,15 +239,15 @@ class Recognizer:
e -= 1
break

max_overlaped_i, max_overlaped = None, 0
max_overlapped_i, max_overlapped = None, 0
for i in range(s, e):
ov = Recognizer.overlapped_area(bxs[i], box)
if ov <= max_overlaped:
if ov <= max_overlapped:
continue
max_overlaped_i = i
max_overlaped = ov
max_overlapped_i = i
max_overlapped = ov

return max_overlaped_i
return max_overlapped_i

@staticmethod
def find_horizontally_tightest_fit(box, boxes):
@@ -264,7 +264,7 @@ class Recognizer:
return min_i

@staticmethod
def find_overlapped_with_threashold(box, boxes, thr=0.3):
def find_overlapped_with_threshold(box, boxes, thr=0.3):
if not boxes:
return
max_overlapped_i, max_overlapped, _max_overlapped = None, thr, 0

+ 3
- 3
deepdoc/vision/t_recognizer.py 查看文件

@@ -84,13 +84,13 @@ def get_table_html(img, tb_cpns, ocr):
clmns = LayoutRecognizer.layouts_cleanup(boxes, clmns, 5, 0.5)

for b in boxes:
ii = LayoutRecognizer.find_overlapped_with_threashold(b, rows, thr=0.3)
ii = LayoutRecognizer.find_overlapped_with_threshold(b, rows, thr=0.3)
if ii is not None:
b["R"] = ii
b["R_top"] = rows[ii]["top"]
b["R_bott"] = rows[ii]["bottom"]

ii = LayoutRecognizer.find_overlapped_with_threashold(b, headers, thr=0.3)
ii = LayoutRecognizer.find_overlapped_with_threshold(b, headers, thr=0.3)
if ii is not None:
b["H_top"] = headers[ii]["top"]
b["H_bott"] = headers[ii]["bottom"]
@@ -104,7 +104,7 @@ def get_table_html(img, tb_cpns, ocr):
b["C_left"] = clmns[ii]["x0"]
b["C_right"] = clmns[ii]["x1"]

ii = LayoutRecognizer.find_overlapped_with_threashold(b, spans, thr=0.3)
ii = LayoutRecognizer.find_overlapped_with_threshold(b, spans, thr=0.3)
if ii is not None:
b["H_top"] = spans[ii]["top"]
b["H_bott"] = spans[ii]["bottom"]

+ 2
- 2
rag/app/naive.py 查看文件

@@ -29,7 +29,7 @@ from tika import parser
from api.db import LLMType
from api.db.services.llm_service import LLMBundle
from deepdoc.parser import DocxParser, ExcelParser, HtmlParser, JsonParser, MarkdownParser, PdfParser, TxtParser
from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wraper
from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wrapper
from deepdoc.parser.pdf_parser import PlainParser, VisionParser
from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table

@@ -379,7 +379,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
sections, tables = Docx()(filename, binary)

if vision_model:
figures_data = vision_figure_parser_figure_data_wraper(sections)
figures_data = vision_figure_parser_figure_data_wrapper(sections)
try:
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
boosted_figures = docx_vision_parser(callback=callback)

+ 2
- 2
rag/svr/task_executor.py 查看文件

@@ -21,7 +21,7 @@ import sys
import threading
import time

from api.utils.log_utils import initRootLogger, get_project_base_directory
from api.utils.log_utils import init_root_logger, get_project_base_directory
from graphrag.general.index import run_graphrag
from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
from rag.prompts import keyword_extraction, question_proposal, content_tagging
@@ -773,5 +773,5 @@ async def main():

if __name__ == "__main__":
faulthandler.enable()
initRootLogger(CONSUMER_NAME)
init_root_logger(CONSUMER_NAME)
trio.run(main)

Loading…
取消
儲存