Преглед изворни кода

Fix: empty query issue. (#7551)

### What problem does this PR solve?

#5214

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.19.0
Kevin Hu пре 5 месеци
родитељ
комит
a14865e6bb
No account linked to committer's email address
2 измењених фајлова са 6 додато и 3 уклоњено
  1. 3
    3
      deepdoc/parser/pdf_parser.py
  2. 3
    0
      rag/nlp/query.py

+ 3
- 3
deepdoc/parser/pdf_parser.py Прегледај датотеку

@@ -309,7 +309,7 @@ class RAGFlowPdfParser:
"bottom": b[-1][1] / ZM,
"chars": [],
"page_number": pagenum} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]],
self.mean_height[-1] / 3
self.mean_height[pagenum-1] / 3
)

# merge chars in the same rect
@@ -355,8 +355,8 @@ class RAGFlowPdfParser:
del boxes_to_reg[i]["box_image"]
logging.info(f"__ocr recognize {len(bxs)} boxes cost {timer() - start}s")
bxs = [b for b in bxs if b["text"]]
if self.mean_height[-1] == 0:
self.mean_height[-1] = np.median([b["bottom"] - b["top"]
if self.mean_height[pagenum-1] == 0:
self.mean_height[pagenum-1] = np.median([b["bottom"] - b["top"]
for b in bxs])
self.boxes.append(bxs)


+ 3
- 0
rag/nlp/query.py Прегледај датотеку

@@ -77,6 +77,7 @@ class FulltextQueryer:
" ",
rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(txt.lower())),
).strip()
otxt = txt
txt = FulltextQueryer.rmWWW(txt)

if not self.isChinese(txt):
@@ -196,6 +197,8 @@ class FulltextQueryer:

if qs:
query = " OR ".join([f"({t})" for t in qs if t])
if not query:
query = otxt
return MatchTextExpr(
self.query_fields, query, 100, {"minimum_should_match": min_match}
), keywords

Loading…
Откажи
Сачувај