Browse Source

Fix: Add title_tks for Pictures (#7365)

### What problem does this PR solve?
https://github.com/infiniflow/ragflow/issues/7362

append title_tks
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.19.0
Stephen Hu 6 months ago
parent
commit
1a5608d0f8
No account linked to committer's email address
1 changed files with 4 additions and 0 deletions
  1. 4
    0
      rag/app/picture.py

+ 4
- 0
rag/app/picture.py View File

@@ -15,6 +15,7 @@
#

import io
import re

import numpy as np
from PIL import Image
@@ -24,6 +25,8 @@ from api.db.services.llm_service import LLMBundle
from deepdoc.vision import OCR
from rag.nlp import tokenize
from rag.utils import clean_markdown_block
from rag.nlp import rag_tokenizer


ocr = OCR()

@@ -32,6 +35,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
img = Image.open(io.BytesIO(binary)).convert('RGB')
doc = {
"docnm_kwd": filename,
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)),
"image": img
}
bxs = ocr(np.array(img))

Loading…
Cancel
Save