Przeglądaj źródła

let's load model from local (#163)

tags/v0.1.0
KevinHuSh 1 rok temu
rodzic
commit
a5384446e3
No account linked to committer's email address

+ 8
- 8
deepdoc/parser/pdf_parser.py Wyświetl plik

@@ -18,7 +18,7 @@ from api.utils.file_utils import get_project_base_directory
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
from rag.nlp import huqie
from copy import deepcopy
from huggingface_hub import hf_hub_download, snapshot_download
from huggingface_hub import snapshot_download

logging.getLogger("pdfminer").setLevel(logging.WARNING)

@@ -36,18 +36,18 @@ class HuParser:
if torch.cuda.is_available():
self.updown_cnt_mdl.set_param({"device": "cuda"})
try:
model_dir = snapshot_download(
repo_id="InfiniFlow/text_concat_xgb_v1.0",
local_dir=os.path.join(
model_dir = os.path.join(
get_project_base_directory(),
"rag/res/deepdoc"),
local_files_only=True)
"rag/res/deepdoc")
self.updown_cnt_mdl.load_model(os.path.join(
model_dir, "updown_concat_xgb.model"))
except Exception as e:
model_dir = snapshot_download(
repo_id="InfiniFlow/text_concat_xgb_v1.0")
self.updown_cnt_mdl.load_model(os.path.join(
model_dir, "updown_concat_xgb.model"))


self.updown_cnt_mdl.load_model(os.path.join(
model_dir, "updown_concat_xgb.model"))
self.page_from = 0
"""
If you have trouble downloading HuggingFace models, -_^ this might help!!

+ 4
- 8
deepdoc/vision/layout_recognizer.py Wyświetl plik

@@ -17,7 +17,6 @@ from copy import deepcopy
import numpy as np
from huggingface_hub import snapshot_download
from api.db import ParserType
from api.utils.file_utils import get_project_base_directory
from deepdoc.vision import Recognizer
@@ -39,17 +38,14 @@ class LayoutRecognizer(Recognizer):
def __init__(self, domain):
try:
model_dir = snapshot_download(
repo_id="InfiniFlow/deepdoc",
local_dir=os.path.join(
model_dir = os.path.join(
get_project_base_directory(),
"rag/res/deepdoc"),
local_files_only=True)
"rag/res/deepdoc")
super().__init__(self.labels, domain, model_dir)
except Exception as e:
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc")
super().__init__(self.labels, domain, model_dir)
# os.path.join(get_project_base_directory(), "rag/res/deepdoc/"))
super().__init__(self.labels, domain, model_dir)
self.garbage_layouts = ["footer", "header", "reference"]
def __call__(self, image_list, ocr_res, scale_factor=3,

+ 6
- 7
deepdoc/vision/ocr.py Wyświetl plik

@@ -480,17 +480,16 @@ class OCR(object):
"""
if not model_dir:
try:
model_dir = snapshot_download(
repo_id="InfiniFlow/deepdoc",
local_dir=os.path.join(
model_dir = os.path.join(
get_project_base_directory(),
"rag/res/deepdoc"),
local_files_only=True)
"rag/res/deepdoc")
self.text_detector = TextDetector(model_dir)
self.text_recognizer = TextRecognizer(model_dir)
except Exception as e:
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc")
self.text_detector = TextDetector(model_dir)
self.text_recognizer = TextRecognizer(model_dir)

self.text_detector = TextDetector(model_dir)
self.text_recognizer = TextRecognizer(model_dir)
self.drop_score = 0.5
self.crop_image_res_index = 0


+ 5
- 8
deepdoc/vision/recognizer.py Wyświetl plik

@@ -36,17 +36,14 @@ class Recognizer(object):
"""
if not model_dir:
try:
model_dir = snapshot_download(
repo_id="InfiniFlow/deepdoc",
local_dir=os.path.join(
model_dir = os.path.join(
get_project_base_directory(),
"rag/res/deepdoc"),
local_files_only=True)
except Exception as e:
"rag/res/deepdoc")
model_file_path = os.path.join(model_dir, task_name + ".onnx")
if not os.path.exists(model_file_path):
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc")
model_file_path = os.path.join(model_dir, task_name + ".onnx")
model_file_path = os.path.join(model_dir, task_name + ".onnx")
if not os.path.exists(model_file_path):
raise ValueError("not find model file path {}".format(
model_file_path))

+ 3
- 9
deepdoc/vision/table_structure_recognizer.py Wyświetl plik

@@ -35,17 +35,11 @@ class TableStructureRecognizer(Recognizer):
def __init__(self):
try:
model_dir = snapshot_download(
repo_id="InfiniFlow/deepdoc",
local_dir=os.path.join(
super().__init__(self.labels, "tsr", os.path.join(
get_project_base_directory(),
"rag/res/deepdoc"),
local_files_only=True)
"rag/res/deepdoc"))
except Exception as e:
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc")
# os.path.join(get_project_base_directory(), "rag/res/deepdoc/"))
super().__init__(self.labels, "tsr", model_dir)
super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc"))
def __call__(self, images, thr=0.2):
tbls = super().__call__(images, thr)

+ 4
- 7
rag/llm/embedding_model.py Wyświetl plik

@@ -28,16 +28,13 @@ from api.utils.file_utils import get_project_base_directory
from rag.utils import num_tokens_from_string

try:
model_dir = snapshot_download(
repo_id="BAAI/bge-large-zh-v1.5",
local_dir=os.path.join(
flag_model = FlagModel(os.path.join(
get_project_base_directory(),
"rag/res/bge-large-zh-v1.5"),
local_files_only=True)
query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:",
use_fp16=torch.cuda.is_available())
except Exception as e:
model_dir = snapshot_download(repo_id="BAAI/bge-large-zh-v1.5")

flag_model = FlagModel(model_dir,
flag_model = FlagModel("BAAI/bge-large-zh-v1.5",
query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:",
use_fp16=torch.cuda.is_available())


+ 1
- 1
rag/nlp/search.py Wyświetl plik

@@ -247,7 +247,7 @@ class Dealer:
for ck in chunks]
cites = {}
thr = 0.63
while len(cites.keys()) == 0 and pieces_ and chunks_tks:
while thr>0.3 and len(cites.keys()) == 0 and pieces_ and chunks_tks:
for i, a in enumerate(pieces_):
sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i],
chunk_v,

Ładowanie…
Anuluj
Zapisz