소스 검색

Reuse loaded modules if possible (#5231)

### What problem does this PR solve?

Reuse loaded modules if possible

### Type of change

- [x] Refactoring
tags/v0.17.0
Zhichang Yu 8 달 전
부모
커밋
0151d42156
No account linked to committer's email address
2개의 변경된 파일18개의 추가작업 그리고 53개의 파일을 삭제
  1. 16
    5
      deepdoc/vision/ocr.py
  2. 2
    48
      deepdoc/vision/recognizer.py

+ 16
- 5
deepdoc/vision/ocr.py 파일 보기

@@ -31,6 +31,7 @@ import onnxruntime as ort

from .postprocess import build_post_process

loaded_models = {}

def transform(data, ops=None):
""" transform """
@@ -67,6 +68,12 @@ def create_operators(op_param_list, global_config=None):

def load_model(model_dir, nm):
model_file_path = os.path.join(model_dir, nm + ".onnx")
global loaded_models
loaded_model = loaded_models.get(model_file_path)
if loaded_model:
logging.info(f"load_model {model_file_path} reuses cached model")
return loaded_model

if not os.path.exists(model_file_path):
raise ValueError("not find model file path {}".format(
model_file_path))
@@ -102,15 +109,17 @@ def load_model(model_dir, nm):
provider_options=[cuda_provider_options]
)
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
logging.info(f"TextRecognizer {nm} uses GPU")
logging.info(f"load_model {model_file_path} uses GPU")
else:
sess = ort.InferenceSession(
model_file_path,
options=options,
providers=['CPUExecutionProvider'])
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
logging.info(f"TextRecognizer {nm} uses CPU")
return sess, sess.get_inputs()[0], run_options
logging.info(f"load_model {model_file_path} uses CPU")
loaded_model = (sess, run_options)
loaded_models[model_file_path] = loaded_model
return loaded_model


class TextRecognizer(object):
@@ -123,7 +132,8 @@ class TextRecognizer(object):
"use_space_char": True
}
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'rec')
self.predictor, self.run_options = load_model(model_dir, 'rec')
self.input_tensor = self.predictor.get_inputs()[0]

def resize_norm_img(self, img, max_wh_ratio):
imgC, imgH, imgW = self.rec_image_shape
@@ -408,7 +418,8 @@ class TextDetector(object):
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}

self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'det')
self.predictor, self.run_options = load_model(model_dir, 'det')
self.input_tensor = self.predictor.get_inputs()[0]

img_h, img_w = self.input_tensor.shape[2:]
if isinstance(img_h, str) or isinstance(img_w, str):

+ 2
- 48
deepdoc/vision/recognizer.py 파일 보기

@@ -21,14 +21,12 @@ import numpy as np
import cv2
from functools import cmp_to_key

import onnxruntime as ort
from huggingface_hub import snapshot_download

from api.utils.file_utils import get_project_base_directory
from .operators import * # noqa: F403
from .operators import preprocess
from . import operators
from .ocr import load_model

class Recognizer(object):
def __init__(self, label_list, task_name, model_dir=None):
@@ -47,51 +45,7 @@ class Recognizer(object):
model_dir = os.path.join(
get_project_base_directory(),
"rag/res/deepdoc")
model_file_path = os.path.join(model_dir, task_name + ".onnx")
if not os.path.exists(model_file_path):
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc",
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
local_dir_use_symlinks=False)
model_file_path = os.path.join(model_dir, task_name + ".onnx")
else:
model_file_path = os.path.join(model_dir, task_name + ".onnx")

if not os.path.exists(model_file_path):
raise ValueError("not find model file path {}".format(
model_file_path))

def cuda_is_available():
try:
import torch
if torch.cuda.is_available():
return True
except Exception:
return False
return False

# https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
# Shrink GPU memory after execution
self.run_options = ort.RunOptions()

if cuda_is_available():
options = ort.SessionOptions()
options.enable_cpu_mem_arena = False
cuda_provider_options = {
"device_id": 0, # Use specific GPU
"gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
"arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
}
self.ort_sess = ort.InferenceSession(
model_file_path, options=options,
providers=['CUDAExecutionProvider'],
provider_options=[cuda_provider_options]
)
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
logging.info(f"Recognizer {task_name} uses GPU")
else:
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
logging.info(f"Recognizer {task_name} uses CPU")
self.ort_sess, self.run_options = load_model(model_dir, task_name)
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
self.output_names = [node.name for node in self.ort_sess.get_outputs()]
self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]

Loading…
취소
저장