### What problem does this PR solve? Reuse loaded modules if possible ### Type of change - [x] Refactoringtags/v0.17.0
| @@ -31,6 +31,7 @@ import onnxruntime as ort | |||
| from .postprocess import build_post_process | |||
| loaded_models = {} | |||
| def transform(data, ops=None): | |||
| """ transform """ | |||
| @@ -67,6 +68,12 @@ def create_operators(op_param_list, global_config=None): | |||
| def load_model(model_dir, nm): | |||
| model_file_path = os.path.join(model_dir, nm + ".onnx") | |||
| global loaded_models | |||
| loaded_model = loaded_models.get(model_file_path) | |||
| if loaded_model: | |||
| logging.info(f"load_model {model_file_path} reuses cached model") | |||
| return loaded_model | |||
| if not os.path.exists(model_file_path): | |||
| raise ValueError("not find model file path {}".format( | |||
| model_file_path)) | |||
| @@ -102,15 +109,17 @@ def load_model(model_dir, nm): | |||
| provider_options=[cuda_provider_options] | |||
| ) | |||
| run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0") | |||
| logging.info(f"TextRecognizer {nm} uses GPU") | |||
| logging.info(f"load_model {model_file_path} uses GPU") | |||
| else: | |||
| sess = ort.InferenceSession( | |||
| model_file_path, | |||
| options=options, | |||
| providers=['CPUExecutionProvider']) | |||
| run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu") | |||
| logging.info(f"TextRecognizer {nm} uses CPU") | |||
| return sess, sess.get_inputs()[0], run_options | |||
| logging.info(f"load_model {model_file_path} uses CPU") | |||
| loaded_model = (sess, run_options) | |||
| loaded_models[model_file_path] = loaded_model | |||
| return loaded_model | |||
| class TextRecognizer(object): | |||
| @@ -123,7 +132,8 @@ class TextRecognizer(object): | |||
| "use_space_char": True | |||
| } | |||
| self.postprocess_op = build_post_process(postprocess_params) | |||
| self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'rec') | |||
| self.predictor, self.run_options = load_model(model_dir, 'rec') | |||
| self.input_tensor = self.predictor.get_inputs()[0] | |||
| def resize_norm_img(self, img, max_wh_ratio): | |||
| imgC, imgH, imgW = self.rec_image_shape | |||
| @@ -408,7 +418,8 @@ class TextDetector(object): | |||
| "unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"} | |||
| self.postprocess_op = build_post_process(postprocess_params) | |||
| self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'det') | |||
| self.predictor, self.run_options = load_model(model_dir, 'det') | |||
| self.input_tensor = self.predictor.get_inputs()[0] | |||
| img_h, img_w = self.input_tensor.shape[2:] | |||
| if isinstance(img_h, str) or isinstance(img_w, str): | |||
| @@ -21,14 +21,12 @@ import numpy as np | |||
| import cv2 | |||
| from functools import cmp_to_key | |||
| import onnxruntime as ort | |||
| from huggingface_hub import snapshot_download | |||
| from api.utils.file_utils import get_project_base_directory | |||
| from .operators import * # noqa: F403 | |||
| from .operators import preprocess | |||
| from . import operators | |||
| from .ocr import load_model | |||
| class Recognizer(object): | |||
| def __init__(self, label_list, task_name, model_dir=None): | |||
| @@ -47,51 +45,7 @@ class Recognizer(object): | |||
| model_dir = os.path.join( | |||
| get_project_base_directory(), | |||
| "rag/res/deepdoc") | |||
| model_file_path = os.path.join(model_dir, task_name + ".onnx") | |||
| if not os.path.exists(model_file_path): | |||
| model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", | |||
| local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), | |||
| local_dir_use_symlinks=False) | |||
| model_file_path = os.path.join(model_dir, task_name + ".onnx") | |||
| else: | |||
| model_file_path = os.path.join(model_dir, task_name + ".onnx") | |||
| if not os.path.exists(model_file_path): | |||
| raise ValueError("not find model file path {}".format( | |||
| model_file_path)) | |||
| def cuda_is_available(): | |||
| try: | |||
| import torch | |||
| if torch.cuda.is_available(): | |||
| return True | |||
| except Exception: | |||
| return False | |||
| return False | |||
| # https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580 | |||
| # Shrink GPU memory after execution | |||
| self.run_options = ort.RunOptions() | |||
| if cuda_is_available(): | |||
| options = ort.SessionOptions() | |||
| options.enable_cpu_mem_arena = False | |||
| cuda_provider_options = { | |||
| "device_id": 0, # Use specific GPU | |||
| "gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory | |||
| "arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy | |||
| } | |||
| self.ort_sess = ort.InferenceSession( | |||
| model_file_path, options=options, | |||
| providers=['CUDAExecutionProvider'], | |||
| provider_options=[cuda_provider_options] | |||
| ) | |||
| self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0") | |||
| logging.info(f"Recognizer {task_name} uses GPU") | |||
| else: | |||
| self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider']) | |||
| self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu") | |||
| logging.info(f"Recognizer {task_name} uses CPU") | |||
| self.ort_sess, self.run_options = load_model(model_dir, task_name) | |||
| self.input_names = [node.name for node in self.ort_sess.get_inputs()] | |||
| self.output_names = [node.name for node in self.ort_sess.get_outputs()] | |||
| self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4] | |||