|  |  | @@ -14,11 +14,14 @@ import os | 
		
	
		
			
			|  |  |  | import re | 
		
	
		
			
			|  |  |  | from collections import Counter | 
		
	
		
			
			|  |  |  | from copy import deepcopy | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | import cv2 | 
		
	
		
			
			|  |  |  | import numpy as np | 
		
	
		
			
			|  |  |  | from huggingface_hub import snapshot_download | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | from api.utils.file_utils import get_project_base_directory | 
		
	
		
			
			|  |  |  | from deepdoc.vision import Recognizer | 
		
	
		
			
			|  |  |  | from deepdoc.vision.operators import nms | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | class LayoutRecognizer(Recognizer): | 
		
	
	
		
			
			|  |  | @@ -149,3 +152,88 @@ class LayoutRecognizer(Recognizer): | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | ocr_res = [b for b in ocr_res if b["text"].strip() not in garbag_set] | 
		
	
		
			
			|  |  |  | return ocr_res, page_layout | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | class LayoutRecognizer4YOLOv10(LayoutRecognizer): | 
		
	
		
			
			|  |  |  | labels = [ | 
		
	
		
			
			|  |  |  | "title", | 
		
	
		
			
			|  |  |  | "Text", | 
		
	
		
			
			|  |  |  | "Reference", | 
		
	
		
			
			|  |  |  | "Figure", | 
		
	
		
			
			|  |  |  | "Figure caption", | 
		
	
		
			
			|  |  |  | "Table", | 
		
	
		
			
			|  |  |  | "Table caption", | 
		
	
		
			
			|  |  |  | "Table caption", | 
		
	
		
			
			|  |  |  | "Equation", | 
		
	
		
			
			|  |  |  | "Figure caption", | 
		
	
		
			
			|  |  |  | ] | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def __init__(self, domain): | 
		
	
		
			
			|  |  |  | domain = "layout" | 
		
	
		
			
			|  |  |  | super().__init__(domain) | 
		
	
		
			
			|  |  |  | self.auto = False | 
		
	
		
			
			|  |  |  | self.scaleFill = False | 
		
	
		
			
			|  |  |  | self.scaleup = True | 
		
	
		
			
			|  |  |  | self.stride = 32 | 
		
	
		
			
			|  |  |  | self.center = True | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def preprocess(self, image_list): | 
		
	
		
			
			|  |  |  | inputs = [] | 
		
	
		
			
			|  |  |  | new_shape = self.input_shape # height, width | 
		
	
		
			
			|  |  |  | for img in image_list: | 
		
	
		
			
			|  |  |  | shape = img.shape[:2]# current shape [height, width] | 
		
	
		
			
			|  |  |  | # Scale ratio (new / old) | 
		
	
		
			
			|  |  |  | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) | 
		
	
		
			
			|  |  |  | # Compute padding | 
		
	
		
			
			|  |  |  | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) | 
		
	
		
			
			|  |  |  | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding | 
		
	
		
			
			|  |  |  | dw /= 2  # divide padding into 2 sides | 
		
	
		
			
			|  |  |  | dh /= 2 | 
		
	
		
			
			|  |  |  | ww, hh = new_unpad | 
		
	
		
			
			|  |  |  | img = np.array(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).astype(np.float32) | 
		
	
		
			
			|  |  |  | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) | 
		
	
		
			
			|  |  |  | top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1)) | 
		
	
		
			
			|  |  |  | left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1)) | 
		
	
		
			
			|  |  |  | img = cv2.copyMakeBorder( | 
		
	
		
			
			|  |  |  | img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114) | 
		
	
		
			
			|  |  |  | )  # add border | 
		
	
		
			
			|  |  |  | img /= 255.0 | 
		
	
		
			
			|  |  |  | img = img.transpose(2, 0, 1) | 
		
	
		
			
			|  |  |  | img = img[np.newaxis, :, :, :].astype(np.float32) | 
		
	
		
			
			|  |  |  | inputs.append({self.input_names[0]: img, "scale_factor": [shape[1]/ww, shape[0]/hh, dw, dh]}) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | return inputs | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def postprocess(self, boxes, inputs, thr): | 
		
	
		
			
			|  |  |  | thr = 0.08 | 
		
	
		
			
			|  |  |  | boxes = np.squeeze(boxes) | 
		
	
		
			
			|  |  |  | scores = boxes[:, 4] | 
		
	
		
			
			|  |  |  | boxes = boxes[scores > thr, :] | 
		
	
		
			
			|  |  |  | scores = scores[scores > thr] | 
		
	
		
			
			|  |  |  | if len(boxes) == 0: | 
		
	
		
			
			|  |  |  | return [] | 
		
	
		
			
			|  |  |  | class_ids = boxes[:, -1].astype(int) | 
		
	
		
			
			|  |  |  | boxes = boxes[:, :4] | 
		
	
		
			
			|  |  |  | boxes[:, 0] -= inputs["scale_factor"][2] | 
		
	
		
			
			|  |  |  | boxes[:, 2] -= inputs["scale_factor"][2] | 
		
	
		
			
			|  |  |  | boxes[:, 1] -= inputs["scale_factor"][3] | 
		
	
		
			
			|  |  |  | boxes[:, 3] -= inputs["scale_factor"][3] | 
		
	
		
			
			|  |  |  | input_shape = np.array([inputs["scale_factor"][0], inputs["scale_factor"][1], inputs["scale_factor"][0], | 
		
	
		
			
			|  |  |  | inputs["scale_factor"][1]]) | 
		
	
		
			
			|  |  |  | boxes = np.multiply(boxes, input_shape, dtype=np.float32) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | unique_class_ids = np.unique(class_ids) | 
		
	
		
			
			|  |  |  | indices = [] | 
		
	
		
			
			|  |  |  | for class_id in unique_class_ids: | 
		
	
		
			
			|  |  |  | class_indices = np.where(class_ids == class_id)[0] | 
		
	
		
			
			|  |  |  | class_boxes = boxes[class_indices, :] | 
		
	
		
			
			|  |  |  | class_scores = scores[class_indices] | 
		
	
		
			
			|  |  |  | class_keep_boxes = nms(class_boxes, class_scores, 0.45) | 
		
	
		
			
			|  |  |  | indices.extend(class_indices[class_keep_boxes]) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | return [{ | 
		
	
		
			
			|  |  |  | "type": self.label_list[class_ids[i]].lower(), | 
		
	
		
			
			|  |  |  | "bbox": [float(t) for t in boxes[i].tolist()], | 
		
	
		
			
			|  |  |  | "score": float(scores[i]) | 
		
	
		
			
			|  |  |  | } for i in indices] | 
		
	
		
			
			|  |  |  | 
 |