浏览代码

Optimized Recognizer.sort_X_firstly and Recognizer.sort_Y_firstly (#5182)

### What problem does this PR solve?

Optimized Recognizer.sort_X_firstly and Recognizer.sort_Y_firstly

### Type of change

- [x] Performance Improvement
tags/v0.17.0
Zhichang Yu 8 个月前
父节点
当前提交
c326f14fed
没有帐户链接到提交者的电子邮件
共有 3 个文件被更改,包括 16 次插入26 次删除
  1. 1
    1
      deepdoc/parser/pdf_parser.py
  2. 14
    24
      deepdoc/vision/recognizer.py
  3. 1
    1
      deepdoc/vision/table_structure_recognizer.py

+ 1
- 1
deepdoc/parser/pdf_parser.py 查看文件

self.page_images = [p.to_image(resolution=72 * zoomin).annotated for i, p in self.page_images = [p.to_image(resolution=72 * zoomin).annotated for i, p in
enumerate(self.pdf.pages[page_from:page_to])] enumerate(self.pdf.pages[page_from:page_to])]
try: try:
self.page_chars = [[{**c, 'top': c['top'], 'bottom': c['bottom']} for c in page.dedupe_chars().chars if self._has_color(c)] for page in self.pdf.pages[page_from:page_to]]
self.page_chars = [[c for c in page.dedupe_chars().chars if self._has_color(c)] for page in self.pdf.pages[page_from:page_to]]
except Exception as e: except Exception as e:
logging.warning(f"Failed to extract characters for pages {page_from}-{page_to}: {str(e)}") logging.warning(f"Failed to extract characters for pages {page_from}-{page_to}: {str(e)}")
self.page_chars = [[] for _ in range(page_to - page_from)] # If failed to extract, using empty list instead. self.page_chars = [[] for _ in range(page_to - page_from)] # If failed to extract, using empty list instead.

+ 14
- 24
deepdoc/vision/recognizer.py 查看文件

import math import math
import numpy as np import numpy as np
import cv2 import cv2
from copy import deepcopy
from functools import cmp_to_key


import onnxruntime as ort import onnxruntime as ort
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download


@staticmethod @staticmethod
def sort_Y_firstly(arr, threashold): def sort_Y_firstly(arr, threashold):
# sort using y1 first and then x1
arr = sorted(arr, key=lambda r: (r["top"], r["x0"]))
for i in range(len(arr) - 1):
for j in range(i, -1, -1):
# restore the order using th
if abs(arr[j + 1]["top"] - arr[j]["top"]) < threashold \
and arr[j + 1]["x0"] < arr[j]["x0"]:
tmp = deepcopy(arr[j])
arr[j] = deepcopy(arr[j + 1])
arr[j + 1] = deepcopy(tmp)
def cmp(c1, c2):
diff = c1["top"] - c2["top"]
if abs(diff) < threashold:
diff = c1["x0"] - c2["x0"]
return diff
arr = sorted(arr, key=cmp_to_key(cmp))
return arr return arr


@staticmethod @staticmethod
def sort_X_firstly(arr, threashold, copy=True):
# sort using y1 first and then x1
arr = sorted(arr, key=lambda r: (r["x0"], r["top"]))
for i in range(len(arr) - 1):
for j in range(i, -1, -1):
# restore the order using th
if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threashold \
and arr[j + 1]["top"] < arr[j]["top"]:
tmp = deepcopy(arr[j]) if copy else arr[j]
arr[j] = deepcopy(arr[j + 1]) if copy else arr[j + 1]
arr[j + 1] = deepcopy(tmp) if copy else tmp
def sort_X_firstly(arr, threashold):
def cmp(c1, c2):
diff = c1["x0"] - c2["x0"]
if abs(diff) < threashold:
diff = c1["top"] - c2["top"]
return diff
arr = sorted(arr, key=cmp_to_key(cmp))
return arr return arr


@staticmethod @staticmethod
arr[j + 1] = tmp arr[j + 1] = tmp
return arr return arr


return sorted(arr, key=lambda r: (r.get("C", r["x0"]), r["top"]))

@staticmethod @staticmethod
def sort_R_firstly(arr, thr=0): def sort_R_firstly(arr, thr=0):
# sort using y1 first and then x1 # sort using y1 first and then x1

+ 1
- 1
deepdoc/vision/table_structure_recognizer.py 查看文件

colwm = np.min(colwm) if colwm else 0 colwm = np.min(colwm) if colwm else 0
crosspage = len(set([b["page_number"] for b in boxes])) > 1 crosspage = len(set([b["page_number"] for b in boxes])) > 1
if crosspage: if crosspage:
boxes = Recognizer.sort_X_firstly(boxes, colwm / 2, False)
boxes = Recognizer.sort_X_firstly(boxes, colwm / 2)
else: else:
boxes = Recognizer.sort_C_firstly(boxes, colwm / 2) boxes = Recognizer.sort_C_firstly(boxes, colwm / 2)
boxes[0]["cn"] = 0 boxes[0]["cn"] = 0

正在加载...
取消
保存