Browse Source

Fix: possible memory leaks close #5277 (#5500)

### What problem does this PR solve?

close #5277 by make sure the file close

### Type of change

- [x] Performance Improvement

---------

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
tags/v0.17.1
yihong 8 months ago
parent
commit
8a2542157f
No account linked to committer's email address
4 changed files with 9 additions and 1 deletions
  1. 1
    0
      api/utils/file_utils.py
  2. 6
    1
      deepdoc/parser/pdf_parser.py
  3. 1
    0
      deepdoc/vision/__init__.py
  4. 1
    0
      rag/svr/task_executor.py

+ 1
- 0
api/utils/file_utils.py View File

@@ -188,6 +188,7 @@ def thumbnail_img(filename, blob):
buffered = BytesIO()
else:
break
pdf.close()
return img

elif re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):

+ 6
- 1
deepdoc/parser/pdf_parser.py View File

@@ -950,7 +950,9 @@ class RAGFlowPdfParser:
try:
pdf = pdfplumber.open(
fnm) if not binary else pdfplumber.open(BytesIO(binary))
return len(pdf.pages)
total_page = len(pdf.pages)
pdf.close()
return total_page
except Exception:
logging.exception("total_page_number")

@@ -996,8 +998,11 @@ class RAGFlowPdfParser:
dfs(outlines, 0)
except Exception as e:
logging.warning(f"Outlines exception: {e}")
finally:
self.pdf.close()
if not self.outlines:
logging.warning("Miss outlines")

logging.debug("Images converted.")
self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(

+ 1
- 0
deepdoc/vision/__init__.py View File

@@ -42,6 +42,7 @@ def init_in_out(args):

for i, page in enumerate(images):
outputs.append(os.path.split(fnm)[-1] + f"_{i}.jpg")
pdf.close()

def images_and_outputs(fnm):
nonlocal outputs, images

+ 1
- 0
rag/svr/task_executor.py View File

@@ -18,6 +18,7 @@
# beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code
import random
import sys

from api.utils.log_utils import initRootLogger, get_project_base_directory
from graphrag.general.index import WithCommunity, WithResolution, Dealer
from graphrag.light.graph_extractor import GraphExtractor as LightKGExt

Loading…
Cancel
Save