### What problem does this PR solve? close #5277 by make sure the file close ### Type of change - [x] Performance Improvement --------- Signed-off-by: yihong0618 <zouzou0208@gmail.com>tags/v0.17.1
| @@ -188,6 +188,7 @@ def thumbnail_img(filename, blob): | |||
| buffered = BytesIO() | |||
| else: | |||
| break | |||
| pdf.close() | |||
| return img | |||
| elif re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename): | |||
| @@ -950,7 +950,9 @@ class RAGFlowPdfParser: | |||
| try: | |||
| pdf = pdfplumber.open( | |||
| fnm) if not binary else pdfplumber.open(BytesIO(binary)) | |||
| return len(pdf.pages) | |||
| total_page = len(pdf.pages) | |||
| pdf.close() | |||
| return total_page | |||
| except Exception: | |||
| logging.exception("total_page_number") | |||
| @@ -996,8 +998,11 @@ class RAGFlowPdfParser: | |||
| dfs(outlines, 0) | |||
| except Exception as e: | |||
| logging.warning(f"Outlines exception: {e}") | |||
| finally: | |||
| self.pdf.close() | |||
| if not self.outlines: | |||
| logging.warning("Miss outlines") | |||
| logging.debug("Images converted.") | |||
| self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join( | |||
| @@ -42,6 +42,7 @@ def init_in_out(args): | |||
| for i, page in enumerate(images): | |||
| outputs.append(os.path.split(fnm)[-1] + f"_{i}.jpg") | |||
| pdf.close() | |||
| def images_and_outputs(fnm): | |||
| nonlocal outputs, images | |||
| @@ -18,6 +18,7 @@ | |||
| # beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code | |||
| import random | |||
| import sys | |||
| from api.utils.log_utils import initRootLogger, get_project_base_directory | |||
| from graphrag.general.index import WithCommunity, WithResolution, Dealer | |||
| from graphrag.light.graph_extractor import GraphExtractor as LightKGExt | |||