### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ Issue link:#[[Link the issue here](https://github.com/infiniflow/ragflow/issues/196)] ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Breaking Change (fix or feature that could cause existing functionality not to work as expected) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Test cases - [ ] Python SDK impacted, Need to update PyPI - [ ] Other (please describe):tags/v0.1.0
| for sheetname in wb.sheetnames: | for sheetname in wb.sheetnames: | ||||
| ws = wb[sheetname] | ws = wb[sheetname] | ||||
| rows = list(ws.rows) | rows = list(ws.rows) | ||||
| if not rows:continue | |||||
| tb += f"<table><caption>{sheetname}</caption><tr>" | tb += f"<table><caption>{sheetname}</caption><tr>" | ||||
| for t in list(rows[0]): | for t in list(rows[0]): | ||||
| tb += f"<th>{t.value}</th>" | tb += f"<th>{t.value}</th>" | ||||
| for sheetname in wb.sheetnames: | for sheetname in wb.sheetnames: | ||||
| ws = wb[sheetname] | ws = wb[sheetname] | ||||
| rows = list(ws.rows) | rows = list(ws.rows) | ||||
| if not rows:continue | |||||
| ti = list(rows[0]) | ti = list(rows[0]) | ||||
| for r in list(rows[1:]): | for r in list(rows[1:]): | ||||
| l = [] | l = [] |
| sections = [(txt, sec_ids[i], poss) | sections = [(txt, sec_ids[i], poss) | ||||
| for i, (txt, _, poss) in enumerate(sections)] | for i, (txt, _, poss) in enumerate(sections)] | ||||
| for (img, rows), poss in tbls: | for (img, rows), poss in tbls: | ||||
| if not rows:continue | |||||
| sections.append((rows if isinstance(rows, str) else rows[0], -1, | sections.append((rows if isinstance(rows, str) else rows[0], -1, | ||||
| [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) | [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) | ||||
| sections = [(b["text"], self.get_position(b, zoomin)) | sections = [(b["text"], self.get_position(b, zoomin)) | ||||
| for i, b in enumerate(self.boxes)] | for i, b in enumerate(self.boxes)] | ||||
| for (img, rows), poss in tbls: | for (img, rows), poss in tbls: | ||||
| if not rows:continue | |||||
| sections.append((rows if isinstance(rows, str) else rows[0], | sections.append((rows if isinstance(rows, str) else rows[0], | ||||
| [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) | [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) | ||||
| return [(txt, "") for txt, _ in sorted(sections, key=lambda x: ( | return [(txt, "") for txt, _ in sorted(sections, key=lambda x: ( |
| for sheetname in wb.sheetnames: | for sheetname in wb.sheetnames: | ||||
| ws = wb[sheetname] | ws = wb[sheetname] | ||||
| rows = list(ws.rows) | rows = list(ws.rows) | ||||
| if not rows:continue | |||||
| headers = [cell.value for cell in rows[0]] | headers = [cell.value for cell in rows[0]] | ||||
| missed = set([i for i, h in enumerate(headers) if h is None]) | missed = set([i for i, h in enumerate(headers) if h is None]) | ||||
| headers = [ | headers = [ |