| raise TextExtractionError(f"Failed to extract text from DOC: {str(e)}") from e | raise TextExtractionError(f"Failed to extract text from DOC: {str(e)}") from e | ||||
| def paser_docx_part(block, doc: Document, content_items, i): | |||||
| def parser_docx_part(block, doc: Document, content_items, i): | |||||
| if isinstance(block, CT_P): | if isinstance(block, CT_P): | ||||
| content_items.append((i, "paragraph", Paragraph(block, doc))) | content_items.append((i, "paragraph", Paragraph(block, doc))) | ||||
| elif isinstance(block, CT_Tbl): | elif isinstance(block, CT_Tbl): | ||||
| part = next(it, None) | part = next(it, None) | ||||
| i = 0 | i = 0 | ||||
| while part is not None: | while part is not None: | ||||
| paser_docx_part(part, doc, content_items, i) | |||||
| parser_docx_part(part, doc, content_items, i) | |||||
| i = i + 1 | i = i + 1 | ||||
| part = next(it, None) | part = next(it, None) | ||||