|  |  | @@ -31,29 +31,48 @@ class RAGFlowPptParser: | 
		
	
		
			
			|  |  |  | return paragraph.text | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def __extract(self, shape): | 
		
	
		
			
			|  |  |  | if shape.shape_type == 19: | 
		
	
		
			
			|  |  |  | tb = shape.table | 
		
	
		
			
			|  |  |  | rows = [] | 
		
	
		
			
			|  |  |  | for i in range(1, len(tb.rows)): | 
		
	
		
			
			|  |  |  | rows.append("; ".join([tb.cell( | 
		
	
		
			
			|  |  |  | 0, j).text + ": " + tb.cell(i, j).text for j in range(len(tb.columns)) if tb.cell(i, j)])) | 
		
	
		
			
			|  |  |  | return "\n".join(rows) | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | # First try to get text content | 
		
	
		
			
			|  |  |  | if hasattr(shape, 'has_text_frame') and shape.has_text_frame: | 
		
	
		
			
			|  |  |  | text_frame = shape.text_frame | 
		
	
		
			
			|  |  |  | texts = [] | 
		
	
		
			
			|  |  |  | for paragraph in text_frame.paragraphs: | 
		
	
		
			
			|  |  |  | if paragraph.text.strip(): | 
		
	
		
			
			|  |  |  | texts.append(self.__get_bulleted_text(paragraph)) | 
		
	
		
			
			|  |  |  | return "\n".join(texts) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if shape.has_text_frame: | 
		
	
		
			
			|  |  |  | text_frame = shape.text_frame | 
		
	
		
			
			|  |  |  | texts = [] | 
		
	
		
			
			|  |  |  | for paragraph in text_frame.paragraphs: | 
		
	
		
			
			|  |  |  | if paragraph.text.strip(): | 
		
	
		
			
			|  |  |  | texts.append(self.__get_bulleted_text(paragraph)) | 
		
	
		
			
			|  |  |  | return "\n".join(texts) | 
		
	
		
			
			|  |  |  | # Safely get shape_type | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | shape_type = shape.shape_type | 
		
	
		
			
			|  |  |  | except NotImplementedError: | 
		
	
		
			
			|  |  |  | # If shape_type is not available, try to get text content | 
		
	
		
			
			|  |  |  | if hasattr(shape, 'text'): | 
		
	
		
			
			|  |  |  | return shape.text.strip() | 
		
	
		
			
			|  |  |  | return "" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if shape.shape_type == 6: | 
		
	
		
			
			|  |  |  | texts = [] | 
		
	
		
			
			|  |  |  | for p in sorted(shape.shapes, key=lambda x: (x.top // 10, x.left)): | 
		
	
		
			
			|  |  |  | t = self.__extract(p) | 
		
	
		
			
			|  |  |  | if t: | 
		
	
		
			
			|  |  |  | texts.append(t) | 
		
	
		
			
			|  |  |  | return "\n".join(texts) | 
		
	
		
			
			|  |  |  | # Handle table | 
		
	
		
			
			|  |  |  | if shape_type == 19: | 
		
	
		
			
			|  |  |  | tb = shape.table | 
		
	
		
			
			|  |  |  | rows = [] | 
		
	
		
			
			|  |  |  | for i in range(1, len(tb.rows)): | 
		
	
		
			
			|  |  |  | rows.append("; ".join([tb.cell( | 
		
	
		
			
			|  |  |  | 0, j).text + ": " + tb.cell(i, j).text for j in range(len(tb.columns)) if tb.cell(i, j)])) | 
		
	
		
			
			|  |  |  | return "\n".join(rows) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | # Handle group shape | 
		
	
		
			
			|  |  |  | if shape_type == 6: | 
		
	
		
			
			|  |  |  | texts = [] | 
		
	
		
			
			|  |  |  | for p in sorted(shape.shapes, key=lambda x: (x.top // 10, x.left)): | 
		
	
		
			
			|  |  |  | t = self.__extract_texts(p) | 
		
	
		
			
			|  |  |  | if t: | 
		
	
		
			
			|  |  |  | texts.append(t) | 
		
	
		
			
			|  |  |  | return "\n".join(texts) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | return "" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
		
			
			|  |  |  | logging.error(f"Error processing shape: {str(e)}") | 
		
	
		
			
			|  |  |  | return "" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def __call__(self, fnm, from_page, to_page, callback=None): | 
		
	
		
			
			|  |  |  | ppt = Presentation(fnm) if isinstance( |