| 
														 | 
														 | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														import uuid | 
														 | 
														 | 
														import uuid | 
													
													
												
													
														 | 
														 | 
														from typing import Optional, List, cast | 
														 | 
														 | 
														from typing import Optional, List, cast | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														
  | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														from flask import current_app, Flask | 
													
													
												
													
														 | 
														 | 
														from flask_login import current_user | 
														 | 
														 | 
														from flask_login import current_user | 
													
													
												
													
														 | 
														 | 
														from langchain.schema import Document | 
														 | 
														 | 
														from langchain.schema import Document | 
													
													
												
													
														 | 
														 | 
														from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter | 
														 | 
														 | 
														from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter | 
													
													
												
											
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                sub_documents = all_documents[i:i + 10] | 
														 | 
														 | 
														                sub_documents = all_documents[i:i + 10] | 
													
													
												
													
														 | 
														 | 
														                for doc in sub_documents: | 
														 | 
														 | 
														                for doc in sub_documents: | 
													
													
												
													
														 | 
														 | 
														                    document_format_thread = threading.Thread(target=self.format_qa_document, kwargs={ | 
														 | 
														 | 
														                    document_format_thread = threading.Thread(target=self.format_qa_document, kwargs={ | 
													
													
												
													
														 | 
														 | 
														                        'tenant_id': tenant_id, 'document_node': doc, 'all_qa_documents': all_qa_documents}) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        'flask_app': current_app._get_current_object(), 'tenant_id': tenant_id, 'document_node': doc, | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                        'all_qa_documents': all_qa_documents}) | 
													
													
												
													
														 | 
														 | 
														                    threads.append(document_format_thread) | 
														 | 
														 | 
														                    threads.append(document_format_thread) | 
													
													
												
													
														 | 
														 | 
														                    document_format_thread.start() | 
														 | 
														 | 
														                    document_format_thread.start() | 
													
													
												
													
														 | 
														 | 
														                for thread in threads: | 
														 | 
														 | 
														                for thread in threads: | 
													
													
												
											
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            return all_qa_documents | 
														 | 
														 | 
														            return all_qa_documents | 
													
													
												
													
														 | 
														 | 
														        return all_documents | 
														 | 
														 | 
														        return all_documents | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														
  | 
													
													
												
													
														 | 
														 | 
														    def format_qa_document(self, tenant_id: str, document_node, all_qa_documents): | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														    def format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents): | 
													
													
												
													
														 | 
														 | 
														        format_documents = [] | 
														 | 
														 | 
														        format_documents = [] | 
													
													
												
													
														 | 
														 | 
														        if document_node.page_content is None or not document_node.page_content.strip(): | 
														 | 
														 | 
														        if document_node.page_content is None or not document_node.page_content.strip(): | 
													
													
												
													
														 | 
														 | 
														            return | 
														 | 
														 | 
														            return | 
													
													
												
													
														 | 
														 | 
														        try: | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            # qa model document | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            document_qa_list = self.format_split_text(response) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            qa_documents = [] | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            for result in document_qa_list: | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                qa_document = Document(page_content=result['question'], metadata=document_node.metadata.copy()) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                doc_id = str(uuid.uuid4()) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                hash = helper.generate_text_hash(result['question']) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                qa_document.metadata['answer'] = result['answer'] | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                qa_document.metadata['doc_id'] = doc_id | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                qa_document.metadata['doc_hash'] = hash | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														                qa_documents.append(qa_document) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            format_documents.extend(qa_documents) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														        except Exception as e: | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														            logging.exception(e) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														        with flask_app.app_context(): | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            try: | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                # qa model document | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content) | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                document_qa_list = self.format_split_text(response) | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                qa_documents = [] | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                for result in document_qa_list: | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                    qa_document = Document(page_content=result['question'], metadata=document_node.metadata.copy()) | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                    doc_id = str(uuid.uuid4()) | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                    hash = helper.generate_text_hash(result['question']) | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                    qa_document.metadata['answer'] = result['answer'] | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                    qa_document.metadata['doc_id'] = doc_id | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                    qa_document.metadata['doc_hash'] = hash | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                    qa_documents.append(qa_document) | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                format_documents.extend(qa_documents) | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            except Exception as e: | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														                logging.exception(e) | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														
  | 
													
													
												
													
														 | 
														 | 
														        all_qa_documents.extend(format_documents) | 
														 | 
														 | 
														 | 
													
													
												
													
														 | 
														 | 
														 | 
														 | 
														 | 
														            all_qa_documents.extend(format_documents) | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														
  | 
													
													
												
													
														 | 
														 | 
														
  | 
														 | 
														 | 
														
  | 
													
													
												
													
														 | 
														 | 
														    def _split_to_documents_for_estimate(self, text_docs: List[Document], splitter: TextSplitter, | 
														 | 
														 | 
														    def _split_to_documents_for_estimate(self, text_docs: List[Document], splitter: TextSplitter, |