| case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT | VectorType.ORACLE: | case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT | VectorType.ORACLE: | ||||
| return { | return { | ||||
| 'retrieval_method': [ | 'retrieval_method': [ | ||||
| RetrievalMethod.SEMANTIC_SEARCH | |||||
| RetrievalMethod.SEMANTIC_SEARCH.value | |||||
| ] | ] | ||||
| } | } | ||||
| case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH | VectorType.ANALYTICDB | VectorType.MYSCALE: | case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH | VectorType.ANALYTICDB | VectorType.MYSCALE: | ||||
| return { | return { | ||||
| 'retrieval_method': [ | 'retrieval_method': [ | ||||
| RetrievalMethod.SEMANTIC_SEARCH, | |||||
| RetrievalMethod.FULL_TEXT_SEARCH, | |||||
| RetrievalMethod.HYBRID_SEARCH, | |||||
| RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| RetrievalMethod.FULL_TEXT_SEARCH.value, | |||||
| RetrievalMethod.HYBRID_SEARCH.value, | |||||
| ] | ] | ||||
| } | } | ||||
| case _: | case _: | ||||
| case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT | VectorType.ORACLE: | case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT | VectorType.ORACLE: | ||||
| return { | return { | ||||
| 'retrieval_method': [ | 'retrieval_method': [ | ||||
| RetrievalMethod.SEMANTIC_SEARCH | |||||
| RetrievalMethod.SEMANTIC_SEARCH.value | |||||
| ] | ] | ||||
| } | } | ||||
| case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH| VectorType.ANALYTICDB | VectorType.MYSCALE: | case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH| VectorType.ANALYTICDB | VectorType.MYSCALE: | ||||
| return { | return { | ||||
| 'retrieval_method': [ | 'retrieval_method': [ | ||||
| RetrievalMethod.SEMANTIC_SEARCH, | |||||
| RetrievalMethod.FULL_TEXT_SEARCH, | |||||
| RetrievalMethod.HYBRID_SEARCH, | |||||
| RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| RetrievalMethod.FULL_TEXT_SEARCH.value, | |||||
| RetrievalMethod.HYBRID_SEARCH.value, | |||||
| ] | ] | ||||
| } | } | ||||
| case _: | case _: |
| from models.dataset import Dataset | from models.dataset import Dataset | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', | ||||
| exception_message = ';\n'.join(exceptions) | exception_message = ';\n'.join(exceptions) | ||||
| raise Exception(exception_message) | raise Exception(exception_message) | ||||
| if retrival_method == RetrievalMethod.HYBRID_SEARCH: | |||||
| if retrival_method == RetrievalMethod.HYBRID_SEARCH.value: | |||||
| data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False) | data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False) | ||||
| all_documents = data_post_processor.invoke( | all_documents = data_post_processor.invoke( | ||||
| query=query, | query=query, | ||||
| ) | ) | ||||
| if documents: | if documents: | ||||
| if reranking_model and retrival_method == RetrievalMethod.SEMANTIC_SEARCH: | |||||
| if reranking_model and retrival_method == RetrievalMethod.SEMANTIC_SEARCH.value: | |||||
| data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False) | data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False) | ||||
| all_documents.extend(data_post_processor.invoke( | all_documents.extend(data_post_processor.invoke( | ||||
| query=query, | query=query, | ||||
| top_k=top_k | top_k=top_k | ||||
| ) | ) | ||||
| if documents: | if documents: | ||||
| if reranking_model and retrival_method == RetrievalMethod.FULL_TEXT_SEARCH: | |||||
| if reranking_model and retrival_method == RetrievalMethod.FULL_TEXT_SEARCH.value: | |||||
| data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False) | data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False) | ||||
| all_documents.extend(data_post_processor.invoke( | all_documents.extend(data_post_processor.invoke( | ||||
| query=query, | query=query, |
| from models.dataset import Document as DatasetDocument | from models.dataset import Document as DatasetDocument | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', | ||||
| if retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE: | if retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE: | ||||
| # get retrieval model config | # get retrieval model config | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', |
| from enum import Enum | from enum import Enum | ||||
| class RetrievalMethod(str, Enum): | |||||
| class RetrievalMethod(Enum): | |||||
| SEMANTIC_SEARCH = 'semantic_search' | SEMANTIC_SEARCH = 'semantic_search' | ||||
| FULL_TEXT_SEARCH = 'full_text_search' | FULL_TEXT_SEARCH = 'full_text_search' | ||||
| HYBRID_SEARCH = 'hybrid_search' | HYBRID_SEARCH = 'hybrid_search' | ||||
| @staticmethod | @staticmethod | ||||
| def is_support_semantic_search(retrieval_method: str) -> bool: | def is_support_semantic_search(retrieval_method: str) -> bool: | ||||
| return retrieval_method in {RetrievalMethod.SEMANTIC_SEARCH, RetrievalMethod.HYBRID_SEARCH} | |||||
| return retrieval_method in {RetrievalMethod.SEMANTIC_SEARCH.value, RetrievalMethod.HYBRID_SEARCH.value} | |||||
| @staticmethod | @staticmethod | ||||
| def is_support_fulltext_search(retrieval_method: str) -> bool: | def is_support_fulltext_search(retrieval_method: str) -> bool: | ||||
| return retrieval_method in {RetrievalMethod.FULL_TEXT_SEARCH, RetrievalMethod.HYBRID_SEARCH} | |||||
| return retrieval_method in {RetrievalMethod.FULL_TEXT_SEARCH.value, RetrievalMethod.HYBRID_SEARCH.value} |
| from models.dataset import Dataset, Document, DocumentSegment | from models.dataset import Dataset, Document, DocumentSegment | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', |
| from models.dataset import Dataset, Document, DocumentSegment | from models.dataset import Dataset, Document, DocumentSegment | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', |
| from models.workflow import WorkflowNodeExecutionStatus | from models.workflow import WorkflowNodeExecutionStatus | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', |
| @property | @property | ||||
| def retrieval_model_dict(self): | def retrieval_model_dict(self): | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', |
| dataset.collection_binding_id = dataset_collection_binding.id | dataset.collection_binding_id = dataset_collection_binding.id | ||||
| if not dataset.retrieval_model: | if not dataset.retrieval_model: | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', | ||||
| retrieval_model = document_data['retrieval_model'] | retrieval_model = document_data['retrieval_model'] | ||||
| else: | else: | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', |
| from models.dataset import Dataset, DatasetQuery, DocumentSegment | from models.dataset import Dataset, DatasetQuery, DocumentSegment | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH.value, | |||||
| 'reranking_enable': False, | 'reranking_enable': False, | ||||
| 'reranking_model': { | 'reranking_model': { | ||||
| 'reranking_provider_name': '', | 'reranking_provider_name': '', |