| @@ -17,6 +17,7 @@ from core.model_runtime.entities.model_entities import ModelType | |||
| from core.provider_manager import ProviderManager | |||
| from core.rag.datasource.vdb.vector_type import VectorType | |||
| from core.rag.extractor.entity.extract_setting import ExtractSetting | |||
| from core.rag.retrieval.retrival_methods import RetrievalMethod | |||
| from extensions.ext_database import db | |||
| from fields.app_fields import related_app_list | |||
| from fields.dataset_fields import dataset_detail_fields, dataset_query_detail_fields | |||
| @@ -500,13 +501,15 @@ class DatasetRetrievalSettingApi(Resource): | |||
| case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCENT: | |||
| return { | |||
| 'retrieval_method': [ | |||
| 'semantic_search' | |||
| RetrievalMethod.SEMANTIC_SEARCH | |||
| ] | |||
| } | |||
| case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH: | |||
| return { | |||
| 'retrieval_method': [ | |||
| 'semantic_search', 'full_text_search', 'hybrid_search' | |||
| RetrievalMethod.SEMANTIC_SEARCH, | |||
| RetrievalMethod.FULL_TEXT_SEARCH, | |||
| RetrievalMethod.HYBRID_SEARCH, | |||
| ] | |||
| } | |||
| case _: | |||
| @@ -522,13 +525,15 @@ class DatasetRetrievalSettingMockApi(Resource): | |||
| case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR | VectorType.CHROMA | VectorType.TENCEN: | |||
| return { | |||
| 'retrieval_method': [ | |||
| 'semantic_search' | |||
| RetrievalMethod.SEMANTIC_SEARCH | |||
| ] | |||
| } | |||
| case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH: | |||
| return { | |||
| 'retrieval_method': [ | |||
| 'semantic_search', 'full_text_search', 'hybrid_search' | |||
| RetrievalMethod.SEMANTIC_SEARCH, | |||
| RetrievalMethod.FULL_TEXT_SEARCH, | |||
| RetrievalMethod.HYBRID_SEARCH, | |||
| ] | |||
| } | |||
| case _: | |||
| @@ -6,11 +6,12 @@ from flask import Flask, current_app | |||
| from core.rag.data_post_processor.data_post_processor import DataPostProcessor | |||
| from core.rag.datasource.keyword.keyword_factory import Keyword | |||
| from core.rag.datasource.vdb.vector_factory import Vector | |||
| from core.rag.retrieval.retrival_methods import RetrievalMethod | |||
| from extensions.ext_database import db | |||
| from models.dataset import Dataset | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||
| @@ -47,7 +48,7 @@ class RetrievalService: | |||
| threads.append(keyword_thread) | |||
| keyword_thread.start() | |||
| # retrieval_model source with semantic | |||
| if retrival_method == 'semantic_search' or retrival_method == 'hybrid_search': | |||
| if RetrievalMethod.is_support_semantic_search(retrival_method): | |||
| embedding_thread = threading.Thread(target=RetrievalService.embedding_search, kwargs={ | |||
| 'flask_app': current_app._get_current_object(), | |||
| 'dataset_id': dataset_id, | |||
| @@ -63,7 +64,7 @@ class RetrievalService: | |||
| embedding_thread.start() | |||
| # retrieval source with full text | |||
| if retrival_method == 'full_text_search' or retrival_method == 'hybrid_search': | |||
| if RetrievalMethod.is_support_fulltext_search(retrival_method): | |||
| full_text_index_thread = threading.Thread(target=RetrievalService.full_text_index_search, kwargs={ | |||
| 'flask_app': current_app._get_current_object(), | |||
| 'dataset_id': dataset_id, | |||
| @@ -85,7 +86,7 @@ class RetrievalService: | |||
| exception_message = ';\n'.join(exceptions) | |||
| raise Exception(exception_message) | |||
| if retrival_method == 'hybrid_search': | |||
| if retrival_method == RetrievalMethod.HYBRID_SEARCH: | |||
| data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False) | |||
| all_documents = data_post_processor.invoke( | |||
| query=query, | |||
| @@ -141,7 +142,7 @@ class RetrievalService: | |||
| ) | |||
| if documents: | |||
| if reranking_model and retrival_method == 'semantic_search': | |||
| if reranking_model and retrival_method == RetrievalMethod.SEMANTIC_SEARCH: | |||
| data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False) | |||
| all_documents.extend(data_post_processor.invoke( | |||
| query=query, | |||
| @@ -173,7 +174,7 @@ class RetrievalService: | |||
| top_k=top_k | |||
| ) | |||
| if documents: | |||
| if reranking_model and retrival_method == 'full_text_search': | |||
| if reranking_model and retrival_method == RetrievalMethod.FULL_TEXT_SEARCH: | |||
| data_post_processor = DataPostProcessor(str(dataset.tenant_id), reranking_model, False) | |||
| all_documents.extend(data_post_processor.invoke( | |||
| query=query, | |||
| @@ -15,6 +15,7 @@ from core.model_runtime.model_providers.__base.large_language_model import Large | |||
| from core.rag.datasource.retrieval_service import RetrievalService | |||
| from core.rag.models.document import Document | |||
| from core.rag.rerank.rerank import RerankRunner | |||
| from core.rag.retrieval.retrival_methods import RetrievalMethod | |||
| from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter | |||
| from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter | |||
| from core.tools.tool.dataset_retriever.dataset_multi_retriever_tool import DatasetMultiRetrieverTool | |||
| @@ -25,7 +26,7 @@ from models.dataset import Dataset, DatasetQuery, DocumentSegment | |||
| from models.dataset import Document as DatasetDocument | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||
| @@ -419,7 +420,7 @@ class DatasetRetrieval: | |||
| if retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE: | |||
| # get retrieval model config | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||
| @@ -0,0 +1,15 @@ | |||
| from enum import Enum | |||
| class RetrievalMethod(str, Enum): | |||
| SEMANTIC_SEARCH = 'semantic_search' | |||
| FULL_TEXT_SEARCH = 'full_text_search' | |||
| HYBRID_SEARCH = 'hybrid_search' | |||
| @staticmethod | |||
| def is_support_semantic_search(retrieval_method: str) -> bool: | |||
| return retrieval_method in {RetrievalMethod.SEMANTIC_SEARCH, RetrievalMethod.HYBRID_SEARCH} | |||
| @staticmethod | |||
| def is_support_fulltext_search(retrieval_method: str) -> bool: | |||
| return retrieval_method in {RetrievalMethod.FULL_TEXT_SEARCH, RetrievalMethod.HYBRID_SEARCH} | |||
| @@ -8,12 +8,13 @@ from core.model_manager import ModelManager | |||
| from core.model_runtime.entities.model_entities import ModelType | |||
| from core.rag.datasource.retrieval_service import RetrievalService | |||
| from core.rag.rerank.rerank import RerankRunner | |||
| from core.rag.retrieval.retrival_methods import RetrievalMethod | |||
| from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool | |||
| from extensions.ext_database import db | |||
| from models.dataset import Dataset, Document, DocumentSegment | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||
| @@ -2,12 +2,13 @@ | |||
| from pydantic import BaseModel, Field | |||
| from core.rag.datasource.retrieval_service import RetrievalService | |||
| from core.rag.retrieval.retrival_methods import RetrievalMethod | |||
| from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool | |||
| from extensions.ext_database import db | |||
| from models.dataset import Dataset, Document, DocumentSegment | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||
| @@ -11,6 +11,7 @@ from core.model_manager import ModelInstance, ModelManager | |||
| from core.model_runtime.entities.model_entities import ModelFeature, ModelType | |||
| from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel | |||
| from core.rag.retrieval.dataset_retrieval import DatasetRetrieval | |||
| from core.rag.retrieval.retrival_methods import RetrievalMethod | |||
| from core.workflow.entities.base_node_data_entities import BaseNodeData | |||
| from core.workflow.entities.node_entities import NodeRunResult, NodeType | |||
| from core.workflow.entities.variable_pool import VariablePool | |||
| @@ -21,7 +22,7 @@ from models.dataset import Dataset, Document, DocumentSegment | |||
| from models.workflow import WorkflowNodeExecutionStatus | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||
| @@ -13,6 +13,7 @@ from flask import current_app | |||
| from sqlalchemy import func | |||
| from sqlalchemy.dialects.postgresql import JSONB | |||
| from core.rag.retrieval.retrival_methods import RetrievalMethod | |||
| from extensions.ext_database import db | |||
| from extensions.ext_storage import storage | |||
| from models import StringUUID | |||
| @@ -116,7 +117,7 @@ class Dataset(db.Model): | |||
| @property | |||
| def retrieval_model_dict(self): | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||
| @@ -15,6 +15,7 @@ from core.model_manager import ModelManager | |||
| from core.model_runtime.entities.model_entities import ModelType | |||
| from core.rag.datasource.keyword.keyword_factory import Keyword | |||
| from core.rag.models.document import Document as RAGDocument | |||
| from core.rag.retrieval.retrival_methods import RetrievalMethod | |||
| from events.dataset_event import dataset_was_deleted | |||
| from events.document_event import document_was_deleted | |||
| from extensions.ext_database import db | |||
| @@ -602,7 +603,7 @@ class DocumentService: | |||
| dataset.collection_binding_id = dataset_collection_binding.id | |||
| if not dataset.retrieval_model: | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||
| @@ -959,7 +960,7 @@ class DocumentService: | |||
| retrieval_model = document_data['retrieval_model'] | |||
| else: | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||
| @@ -10,12 +10,13 @@ from core.model_runtime.entities.model_entities import ModelType | |||
| from core.rag.datasource.entity.embedding import Embeddings | |||
| from core.rag.datasource.retrieval_service import RetrievalService | |||
| from core.rag.models.document import Document | |||
| from core.rag.retrieval.retrival_methods import RetrievalMethod | |||
| from extensions.ext_database import db | |||
| from models.account import Account | |||
| from models.dataset import Dataset, DatasetQuery, DocumentSegment | |||
| default_retrieval_model = { | |||
| 'search_method': 'semantic_search', | |||
| 'search_method': RetrievalMethod.SEMANTIC_SEARCH, | |||
| 'reranking_enable': False, | |||
| 'reranking_model': { | |||
| 'reranking_provider_name': '', | |||