소스 검색

fix #12453 #12482 (#12495)

tags/1.0.0-beta.1
Jyong 9 달 전
부모
커밋
bee32d960a
No account linked to committer's email address
3개의 변경된 파일23개의 추가작업 그리고 9개의 파일을 삭제
  1. 2
    1
      api/controllers/console/datasets/datasets_document.py
  2. 17
    7
      api/services/dataset_service.py
  3. 4
    1
      api/tasks/deal_dataset_vector_index_task.py

+ 2
- 1
api/controllers/console/datasets/datasets_document.py 파일 보기

parser.add_argument("original_document_id", type=str, required=False, location="json") parser.add_argument("original_document_id", type=str, required=False, location="json")
parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json") parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json")
parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json") parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")

parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json")
parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json")
parser.add_argument( parser.add_argument(
"doc_language", type=str, default="English", required=False, nullable=False, location="json" "doc_language", type=str, default="English", required=False, nullable=False, location="json"
) )

+ 17
- 7
api/services/dataset_service.py 파일 보기

dataset.indexing_technique = knowledge_config.indexing_technique dataset.indexing_technique = knowledge_config.indexing_technique
if knowledge_config.indexing_technique == "high_quality": if knowledge_config.indexing_technique == "high_quality":
model_manager = ModelManager() model_manager = ModelManager()
embedding_model = model_manager.get_default_model_instance(
tenant_id=current_user.current_tenant_id, model_type=ModelType.TEXT_EMBEDDING
)
dataset.embedding_model = embedding_model.model
dataset.embedding_model_provider = embedding_model.provider
if knowledge_config.embedding_model and knowledge_config.embedding_model_provider:
dataset_embedding_model = knowledge_config.embedding_model
dataset_embedding_model_provider = knowledge_config.embedding_model_provider
else:
embedding_model = model_manager.get_default_model_instance(
tenant_id=current_user.current_tenant_id, model_type=ModelType.TEXT_EMBEDDING
)
dataset_embedding_model = embedding_model.model
dataset_embedding_model_provider = embedding_model.provider
dataset.embedding_model = dataset_embedding_model
dataset.embedding_model_provider = dataset_embedding_model_provider
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding( dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
dataset_embedding_model_provider, dataset_embedding_model
) )
dataset.collection_binding_id = dataset_collection_binding.id dataset.collection_binding_id = dataset_collection_binding.id
if not dataset.retrieval_model: if not dataset.retrieval_model:
"score_threshold_enabled": False, "score_threshold_enabled": False,
} }


dataset.retrieval_model = knowledge_config.retrieval_model.model_dump() or default_retrieval_model # type: ignore
dataset.retrieval_model = (
knowledge_config.retrieval_model.model_dump()
if knowledge_config.retrieval_model
else default_retrieval_model
) # type: ignore


documents = [] documents = []
if knowledge_config.original_document_id: if knowledge_config.original_document_id:

+ 4
- 1
api/tasks/deal_dataset_vector_index_task.py 파일 보기



if not dataset: if not dataset:
raise Exception("Dataset not found") raise Exception("Dataset not found")
index_type = dataset.doc_form
index_type = dataset.doc_form or IndexType.PARAGRAPH_INDEX
index_processor = IndexProcessorFactory(index_type).init_index_processor() index_processor = IndexProcessorFactory(index_type).init_index_processor()
if action == "remove": if action == "remove":
index_processor.clean(dataset, None, with_keywords=False) index_processor.clean(dataset, None, with_keywords=False)
{"indexing_status": "error", "error": str(e)}, synchronize_session=False {"indexing_status": "error", "error": str(e)}, synchronize_session=False
) )
db.session.commit() db.session.commit()
else:
# clean collection
index_processor.clean(dataset, None, with_keywords=False, delete_child_chunks=False)


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info( logging.info(

Loading…
취소
저장