|
|
|
@@ -859,7 +859,7 @@ class DocumentService: |
|
|
|
position = DocumentService.get_documents_position(dataset.id) |
|
|
|
document_ids = [] |
|
|
|
duplicate_document_ids = [] |
|
|
|
if knowledge_config.data_source.info_list.data_source_type == "upload_file": |
|
|
|
if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore |
|
|
|
upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore |
|
|
|
for file_id in upload_file_list: |
|
|
|
file = ( |
|
|
|
@@ -901,7 +901,7 @@ class DocumentService: |
|
|
|
document = DocumentService.build_document( |
|
|
|
dataset, |
|
|
|
dataset_process_rule.id, # type: ignore |
|
|
|
knowledge_config.data_source.info_list.data_source_type, |
|
|
|
knowledge_config.data_source.info_list.data_source_type, # type: ignore |
|
|
|
knowledge_config.doc_form, |
|
|
|
knowledge_config.doc_language, |
|
|
|
data_source_info, |
|
|
|
@@ -916,8 +916,8 @@ class DocumentService: |
|
|
|
document_ids.append(document.id) |
|
|
|
documents.append(document) |
|
|
|
position += 1 |
|
|
|
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": |
|
|
|
notion_info_list = knowledge_config.data_source.info_list.notion_info_list |
|
|
|
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore |
|
|
|
notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore |
|
|
|
if not notion_info_list: |
|
|
|
raise ValueError("No notion info list found.") |
|
|
|
exist_page_ids = [] |
|
|
|
@@ -956,7 +956,7 @@ class DocumentService: |
|
|
|
document = DocumentService.build_document( |
|
|
|
dataset, |
|
|
|
dataset_process_rule.id, # type: ignore |
|
|
|
knowledge_config.data_source.info_list.data_source_type, |
|
|
|
knowledge_config.data_source.info_list.data_source_type, # type: ignore |
|
|
|
knowledge_config.doc_form, |
|
|
|
knowledge_config.doc_language, |
|
|
|
data_source_info, |
|
|
|
@@ -976,8 +976,8 @@ class DocumentService: |
|
|
|
# delete not selected documents |
|
|
|
if len(exist_document) > 0: |
|
|
|
clean_notion_document_task.delay(list(exist_document.values()), dataset.id) |
|
|
|
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": |
|
|
|
website_info = knowledge_config.data_source.info_list.website_info_list |
|
|
|
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore |
|
|
|
website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore |
|
|
|
if not website_info: |
|
|
|
raise ValueError("No website info list found.") |
|
|
|
urls = website_info.urls |
|
|
|
@@ -996,7 +996,7 @@ class DocumentService: |
|
|
|
document = DocumentService.build_document( |
|
|
|
dataset, |
|
|
|
dataset_process_rule.id, # type: ignore |
|
|
|
knowledge_config.data_source.info_list.data_source_type, |
|
|
|
knowledge_config.data_source.info_list.data_source_type, # type: ignore |
|
|
|
knowledge_config.doc_form, |
|
|
|
knowledge_config.doc_language, |
|
|
|
data_source_info, |
|
|
|
@@ -1195,20 +1195,20 @@ class DocumentService: |
|
|
|
|
|
|
|
if features.billing.enabled: |
|
|
|
count = 0 |
|
|
|
if knowledge_config.data_source.info_list.data_source_type == "upload_file": |
|
|
|
if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore |
|
|
|
upload_file_list = ( |
|
|
|
knowledge_config.data_source.info_list.file_info_list.file_ids |
|
|
|
if knowledge_config.data_source.info_list.file_info_list |
|
|
|
knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore |
|
|
|
if knowledge_config.data_source.info_list.file_info_list # type: ignore |
|
|
|
else [] |
|
|
|
) |
|
|
|
count = len(upload_file_list) |
|
|
|
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": |
|
|
|
notion_info_list = knowledge_config.data_source.info_list.notion_info_list |
|
|
|
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore |
|
|
|
notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore |
|
|
|
if notion_info_list: |
|
|
|
for notion_info in notion_info_list: |
|
|
|
count = count + len(notion_info.pages) |
|
|
|
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": |
|
|
|
website_info = knowledge_config.data_source.info_list.website_info_list |
|
|
|
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore |
|
|
|
website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore |
|
|
|
if website_info: |
|
|
|
count = len(website_info.urls) |
|
|
|
batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT) |
|
|
|
@@ -1239,7 +1239,7 @@ class DocumentService: |
|
|
|
dataset = Dataset( |
|
|
|
tenant_id=tenant_id, |
|
|
|
name="", |
|
|
|
data_source_type=knowledge_config.data_source.info_list.data_source_type, |
|
|
|
data_source_type=knowledge_config.data_source.info_list.data_source_type, # type: ignore |
|
|
|
indexing_technique=knowledge_config.indexing_technique, |
|
|
|
created_by=account.id, |
|
|
|
embedding_model=knowledge_config.embedding_model, |