浏览代码

Chore: clean some # type: ignore (#25157)

tags/2.0.0-beta.2^2
Yongtao Huang 1 个月前
父节点
当前提交
432f89cf33
没有帐户链接到提交者的电子邮件
共有 2 个文件被更改,包括 12 次插入9 次删除
  1. 11
    8
      api/core/indexing_runner.py
  2. 1
    1
      api/core/rag/index_processor/processor/parent_child_index_processor.py

+ 11
- 8
api/core/indexing_runner.py 查看文件

tenant_id=tenant_id, tenant_id=tenant_id,
model_type=ModelType.TEXT_EMBEDDING, model_type=ModelType.TEXT_EMBEDDING,
) )
preview_texts = [] # type: ignore
# keep separate, avoid union-list ambiguity
preview_texts: list[PreviewDetail] = []
qa_preview_texts: list[QAPreviewDetail] = []


total_segments = 0 total_segments = 0
index_type = doc_form index_type = doc_form
for document in documents: for document in documents:
if len(preview_texts) < 10: if len(preview_texts) < 10:
if doc_form and doc_form == "qa_model": if doc_form and doc_form == "qa_model":
preview_detail = QAPreviewDetail(
qa_detail = QAPreviewDetail(
question=document.page_content, answer=document.metadata.get("answer") or "" question=document.page_content, answer=document.metadata.get("answer") or ""
) )
preview_texts.append(preview_detail)
qa_preview_texts.append(qa_detail)
else: else:
preview_detail = PreviewDetail(content=document.page_content) # type: ignore
preview_detail = PreviewDetail(content=document.page_content)
if document.children: if document.children:
preview_detail.child_chunks = [child.page_content for child in document.children] # type: ignore
preview_detail.child_chunks = [child.page_content for child in document.children]
preview_texts.append(preview_detail) preview_texts.append(preview_detail)


# delete image files and related db records # delete image files and related db records
db.session.delete(image_file) db.session.delete(image_file)


if doc_form and doc_form == "qa_model": if doc_form and doc_form == "qa_model":
return IndexingEstimate(total_segments=total_segments * 20, qa_preview=preview_texts, preview=[])
return IndexingEstimate(total_segments=total_segments, preview=preview_texts) # type: ignore
return IndexingEstimate(total_segments=total_segments * 20, qa_preview=qa_preview_texts, preview=[])
return IndexingEstimate(total_segments=total_segments, preview=preview_texts)


def _extract( def _extract(
self, index_processor: BaseIndexProcessor, dataset_document: DatasetDocument, process_rule: dict self, index_processor: BaseIndexProcessor, dataset_document: DatasetDocument, process_rule: dict
""" """
Get the NodeParser object according to the processing rule. Get the NodeParser object according to the processing rule.
""" """
character_splitter: TextSplitter
if processing_rule_mode in ["custom", "hierarchical"]: if processing_rule_mode in ["custom", "hierarchical"]:
# The user-defined segmentation rule # The user-defined segmentation rule
max_segmentation_tokens_length = dify_config.INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH max_segmentation_tokens_length = dify_config.INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH
embedding_model_instance=embedding_model_instance, embedding_model_instance=embedding_model_instance,
) )


return character_splitter # type: ignore
return character_splitter


def _split_to_documents_for_estimate( def _split_to_documents_for_estimate(
self, text_docs: list[Document], splitter: TextSplitter, processing_rule: DatasetProcessRule self, text_docs: list[Document], splitter: TextSplitter, processing_rule: DatasetProcessRule

+ 1
- 1
api/core/rag/index_processor/processor/parent_child_index_processor.py 查看文件

if not process_rule.get("rules"): if not process_rule.get("rules"):
raise ValueError("No rules found in process rule.") raise ValueError("No rules found in process rule.")
rules = Rule(**process_rule.get("rules")) rules = Rule(**process_rule.get("rules"))
all_documents = [] # type: ignore
all_documents: list[Document] = []
if rules.parent_mode == ParentMode.PARAGRAPH: if rules.parent_mode == ParentMode.PARAGRAPH:
# Split the text documents into nodes. # Split the text documents into nodes.
if not rules.segmentation: if not rules.segmentation:

正在加载...
取消
保存