You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

knowledge_entities.py 3.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. from enum import StrEnum
  2. from typing import Literal, Optional
  3. from pydantic import BaseModel
  4. class ParentMode(StrEnum):
  5. FULL_DOC = "full-doc"
  6. PARAGRAPH = "paragraph"
  7. class NotionIcon(BaseModel):
  8. type: str
  9. url: Optional[str] = None
  10. emoji: Optional[str] = None
  11. class NotionPage(BaseModel):
  12. page_id: str
  13. page_name: str
  14. page_icon: Optional[NotionIcon] = None
  15. type: str
  16. class NotionInfo(BaseModel):
  17. workspace_id: str
  18. pages: list[NotionPage]
  19. class WebsiteInfo(BaseModel):
  20. provider: str
  21. job_id: str
  22. urls: list[str]
  23. only_main_content: bool = True
  24. class FileInfo(BaseModel):
  25. file_ids: list[str]
  26. class InfoList(BaseModel):
  27. data_source_type: Literal["upload_file", "notion_import", "website_crawl"]
  28. notion_info_list: Optional[list[NotionInfo]] = None
  29. file_info_list: Optional[FileInfo] = None
  30. website_info_list: Optional[WebsiteInfo] = None
  31. class DataSource(BaseModel):
  32. info_list: InfoList
  33. class PreProcessingRule(BaseModel):
  34. id: str
  35. enabled: bool
  36. class Segmentation(BaseModel):
  37. separator: str = "\n"
  38. max_tokens: int
  39. chunk_overlap: int = 0
  40. class Rule(BaseModel):
  41. pre_processing_rules: Optional[list[PreProcessingRule]] = None
  42. segmentation: Optional[Segmentation] = None
  43. parent_mode: Optional[Literal["full-doc", "paragraph"]] = None
  44. subchunk_segmentation: Optional[Segmentation] = None
  45. class ProcessRule(BaseModel):
  46. mode: Literal["automatic", "custom", "hierarchical"]
  47. rules: Optional[Rule] = None
  48. class RerankingModel(BaseModel):
  49. reranking_provider_name: Optional[str] = None
  50. reranking_model_name: Optional[str] = None
  51. class WeightVectorSetting(BaseModel):
  52. vector_weight: float
  53. embedding_provider_name: str
  54. embedding_model_name: str
  55. class WeightKeywordSetting(BaseModel):
  56. keyword_weight: float
  57. class WeightModel(BaseModel):
  58. weight_type: Optional[Literal["semantic_first", "keyword_first", "customized"]] = None
  59. vector_setting: Optional[WeightVectorSetting] = None
  60. keyword_setting: Optional[WeightKeywordSetting] = None
  61. class RetrievalModel(BaseModel):
  62. search_method: Literal["hybrid_search", "semantic_search", "full_text_search", "keyword_search"]
  63. reranking_enable: bool
  64. reranking_model: Optional[RerankingModel] = None
  65. reranking_mode: Optional[str] = None
  66. top_k: int
  67. score_threshold_enabled: bool
  68. score_threshold: Optional[float] = None
  69. weights: Optional[WeightModel] = None
  70. class MetaDataConfig(BaseModel):
  71. doc_type: str
  72. doc_metadata: dict
  73. class KnowledgeConfig(BaseModel):
  74. original_document_id: Optional[str] = None
  75. duplicate: bool = True
  76. indexing_technique: Literal["high_quality", "economy"]
  77. data_source: Optional[DataSource] = None
  78. process_rule: Optional[ProcessRule] = None
  79. retrieval_model: Optional[RetrievalModel] = None
  80. doc_form: str = "text_model"
  81. doc_language: str = "English"
  82. embedding_model: Optional[str] = None
  83. embedding_model_provider: Optional[str] = None
  84. name: Optional[str] = None
  85. class SegmentUpdateArgs(BaseModel):
  86. content: Optional[str] = None
  87. answer: Optional[str] = None
  88. keywords: Optional[list[str]] = None
  89. regenerate_child_chunks: bool = False
  90. enabled: Optional[bool] = None
  91. class ChildChunkUpdateArgs(BaseModel):
  92. id: Optional[str] = None
  93. content: str
  94. class MetadataArgs(BaseModel):
  95. type: Literal["string", "number", "time"]
  96. name: str
  97. class MetadataUpdateArgs(BaseModel):
  98. name: str
  99. value: Optional[str | int | float] = None
  100. class MetadataDetail(BaseModel):
  101. id: str
  102. name: str
  103. value: Optional[str | int | float] = None
  104. class DocumentMetadataOperation(BaseModel):
  105. document_id: str
  106. metadata_list: list[MetadataDetail]
  107. class MetadataOperationData(BaseModel):
  108. """
  109. Metadata operation data
  110. """
  111. operation_data: list[DocumentMetadataOperation]