Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. import type { DataSourceNotionPage, DataSourceProvider } from './common'
  2. import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
  3. import type { Tag } from '@/app/components/base/tag-management/constant'
  4. import type { IndexingType } from '@/app/components/datasets/create/step-two'
  5. import type { MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types'
  6. import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types'
  7. import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card'
  8. import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge'
  9. import type { DatasourceType } from './pipeline'
  10. export enum DataSourceType {
  11. FILE = 'upload_file',
  12. NOTION = 'notion_import',
  13. WEB = 'website_crawl',
  14. }
  15. export enum DatasetPermission {
  16. onlyMe = 'only_me',
  17. allTeamMembers = 'all_team_members',
  18. partialMembers = 'partial_members',
  19. }
  20. export enum ChunkingMode {
  21. text = 'text_model', // General text
  22. qa = 'qa_model', // General QA
  23. parentChild = 'hierarchical_model', // Parent-Child
  24. // graph = 'graph', // todo: Graph RAG
  25. }
  26. export type MetadataInDoc = {
  27. value: string
  28. id: string
  29. type: MetadataFilteringVariableType
  30. name: string
  31. }
  32. export type IconInfo = {
  33. icon: string
  34. icon_background?: string
  35. icon_type: AppIconType
  36. icon_url?: string
  37. }
  38. export type DataSet = {
  39. id: string
  40. name: string
  41. indexing_status: DocumentIndexingStatus
  42. icon_info: IconInfo
  43. description: string
  44. permission: DatasetPermission
  45. data_source_type: DataSourceType
  46. indexing_technique: IndexingType
  47. created_by: string
  48. updated_by: string
  49. updated_at: number
  50. app_count: number
  51. doc_form: ChunkingMode
  52. document_count: number
  53. total_document_count: number
  54. total_available_documents?: number
  55. word_count: number
  56. provider: string
  57. embedding_model: string
  58. embedding_model_provider: string
  59. embedding_available: boolean
  60. retrieval_model_dict: RetrievalConfig
  61. retrieval_model: RetrievalConfig
  62. tags: Tag[]
  63. partial_member_list?: string[]
  64. external_knowledge_info: {
  65. external_knowledge_id: string
  66. external_knowledge_api_id: string
  67. external_knowledge_api_name: string
  68. external_knowledge_api_endpoint: string
  69. }
  70. external_retrieval_model: {
  71. top_k: number
  72. score_threshold: number
  73. score_threshold_enabled: boolean
  74. }
  75. built_in_field_enabled: boolean
  76. doc_metadata?: MetadataInDoc[]
  77. keyword_number?: number
  78. pipeline_id?: string
  79. is_published?: boolean // Indicates if the pipeline is published
  80. runtime_mode: 'rag_pipeline' | 'general'
  81. }
  82. export type ExternalAPIItem = {
  83. id: string
  84. tenant_id: string
  85. name: string
  86. description: string
  87. settings: {
  88. endpoint: string
  89. api_key: string
  90. }
  91. dataset_bindings: { id: string; name: string }[]
  92. created_by: string
  93. created_at: string
  94. }
  95. export type ExternalKnowledgeItem = {
  96. id: string
  97. name: string
  98. description: string | null
  99. provider: 'external'
  100. permission: DatasetPermission
  101. data_source_type: null
  102. indexing_technique: null
  103. app_count: number
  104. document_count: number
  105. word_count: number
  106. created_by: string
  107. created_at: string
  108. updated_by: string
  109. updated_at: string
  110. tags: Tag[]
  111. }
  112. export type ExternalAPIDeleteResponse = {
  113. result: 'success' | 'error'
  114. }
  115. export type ExternalAPIUsage = {
  116. is_using: boolean
  117. count: number
  118. }
  119. export type CustomFile = File & {
  120. id?: string
  121. extension?: string
  122. mime_type?: string
  123. created_by?: string
  124. created_at?: number
  125. }
  126. export type DocumentItem = {
  127. id: string
  128. name: string
  129. extension: string
  130. }
  131. export type CrawlOptions = {
  132. crawl_sub_pages: boolean
  133. only_main_content: boolean
  134. includes: string
  135. excludes: string
  136. limit: number | string
  137. max_depth: number | string
  138. use_sitemap: boolean
  139. }
  140. export type CrawlResultItem = {
  141. title: string
  142. markdown: string
  143. description: string
  144. source_url: string
  145. }
  146. export type FileItem = {
  147. fileID: string
  148. file: CustomFile
  149. progress: number
  150. }
  151. export type FetchDatasetsParams = {
  152. url: string
  153. params: {
  154. page: number
  155. ids?: string[]
  156. tag_ids?: string[]
  157. limit?: number
  158. include_all?: boolean
  159. keyword?: string
  160. }
  161. }
  162. export type DatasetListRequest = {
  163. initialPage: number
  164. tag_ids: string[]
  165. limit: number
  166. include_all: boolean
  167. keyword: string
  168. }
  169. export type DataSetListResponse = {
  170. data: DataSet[]
  171. has_more: boolean
  172. limit: number
  173. page: number
  174. total: number
  175. }
  176. export type ExternalAPIListResponse = {
  177. data: ExternalAPIItem[]
  178. has_more: boolean
  179. limit: number
  180. page: number
  181. total: number
  182. }
  183. export type QA = {
  184. question: string
  185. answer: string
  186. }
  187. export type IndexingEstimateResponse = {
  188. tokens: number
  189. total_price: number
  190. currency: string
  191. total_segments: number
  192. preview: Array<{ content: string; child_chunks: string[] }>
  193. qa_preview?: QA[]
  194. }
  195. export type FileIndexingEstimateResponse = {
  196. total_nodes: number
  197. } & IndexingEstimateResponse
  198. export type IndexingStatusResponse = {
  199. id: string
  200. indexing_status: DocumentIndexingStatus
  201. processing_started_at: number
  202. parsing_completed_at: number
  203. cleaning_completed_at: number
  204. splitting_completed_at: number
  205. completed_at: any
  206. paused_at: any
  207. error: any
  208. stopped_at: any
  209. completed_segments: number
  210. total_segments: number
  211. }
  212. export type IndexingStatusBatchResponse = {
  213. data: IndexingStatusResponse[]
  214. }
  215. export enum ProcessMode {
  216. general = 'custom',
  217. parentChild = 'hierarchical',
  218. }
  219. export type ParentMode = 'full-doc' | 'paragraph'
  220. export type ProcessRuleResponse = {
  221. mode: ProcessMode
  222. rules: Rules
  223. limits: Limits
  224. }
  225. export type Rules = {
  226. pre_processing_rules: PreProcessingRule[]
  227. segmentation: Segmentation
  228. parent_mode: ParentMode
  229. subchunk_segmentation: Segmentation
  230. }
  231. export type Limits = {
  232. indexing_max_segmentation_tokens_length: number
  233. }
  234. export type PreProcessingRule = {
  235. id: string
  236. enabled: boolean
  237. }
  238. export type Segmentation = {
  239. separator: string
  240. max_tokens: number
  241. chunk_overlap?: number
  242. }
  243. export const DocumentIndexingStatusList = [
  244. 'waiting',
  245. 'parsing',
  246. 'cleaning',
  247. 'splitting',
  248. 'indexing',
  249. 'paused',
  250. 'error',
  251. 'completed',
  252. ] as const
  253. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  254. export const DisplayStatusList = [
  255. 'queuing',
  256. 'indexing',
  257. 'paused',
  258. 'error',
  259. 'available',
  260. 'enabled',
  261. 'disabled',
  262. 'archived',
  263. ] as const
  264. export type DocumentDisplayStatus = typeof DisplayStatusList[number]
  265. export type DataSourceInfo = {
  266. upload_file: {
  267. id: string
  268. name: string
  269. size: number
  270. mime_type: string
  271. created_at: number
  272. created_by: string
  273. extension: string
  274. }
  275. notion_page_icon?: string
  276. notion_workspace_id?: string
  277. notion_page_id?: string
  278. provider?: DataSourceProvider
  279. job_id: string
  280. url: string
  281. }
  282. export type InitialDocumentDetail = {
  283. id: string
  284. batch: string
  285. position: number
  286. dataset_id: string
  287. data_source_type: DataSourceType | DatasourceType
  288. data_source_info: DataSourceInfo
  289. dataset_process_rule_id: string
  290. name: string
  291. created_from: 'api' | 'web'
  292. created_by: string
  293. created_at: number
  294. indexing_status: DocumentIndexingStatus
  295. display_status: DocumentDisplayStatus
  296. completed_segments?: number
  297. total_segments?: number
  298. doc_form: ChunkingMode
  299. doc_language: string
  300. }
  301. export type SimpleDocumentDetail = InitialDocumentDetail & {
  302. enabled: boolean
  303. word_count: number
  304. is_qa: boolean // TODO waiting for backend to add this field
  305. error?: string | null
  306. archived: boolean
  307. updated_at: number
  308. hit_count: number
  309. dataset_process_rule_id?: string
  310. data_source_detail_dict?: {
  311. upload_file: {
  312. name: string
  313. extension: string
  314. }
  315. }
  316. doc_metadata?: MetadataItemWithValue[]
  317. }
  318. export type DocumentListResponse = {
  319. data: SimpleDocumentDetail[]
  320. has_more: boolean
  321. total: number
  322. page: number
  323. limit: number
  324. }
  325. export type DocumentReq = {
  326. original_document_id?: string
  327. indexing_technique?: IndexingType
  328. doc_form: ChunkingMode
  329. doc_language: string
  330. process_rule: ProcessRule
  331. }
  332. export type CreateDocumentReq = DocumentReq & {
  333. data_source: DataSource
  334. retrieval_model: RetrievalConfig
  335. embedding_model: string
  336. embedding_model_provider: string
  337. }
  338. export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
  339. dataset_id: string
  340. }
  341. export type DataSource = {
  342. type: DataSourceType
  343. info_list: {
  344. data_source_type: DataSourceType
  345. notion_info_list?: NotionInfo[]
  346. file_info_list?: {
  347. file_ids: string[]
  348. }
  349. website_info_list?: {
  350. provider: string
  351. job_id: string
  352. urls: string[]
  353. }
  354. }
  355. }
  356. export type NotionInfo = {
  357. workspace_id: string
  358. pages: DataSourceNotionPage[]
  359. }
  360. export type NotionPage = {
  361. page_id: string
  362. type: string
  363. }
  364. export type ProcessRule = {
  365. mode: ProcessMode
  366. rules: Rules
  367. }
  368. export type createDocumentResponse = {
  369. dataset?: DataSet
  370. batch: string
  371. documents: InitialDocumentDetail[]
  372. }
  373. export type PrecessRule = {
  374. mode: ProcessMode
  375. rules: Rules
  376. }
  377. export type FullDocumentDetail = SimpleDocumentDetail & {
  378. batch: string
  379. created_api_request_id: string
  380. processing_started_at: number
  381. parsing_completed_at: number
  382. cleaning_completed_at: number
  383. splitting_completed_at: number
  384. tokens: number
  385. indexing_latency: number
  386. completed_at: number
  387. paused_by: string
  388. paused_at: number
  389. stopped_at: number
  390. indexing_status: string
  391. disabled_at: number
  392. disabled_by: string
  393. archived_reason: 'rule_modified' | 're_upload'
  394. archived_by: string
  395. archived_at: number
  396. doc_type?: DocType | null | 'others'
  397. doc_metadata?: DocMetadata | null
  398. segment_count: number
  399. dataset_process_rule: PrecessRule
  400. document_process_rule: ProcessRule
  401. [key: string]: any
  402. }
  403. export type DocMetadata = {
  404. title: string
  405. language: string
  406. author: string
  407. publisher: string
  408. publicationDate: string
  409. ISBN: string
  410. category: string
  411. [key: string]: string
  412. }
  413. export const CUSTOMIZABLE_DOC_TYPES = [
  414. 'book',
  415. 'web_page',
  416. 'paper',
  417. 'social_media_post',
  418. 'personal_document',
  419. 'business_document',
  420. 'im_chat_log',
  421. ] as const
  422. export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
  423. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
  424. export type FixedDocType = typeof FIXED_DOC_TYPES[number]
  425. export type DocType = CustomizableDocType | FixedDocType
  426. export type DocumentDetailResponse = FullDocumentDetail
  427. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  428. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  429. export type SegmentsQuery = {
  430. page?: string
  431. limit: number
  432. // status?: SegmentStatus
  433. hit_count_gte?: number
  434. keyword?: string
  435. enabled?: boolean | 'all'
  436. }
  437. export type SegmentDetailModel = {
  438. id: string
  439. position: number
  440. document_id: string
  441. content: string
  442. sign_content: string
  443. word_count: number
  444. tokens: number
  445. keywords: string[]
  446. index_node_id: string
  447. index_node_hash: string
  448. hit_count: number
  449. enabled: boolean
  450. disabled_at: number
  451. disabled_by: string
  452. status: SegmentStatus
  453. created_by: string
  454. created_at: number
  455. indexing_at: number
  456. completed_at: number
  457. error: string | null
  458. stopped_at: number
  459. answer?: string
  460. child_chunks?: ChildChunkDetail[]
  461. updated_at: number
  462. }
  463. export type SegmentsResponse = {
  464. data: SegmentDetailModel[]
  465. has_more: boolean
  466. limit: number
  467. total: number
  468. total_pages: number
  469. page: number
  470. }
  471. export type HitTestingRecord = {
  472. id: string
  473. content: string
  474. source: 'app' | 'hit_testing' | 'plugin'
  475. source_app_id: string
  476. created_by_role: 'account' | 'end_user'
  477. created_by: string
  478. created_at: number
  479. }
  480. export type HitTestingChildChunk = {
  481. id: string
  482. content: string
  483. position: number
  484. score: number
  485. }
  486. export type HitTesting = {
  487. segment: Segment
  488. content: Segment
  489. score: number
  490. tsne_position: TsnePosition
  491. child_chunks?: HitTestingChildChunk[] | null
  492. }
  493. export type ExternalKnowledgeBaseHitTesting = {
  494. content: string
  495. title: string
  496. score: number
  497. metadata: {
  498. 'x-amz-bedrock-kb-source-uri': string
  499. 'x-amz-bedrock-kb-data-source-id': string
  500. }
  501. }
  502. export type Segment = {
  503. id: string
  504. document: Document
  505. content: string
  506. sign_content: string
  507. position: number
  508. word_count: number
  509. tokens: number
  510. keywords: string[]
  511. hit_count: number
  512. index_node_hash: string
  513. }
  514. export type Document = {
  515. id: string
  516. data_source_type: string
  517. name: string
  518. doc_type: DocType
  519. }
  520. export type HitTestingRecordsResponse = {
  521. data: HitTestingRecord[]
  522. has_more: boolean
  523. limit: number
  524. total: number
  525. page: number
  526. }
  527. export type TsnePosition = {
  528. x: number
  529. y: number
  530. }
  531. export type HitTestingResponse = {
  532. query: {
  533. content: string
  534. tsne_position: TsnePosition
  535. }
  536. records: Array<HitTesting>
  537. }
  538. export type ExternalKnowledgeBaseHitTestingResponse = {
  539. query: {
  540. content: string
  541. }
  542. records: Array<ExternalKnowledgeBaseHitTesting>
  543. }
  544. export type RelatedApp = {
  545. id: string
  546. name: string
  547. mode: AppMode
  548. icon_type: AppIconType | null
  549. icon: string
  550. icon_background: string
  551. icon_url: string
  552. }
  553. export type RelatedAppResponse = {
  554. data: Array<RelatedApp>
  555. total: number
  556. }
  557. export type SegmentUpdater = {
  558. content: string
  559. answer?: string
  560. keywords?: string[]
  561. regenerate_child_chunks?: boolean
  562. }
  563. export type ErrorDocsResponse = {
  564. data: IndexingStatusResponse[]
  565. total: number
  566. }
  567. export type SelectedDatasetsMode = {
  568. allHighQuality: boolean
  569. allHighQualityVectorSearch: boolean
  570. allHighQualityFullTextSearch: boolean
  571. allEconomic: boolean
  572. mixtureHighQualityAndEconomic: boolean
  573. allInternal: boolean
  574. allExternal: boolean
  575. mixtureInternalAndExternal: boolean
  576. inconsistentEmbeddingModel: boolean
  577. }
  578. export enum WeightedScoreEnum {
  579. SemanticFirst = 'semantic_first',
  580. KeywordFirst = 'keyword_first',
  581. Customized = 'customized',
  582. }
  583. export enum RerankingModeEnum {
  584. RerankingModel = 'reranking_model',
  585. WeightedScore = 'weighted_score',
  586. }
  587. export const DEFAULT_WEIGHTED_SCORE = {
  588. allHighQualityVectorSearch: {
  589. semantic: 1.0,
  590. keyword: 0,
  591. },
  592. allHighQualityFullTextSearch: {
  593. semantic: 0,
  594. keyword: 1.0,
  595. },
  596. other: {
  597. semantic: 0.7,
  598. keyword: 0.3,
  599. },
  600. }
  601. export type ChildChunkType = 'automatic' | 'customized'
  602. export type ChildChunkDetail = {
  603. id: string
  604. position: number
  605. segment_id: string
  606. content: string
  607. word_count: number
  608. created_at: number
  609. updated_at: number
  610. type: ChildChunkType
  611. }
  612. export type ChildSegmentsResponse = {
  613. data: ChildChunkDetail[]
  614. total: number
  615. total_pages: number
  616. page: number
  617. limit: number
  618. }
  619. export type UpdateDocumentParams = {
  620. datasetId: string
  621. documentId: string
  622. }
  623. // Used in api url
  624. export enum DocumentActionType {
  625. enable = 'enable',
  626. disable = 'disable',
  627. archive = 'archive',
  628. unArchive = 'un_archive',
  629. delete = 'delete',
  630. }
  631. export type UpdateDocumentBatchParams = {
  632. datasetId: string
  633. documentId?: string
  634. documentIds?: string[] | string
  635. }
  636. export type BatchImportResponse = {
  637. job_id: string
  638. job_status: string
  639. }
  640. export const DOC_FORM_ICON_WITH_BG: Record<ChunkingMode | 'external', React.ComponentType<{ className: string }>> = {
  641. [ChunkingMode.text]: General,
  642. [ChunkingMode.qa]: Qa,
  643. [ChunkingMode.parentChild]: ParentChild,
  644. // [ChunkingMode.graph]: Graph, // todo: Graph RAG
  645. external: ExternalKnowledgeBase,
  646. }
  647. export const DOC_FORM_ICON: Record<ChunkingMode.text | ChunkingMode.qa | ChunkingMode.parentChild, React.ComponentType<{ className: string }>> = {
  648. [ChunkingMode.text]: GeneralChunk,
  649. [ChunkingMode.qa]: QuestionAndAnswer,
  650. [ChunkingMode.parentChild]: ParentChildChunk,
  651. }
  652. export const DOC_FORM_TEXT: Record<ChunkingMode, string> = {
  653. [ChunkingMode.text]: 'general',
  654. [ChunkingMode.qa]: 'qa',
  655. [ChunkingMode.parentChild]: 'parentChild',
  656. // [ChunkingMode.graph]: 'graph', // todo: Graph RAG
  657. }
  658. export type CreateDatasetReq = {
  659. name: string
  660. description: string
  661. icon_info: IconInfo
  662. doc_form?: ChunkingMode
  663. permission: DatasetPermission
  664. partial_member_list?: {
  665. user_id: string
  666. role?: 'owner' | 'admin' | 'editor' | 'normal' | 'dataset_operator'
  667. }[]
  668. yaml_content?: string
  669. }
  670. export type CreateDatasetResponse = {
  671. id: string
  672. name: string
  673. description: string
  674. permission: DatasetPermission
  675. indexing_technique: IndexingType
  676. created_by: string
  677. created_at: number
  678. updated_by: string
  679. updated_at: number
  680. pipeline_id: string
  681. }
  682. export type NotionPagePreviewRequest = {
  683. workspaceID: string
  684. pageID: string
  685. pageType: string
  686. }
  687. export type NotionPagePreviewResponse = {
  688. content: string
  689. }
  690. export type IndexingStatusBatchRequest = {
  691. datasetId: string
  692. batchId: string
  693. }