您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. import json
  2. from .base import Base
  3. from .chunk import Chunk
  4. from typing import List
  5. class Document(Base):
  6. class ParserConfig(Base):
  7. def __init__(self, rag, res_dict):
  8. super().__init__(rag, res_dict)
  9. def __init__(self, rag, res_dict):
  10. self.id = ""
  11. self.name = ""
  12. self.thumbnail = None
  13. self.dataset_id = None
  14. self.chunk_method = "naive"
  15. self.parser_config = {"pages": [[1, 1000000]]}
  16. self.source_type = "local"
  17. self.type = ""
  18. self.created_by = ""
  19. self.size = 0
  20. self.token_count = 0
  21. self.chunk_count = 0
  22. self.progress = 0.0
  23. self.progress_msg = ""
  24. self.process_begin_at = None
  25. self.process_duration = 0.0
  26. self.run = "0"
  27. self.status = "1"
  28. for k in list(res_dict.keys()):
  29. if k not in self.__dict__:
  30. res_dict.pop(k)
  31. super().__init__(rag, res_dict)
  32. def update(self, update_message: dict):
  33. res = self.put(f'/datasets/{self.dataset_id}/documents/{self.id}',
  34. update_message)
  35. res = res.json()
  36. if res.get("code") != 0:
  37. raise Exception(res["message"])
  38. def download(self):
  39. res = self.get(f"/datasets/{self.dataset_id}/documents/{self.id}")
  40. try:
  41. res = res.json()
  42. raise Exception(res.get("message"))
  43. except json.JSONDecodeError:
  44. return res.content
  45. def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
  46. data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
  47. res = self.get(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', data)
  48. res = res.json()
  49. if res.get("code") == 0:
  50. chunks=[]
  51. for data in res["data"].get("chunks"):
  52. chunk = Chunk(self.rag,data)
  53. chunks.append(chunk)
  54. return chunks
  55. raise Exception(res.get("message"))
  56. def add_chunk(self, content: str,important_keywords:List[str]=[]):
  57. res = self.post(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', {"content":content,"important_keywords":important_keywords})
  58. res = res.json()
  59. if res.get("code") == 0:
  60. return Chunk(self.rag,res["data"].get("chunk"))
  61. raise Exception(res.get("message"))
  62. def delete_chunks(self,ids:List[str] = None):
  63. res = self.rm(f"datasets/{self.dataset_id}/documents/{self.id}/chunks",{"ids":ids})
  64. res = res.json()
  65. if res.get("code")!=0:
  66. raise Exception(res.get("message"))