You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

dataset.py 2.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. from .document import Document
  2. from .base import Base
  3. class DataSet(Base):
  4. class ParserConfig(Base):
  5. def __init__(self, rag, res_dict):
  6. super().__init__(rag, res_dict)
  7. def __init__(self, rag, res_dict):
  8. self.id = ""
  9. self.name = ""
  10. self.avatar = ""
  11. self.tenant_id = None
  12. self.description = ""
  13. self.language = "English"
  14. self.embedding_model = ""
  15. self.permission = "me"
  16. self.document_count = 0
  17. self.chunk_count = 0
  18. self.chunk_method = "naive"
  19. self.parser_config = None
  20. for k in list(res_dict.keys()):
  21. if k not in self.__dict__:
  22. res_dict.pop(k)
  23. super().__init__(rag, res_dict)
  24. def update(self, update_message: dict):
  25. res = self.put(f'/datasets/{self.id}',
  26. update_message)
  27. res = res.json()
  28. if res.get("code") != 0:
  29. raise Exception(res["message"])
  30. def upload_documents(self,document_list: list[dict]):
  31. url = f"/datasets/{self.id}/documents"
  32. files = [("file",(ele["displayed_name"],ele["blob"])) for ele in document_list]
  33. res = self.post(path=url,json=None,files=files)
  34. res = res.json()
  35. if res.get("code") == 0:
  36. doc_list=[]
  37. for doc in res["data"]:
  38. document = Document(self.rag,doc)
  39. doc_list.append(document)
  40. return doc_list
  41. raise Exception(res.get("message"))
  42. def list_documents(self, id: str | None = None, keywords: str | None = None, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True):
  43. res = self.get(f"/datasets/{self.id}/documents",params={"id": id,"keywords": keywords,"page": page,"page_size": page_size,"orderby": orderby,"desc": desc})
  44. res = res.json()
  45. documents = []
  46. if res.get("code") == 0:
  47. for document in res["data"].get("docs"):
  48. documents.append(Document(self.rag,document))
  49. return documents
  50. raise Exception(res["message"])
  51. def delete_documents(self,ids: list[str] | None = None):
  52. res = self.rm(f"/datasets/{self.id}/documents",{"ids":ids})
  53. res = res.json()
  54. if res.get("code") != 0:
  55. raise Exception(res["message"])
  56. def async_parse_documents(self,document_ids):
  57. res = self.post(f"/datasets/{self.id}/chunks",{"document_ids":document_ids})
  58. res = res.json()
  59. if res.get("code") != 0:
  60. raise Exception(res.get("message"))
  61. def async_cancel_parse_documents(self,document_ids):
  62. res = self.rm(f"/datasets/{self.id}/chunks",{"document_ids":document_ids})
  63. res = res.json()
  64. if res.get("code") != 0:
  65. raise Exception(res.get("message"))