You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.py 2.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. #
  2. # Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import os
  17. from pathlib import Path
  18. import requests
  19. from requests_toolbelt import MultipartEncoder
  20. HEADERS = {"Content-Type": "application/json"}
  21. HOST_ADDRESS = os.getenv("HOST_ADDRESS", "http://127.0.0.1:9380")
  22. DATASETS_API_URL = "/api/v1/datasets"
  23. FILE_API_URL = "/api/v1/datasets/{dataset_id}/documents"
  24. INVALID_API_TOKEN = "invalid_key_123"
  25. DATASET_NAME_LIMIT = 128
  26. DOCUMENT_NAME_LIMIT = 128
  27. # DATASET MANAGEMENT
  28. def create_dataset(auth, payload):
  29. res = requests.post(
  30. url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
  31. headers=HEADERS,
  32. auth=auth,
  33. json=payload,
  34. )
  35. return res.json()
  36. def list_dataset(auth, params=None):
  37. res = requests.get(
  38. url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
  39. headers=HEADERS,
  40. auth=auth,
  41. params=params,
  42. )
  43. return res.json()
  44. def update_dataset(auth, dataset_id, payload):
  45. res = requests.put(
  46. url=f"{HOST_ADDRESS}{DATASETS_API_URL}/{dataset_id}",
  47. headers=HEADERS,
  48. auth=auth,
  49. json=payload,
  50. )
  51. return res.json()
  52. def delete_dataset(auth, payload=None):
  53. res = requests.delete(
  54. url=f"{HOST_ADDRESS}{DATASETS_API_URL}",
  55. headers=HEADERS,
  56. auth=auth,
  57. json=payload,
  58. )
  59. return res.json()
  60. def create_datasets(auth, num):
  61. ids = []
  62. for i in range(num):
  63. res = create_dataset(auth, {"name": f"dataset_{i}"})
  64. ids.append(res["data"]["id"])
  65. return ids
  66. # FILE MANAGEMENT WITHIN DATASET
  67. def upload_documnets(auth, dataset_id, files_path=None):
  68. url = f"{HOST_ADDRESS}{FILE_API_URL}".format(dataset_id=dataset_id)
  69. if files_path is None:
  70. files_path = []
  71. fields = []
  72. for i, fp in enumerate(files_path):
  73. p = Path(fp)
  74. fields.append(("file", (p.name, p.open("rb"))))
  75. m = MultipartEncoder(fields=fields)
  76. res = requests.post(
  77. url=url,
  78. headers={"Content-Type": m.content_type},
  79. auth=auth,
  80. data=m,
  81. )
  82. return res.json()