### What problem does this PR solve? list_document supports range filtering. ### Type of change - [x] New Feature (non-breaking change which adds functionality)tags/v0.20.1
| @@ -206,6 +206,8 @@ def list_docs(): | |||
| desc = False | |||
| else: | |||
| desc = True | |||
| create_time_from = int(request.args.get("create_time_from", 0)) | |||
| create_time_to = int(request.args.get("create_time_to", 0)) | |||
| req = request.get_json() | |||
| @@ -226,6 +228,14 @@ def list_docs(): | |||
| try: | |||
| docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix) | |||
| if create_time_from or create_time_to: | |||
| filtered_docs = [] | |||
| for doc in docs: | |||
| doc_create_time = doc.get("create_time", 0) | |||
| if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to): | |||
| filtered_docs.append(doc) | |||
| docs = filtered_docs | |||
| for doc_item in docs: | |||
| if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX): | |||
| doc_item["thumbnail"] = f"/v1/document/image/{kb_id}-{doc_item['thumbnail']}" | |||
| @@ -38,7 +38,7 @@ from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_ | |||
| from rag.app.qa import beAdoc, rmPrefix | |||
| from rag.app.tag import label_question | |||
| from rag.nlp import rag_tokenizer, search | |||
| from rag.prompts import keyword_extraction, cross_languages | |||
| from rag.prompts import cross_languages, keyword_extraction | |||
| from rag.utils import rmSpace | |||
| from rag.utils.storage_factory import STORAGE_IMPL | |||
| @@ -456,6 +456,18 @@ def list_docs(dataset_id, tenant_id): | |||
| required: false | |||
| default: true | |||
| description: Order in descending. | |||
| - in: query | |||
| name: create_time_from | |||
| type: integer | |||
| required: false | |||
| default: 0 | |||
| description: Unix timestamp for filtering documents created after this time. 0 means no filter. | |||
| - in: query | |||
| name: create_time_to | |||
| type: integer | |||
| required: false | |||
| default: 0 | |||
| description: Unix timestamp for filtering documents created before this time. 0 means no filter. | |||
| - in: header | |||
| name: Authorization | |||
| type: string | |||
| @@ -517,6 +529,17 @@ def list_docs(dataset_id, tenant_id): | |||
| desc = True | |||
| docs, tol = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, id, name) | |||
| create_time_from = int(request.args.get("create_time_from", 0)) | |||
| create_time_to = int(request.args.get("create_time_to", 0)) | |||
| if create_time_from or create_time_to: | |||
| filtered_docs = [] | |||
| for doc in docs: | |||
| doc_create_time = doc.get("create_time", 0) | |||
| if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to): | |||
| filtered_docs.append(doc) | |||
| docs = filtered_docs | |||
| # rename key's name | |||
| renamed_doc_list = [] | |||
| key_mapping = { | |||
| @@ -1118,14 +1118,14 @@ Failure: | |||
| ### List documents | |||
| **GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}` | |||
| **GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}` | |||
| Lists documents in a specified dataset. | |||
| #### Request | |||
| - Method: GET | |||
| - URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}` | |||
| - URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}` | |||
| - Headers: | |||
| - `'content-Type: application/json'` | |||
| - `'Authorization: Bearer <YOUR_API_KEY>'` | |||
| @@ -1134,7 +1134,7 @@ Lists documents in a specified dataset. | |||
| ```bash | |||
| curl --request GET \ | |||
| --url http://{address}/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name} \ | |||
| --url http://{address}/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp} \ | |||
| --header 'Authorization: Bearer <YOUR_API_KEY>' | |||
| ``` | |||
| @@ -1156,6 +1156,10 @@ curl --request GET \ | |||
| Indicates whether the retrieved documents should be sorted in descending order. Defaults to `true`. | |||
| - `id`: (*Filter parameter*), `string` | |||
| The ID of the document to retrieve. | |||
| - `create_time_from`: (*Filter parameter*), `integer` | |||
| Unix timestamp for filtering documents created after this time. 0 means no filter. Defaults to `0`. | |||
| - `create_time_to`: (*Filter parameter*), `integer` | |||
| Unix timestamp for filtering documents created before this time. 0 means no filter. Defaults to `0`. | |||
| #### Response | |||
| @@ -507,7 +507,16 @@ print(doc) | |||
| ### List documents | |||
| ```python | |||
| Dataset.list_documents(id:str =None, keywords: str=None, page: int=1, page_size:int = 30, order_by:str = "create_time", desc: bool = True) -> list[Document] | |||
| Dataset.list_documents( | |||
| id: str = None, | |||
| keywords: str = None, | |||
| page: int = 1, | |||
| page_size: int = 30, | |||
| order_by: str = "create_time", | |||
| desc: bool = True, | |||
| create_time_from: int = 0, | |||
| create_time_to: int = 0 | |||
| ) -> list[Document] | |||
| ``` | |||
| Lists documents in the current dataset. | |||
| @@ -541,6 +550,12 @@ The field by which documents should be sorted. Available options: | |||
| Indicates whether the retrieved documents should be sorted in descending order. Defaults to `True`. | |||
| ##### create_time_from: `int` | |||
| Unix timestamp for filtering documents created after this time. 0 means no filter. Defaults to 0. | |||
| ##### create_time_to: `int` | |||
| Unix timestamp for filtering documents created before this time. 0 means no filter. Defaults to 0. | |||
| #### Returns | |||
| - Success: A list of `Document` objects. | |||
| @@ -63,8 +63,30 @@ class DataSet(Base): | |||
| return doc_list | |||
| raise Exception(res.get("message")) | |||
| def list_documents(self, id: str | None = None, name: str | None = None, keywords: str | None = None, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True): | |||
| res = self.get(f"/datasets/{self.id}/documents", params={"id": id, "name": name, "keywords": keywords, "page": page, "page_size": page_size, "orderby": orderby, "desc": desc}) | |||
| def list_documents( | |||
| self, | |||
| id: str | None = None, | |||
| name: str | None = None, | |||
| keywords: str | None = None, | |||
| page: int = 1, | |||
| page_size: int = 30, | |||
| orderby: str = "create_time", | |||
| desc: bool = True, | |||
| create_time_from: int = 0, | |||
| create_time_to: int = 0, | |||
| ): | |||
| params = { | |||
| "id": id, | |||
| "name": name, | |||
| "keywords": keywords, | |||
| "page": page, | |||
| "page_size": page_size, | |||
| "orderby": orderby, | |||
| "desc": desc, | |||
| "create_time_from": create_time_from, | |||
| "create_time_to": create_time_to, | |||
| } | |||
| res = self.get(f"/datasets/{self.id}/documents", params=params) | |||
| res = res.json() | |||
| documents = [] | |||
| if res.get("code") == 0: | |||