### What problem does this PR solve? list_document supports range filtering. ### Type of change - [x] New Feature (non-breaking change which adds functionality)tags/v0.20.1
| desc = False | desc = False | ||||
| else: | else: | ||||
| desc = True | desc = True | ||||
| create_time_from = int(request.args.get("create_time_from", 0)) | |||||
| create_time_to = int(request.args.get("create_time_to", 0)) | |||||
| req = request.get_json() | req = request.get_json() | ||||
| try: | try: | ||||
| docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix) | docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix) | ||||
| if create_time_from or create_time_to: | |||||
| filtered_docs = [] | |||||
| for doc in docs: | |||||
| doc_create_time = doc.get("create_time", 0) | |||||
| if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to): | |||||
| filtered_docs.append(doc) | |||||
| docs = filtered_docs | |||||
| for doc_item in docs: | for doc_item in docs: | ||||
| if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX): | if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX): | ||||
| doc_item["thumbnail"] = f"/v1/document/image/{kb_id}-{doc_item['thumbnail']}" | doc_item["thumbnail"] = f"/v1/document/image/{kb_id}-{doc_item['thumbnail']}" | 
| from rag.app.qa import beAdoc, rmPrefix | from rag.app.qa import beAdoc, rmPrefix | ||||
| from rag.app.tag import label_question | from rag.app.tag import label_question | ||||
| from rag.nlp import rag_tokenizer, search | from rag.nlp import rag_tokenizer, search | ||||
| from rag.prompts import keyword_extraction, cross_languages | |||||
| from rag.prompts import cross_languages, keyword_extraction | |||||
| from rag.utils import rmSpace | from rag.utils import rmSpace | ||||
| from rag.utils.storage_factory import STORAGE_IMPL | from rag.utils.storage_factory import STORAGE_IMPL | ||||
| required: false | required: false | ||||
| default: true | default: true | ||||
| description: Order in descending. | description: Order in descending. | ||||
| - in: query | |||||
| name: create_time_from | |||||
| type: integer | |||||
| required: false | |||||
| default: 0 | |||||
| description: Unix timestamp for filtering documents created after this time. 0 means no filter. | |||||
| - in: query | |||||
| name: create_time_to | |||||
| type: integer | |||||
| required: false | |||||
| default: 0 | |||||
| description: Unix timestamp for filtering documents created before this time. 0 means no filter. | |||||
| - in: header | - in: header | ||||
| name: Authorization | name: Authorization | ||||
| type: string | type: string | ||||
| desc = True | desc = True | ||||
| docs, tol = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, id, name) | docs, tol = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, id, name) | ||||
| create_time_from = int(request.args.get("create_time_from", 0)) | |||||
| create_time_to = int(request.args.get("create_time_to", 0)) | |||||
| if create_time_from or create_time_to: | |||||
| filtered_docs = [] | |||||
| for doc in docs: | |||||
| doc_create_time = doc.get("create_time", 0) | |||||
| if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to): | |||||
| filtered_docs.append(doc) | |||||
| docs = filtered_docs | |||||
| # rename key's name | # rename key's name | ||||
| renamed_doc_list = [] | renamed_doc_list = [] | ||||
| key_mapping = { | key_mapping = { | 
| ### List documents | ### List documents | ||||
| **GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}` | |||||
| **GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}` | |||||
| Lists documents in a specified dataset. | Lists documents in a specified dataset. | ||||
| #### Request | #### Request | ||||
| - Method: GET | - Method: GET | ||||
| - URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}` | |||||
| - URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}` | |||||
| - Headers: | - Headers: | ||||
| - `'content-Type: application/json'` | - `'content-Type: application/json'` | ||||
| - `'Authorization: Bearer <YOUR_API_KEY>'` | - `'Authorization: Bearer <YOUR_API_KEY>'` | ||||
| ```bash | ```bash | ||||
| curl --request GET \ | curl --request GET \ | ||||
| --url http://{address}/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name} \ | |||||
| --url http://{address}/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp} \ | |||||
| --header 'Authorization: Bearer <YOUR_API_KEY>' | --header 'Authorization: Bearer <YOUR_API_KEY>' | ||||
| ``` | ``` | ||||
| Indicates whether the retrieved documents should be sorted in descending order. Defaults to `true`. | Indicates whether the retrieved documents should be sorted in descending order. Defaults to `true`. | ||||
| - `id`: (*Filter parameter*), `string` | - `id`: (*Filter parameter*), `string` | ||||
| The ID of the document to retrieve. | The ID of the document to retrieve. | ||||
| - `create_time_from`: (*Filter parameter*), `integer` | |||||
| Unix timestamp for filtering documents created after this time. 0 means no filter. Defaults to `0`. | |||||
| - `create_time_to`: (*Filter parameter*), `integer` | |||||
| Unix timestamp for filtering documents created before this time. 0 means no filter. Defaults to `0`. | |||||
| #### Response | #### Response | ||||
| ### List documents | ### List documents | ||||
| ```python | ```python | ||||
| Dataset.list_documents(id:str =None, keywords: str=None, page: int=1, page_size:int = 30, order_by:str = "create_time", desc: bool = True) -> list[Document] | |||||
| Dataset.list_documents( | |||||
| id: str = None, | |||||
| keywords: str = None, | |||||
| page: int = 1, | |||||
| page_size: int = 30, | |||||
| order_by: str = "create_time", | |||||
| desc: bool = True, | |||||
| create_time_from: int = 0, | |||||
| create_time_to: int = 0 | |||||
| ) -> list[Document] | |||||
| ``` | ``` | ||||
| Lists documents in the current dataset. | Lists documents in the current dataset. | ||||
| Indicates whether the retrieved documents should be sorted in descending order. Defaults to `True`. | Indicates whether the retrieved documents should be sorted in descending order. Defaults to `True`. | ||||
| ##### create_time_from: `int` | |||||
| Unix timestamp for filtering documents created after this time. 0 means no filter. Defaults to 0. | |||||
| ##### create_time_to: `int` | |||||
| Unix timestamp for filtering documents created before this time. 0 means no filter. Defaults to 0. | |||||
| #### Returns | #### Returns | ||||
| - Success: A list of `Document` objects. | - Success: A list of `Document` objects. | 
| return doc_list | return doc_list | ||||
| raise Exception(res.get("message")) | raise Exception(res.get("message")) | ||||
| def list_documents(self, id: str | None = None, name: str | None = None, keywords: str | None = None, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True): | |||||
| res = self.get(f"/datasets/{self.id}/documents", params={"id": id, "name": name, "keywords": keywords, "page": page, "page_size": page_size, "orderby": orderby, "desc": desc}) | |||||
| def list_documents( | |||||
| self, | |||||
| id: str | None = None, | |||||
| name: str | None = None, | |||||
| keywords: str | None = None, | |||||
| page: int = 1, | |||||
| page_size: int = 30, | |||||
| orderby: str = "create_time", | |||||
| desc: bool = True, | |||||
| create_time_from: int = 0, | |||||
| create_time_to: int = 0, | |||||
| ): | |||||
| params = { | |||||
| "id": id, | |||||
| "name": name, | |||||
| "keywords": keywords, | |||||
| "page": page, | |||||
| "page_size": page_size, | |||||
| "orderby": orderby, | |||||
| "desc": desc, | |||||
| "create_time_from": create_time_from, | |||||
| "create_time_to": create_time_to, | |||||
| } | |||||
| res = self.get(f"/datasets/{self.id}/documents", params=params) | |||||
| res = res.json() | res = res.json() | ||||
| documents = [] | documents = [] | ||||
| if res.get("code") == 0: | if res.get("code") == 0: |