### What problem does this PR solve? #717 ### Type of change - [x] New Feature (non-breaking change which adds functionality)tags/v0.6.0
| from api.utils.file_utils import filename_type, thumbnail | from api.utils.file_utils import filename_type, thumbnail | ||||
| from rag.utils.minio_conn import MINIO | from rag.utils.minio_conn import MINIO | ||||
| from rag.utils.es_conn import ELASTICSEARCH | |||||
| from rag.nlp import search | |||||
| from elasticsearch_dsl import Q | |||||
| def generate_confirmation_token(tenent_id): | def generate_confirmation_token(tenent_id): | ||||
| serializer = URLSafeTimedSerializer(tenent_id) | serializer = URLSafeTimedSerializer(tenent_id) | ||||
| return server_error_response(e) | return server_error_response(e) | ||||
| return get_json_result(data=doc_result.to_json()) | return get_json_result(data=doc_result.to_json()) | ||||
| @manager.route('/list_chunks', methods=['POST']) | |||||
| # @login_required | |||||
| def list_chunks(): | |||||
| token = request.headers.get('Authorization').split()[1] | |||||
| objs = APIToken.query(token=token) | |||||
| if not objs: | |||||
| return get_json_result( | |||||
| data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) | |||||
| form_data = request.form | |||||
| try: | |||||
| if "doc_name" in form_data.keys(): | |||||
| tenant_id = DocumentService.get_tenant_id_by_name(form_data['doc_name']) | |||||
| q = Q("match", docnm_kwd=form_data['doc_name']) | |||||
| elif "doc_id" in form_data.keys(): | |||||
| tenant_id = DocumentService.get_tenant_id(form_data['doc_id']) | |||||
| q = Q("match", doc_id=form_data['doc_id']) | |||||
| else: | |||||
| return get_json_result( | |||||
| data=False,retmsg="Can't find doc_name or doc_id" | |||||
| ) | |||||
| res_es_search = ELASTICSEARCH.search(q,idxnm=search.index_name(tenant_id),timeout="600s") | |||||
| res = [{} for _ in range(len(res_es_search['hits']['hits']))] | |||||
| for index , chunk in enumerate(res_es_search['hits']['hits']): | |||||
| res[index]['doc_name'] = chunk['_source']['docnm_kwd'] | |||||
| res[index]['content'] = chunk['_source']['content_with_weight'] | |||||
| if 'img_id' in chunk['_source'].keys(): | |||||
| res[index]['img_id'] = chunk['_source']['img_id'] | |||||
| except Exception as e: | |||||
| return server_error_response(e) | |||||
| return get_json_result(data=res) |
| return | return | ||||
| return docs[0]["tenant_id"] | return docs[0]["tenant_id"] | ||||
| @classmethod | |||||
| @DB.connection_context() | |||||
| def get_tenant_id_by_name(cls, name): | |||||
| docs = cls.model.select( | |||||
| Knowledgebase.tenant_id).join( | |||||
| Knowledgebase, on=( | |||||
| Knowledgebase.id == cls.model.kb_id)).where( | |||||
| cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value) | |||||
| docs = docs.dicts() | |||||
| if not docs: | |||||
| return | |||||
| return docs[0]["tenant_id"] | |||||
| @classmethod | @classmethod | ||||
| @DB.connection_context() | @DB.connection_context() | ||||
| def get_thumbnails(cls, docids): | def get_thumbnails(cls, docids): |
| } | } | ||||
| ``` | ``` | ||||
| ## Get document chunks | |||||
| Get the chunks of the document based on doc_name or doc_id. | |||||
| ### Path: /api/list_chunks/ | |||||
| ### Method: POST | |||||
| ### Parameter: | |||||
| | Name | Type | Optional | Description | | |||||
| |----------|--------|----------|---------------------------------| | |||||
| | `doc_name` | string | Yes | The name of the document in the knowledge base. It must not be empty if `doc_id` is not set.| | |||||
| | `doc_id` | string | Yes | The ID of the document in the knowledge base. It must not be empty if `doc_name` is not set.| | |||||
| ### Response | |||||
| ```json | |||||
| { | |||||
| "data": [ | |||||
| { | |||||
| "content": "Figure 14: Per-request neural-net processingof RL-Cache.\n103\n(sn)\nCPU\n 102\nGPU\n8101\n100\n8\n16 64 256 1K\n4K", | |||||
| "doc_name": "RL-Cache.pdf", | |||||
| "img_id": "0335167613f011ef91240242ac120006-b46c3524952f82dbe061ce9b123f2211" | |||||
| }, | |||||
| { | |||||
| "content": "4.3 ProcessingOverheadof RL-CacheACKNOWLEDGMENTSThis section evaluates how eectively our RL-Cache implemen-tation leverages modern multi-core CPUs and GPUs to keep the per-request neural-net processing overhead low. Figure 14 depictsThis researchwas supported inpart by the Regional Government of Madrid (grant P2018/TCS-4499, EdgeData-CM)andU.S. National Science Foundation (grants CNS-1763617 andCNS-1717179).REFERENCES", | |||||
| "doc_name": "RL-Cache.pdf", | |||||
| "img_id": "0335167613f011ef91240242ac120006-d4c12c43938eb55d2d8278eea0d7e6d7" | |||||
| } | |||||
| ], | |||||
| "retcode": 0, | |||||
| "retmsg": "success" | |||||
| } | |||||
| ``` |