### What problem does this PR solve? Fix some issues in API and test ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>tags/v0.13.0
| @token_required | @token_required | ||||
| def create(tenant_id): | def create(tenant_id): | ||||
| req=request.json | req=request.json | ||||
| ids= req.get("datasets") | |||||
| ids= req.get("dataset_ids") | |||||
| if not ids: | if not ids: | ||||
| return get_error_data_result(retmsg="`datasets` is required") | |||||
| return get_error_data_result(retmsg="`dataset_ids` is required") | |||||
| for kb_id in ids: | for kb_id in ids: | ||||
| kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id) | kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id) | ||||
| if not kbs: | if not kbs: | ||||
| res["llm"] = res.pop("llm_setting") | res["llm"] = res.pop("llm_setting") | ||||
| res["llm"]["model_name"] = res.pop("llm_id") | res["llm"]["model_name"] = res.pop("llm_id") | ||||
| del res["kb_ids"] | del res["kb_ids"] | ||||
| res["datasets"] = req["datasets"] | |||||
| res["dataset_ids"] = req["dataset_ids"] | |||||
| res["avatar"] = res.pop("icon") | res["avatar"] = res.pop("icon") | ||||
| return get_result(data=res) | return get_result(data=res) | ||||
| if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value): | if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value): | ||||
| return get_error_data_result(retmsg='You do not own the chat') | return get_error_data_result(retmsg='You do not own the chat') | ||||
| req =request.json | req =request.json | ||||
| ids = req.get("datasets") | |||||
| if "datasets" in req: | |||||
| ids = req.get("dataset_ids") | |||||
| if "dataset_ids" in req: | |||||
| if not ids: | if not ids: | ||||
| return get_error_data_result("`datasets` can't be empty") | return get_error_data_result("`datasets` can't be empty") | ||||
| if ids: | if ids: | ||||
| # avatar | # avatar | ||||
| if "avatar" in req: | if "avatar" in req: | ||||
| req["icon"] = req.pop("avatar") | req["icon"] = req.pop("avatar") | ||||
| if "datasets" in req: | |||||
| req.pop("datasets") | |||||
| if "dataset_ids" in req: | |||||
| req.pop("dataset_ids") | |||||
| if not DialogService.update_by_id(chat_id, req): | if not DialogService.update_by_id(chat_id, req): | ||||
| return get_error_data_result(retmsg="Chat not found!") | return get_error_data_result(retmsg="Chat not found!") | ||||
| return get_result() | return get_result() |
| @token_required | @token_required | ||||
| def retrieval_test(tenant_id): | def retrieval_test(tenant_id): | ||||
| req = request.json | req = request.json | ||||
| if not req.get("datasets"): | |||||
| if not req.get("dataset_ids"): | |||||
| return get_error_data_result("`datasets` is required.") | return get_error_data_result("`datasets` is required.") | ||||
| kb_ids = req["datasets"] | |||||
| kb_ids = req["dataset_ids"] | |||||
| if not isinstance(kb_ids,list): | if not isinstance(kb_ids,list): | ||||
| return get_error_data_result("`datasets` should be a list") | return get_error_data_result("`datasets` should be a list") | ||||
| kbs = KnowledgebaseService.get_by_ids(kb_ids) | kbs = KnowledgebaseService.get_by_ids(kb_ids) | ||||
| for id in kb_ids: | |||||
| if not KnowledgebaseService.query(id=id,tenant_id=tenant_id): | |||||
| return get_error_data_result(f"You don't own the dataset {id}.") | |||||
| embd_nms = list(set([kb.embd_id for kb in kbs])) | embd_nms = list(set([kb.embd_id for kb in kbs])) | ||||
| if len(embd_nms) != 1: | if len(embd_nms) != 1: | ||||
| return get_result( | return get_result( | ||||
| retmsg='Knowledge bases use different embedding models or does not exist."', | |||||
| retmsg='Datasets use different embedding models."', | |||||
| retcode=RetCode.AUTHENTICATION_ERROR) | retcode=RetCode.AUTHENTICATION_ERROR) | ||||
| if isinstance(kb_ids, str): kb_ids = [kb_ids] | |||||
| for id in kb_ids: | |||||
| if not KnowledgebaseService.query(id=id,tenant_id=tenant_id): | |||||
| return get_error_data_result(f"You don't own the dataset {id}.") | |||||
| if "question" not in req: | if "question" not in req: | ||||
| return get_error_data_result("`question` is required.") | return get_error_data_result("`question` is required.") | ||||
| page = int(req.get("offset", 1)) | page = int(req.get("offset", 1)) | ||||
| size = int(req.get("limit", 1024)) | size = int(req.get("limit", 1024)) | ||||
| question = req["question"] | question = req["question"] | ||||
| doc_ids = req.get("documents", []) | |||||
| if not isinstance(req.get("documents"),list): | |||||
| doc_ids = req.get("document_ids", []) | |||||
| if not isinstance(doc_ids,list): | |||||
| return get_error_data_result("`documents` should be a list") | return get_error_data_result("`documents` should be a list") | ||||
| doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids) | doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids) | ||||
| for doc_id in doc_ids: | for doc_id in doc_ids: | ||||
| if doc_id not in doc_ids_list: | if doc_id not in doc_ids_list: | ||||
| return get_error_data_result(f"You don't own the document {doc_id}") | |||||
| return get_error_data_result(f"The datasets don't own the document {doc_id}") | |||||
| similarity_threshold = float(req.get("similarity_threshold", 0.2)) | similarity_threshold = float(req.get("similarity_threshold", 0.2)) | ||||
| vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) | vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) | ||||
| top = int(req.get("top_k", 1024)) | top = int(req.get("top_k", 1024)) |
| self.id = "" | self.id = "" | ||||
| self.name = "assistant" | self.name = "assistant" | ||||
| self.avatar = "path/to/avatar" | self.avatar = "path/to/avatar" | ||||
| self.datasets = ["kb1"] | |||||
| self.dataset_ids = ["kb1"] | |||||
| self.llm = Chat.LLM(rag, {}) | self.llm = Chat.LLM(rag, {}) | ||||
| self.prompt = Chat.Prompt(rag, {}) | self.prompt = Chat.Prompt(rag, {}) | ||||
| super().__init__(rag, res_dict) | super().__init__(rag, res_dict) |
| return DataSet(self, res["data"]) | return DataSet(self, res["data"]) | ||||
| raise Exception(res["message"]) | raise Exception(res["message"]) | ||||
| def delete_datasets(self, ids: List[str] = None, names: List[str] = None): | |||||
| res = self.delete("/dataset",{"ids": ids, "names": names}) | |||||
| def delete_datasets(self, ids: List[str]): | |||||
| res = self.delete("/dataset",{"ids": ids}) | |||||
| res=res.json() | res=res.json() | ||||
| if res.get("code") != 0: | if res.get("code") != 0: | ||||
| raise Exception(res["message"]) | raise Exception(res["message"]) | ||||
| return result_list | return result_list | ||||
| raise Exception(res["message"]) | raise Exception(res["message"]) | ||||
| def create_chat(self, name: str, avatar: str = "", datasets: List[DataSet] = [], | |||||
| def create_chat(self, name: str, avatar: str = "", dataset_ids: List[str] = [], | |||||
| llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat: | llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat: | ||||
| dataset_list = [] | dataset_list = [] | ||||
| for dataset in datasets: | |||||
| dataset_list.append(dataset.id) | |||||
| for id in dataset_ids: | |||||
| dataset_list.append(id) | |||||
| if llm is None: | if llm is None: | ||||
| llm = Chat.LLM(self, {"model_name": None, | llm = Chat.LLM(self, {"model_name": None, | ||||
| temp_dict = {"name": name, | temp_dict = {"name": name, | ||||
| "avatar": avatar, | "avatar": avatar, | ||||
| "datasets": dataset_list, | |||||
| "dataset_ids": dataset_list, | |||||
| "llm": llm.to_json(), | "llm": llm.to_json(), | ||||
| "prompt": prompt.to_json()} | "prompt": prompt.to_json()} | ||||
| res = self.post("/chat", temp_dict) | res = self.post("/chat", temp_dict) | ||||
| raise Exception(res["message"]) | raise Exception(res["message"]) | ||||
| def retrieve(self, datasets,documents,question="", offset=1, limit=1024, similarity_threshold=0.2,vector_similarity_weight=0.3,top_k=1024,rerank_id:str=None,keyword:bool=False,): | |||||
| def retrieve(self, dataset_ids, document_ids=None, question="", offset=1, limit=1024, similarity_threshold=0.2, vector_similarity_weight=0.3, top_k=1024, rerank_id:str=None, keyword:bool=False, ): | |||||
| if document_ids is None: | |||||
| document_ids = [] | |||||
| data_json ={ | data_json ={ | ||||
| "offset": offset, | "offset": offset, | ||||
| "limit": limit, | "limit": limit, | ||||
| "rerank_id": rerank_id, | "rerank_id": rerank_id, | ||||
| "keyword": keyword, | "keyword": keyword, | ||||
| "question": question, | "question": question, | ||||
| "datasets": datasets, | |||||
| "documents": documents | |||||
| "datasets": dataset_ids, | |||||
| "documents": document_ids | |||||
| } | } | ||||
| # Send a POST request to the backend service (using requests library as an example, actual implementation may vary) | # Send a POST request to the backend service (using requests library as an example, actual implementation may vary) | ||||
| res = self.post(f'/retrieval',json=data_json) | res = self.post(f'/retrieval',json=data_json) | ||||
| res = res.json() | res = res.json() |
| from ragflow import RAGFlow, Chat | from ragflow import RAGFlow, Chat | ||||
| import time | |||||
| HOST_ADDRESS = 'http://127.0.0.1:9380' | HOST_ADDRESS = 'http://127.0.0.1:9380' | ||||
| def test_create_chat_with_name(get_api_key_fixture): | def test_create_chat_with_name(get_api_key_fixture): | ||||
| document = {"displayed_name":displayed_name,"blob":blob} | document = {"displayed_name":displayed_name,"blob":blob} | ||||
| documents = [] | documents = [] | ||||
| documents.append(document) | documents.append(document) | ||||
| doc_ids = [] | |||||
| docs= kb.upload_documents(documents) | docs= kb.upload_documents(documents) | ||||
| for doc in docs: | for doc in docs: | ||||
| doc_ids.append(doc.id) | |||||
| kb.async_parse_documents(doc_ids) | |||||
| time.sleep(60) | |||||
| rag.create_chat("test_create", datasets=[kb]) | |||||
| doc.add_chunk("This is a test to add chunk") | |||||
| rag.create_chat("test_create", dataset_ids=[kb.id]) | |||||
| def test_update_chat_with_name(get_api_key_fixture): | def test_update_chat_with_name(get_api_key_fixture): | ||||
| document = {"displayed_name": displayed_name, "blob": blob} | document = {"displayed_name": displayed_name, "blob": blob} | ||||
| documents = [] | documents = [] | ||||
| documents.append(document) | documents.append(document) | ||||
| doc_ids = [] | |||||
| docs = kb.upload_documents(documents) | docs = kb.upload_documents(documents) | ||||
| for doc in docs: | for doc in docs: | ||||
| doc_ids.append(doc.id) | |||||
| kb.async_parse_documents(doc_ids) | |||||
| time.sleep(60) | |||||
| chat = rag.create_chat("test_update", datasets=[kb]) | |||||
| doc.add_chunk("This is a test to add chunk") | |||||
| chat = rag.create_chat("test_update", dataset_ids=[kb.id]) | |||||
| chat.update({"name": "new_chat"}) | chat.update({"name": "new_chat"}) | ||||
| document = {"displayed_name": displayed_name, "blob": blob} | document = {"displayed_name": displayed_name, "blob": blob} | ||||
| documents = [] | documents = [] | ||||
| documents.append(document) | documents.append(document) | ||||
| doc_ids = [] | |||||
| docs = kb.upload_documents(documents) | docs = kb.upload_documents(documents) | ||||
| for doc in docs: | for doc in docs: | ||||
| doc_ids.append(doc.id) | |||||
| kb.async_parse_documents(doc_ids) | |||||
| time.sleep(60) | |||||
| chat = rag.create_chat("test_delete", datasets=[kb]) | |||||
| doc.add_chunk("This is a test to add chunk") | |||||
| chat = rag.create_chat("test_delete", dataset_ids=[kb.id]) | |||||
| rag.delete_chats(ids=[chat.id]) | rag.delete_chats(ids=[chat.id]) | ||||
| def test_list_chats_with_success(get_api_key_fixture): | |||||
| API_KEY = get_api_key_fixture | API_KEY = get_api_key_fixture | ||||
| rag = RAGFlow(API_KEY, HOST_ADDRESS) | rag = RAGFlow(API_KEY, HOST_ADDRESS) | ||||
| kb = rag.create_dataset(name="test_delete_chat") | |||||
| displayed_name = "ragflow.txt" | |||||
| with open("./ragflow.txt", "rb") as file: | |||||
| blob = file.read() | |||||
| document = {"displayed_name": displayed_name, "blob": blob} | |||||
| documents = [] | |||||
| documents.append(document) | |||||
| docs = kb.upload_documents(documents) | |||||
| for doc in docs: | |||||
| doc.add_chunk("This is a test to add chunk") | |||||
| rag.create_chat("test_list_1", dataset_ids=[kb.id]) | |||||
| rag.create_chat("test_list_2", dataset_ids=[kb.id]) | |||||
| rag.list_chats() | rag.list_chats() | ||||
| displayed_name = "ragflow.txt" | displayed_name = "ragflow.txt" | ||||
| with open("./ragflow.txt", "rb") as file: | with open("./ragflow.txt", "rb") as file: | ||||
| blob = file.read() | blob = file.read() | ||||
| document = {"displayed_name": displayed_name, "blob": blob} | |||||
| document = {"displayed_name":displayed_name,"blob":blob} | |||||
| documents = [] | documents = [] | ||||
| documents.append(document) | documents.append(document) | ||||
| doc_ids = [] | |||||
| docs = kb.upload_documents(documents) | |||||
| docs= kb.upload_documents(documents) | |||||
| for doc in docs: | for doc in docs: | ||||
| doc_ids.append(doc.id) | |||||
| kb.async_parse_documents(doc_ids) | |||||
| time.sleep(60) | |||||
| assistant = rag.create_chat(name="test_create_session", datasets=[kb]) | |||||
| doc.add_chunk("This is a test to add chunk") | |||||
| assistant=rag.create_chat("test_create", dataset_ids=[kb.id]) | |||||
| assistant.create_session() | assistant.create_session() | ||||
| displayed_name = "ragflow.txt" | displayed_name = "ragflow.txt" | ||||
| with open("./ragflow.txt","rb") as file: | with open("./ragflow.txt","rb") as file: | ||||
| blob = file.read() | blob = file.read() | ||||
| document = {"displayed_name":displayed_name,"blob":blob} | |||||
| document = {"displayed_name": displayed_name, "blob": blob} | |||||
| documents = [] | documents = [] | ||||
| documents.append(document) | documents.append(document) | ||||
| doc_ids = [] | |||||
| docs= kb.upload_documents(documents) | |||||
| docs = kb.upload_documents(documents) | |||||
| for doc in docs: | for doc in docs: | ||||
| doc_ids.append(doc.id) | |||||
| kb.async_parse_documents(doc_ids) | |||||
| time.sleep(60) | |||||
| assistant = rag.create_chat(name="test_create_conversation", datasets=[kb]) | |||||
| doc.add_chunk("This is a test to add chunk") | |||||
| assistant = rag.create_chat("test_create", dataset_ids=[kb.id]) | |||||
| session = assistant.create_session() | session = assistant.create_session() | ||||
| question = "What is AI" | question = "What is AI" | ||||
| for ans in session.ask(question, stream=True): | for ans in session.ask(question, stream=True): | ||||
| document = {"displayed_name":displayed_name,"blob":blob} | document = {"displayed_name":displayed_name,"blob":blob} | ||||
| documents = [] | documents = [] | ||||
| documents.append(document) | documents.append(document) | ||||
| doc_ids = [] | |||||
| docs= kb.upload_documents(documents) | docs= kb.upload_documents(documents) | ||||
| for doc in docs: | for doc in docs: | ||||
| doc_ids.append(doc.id) | |||||
| kb.async_parse_documents(doc_ids) | |||||
| time.sleep(60) | |||||
| assistant = rag.create_chat(name="test_delete_session", datasets=[kb]) | |||||
| doc.add_chunk("This is a test to add chunk") | |||||
| assistant=rag.create_chat("test_create", dataset_ids=[kb.id]) | |||||
| session = assistant.create_session() | session = assistant.create_session() | ||||
| assistant.delete_sessions(ids=[session.id]) | assistant.delete_sessions(ids=[session.id]) | ||||
| displayed_name = "ragflow.txt" | displayed_name = "ragflow.txt" | ||||
| with open("./ragflow.txt","rb") as file: | with open("./ragflow.txt","rb") as file: | ||||
| blob = file.read() | blob = file.read() | ||||
| document = {"displayed_name":displayed_name,"blob":blob} | |||||
| document = {"displayed_name": displayed_name, "blob": blob} | |||||
| documents = [] | documents = [] | ||||
| documents.append(document) | documents.append(document) | ||||
| doc_ids = [] | |||||
| docs= kb.upload_documents(documents) | |||||
| docs = kb.upload_documents(documents) | |||||
| for doc in docs: | for doc in docs: | ||||
| doc_ids.append(doc.id) | |||||
| kb.async_parse_documents(doc_ids) | |||||
| time.sleep(60) | |||||
| assistant = rag.create_chat(name="test_update_session", datasets=[kb]) | |||||
| doc.add_chunk("This is a test to add chunk") | |||||
| assistant = rag.create_chat("test_create", dataset_ids=[kb.id]) | |||||
| session = assistant.create_session(name="old session") | session = assistant.create_session(name="old session") | ||||
| session.update({"name": "new session"}) | session.update({"name": "new session"}) | ||||
| document = {"displayed_name":displayed_name,"blob":blob} | document = {"displayed_name":displayed_name,"blob":blob} | ||||
| documents = [] | documents = [] | ||||
| documents.append(document) | documents.append(document) | ||||
| doc_ids = [] | |||||
| docs= kb.upload_documents(documents) | docs= kb.upload_documents(documents) | ||||
| for doc in docs: | for doc in docs: | ||||
| doc_ids.append(doc.id) | |||||
| kb.async_parse_documents(doc_ids) | |||||
| time.sleep(60) | |||||
| assistant = rag.create_chat(name="test_list_session", datasets=[kb]) | |||||
| doc.add_chunk("This is a test to add chunk") | |||||
| assistant=rag.create_chat("test_create", dataset_ids=[kb.id]) | |||||
| assistant.create_session("test_1") | assistant.create_session("test_1") | ||||
| assistant.create_session("test_2") | assistant.create_session("test_2") | ||||
| assistant.list_sessions() | assistant.list_sessions() |