Explorar el Código

Fix some issues in API and test (#3001)

### What problem does this PR solve?

Fix some issues in API and test

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
tags/v0.13.0
liuhua hace 1 año
padre
commit
161c7a231b
No account linked to committer's email address

+ 7
- 7
api/apps/sdk/chat.py Ver fichero

@token_required @token_required
def create(tenant_id): def create(tenant_id):
req=request.json req=request.json
ids= req.get("datasets")
ids= req.get("dataset_ids")
if not ids: if not ids:
return get_error_data_result(retmsg="`datasets` is required")
return get_error_data_result(retmsg="`dataset_ids` is required")
for kb_id in ids: for kb_id in ids:
kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id) kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id)
if not kbs: if not kbs:
res["llm"] = res.pop("llm_setting") res["llm"] = res.pop("llm_setting")
res["llm"]["model_name"] = res.pop("llm_id") res["llm"]["model_name"] = res.pop("llm_id")
del res["kb_ids"] del res["kb_ids"]
res["datasets"] = req["datasets"]
res["dataset_ids"] = req["dataset_ids"]
res["avatar"] = res.pop("icon") res["avatar"] = res.pop("icon")
return get_result(data=res) return get_result(data=res)


if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value): if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
return get_error_data_result(retmsg='You do not own the chat') return get_error_data_result(retmsg='You do not own the chat')
req =request.json req =request.json
ids = req.get("datasets")
if "datasets" in req:
ids = req.get("dataset_ids")
if "dataset_ids" in req:
if not ids: if not ids:
return get_error_data_result("`datasets` can't be empty") return get_error_data_result("`datasets` can't be empty")
if ids: if ids:
# avatar # avatar
if "avatar" in req: if "avatar" in req:
req["icon"] = req.pop("avatar") req["icon"] = req.pop("avatar")
if "datasets" in req:
req.pop("datasets")
if "dataset_ids" in req:
req.pop("dataset_ids")
if not DialogService.update_by_id(chat_id, req): if not DialogService.update_by_id(chat_id, req):
return get_error_data_result(retmsg="Chat not found!") return get_error_data_result(retmsg="Chat not found!")
return get_result() return get_result()

+ 9
- 10
api/apps/sdk/doc.py Ver fichero

@token_required @token_required
def retrieval_test(tenant_id): def retrieval_test(tenant_id):
req = request.json req = request.json
if not req.get("datasets"):
if not req.get("dataset_ids"):
return get_error_data_result("`datasets` is required.") return get_error_data_result("`datasets` is required.")
kb_ids = req["datasets"]
kb_ids = req["dataset_ids"]
if not isinstance(kb_ids,list): if not isinstance(kb_ids,list):
return get_error_data_result("`datasets` should be a list") return get_error_data_result("`datasets` should be a list")
kbs = KnowledgebaseService.get_by_ids(kb_ids) kbs = KnowledgebaseService.get_by_ids(kb_ids)
for id in kb_ids:
if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
return get_error_data_result(f"You don't own the dataset {id}.")
embd_nms = list(set([kb.embd_id for kb in kbs])) embd_nms = list(set([kb.embd_id for kb in kbs]))
if len(embd_nms) != 1: if len(embd_nms) != 1:
return get_result( return get_result(
retmsg='Knowledge bases use different embedding models or does not exist."',
retmsg='Datasets use different embedding models."',
retcode=RetCode.AUTHENTICATION_ERROR) retcode=RetCode.AUTHENTICATION_ERROR)
if isinstance(kb_ids, str): kb_ids = [kb_ids]
for id in kb_ids:
if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
return get_error_data_result(f"You don't own the dataset {id}.")
if "question" not in req: if "question" not in req:
return get_error_data_result("`question` is required.") return get_error_data_result("`question` is required.")
page = int(req.get("offset", 1)) page = int(req.get("offset", 1))
size = int(req.get("limit", 1024)) size = int(req.get("limit", 1024))
question = req["question"] question = req["question"]
doc_ids = req.get("documents", [])
if not isinstance(req.get("documents"),list):
doc_ids = req.get("document_ids", [])
if not isinstance(doc_ids,list):
return get_error_data_result("`documents` should be a list") return get_error_data_result("`documents` should be a list")
doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids) doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids)
for doc_id in doc_ids: for doc_id in doc_ids:
if doc_id not in doc_ids_list: if doc_id not in doc_ids_list:
return get_error_data_result(f"You don't own the document {doc_id}")
return get_error_data_result(f"The datasets don't own the document {doc_id}")
similarity_threshold = float(req.get("similarity_threshold", 0.2)) similarity_threshold = float(req.get("similarity_threshold", 0.2))
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
top = int(req.get("top_k", 1024)) top = int(req.get("top_k", 1024))

+ 1
- 1
sdk/python/ragflow/modules/chat.py Ver fichero

self.id = "" self.id = ""
self.name = "assistant" self.name = "assistant"
self.avatar = "path/to/avatar" self.avatar = "path/to/avatar"
self.datasets = ["kb1"]
self.dataset_ids = ["kb1"]
self.llm = Chat.LLM(rag, {}) self.llm = Chat.LLM(rag, {})
self.prompt = Chat.Prompt(rag, {}) self.prompt = Chat.Prompt(rag, {})
super().__init__(rag, res_dict) super().__init__(rag, res_dict)

+ 11
- 10
sdk/python/ragflow/ragflow.py Ver fichero

return DataSet(self, res["data"]) return DataSet(self, res["data"])
raise Exception(res["message"]) raise Exception(res["message"])


def delete_datasets(self, ids: List[str] = None, names: List[str] = None):
res = self.delete("/dataset",{"ids": ids, "names": names})
def delete_datasets(self, ids: List[str]):
res = self.delete("/dataset",{"ids": ids})
res=res.json() res=res.json()
if res.get("code") != 0: if res.get("code") != 0:
raise Exception(res["message"]) raise Exception(res["message"])
return result_list return result_list
raise Exception(res["message"]) raise Exception(res["message"])


def create_chat(self, name: str, avatar: str = "", datasets: List[DataSet] = [],
def create_chat(self, name: str, avatar: str = "", dataset_ids: List[str] = [],
llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat: llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
dataset_list = [] dataset_list = []
for dataset in datasets:
dataset_list.append(dataset.id)
for id in dataset_ids:
dataset_list.append(id)


if llm is None: if llm is None:
llm = Chat.LLM(self, {"model_name": None, llm = Chat.LLM(self, {"model_name": None,


temp_dict = {"name": name, temp_dict = {"name": name,
"avatar": avatar, "avatar": avatar,
"datasets": dataset_list,
"dataset_ids": dataset_list,
"llm": llm.to_json(), "llm": llm.to_json(),
"prompt": prompt.to_json()} "prompt": prompt.to_json()}
res = self.post("/chat", temp_dict) res = self.post("/chat", temp_dict)
raise Exception(res["message"]) raise Exception(res["message"])




def retrieve(self, datasets,documents,question="", offset=1, limit=1024, similarity_threshold=0.2,vector_similarity_weight=0.3,top_k=1024,rerank_id:str=None,keyword:bool=False,):
def retrieve(self, dataset_ids, document_ids=None, question="", offset=1, limit=1024, similarity_threshold=0.2, vector_similarity_weight=0.3, top_k=1024, rerank_id:str=None, keyword:bool=False, ):
if document_ids is None:
document_ids = []
data_json ={ data_json ={
"offset": offset, "offset": offset,
"limit": limit, "limit": limit,
"rerank_id": rerank_id, "rerank_id": rerank_id,
"keyword": keyword, "keyword": keyword,
"question": question, "question": question,
"datasets": datasets,
"documents": documents
"datasets": dataset_ids,
"documents": document_ids
} }

# Send a POST request to the backend service (using requests library as an example, actual implementation may vary) # Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
res = self.post(f'/retrieval',json=data_json) res = self.post(f'/retrieval',json=data_json)
res = res.json() res = res.json()

+ 19
- 16
sdk/python/test/t_chat.py Ver fichero

from ragflow import RAGFlow, Chat from ragflow import RAGFlow, Chat
import time
HOST_ADDRESS = 'http://127.0.0.1:9380' HOST_ADDRESS = 'http://127.0.0.1:9380'


def test_create_chat_with_name(get_api_key_fixture): def test_create_chat_with_name(get_api_key_fixture):
document = {"displayed_name":displayed_name,"blob":blob} document = {"displayed_name":displayed_name,"blob":blob}
documents = [] documents = []
documents.append(document) documents.append(document)
doc_ids = []
docs= kb.upload_documents(documents) docs= kb.upload_documents(documents)
for doc in docs: for doc in docs:
doc_ids.append(doc.id)
kb.async_parse_documents(doc_ids)
time.sleep(60)
rag.create_chat("test_create", datasets=[kb])
doc.add_chunk("This is a test to add chunk")
rag.create_chat("test_create", dataset_ids=[kb.id])




def test_update_chat_with_name(get_api_key_fixture): def test_update_chat_with_name(get_api_key_fixture):
document = {"displayed_name": displayed_name, "blob": blob} document = {"displayed_name": displayed_name, "blob": blob}
documents = [] documents = []
documents.append(document) documents.append(document)
doc_ids = []
docs = kb.upload_documents(documents) docs = kb.upload_documents(documents)
for doc in docs: for doc in docs:
doc_ids.append(doc.id)
kb.async_parse_documents(doc_ids)
time.sleep(60)
chat = rag.create_chat("test_update", datasets=[kb])
doc.add_chunk("This is a test to add chunk")
chat = rag.create_chat("test_update", dataset_ids=[kb.id])
chat.update({"name": "new_chat"}) chat.update({"name": "new_chat"})




document = {"displayed_name": displayed_name, "blob": blob} document = {"displayed_name": displayed_name, "blob": blob}
documents = [] documents = []
documents.append(document) documents.append(document)
doc_ids = []
docs = kb.upload_documents(documents) docs = kb.upload_documents(documents)
for doc in docs: for doc in docs:
doc_ids.append(doc.id)
kb.async_parse_documents(doc_ids)
time.sleep(60)
chat = rag.create_chat("test_delete", datasets=[kb])
doc.add_chunk("This is a test to add chunk")
chat = rag.create_chat("test_delete", dataset_ids=[kb.id])
rag.delete_chats(ids=[chat.id]) rag.delete_chats(ids=[chat.id])


def test_list_chats_with_success(get_api_key_fixture):
API_KEY = get_api_key_fixture API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS) rag = RAGFlow(API_KEY, HOST_ADDRESS)
kb = rag.create_dataset(name="test_delete_chat")
displayed_name = "ragflow.txt"
with open("./ragflow.txt", "rb") as file:
blob = file.read()
document = {"displayed_name": displayed_name, "blob": blob}
documents = []
documents.append(document)
docs = kb.upload_documents(documents)
for doc in docs:
doc.add_chunk("This is a test to add chunk")
rag.create_chat("test_list_1", dataset_ids=[kb.id])
rag.create_chat("test_list_2", dataset_ids=[kb.id])
rag.list_chats() rag.list_chats()





+ 16
- 31
sdk/python/test/t_session.py Ver fichero

displayed_name = "ragflow.txt" displayed_name = "ragflow.txt"
with open("./ragflow.txt", "rb") as file: with open("./ragflow.txt", "rb") as file:
blob = file.read() blob = file.read()
document = {"displayed_name": displayed_name, "blob": blob}
document = {"displayed_name":displayed_name,"blob":blob}
documents = [] documents = []
documents.append(document) documents.append(document)
doc_ids = []
docs = kb.upload_documents(documents)
docs= kb.upload_documents(documents)
for doc in docs: for doc in docs:
doc_ids.append(doc.id)
kb.async_parse_documents(doc_ids)
time.sleep(60)
assistant = rag.create_chat(name="test_create_session", datasets=[kb])
doc.add_chunk("This is a test to add chunk")
assistant=rag.create_chat("test_create", dataset_ids=[kb.id])
assistant.create_session() assistant.create_session()
displayed_name = "ragflow.txt" displayed_name = "ragflow.txt"
with open("./ragflow.txt","rb") as file: with open("./ragflow.txt","rb") as file:
blob = file.read() blob = file.read()
document = {"displayed_name":displayed_name,"blob":blob}
document = {"displayed_name": displayed_name, "blob": blob}
documents = [] documents = []
documents.append(document) documents.append(document)
doc_ids = []
docs= kb.upload_documents(documents)
docs = kb.upload_documents(documents)
for doc in docs: for doc in docs:
doc_ids.append(doc.id)
kb.async_parse_documents(doc_ids)
time.sleep(60)
assistant = rag.create_chat(name="test_create_conversation", datasets=[kb])
doc.add_chunk("This is a test to add chunk")
assistant = rag.create_chat("test_create", dataset_ids=[kb.id])
session = assistant.create_session() session = assistant.create_session()
question = "What is AI" question = "What is AI"
for ans in session.ask(question, stream=True): for ans in session.ask(question, stream=True):
document = {"displayed_name":displayed_name,"blob":blob} document = {"displayed_name":displayed_name,"blob":blob}
documents = [] documents = []
documents.append(document) documents.append(document)
doc_ids = []
docs= kb.upload_documents(documents) docs= kb.upload_documents(documents)
for doc in docs: for doc in docs:
doc_ids.append(doc.id)
kb.async_parse_documents(doc_ids)
time.sleep(60)
assistant = rag.create_chat(name="test_delete_session", datasets=[kb])
doc.add_chunk("This is a test to add chunk")
assistant=rag.create_chat("test_create", dataset_ids=[kb.id])
session = assistant.create_session() session = assistant.create_session()
assistant.delete_sessions(ids=[session.id]) assistant.delete_sessions(ids=[session.id])
displayed_name = "ragflow.txt" displayed_name = "ragflow.txt"
with open("./ragflow.txt","rb") as file: with open("./ragflow.txt","rb") as file:
blob = file.read() blob = file.read()
document = {"displayed_name":displayed_name,"blob":blob}
document = {"displayed_name": displayed_name, "blob": blob}
documents = [] documents = []
documents.append(document) documents.append(document)
doc_ids = []
docs= kb.upload_documents(documents)
docs = kb.upload_documents(documents)
for doc in docs: for doc in docs:
doc_ids.append(doc.id)
kb.async_parse_documents(doc_ids)
time.sleep(60)
assistant = rag.create_chat(name="test_update_session", datasets=[kb])
doc.add_chunk("This is a test to add chunk")
assistant = rag.create_chat("test_create", dataset_ids=[kb.id])
session = assistant.create_session(name="old session") session = assistant.create_session(name="old session")
session.update({"name": "new session"}) session.update({"name": "new session"})
document = {"displayed_name":displayed_name,"blob":blob} document = {"displayed_name":displayed_name,"blob":blob}
documents = [] documents = []
documents.append(document) documents.append(document)
doc_ids = []
docs= kb.upload_documents(documents) docs= kb.upload_documents(documents)
for doc in docs: for doc in docs:
doc_ids.append(doc.id)
kb.async_parse_documents(doc_ids)
time.sleep(60)
assistant = rag.create_chat(name="test_list_session", datasets=[kb])
doc.add_chunk("This is a test to add chunk")
assistant=rag.create_chat("test_create", dataset_ids=[kb.id])
assistant.create_session("test_1") assistant.create_session("test_1")
assistant.create_session("test_2") assistant.create_session("test_2")
assistant.list_sessions() assistant.list_sessions()

Cargando…
Cancelar
Guardar