Просмотр исходного кода

Fix some issues in API (#2902)

### What problem does this PR solve?

Fix some issues in API

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
tags/v0.13.0
liuhua 1 год назад
Родитель
Сommit
1935c3be1a
Аккаунт пользователя с таким Email не найден

+ 25
- 26
api/apps/sdk/chat.py Просмотреть файл

@token_required @token_required
def create(tenant_id): def create(tenant_id):
req=request.json req=request.json
if not req.get("knowledgebases"):
return get_error_data_result(retmsg="knowledgebases are required")
kb_list = []
for kb in req.get("knowledgebases"):
if not kb["id"]:
return get_error_data_result(retmsg="knowledgebase needs id")
if not KnowledgebaseService.query(id=kb["id"], tenant_id=tenant_id):
return get_error_data_result(retmsg="you do not own the knowledgebase")
# if not DocumentService.query(kb_id=kb["id"]):
# return get_error_data_result(retmsg="There is a invalid knowledgebase")
kb_list.append(kb["id"])
req["kb_ids"] = kb_list
ids= req.get("knowledgebases")
if not ids:
return get_error_data_result(retmsg="`knowledgebases` is required")
for kb_id in ids:
kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id)
if not kbs:
return get_error_data_result(f"You don't own the dataset {kb_id}")
kb=kbs[0]
if kb.chunk_num == 0:
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
req["kb_ids"] = ids
# llm # llm
llm = req.get("llm") llm = req.get("llm")
if llm: if llm:
else: else:
req["llm_id"] = tenant.llm_id req["llm_id"] = tenant.llm_id
if not req.get("name"): if not req.get("name"):
return get_error_data_result(retmsg="name is required.")
return get_error_data_result(retmsg="`name` is required.")
if DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value): if DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
return get_error_data_result(retmsg="Duplicated chat name in creating dataset.")
return get_error_data_result(retmsg="Duplicated chat name in creating chat.")
# tenant_id # tenant_id
if req.get("tenant_id"): if req.get("tenant_id"):
return get_error_data_result(retmsg="tenant_id must not be provided.")
return get_error_data_result(retmsg="`tenant_id` must not be provided.")
req["tenant_id"] = tenant_id req["tenant_id"] = tenant_id
# prompt more parameter # prompt more parameter
default_prompt = { default_prompt = {
"system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
以下是知识库:
{knowledge}
以上是知识库。""",
"prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
"system": """You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.
Here is the knowledge base:
{knowledge}
The above is the knowledge base.""",
"prologue": "Hi! I'm your assistant, what can I do for you?",
"parameters": [ "parameters": [
{"key": "knowledge", "optional": False} {"key": "knowledge", "optional": False}
], ],
"empty_response": "Sorry! 知识库中未找到相关内容!"
"empty_response": "Sorry! No relevant content was found in the knowledge base!"
} }
key_list_2 = ["system", "prologue", "parameters", "empty_response"] key_list_2 = ["system", "prologue", "parameters", "empty_response"]
if "prompt_config" not in req: if "prompt_config" not in req:
req =request.json req =request.json
if "knowledgebases" in req: if "knowledgebases" in req:
if not req.get("knowledgebases"): if not req.get("knowledgebases"):
return get_error_data_result(retmsg="knowledgebases can't be empty value")
return get_error_data_result(retmsg="`knowledgebases` can't be empty value")
kb_list = [] kb_list = []
for kb in req.get("knowledgebases"): for kb in req.get("knowledgebases"):
if not kb["id"]: if not kb["id"]:
res = res.to_json() res = res.to_json()
if "llm_id" in req: if "llm_id" in req:
if not TenantLLMService.query(llm_name=req["llm_id"]): if not TenantLLMService.query(llm_name=req["llm_id"]):
return get_error_data_result(retmsg="the model_name does not exist.")
return get_error_data_result(retmsg="The `model_name` does not exist.")
if "name" in req: if "name" in req:
if not req.get("name"): if not req.get("name"):
return get_error_data_result(retmsg="name is not empty.")
return get_error_data_result(retmsg="`name` is not empty.")
if req["name"].lower() != res["name"].lower() \ if req["name"].lower() != res["name"].lower() \
and len( and len(
DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)) > 0: DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)) > 0:
req = request.json req = request.json
ids = req.get("ids") ids = req.get("ids")
if not ids: if not ids:
return get_error_data_result(retmsg="ids are required")
return get_error_data_result(retmsg="`ids` are required")
for id in ids: for id in ids:
if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value): if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
return get_error_data_result(retmsg=f"You don't own the chat {id}") return get_error_data_result(retmsg=f"You don't own the chat {id}")


@manager.route('/chat', methods=['GET']) @manager.route('/chat', methods=['GET'])
@token_required @token_required
def list(tenant_id):
def list_chat(tenant_id):
id = request.args.get("id") id = request.args.get("id")
name = request.args.get("name") name = request.args.get("name")
chat = DialogService.query(id=id,name=name,status=StatusEnum.VALID.value) chat = DialogService.query(id=id,name=name,status=StatusEnum.VALID.value)

+ 36
- 17
api/apps/sdk/dataset.py Просмотреть файл

from api.db.services.user_service import TenantService from api.db.services.user_service import TenantService
from api.settings import RetCode from api.settings import RetCode
from api.utils import get_uuid from api.utils import get_uuid
from api.utils.api_utils import get_result, token_required,get_error_data_result
from api.utils.api_utils import get_result, token_required, get_error_data_result, valid
@manager.route('/dataset', methods=['POST']) @manager.route('/dataset', methods=['POST'])
@token_required @token_required
def create(tenant_id): def create(tenant_id):
req = request.json req = request.json
e, t = TenantService.get_by_id(tenant_id) e, t = TenantService.get_by_id(tenant_id)
permission = req.get("permission")
language = req.get("language")
chunk_method = req.get("chunk_method")
valid_permission = ("me", "team")
valid_language =("Chinese", "English")
valid_chunk_method = ("naive","manual","qa","table","paper","book","laws","presentation","picture","one","knowledge_graph","email")
check_validation=valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method)
if check_validation:
return check_validation
if "tenant_id" in req or "embedding_model" in req: if "tenant_id" in req or "embedding_model" in req:
return get_error_data_result( return get_error_data_result(
retmsg="Tenant_id or embedding_model must not be provided")
retmsg="`tenant_id` or `embedding_model` must not be provided")
chunk_count=req.get("chunk_count") chunk_count=req.get("chunk_count")
document_count=req.get("document_count") document_count=req.get("document_count")
if chunk_count or document_count: if chunk_count or document_count:
return get_error_data_result(retmsg="chunk_count or document_count must be 0 or not be provided")
return get_error_data_result(retmsg="`chunk_count` or `document_count` must be 0 or not be provided")
if "name" not in req: if "name" not in req:
return get_error_data_result( return get_error_data_result(
retmsg="Name is not empty!")
retmsg="`name` is not empty!")
req['id'] = get_uuid() req['id'] = get_uuid()
req["name"] = req["name"].strip() req["name"] = req["name"].strip()
if req["name"] == "": if req["name"] == "":
return get_error_data_result( return get_error_data_result(
retmsg="Name is not empty string!")
retmsg="`name` is not empty string!")
if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value): if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
return get_error_data_result( return get_error_data_result(
retmsg="Duplicated knowledgebase name in creating dataset.") retmsg="Duplicated knowledgebase name in creating dataset.")
key_mapping = { key_mapping = {
"chunk_num": "chunk_count", "chunk_num": "chunk_count",
"doc_num": "document_count", "doc_num": "document_count",
"parser_id": "parse_method",
"parser_id": "chunk_method",
"embd_id": "embedding_model" "embd_id": "embedding_model"
} }
mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req} mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req}
File2DocumentService.delete_by_document_id(doc.id) File2DocumentService.delete_by_document_id(doc.id)
if not KnowledgebaseService.delete_by_id(id): if not KnowledgebaseService.delete_by_id(id):
return get_error_data_result( return get_error_data_result(
retmsg="Delete dataset error.(Database serror)")
retmsg="Delete dataset error.(Database error)")
return get_result(retcode=RetCode.SUCCESS) return get_result(retcode=RetCode.SUCCESS)
@manager.route('/dataset/<dataset_id>', methods=['PUT']) @manager.route('/dataset/<dataset_id>', methods=['PUT'])
invalid_keys = {"id", "embd_id", "chunk_num", "doc_num", "parser_id"} invalid_keys = {"id", "embd_id", "chunk_num", "doc_num", "parser_id"}
if any(key in req for key in invalid_keys): if any(key in req for key in invalid_keys):
return get_error_data_result(retmsg="The input parameters are invalid.") return get_error_data_result(retmsg="The input parameters are invalid.")
permission = req.get("permission")
language = req.get("language")
chunk_method = req.get("chunk_method")
valid_permission = ("me", "team")
valid_language =("Chinese", "English")
valid_chunk_method = ("naive","manual","qa","table","paper","book","laws","presentation","picture","one","knowledge_graph","email")
check_validation=valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method)
if check_validation:
return check_validation
if "tenant_id" in req: if "tenant_id" in req:
if req["tenant_id"] != tenant_id: if req["tenant_id"] != tenant_id:
return get_error_data_result( return get_error_data_result(
retmsg="Can't change tenant_id.")
retmsg="Can't change `tenant_id`.")
e, kb = KnowledgebaseService.get_by_id(dataset_id) e, kb = KnowledgebaseService.get_by_id(dataset_id)
if "chunk_count" in req: if "chunk_count" in req:
if req["chunk_count"] != kb.chunk_num: if req["chunk_count"] != kb.chunk_num:
return get_error_data_result( return get_error_data_result(
retmsg="Can't change chunk_count.")
retmsg="Can't change `chunk_count`.")
req.pop("chunk_count") req.pop("chunk_count")
if "document_count" in req: if "document_count" in req:
if req['document_count'] != kb.doc_num: if req['document_count'] != kb.doc_num:
return get_error_data_result( return get_error_data_result(
retmsg="Can't change document_count.")
retmsg="Can't change `document_count`.")
req.pop("document_count") req.pop("document_count")
if "parse_method" in req:
if kb.chunk_num != 0 and req['parse_method'] != kb.parser_id:
if "chunk_method" in req:
if kb.chunk_num != 0 and req['chunk_method'] != kb.parser_id:
return get_error_data_result( return get_error_data_result(
retmsg="If chunk count is not 0, parse method is not changable.")
req['parser_id'] = req.pop('parse_method')
retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable.")
req['parser_id'] = req.pop('chunk_method')
if "embedding_model" in req: if "embedding_model" in req:
if kb.chunk_num != 0 and req['parse_method'] != kb.parser_id:
if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
return get_error_data_result( return get_error_data_result(
retmsg="If chunk count is not 0, parse method is not changable.")
retmsg="If `chunk_count` is not 0, `embedding_method` is not changeable.")
req['embd_id'] = req.pop('embedding_model') req['embd_id'] = req.pop('embedding_model')
if "name" in req: if "name" in req:
req["name"] = req["name"].strip() req["name"] = req["name"].strip()
key_mapping = { key_mapping = {
"chunk_num": "chunk_count", "chunk_num": "chunk_count",
"doc_num": "document_count", "doc_num": "document_count",
"parser_id": "parse_method",
"parser_id": "chunk_method",
"embd_id": "embedding_model" "embd_id": "embedding_model"
} }
renamed_data = {} renamed_data = {}

+ 23
- 17
api/apps/sdk/doc.py Просмотреть файл

def update_doc(tenant_id, dataset_id, document_id): def update_doc(tenant_id, dataset_id, document_id):
req = request.json req = request.json
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id): if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
return get_error_data_result(retmsg='You do not own the dataset.')
return get_error_data_result(retmsg="You don't own the dataset.")
doc = DocumentService.query(kb_id=dataset_id, id=document_id) doc = DocumentService.query(kb_id=dataset_id, id=document_id)
if not doc: if not doc:
return get_error_data_result(retmsg='The dataset not own the document.')
return get_error_data_result(retmsg="The dataset doesn't own the document.")
doc = doc[0] doc = doc[0]
if "chunk_count" in req: if "chunk_count" in req:
if req["chunk_count"] != doc.chunk_num: if req["chunk_count"] != doc.chunk_num:
return get_error_data_result(retmsg="Can't change chunk_count.")
return get_error_data_result(retmsg="Can't change `chunk_count`.")
if "token_count" in req: if "token_count" in req:
if req["token_count"] != doc.token_num: if req["token_count"] != doc.token_num:
return get_error_data_result(retmsg="Can't change token_count.")
return get_error_data_result(retmsg="Can't change `token_count`.")
if "progress" in req: if "progress" in req:
if req['progress'] != doc.progress: if req['progress'] != doc.progress:
return get_error_data_result(retmsg="Can't change progress.")
return get_error_data_result(retmsg="Can't change `progress`.")


if "name" in req and req["name"] != doc.name: if "name" in req and req["name"] != doc.name:
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
FileService.update_by_id(file.id, {"name": req["name"]}) FileService.update_by_id(file.id, {"name": req["name"]})
if "parser_config" in req: if "parser_config" in req:
DocumentService.update_parser_config(doc.id, req["parser_config"]) DocumentService.update_parser_config(doc.id, req["parser_config"])
if "parser_method" in req:
if doc.parser_id.lower() == req["parser_method"].lower():
if "chunk_method" in req:
if doc.parser_id.lower() == req["chunk_method"].lower():
return get_result() return get_result()


if doc.type == FileType.VISUAL or re.search( if doc.type == FileType.VISUAL or re.search(
return get_error_data_result(retmsg="Not supported yet!") return get_error_data_result(retmsg="Not supported yet!")


e = DocumentService.update_by_id(doc.id, e = DocumentService.update_by_id(doc.id,
{"parser_id": req["parser_method"], "progress": 0, "progress_msg": "",
{"parser_id": req["chunk_method"], "progress": 0, "progress_msg": "",
"run": TaskStatus.UNSTART.value}) "run": TaskStatus.UNSTART.value})
if not e: if not e:
return get_error_data_result(retmsg="Document not found!") return get_error_data_result(retmsg="Document not found!")
"chunk_num": "chunk_count", "chunk_num": "chunk_count",
"kb_id": "knowledgebase_id", "kb_id": "knowledgebase_id",
"token_num": "token_count", "token_num": "token_count",
"parser_id": "parser_method"
"parser_id": "chunk_method"
} }
renamed_doc = {} renamed_doc = {}
for key, value in doc.items(): for key, value in doc.items():
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ") return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
req = request.json req = request.json
if not req.get("ids"): if not req.get("ids"):
return get_error_data_result(retmsg="ids is required")
return get_error_data_result(retmsg="`ids` is required")
doc_ids = req["ids"] doc_ids = req["ids"]
root_folder = FileService.get_root_folder(tenant_id) root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"] pf_id = root_folder["id"]


@manager.route('/dataset/<dataset_id>/document/<document_id>/chunk/<chunk_id>', methods=['PUT']) @manager.route('/dataset/<dataset_id>/document/<document_id>/chunk/<chunk_id>', methods=['PUT'])
@token_required @token_required
def set(tenant_id,dataset_id,document_id,chunk_id):
def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
try: try:
res = ELASTICSEARCH.get( res = ELASTICSEARCH.get(
chunk_id, search.index_name( chunk_id, search.index_name(
req = request.json req = request.json
if not req.get("datasets"): if not req.get("datasets"):
return get_error_data_result("`datasets` is required.") return get_error_data_result("`datasets` is required.")
kb_id = req["datasets"]
if isinstance(kb_id, str): kb_id = [kb_id]
for id in kb_id:
kb_ids = req["datasets"]
kbs = KnowledgebaseService.get_by_ids(kb_ids)
embd_nms = list(set([kb.embd_id for kb in kbs]))
if len(embd_nms) != 1:
return get_result(
retmsg='Knowledge bases use different embedding models or does not exist."',
retcode=RetCode.AUTHENTICATION_ERROR)
if isinstance(kb_ids, str): kb_ids = [kb_ids]
for id in kb_ids:
if not KnowledgebaseService.query(id=id,tenant_id=tenant_id): if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
return get_error_data_result(f"You don't own the dataset {id}.") return get_error_data_result(f"You don't own the dataset {id}.")
if "question" not in req: if "question" not in req:
else: else:
highlight = True highlight = True
try: try:
e, kb = KnowledgebaseService.get_by_id(kb_id[0])
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
if not e: if not e:
return get_error_data_result(retmsg="Knowledgebase not found!") return get_error_data_result(retmsg="Knowledgebase not found!")
embd_mdl = TenantLLMService.model_instance( embd_mdl = TenantLLMService.model_instance(
question += keyword_extraction(chat_mdl, question) question += keyword_extraction(chat_mdl, question)


retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, kb_id, page, size,
ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, kb_ids, page, size,
similarity_threshold, vector_similarity_weight, top, similarity_threshold, vector_similarity_weight, top,
doc_ids, rerank_mdl=rerank_mdl, highlight=highlight) doc_ids, rerank_mdl=rerank_mdl, highlight=highlight)
for c in ranks["chunks"]: for c in ranks["chunks"]:
return get_result(data=ranks) return get_result(data=ranks)
except Exception as e: except Exception as e:
if str(e).find("not_found") > 0: if str(e).find("not_found") > 0:
return get_result(retmsg=f'No chunk found! Check the chunk statu s please!',
return get_result(retmsg=f'No chunk found! Check the chunk status please!',
retcode=RetCode.DATA_ERROR) retcode=RetCode.DATA_ERROR)
return server_error_response(e) return server_error_response(e)

+ 5
- 5
api/apps/sdk/session.py Просмотреть файл

"message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}] "message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
} }
if not conv.get("name"): if not conv.get("name"):
return get_error_data_result(retmsg="Name can not be empty.")
return get_error_data_result(retmsg="`name` can not be empty.")
ConversationService.save(**conv) ConversationService.save(**conv)
e, conv = ConversationService.get_by_id(conv["id"]) e, conv = ConversationService.get_by_id(conv["id"])
if not e: if not e:
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value): if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
return get_error_data_result(retmsg="You do not own the session") return get_error_data_result(retmsg="You do not own the session")
if "message" in req or "messages" in req: if "message" in req or "messages" in req:
return get_error_data_result(retmsg="Message can not be change")
return get_error_data_result(retmsg="`message` can not be change")
if "reference" in req: if "reference" in req:
return get_error_data_result(retmsg="Reference can not be change")
return get_error_data_result(retmsg="`reference` can not be change")
if "name" in req and not req.get("name"): if "name" in req and not req.get("name"):
return get_error_data_result(retmsg="Name can not be empty.")
return get_error_data_result(retmsg="`name` can not be empty.")
if not ConversationService.update_by_id(conv_id, req): if not ConversationService.update_by_id(conv_id, req):
return get_error_data_result(retmsg="Session updates error") return get_error_data_result(retmsg="Session updates error")
return get_result() return get_result()
"message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}] "message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
} }
if not conv.get("name"): if not conv.get("name"):
return get_error_data_result(retmsg="Name can not be empty.")
return get_error_data_result(retmsg="`name` can not be empty.")
ConversationService.save(**conv) ConversationService.save(**conv)
e, conv = ConversationService.get_by_id(conv["id"]) e, conv = ConversationService.get_by_id(conv["id"])
session_id=conv.id session_id=conv.id

+ 2
- 2
api/db/db_models.py Просмотреть файл

default="simple", default="simple",
help_text="simple|advanced", help_text="simple|advanced",
index=True) index=True)
prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
"parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"})
prompt_config = JSONField(null=False, default={"system": "", "prologue": "Hi! I'm your assistant, what can I do for you?",
"parameters": [], "empty_response": "Sorry! No relevant content was found in the knowledge base!"})


similarity_threshold = FloatField(default=0.2) similarity_threshold = FloatField(default=0.2)
vector_similarity_weight = FloatField(default=0.3) vector_similarity_weight = FloatField(default=0.3)

+ 14
- 1
api/utils/api_utils.py Просмотреть файл



def generate_confirmation_token(tenent_id): def generate_confirmation_token(tenent_id):
serializer = URLSafeTimedSerializer(tenent_id) serializer = URLSafeTimedSerializer(tenent_id)
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]


def valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method):
if valid_parameter(permission,valid_permission):
return valid_parameter(permission,valid_permission)
if valid_parameter(language,valid_language):
return valid_parameter(language,valid_language)
if valid_parameter(chunk_method,valid_chunk_method):
return valid_parameter(chunk_method,valid_chunk_method)

def valid_parameter(parameter,valid_values):
if parameter and parameter not in valid_values:
return get_error_data_result(f"{parameter} not in {valid_values}")

+ 1
- 1
sdk/python/ragflow/modules/base.py Просмотреть файл

res = self.rag.post(path, json, stream=stream,files=files) res = self.rag.post(path, json, stream=stream,files=files)
return res return res
def get(self, path, params):
def get(self, path, params=None):
res = self.rag.get(path, params) res = self.rag.get(path, params)
return res return res

+ 0
- 3
sdk/python/ragflow/modules/chat.py Просмотреть файл

res = res.json() res = res.json()
if res.get("code") != 0: if res.get("code") != 0:
raise Exception(res.get("message")) raise Exception(res.get("message"))

def get_prologue(self):
return self.prompt.opener

+ 78
- 78
sdk/python/ragflow/modules/dataset.py Просмотреть файл

from typing import Optional, List
from transformers.models.bloom.modeling_bloom import bloom_gelu_back
from .document import Document
from .base import Base
class DataSet(Base):
class ParserConfig(Base):
def __init__(self, rag, res_dict):
self.chunk_token_count = 128
self.layout_recognize = True
self.delimiter = '\n!?。;!?'
self.task_page_size = 12
super().__init__(rag, res_dict)
def __init__(self, rag, res_dict):
self.id = ""
self.name = ""
self.avatar = ""
self.tenant_id = None
self.description = ""
self.language = "English"
self.embedding_model = ""
self.permission = "me"
self.document_count = 0
self.chunk_count = 0
self.parse_method = "naive"
self.parser_config = None
for k in list(res_dict.keys()):
if k not in self.__dict__:
res_dict.pop(k)
super().__init__(rag, res_dict)
def update(self, update_message: dict):
res = self.put(f'/dataset/{self.id}',
update_message)
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def upload_documents(self,document_list: List[dict]):
url = f"/dataset/{self.id}/document"
files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
res = self.post(path=url,json=None,files=files)
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
res = res.json()
documents = []
if res.get("code") == 0:
for document in res["data"].get("docs"):
documents.append(Document(self.rag,document))
return documents
raise Exception(res["message"])
def delete_documents(self,ids: List[str] = None):
res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def async_parse_documents(self,document_ids):
res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def async_cancel_parse_documents(self,document_ids):
res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
from typing import Optional, List
from transformers.models.bloom.modeling_bloom import bloom_gelu_back
from .document import Document
from .base import Base
class DataSet(Base):
class ParserConfig(Base):
def __init__(self, rag, res_dict):
self.chunk_token_count = 128
self.layout_recognize = True
self.delimiter = '\n!?。;!?'
self.task_page_size = 12
super().__init__(rag, res_dict)
def __init__(self, rag, res_dict):
self.id = ""
self.name = ""
self.avatar = ""
self.tenant_id = None
self.description = ""
self.language = "English"
self.embedding_model = ""
self.permission = "me"
self.document_count = 0
self.chunk_count = 0
self.chunk_method = "naive"
self.parser_config = None
for k in list(res_dict.keys()):
if k not in self.__dict__:
res_dict.pop(k)
super().__init__(rag, res_dict)
def update(self, update_message: dict):
res = self.put(f'/dataset/{self.id}',
update_message)
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def upload_documents(self,document_list: List[dict]):
url = f"/dataset/{self.id}/document"
files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
res = self.post(path=url,json=None,files=files)
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
res = res.json()
documents = []
if res.get("code") == 0:
for document in res["data"].get("docs"):
documents.append(Document(self.rag,document))
return documents
raise Exception(res["message"])
def delete_documents(self,ids: List[str] = None):
res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def async_parse_documents(self,document_ids):
res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def async_cancel_parse_documents(self,document_ids):
res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))

+ 19
- 5
sdk/python/ragflow/modules/document.py Просмотреть файл

import time

from PIL.ImageFile import raise_oserror

import json
from .base import Base from .base import Base
from .chunk import Chunk from .chunk import Chunk
from typing import List from typing import List
self.name = "" self.name = ""
self.thumbnail = None self.thumbnail = None
self.knowledgebase_id = None self.knowledgebase_id = None
self.parser_method = ""
self.chunk_method = ""
self.parser_config = {"pages": [[1, 1000000]]} self.parser_config = {"pages": [[1, 1000000]]}
self.source_type = "local" self.source_type = "local"
self.type = "" self.type = ""
res_dict.pop(k) res_dict.pop(k)
super().__init__(rag, res_dict) super().__init__(rag, res_dict)



def update(self, update_message: dict):
res = self.put(f'/dataset/{self.knowledgebase_id}/info/{self.id}',
update_message)
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])

def download(self):
res = self.get(f"/dataset/{self.knowledgebase_id}/document/{self.id}")
try:
res = res.json()
raise Exception(res.get("message"))
except json.JSONDecodeError:
return res.content


def list_chunks(self,offset=0, limit=30, keywords="", id:str=None): def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id} data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data) res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data)

+ 5
- 5
sdk/python/ragflow/ragflow.py Просмотреть файл





class RAGFlow: class RAGFlow:
def __init__(self, user_key, base_url, version='v1'):
def __init__(self, api_key, base_url, version='v1'):
""" """
api_url: http://<host_address>/api/v1 api_url: http://<host_address>/api/v1
""" """
self.user_key = user_key
self.user_key = api_key
self.api_url = f"{base_url}/api/{version}" self.api_url = f"{base_url}/api/{version}"
self.authorization_header = {"Authorization": "{} {}".format("Bearer", self.user_key)} self.authorization_header = {"Authorization": "{} {}".format("Bearer", self.user_key)}




def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English", def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
permission: str = "me", permission: str = "me",
document_count: int = 0, chunk_count: int = 0, parse_method: str = "naive",
document_count: int = 0, chunk_count: int = 0, chunk_method: str = "naive",
parser_config: DataSet.ParserConfig = None) -> DataSet: parser_config: DataSet.ParserConfig = None) -> DataSet:
if parser_config is None: if parser_config is None:
parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True, parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
res = self.post("/dataset", res = self.post("/dataset",
{"name": name, "avatar": avatar, "description": description, "language": language, {"name": name, "avatar": avatar, "description": description, "language": language,
"permission": permission, "permission": permission,
"document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method,
"document_count": document_count, "chunk_count": chunk_count, "chunk_method": chunk_method,
"parser_config": parser_config "parser_config": parser_config
} }
) )
return result_list return result_list
raise Exception(res["message"]) raise Exception(res["message"])


def create_chat(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [],
def create_chat(self, name: str, avatar: str = "", knowledgebases: List[DataSet] = [],
llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat: llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
datasets = [] datasets = []
for dataset in knowledgebases: for dataset in knowledgebases:

+ 2
- 2
sdk/python/test/t_document.py Просмотреть файл

def test_update_document_with_success(self): def test_update_document_with_success(self):
""" """
Test updating a document with success. Test updating a document with success.
Update name or parser_method are supported
Update name or chunk_method are supported
""" """
rag = RAGFlow(API_KEY, HOST_ADDRESS) rag = RAGFlow(API_KEY, HOST_ADDRESS)
ds = rag.list_datasets(name="God") ds = rag.list_datasets(name="God")
doc = ds.list_documents() doc = ds.list_documents()
doc = doc[0] doc = doc[0]
if isinstance(doc, Document): if isinstance(doc, Document):
res = doc.update({"parser_method":"manual","name":"manual.txt"})
res = doc.update({"chunk_method":"manual","name":"manual.txt"})
assert res is None, f"Failed to update document, error: {res}" assert res is None, f"Failed to update document, error: {res}"
else: else:
assert False, f"Failed to get document, error: {doc}" assert False, f"Failed to get document, error: {doc}"

Загрузка…
Отмена
Сохранить