### What problem does this PR solve? Add OpenAI-compatible http and python api reference ### Type of change - [x] Documentation Update --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>tags/v0.17.0
| model=model, | model=model, | ||||
| messages=[ | messages=[ | ||||
| {"role": "system", "content": "You are a helpful assistant."}, | {"role": "system", "content": "You are a helpful assistant."}, | ||||
| {"role": "user", "content": "Who you are?"}, | |||||
| {"role": "user", "content": "Who are you?"}, | |||||
| {"role": "assistant", "content": "I am an AI assistant named..."}, | {"role": "assistant", "content": "I am an AI assistant named..."}, | ||||
| {"role": "user", "content": "Can you tell me how to install neovim"}, | {"role": "user", "content": "Can you tell me how to install neovim"}, | ||||
| ], | ], | ||||
| messages = req.get("messages", []) | messages = req.get("messages", []) | ||||
| # To prevent empty [] input | # To prevent empty [] input | ||||
| if len(messages) < 1: | if len(messages) < 1: | ||||
| return get_error_data_result("You have to provide messages") | |||||
| return get_error_data_result("You have to provide messages.") | |||||
| if messages[-1]["role"] != "user": | |||||
| return get_error_data_result("The last content of this conversation is not from user.") | |||||
| prompt = messages[-1]["content"] | |||||
| # Treat context tokens as reasoning tokens | |||||
| context_token_used = sum(len(message["content"]) for message in messages) | |||||
| dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value) | dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value) | ||||
| if not dia: | if not dia: | ||||
| return get_error_data_result(f"You don't own the chat {chat_id}") | return get_error_data_result(f"You don't own the chat {chat_id}") | ||||
| dia = dia[0] | dia = dia[0] | ||||
| # Filter system and assistant messages | |||||
| # Filter system and non-sense assistant messages | |||||
| msg = None | msg = None | ||||
| msg = [m for m in messages if m["role"] != "system" and (m["role"] != "assistant" or msg)] | msg = [m for m in messages if m["role"] != "system" and (m["role"] != "assistant" or msg)] | ||||
| # The value for the usage field on all chunks except for the last one will be null. | # The value for the usage field on all chunks except for the last one will be null. | ||||
| # The usage field on the last chunk contains token usage statistics for the entire request. | # The usage field on the last chunk contains token usage statistics for the entire request. | ||||
| # The choices field on the last chunk will always be an empty array []. | # The choices field on the last chunk will always be an empty array []. | ||||
| def streamed_respose_generator(chat_id, dia, msg): | |||||
| def streamed_response_generator(chat_id, dia, msg): | |||||
| token_used = 0 | token_used = 0 | ||||
| response = { | response = { | ||||
| "id": f"chatcmpl-{chat_id}", | "id": f"chatcmpl-{chat_id}", | ||||
| response["choices"][0]["delta"]["content"] = "**ERROR**: " + str(e) | response["choices"][0]["delta"]["content"] = "**ERROR**: " + str(e) | ||||
| yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") | yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") | ||||
| # The last chunck | |||||
| # The last chunk | |||||
| response["choices"][0]["delta"]["content"] = None | response["choices"][0]["delta"]["content"] = None | ||||
| response["choices"][0]["finish_reason"] = "stop" | response["choices"][0]["finish_reason"] = "stop" | ||||
| response["usage"] = { | response["usage"] = { | ||||
| "prompt_tokens": len(msg), | |||||
| "prompt_tokens": len(prompt), | |||||
| "completion_tokens": token_used, | "completion_tokens": token_used, | ||||
| "total_tokens": len(msg) + token_used | |||||
| "total_tokens": len(prompt) + token_used | |||||
| } | } | ||||
| yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") | yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") | ||||
| resp = Response(streamed_respose_generator(chat_id, dia, msg), mimetype="text/event-stream") | |||||
| resp = Response(streamed_response_generator(chat_id, dia, msg), mimetype="text/event-stream") | |||||
| resp.headers.add_header("Cache-control", "no-cache") | resp.headers.add_header("Cache-control", "no-cache") | ||||
| resp.headers.add_header("Connection", "keep-alive") | resp.headers.add_header("Connection", "keep-alive") | ||||
| resp.headers.add_header("X-Accel-Buffering", "no") | resp.headers.add_header("X-Accel-Buffering", "no") | ||||
| # focus answer content only | # focus answer content only | ||||
| answer = ans | answer = ans | ||||
| break | break | ||||
| content = answer["answer"] | |||||
| response = { | response = { | ||||
| "id": f"chatcmpl-{chat_id}", | "id": f"chatcmpl-{chat_id}", | ||||
| "created": int(time.time()), | "created": int(time.time()), | ||||
| "model": req.get("model", ""), | "model": req.get("model", ""), | ||||
| "usage": { | "usage": { | ||||
| "prompt_tokens": len(messages), | |||||
| "completion_tokens": len(answer), | |||||
| "total_tokens": len(messages) + len(answer), | |||||
| "prompt_tokens": len(prompt), | |||||
| "completion_tokens": len(content), | |||||
| "total_tokens": len(prompt) + len(content), | |||||
| "completion_tokens_details": { | "completion_tokens_details": { | ||||
| "reasoning_tokens": len(answer), | |||||
| "accepted_prediction_tokens": len(answer), | |||||
| "rejected_prediction_tokens": len(answer) | |||||
| "reasoning_tokens": context_token_used, | |||||
| "accepted_prediction_tokens": len(content), | |||||
| "rejected_prediction_tokens": 0 # 0 for simplicity | |||||
| } | } | ||||
| }, | }, | ||||
| "choices": [ | "choices": [ | ||||
| { | { | ||||
| "message": { | "message": { | ||||
| "role": "assistant", | "role": "assistant", | ||||
| "content": answer["answer"] | |||||
| "content": content | |||||
| }, | }, | ||||
| "logprobs": None, | "logprobs": None, | ||||
| "finish_reason": "stop", | "finish_reason": "stop", |
| --- | --- | ||||
| ## OpenAI-Compatible API | |||||
| --- | |||||
| ### Create chat completion | |||||
| **POST** `/api/v1/chats_openai/{chat_id}/chat/completions` | |||||
| Creates a model response for a given chat conversation. | |||||
| This API follows the same request and response format as OpenAI's API. It allows you to interact with the model in a manner similar to how you would with [OpenAI's API](https://platform.openai.com/docs/api-reference/chat/create). | |||||
| #### Request | |||||
| - Method: POST | |||||
| - URL: `/api/v1/chats_openai/{chat_id}/chat/completions` | |||||
| - Headers: | |||||
| - `'content-Type: application/json'` | |||||
| - `'Authorization: Bearer <YOUR_API_KEY>'` | |||||
| - Body: | |||||
| - `"model"`: `string` | |||||
| - `"messages"`: `object list` | |||||
| - `"stream"`: `boolean` | |||||
| ##### Request example | |||||
| ```bash | |||||
| curl --request POST \ | |||||
| --url http://{address}/api/v1/chats_openai/{chat_id}/chat/completions \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --header 'Authorization: Bearer <YOUR_API_KEY>' \ | |||||
| --data '{ | |||||
| "model": "model", | |||||
| "messages": [{"role": "user", "content": "Say this is a test!"}], | |||||
| "stream": true | |||||
| }' | |||||
| ``` | |||||
| ##### Request Parameters | |||||
| - `model` (*Body parameter*) `string`, *Required* | |||||
| The model used to generate the response. The server will parse this automatically, so you can set it to any value for now. | |||||
| - `messages` (*Body parameter*) `list[object]`, *Required* | |||||
| A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role. | |||||
| - `stream` (*Body parameter*) `boolean` | |||||
| Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream. | |||||
| #### Response | |||||
| Stream: | |||||
| ```json | |||||
| { | |||||
| "id": "chatcmpl-3a9c3572f29311efa69751e139332ced", | |||||
| "choices": [ | |||||
| { | |||||
| "delta": { | |||||
| "content": "This is a test. If you have any specific questions or need information, feel", | |||||
| "role": "assistant", | |||||
| "function_call": null, | |||||
| "tool_calls": null | |||||
| }, | |||||
| "finish_reason": null, | |||||
| "index": 0, | |||||
| "logprobs": null | |||||
| } | |||||
| ], | |||||
| "created": 1740543996, | |||||
| "model": "model", | |||||
| "object": "chat.completion.chunk", | |||||
| "system_fingerprint": "", | |||||
| "usage": null | |||||
| } | |||||
| // omit duplicated information | |||||
| {"choices":[{"delta":{"content":" free to ask, and I will do my best to provide an answer based on","role":"assistant"}}]} | |||||
| {"choices":[{"delta":{"content":" the knowledge I have. If your question is unrelated to the provided knowledge base,","role":"assistant"}}]} | |||||
| {"choices":[{"delta":{"content":" I will let you know.","role":"assistant"}}]} | |||||
| // the last chunk | |||||
| { | |||||
| "id": "chatcmpl-3a9c3572f29311efa69751e139332ced", | |||||
| "choices": [ | |||||
| { | |||||
| "delta": { | |||||
| "content": null, | |||||
| "role": "assistant", | |||||
| "function_call": null, | |||||
| "tool_calls": null | |||||
| }, | |||||
| "finish_reason": "stop", | |||||
| "index": 0, | |||||
| "logprobs": null | |||||
| } | |||||
| ], | |||||
| "created": 1740543996, | |||||
| "model": "model", | |||||
| "object": "chat.completion.chunk", | |||||
| "system_fingerprint": "", | |||||
| "usage": { | |||||
| "prompt_tokens": 18, | |||||
| "completion_tokens": 225, | |||||
| "total_tokens": 243 | |||||
| } | |||||
| } | |||||
| ``` | |||||
| Non-stream: | |||||
| ```json | |||||
| { | |||||
| "choices":[ | |||||
| { | |||||
| "finish_reason":"stop", | |||||
| "index":0, | |||||
| "logprobs":null, | |||||
| "message":{ | |||||
| "content":"This is a test. If you have any specific questions or need information, feel free to ask, and I will do my best to provide an answer based on the knowledge I have. If your question is unrelated to the provided knowledge base, I will let you know.", | |||||
| "role":"assistant" | |||||
| } | |||||
| } | |||||
| ], | |||||
| "created":1740543499, | |||||
| "id":"chatcmpl-3a9c3572f29311efa69751e139332ced", | |||||
| "model":"model", | |||||
| "object":"chat.completion", | |||||
| "usage":{ | |||||
| "completion_tokens":246, | |||||
| "completion_tokens_details":{ | |||||
| "accepted_prediction_tokens":246, | |||||
| "reasoning_tokens":18, | |||||
| "rejected_prediction_tokens":0 | |||||
| }, | |||||
| "prompt_tokens":18, | |||||
| "total_tokens":264 | |||||
| } | |||||
| } | |||||
| ``` | |||||
| Failure: | |||||
| ```json | |||||
| { | |||||
| "code": 102, | |||||
| "message": "The last content of this conversation is not from user." | |||||
| } | |||||
| ``` | |||||
| ## DATASET MANAGEMENT | ## DATASET MANAGEMENT | ||||
| --- | --- |
| ```bash | ```bash | ||||
| pip install ragflow-sdk | pip install ragflow-sdk | ||||
| ``` | ``` | ||||
| ::: | ::: | ||||
| --- | --- | ||||
| ## OpenAI-Compatible API | |||||
| --- | |||||
| ### Create chat completion | |||||
| Creates a model response for the given historical chat conversation via OpenAI's API. | |||||
| #### Parameters | |||||
| ##### model: `str`, *Required* | |||||
| The model used to generate the response. The server will parse this automatically, so you can set it to any value for now. | |||||
| ##### messages: `list[object]`, *Required* | |||||
| A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role. | |||||
| ##### stream: `boolean` | |||||
| Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream. | |||||
| #### Returns | |||||
| - Success: Respose [message](https://platform.openai.com/docs/api-reference/chat/create) like OpenAI | |||||
| - Failure: `Exception` | |||||
| #### Examples | |||||
| ```python | |||||
| from openai import OpenAI | |||||
| model = "model" | |||||
| client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>") | |||||
| completion = client.chat.completions.create( | |||||
| model=model, | |||||
| messages=[ | |||||
| {"role": "system", "content": "You are a helpful assistant."}, | |||||
| {"role": "user", "content": "Who are you?"}, | |||||
| ], | |||||
| stream=True | |||||
| ) | |||||
| stream = True | |||||
| if stream: | |||||
| for chunk in completion: | |||||
| print(chunk) | |||||
| else: | |||||
| print(completion.choices[0].message.content) | |||||
| ``` | |||||
| ## DATASET MANAGEMENT | ## DATASET MANAGEMENT | ||||
| --- | --- |