### What problem does this PR solve? Add OpenAI-compatible http and python api reference ### Type of change - [x] Documentation Update --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>tags/v0.17.0
| @@ -217,7 +217,7 @@ def chat_completion_openai_like(tenant_id, chat_id): | |||
| model=model, | |||
| messages=[ | |||
| {"role": "system", "content": "You are a helpful assistant."}, | |||
| {"role": "user", "content": "Who you are?"}, | |||
| {"role": "user", "content": "Who are you?"}, | |||
| {"role": "assistant", "content": "I am an AI assistant named..."}, | |||
| {"role": "user", "content": "Can you tell me how to install neovim"}, | |||
| ], | |||
| @@ -236,14 +236,20 @@ def chat_completion_openai_like(tenant_id, chat_id): | |||
| messages = req.get("messages", []) | |||
| # To prevent empty [] input | |||
| if len(messages) < 1: | |||
| return get_error_data_result("You have to provide messages") | |||
| return get_error_data_result("You have to provide messages.") | |||
| if messages[-1]["role"] != "user": | |||
| return get_error_data_result("The last content of this conversation is not from user.") | |||
| prompt = messages[-1]["content"] | |||
| # Treat context tokens as reasoning tokens | |||
| context_token_used = sum(len(message["content"]) for message in messages) | |||
| dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value) | |||
| if not dia: | |||
| return get_error_data_result(f"You don't own the chat {chat_id}") | |||
| dia = dia[0] | |||
| # Filter system and assistant messages | |||
| # Filter system and non-sense assistant messages | |||
| msg = None | |||
| msg = [m for m in messages if m["role"] != "system" and (m["role"] != "assistant" or msg)] | |||
| @@ -251,7 +257,7 @@ def chat_completion_openai_like(tenant_id, chat_id): | |||
| # The value for the usage field on all chunks except for the last one will be null. | |||
| # The usage field on the last chunk contains token usage statistics for the entire request. | |||
| # The choices field on the last chunk will always be an empty array []. | |||
| def streamed_respose_generator(chat_id, dia, msg): | |||
| def streamed_response_generator(chat_id, dia, msg): | |||
| token_used = 0 | |||
| response = { | |||
| "id": f"chatcmpl-{chat_id}", | |||
| @@ -286,17 +292,17 @@ def chat_completion_openai_like(tenant_id, chat_id): | |||
| response["choices"][0]["delta"]["content"] = "**ERROR**: " + str(e) | |||
| yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") | |||
| # The last chunck | |||
| # The last chunk | |||
| response["choices"][0]["delta"]["content"] = None | |||
| response["choices"][0]["finish_reason"] = "stop" | |||
| response["usage"] = { | |||
| "prompt_tokens": len(msg), | |||
| "prompt_tokens": len(prompt), | |||
| "completion_tokens": token_used, | |||
| "total_tokens": len(msg) + token_used | |||
| "total_tokens": len(prompt) + token_used | |||
| } | |||
| yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") | |||
| resp = Response(streamed_respose_generator(chat_id, dia, msg), mimetype="text/event-stream") | |||
| resp = Response(streamed_response_generator(chat_id, dia, msg), mimetype="text/event-stream") | |||
| resp.headers.add_header("Cache-control", "no-cache") | |||
| resp.headers.add_header("Connection", "keep-alive") | |||
| resp.headers.add_header("X-Accel-Buffering", "no") | |||
| @@ -308,6 +314,7 @@ def chat_completion_openai_like(tenant_id, chat_id): | |||
| # focus answer content only | |||
| answer = ans | |||
| break | |||
| content = answer["answer"] | |||
| response = { | |||
| "id": f"chatcmpl-{chat_id}", | |||
| @@ -315,20 +322,20 @@ def chat_completion_openai_like(tenant_id, chat_id): | |||
| "created": int(time.time()), | |||
| "model": req.get("model", ""), | |||
| "usage": { | |||
| "prompt_tokens": len(messages), | |||
| "completion_tokens": len(answer), | |||
| "total_tokens": len(messages) + len(answer), | |||
| "prompt_tokens": len(prompt), | |||
| "completion_tokens": len(content), | |||
| "total_tokens": len(prompt) + len(content), | |||
| "completion_tokens_details": { | |||
| "reasoning_tokens": len(answer), | |||
| "accepted_prediction_tokens": len(answer), | |||
| "rejected_prediction_tokens": len(answer) | |||
| "reasoning_tokens": context_token_used, | |||
| "accepted_prediction_tokens": len(content), | |||
| "rejected_prediction_tokens": 0 # 0 for simplicity | |||
| } | |||
| }, | |||
| "choices": [ | |||
| { | |||
| "message": { | |||
| "role": "assistant", | |||
| "content": answer["answer"] | |||
| "content": content | |||
| }, | |||
| "logprobs": None, | |||
| "finish_reason": "stop", | |||
| @@ -9,6 +9,154 @@ A complete reference for RAGFlow's RESTful API. Before proceeding, please ensure | |||
| --- | |||
| ## OpenAI-Compatible API | |||
| --- | |||
| ### Create chat completion | |||
| **POST** `/api/v1/chats_openai/{chat_id}/chat/completions` | |||
| Creates a model response for a given chat conversation. | |||
| This API follows the same request and response format as OpenAI's API. It allows you to interact with the model in a manner similar to how you would with [OpenAI's API](https://platform.openai.com/docs/api-reference/chat/create). | |||
| #### Request | |||
| - Method: POST | |||
| - URL: `/api/v1/chats_openai/{chat_id}/chat/completions` | |||
| - Headers: | |||
| - `'content-Type: application/json'` | |||
| - `'Authorization: Bearer <YOUR_API_KEY>'` | |||
| - Body: | |||
| - `"model"`: `string` | |||
| - `"messages"`: `object list` | |||
| - `"stream"`: `boolean` | |||
| ##### Request example | |||
| ```bash | |||
| curl --request POST \ | |||
| --url http://{address}/api/v1/chats_openai/{chat_id}/chat/completions \ | |||
| --header 'Content-Type: application/json' \ | |||
| --header 'Authorization: Bearer <YOUR_API_KEY>' \ | |||
| --data '{ | |||
| "model": "model", | |||
| "messages": [{"role": "user", "content": "Say this is a test!"}], | |||
| "stream": true | |||
| }' | |||
| ``` | |||
| ##### Request Parameters | |||
| - `model` (*Body parameter*) `string`, *Required* | |||
| The model used to generate the response. The server will parse this automatically, so you can set it to any value for now. | |||
| - `messages` (*Body parameter*) `list[object]`, *Required* | |||
| A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role. | |||
| - `stream` (*Body parameter*) `boolean` | |||
| Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream. | |||
| #### Response | |||
| Stream: | |||
| ```json | |||
| { | |||
| "id": "chatcmpl-3a9c3572f29311efa69751e139332ced", | |||
| "choices": [ | |||
| { | |||
| "delta": { | |||
| "content": "This is a test. If you have any specific questions or need information, feel", | |||
| "role": "assistant", | |||
| "function_call": null, | |||
| "tool_calls": null | |||
| }, | |||
| "finish_reason": null, | |||
| "index": 0, | |||
| "logprobs": null | |||
| } | |||
| ], | |||
| "created": 1740543996, | |||
| "model": "model", | |||
| "object": "chat.completion.chunk", | |||
| "system_fingerprint": "", | |||
| "usage": null | |||
| } | |||
| // omit duplicated information | |||
| {"choices":[{"delta":{"content":" free to ask, and I will do my best to provide an answer based on","role":"assistant"}}]} | |||
| {"choices":[{"delta":{"content":" the knowledge I have. If your question is unrelated to the provided knowledge base,","role":"assistant"}}]} | |||
| {"choices":[{"delta":{"content":" I will let you know.","role":"assistant"}}]} | |||
| // the last chunk | |||
| { | |||
| "id": "chatcmpl-3a9c3572f29311efa69751e139332ced", | |||
| "choices": [ | |||
| { | |||
| "delta": { | |||
| "content": null, | |||
| "role": "assistant", | |||
| "function_call": null, | |||
| "tool_calls": null | |||
| }, | |||
| "finish_reason": "stop", | |||
| "index": 0, | |||
| "logprobs": null | |||
| } | |||
| ], | |||
| "created": 1740543996, | |||
| "model": "model", | |||
| "object": "chat.completion.chunk", | |||
| "system_fingerprint": "", | |||
| "usage": { | |||
| "prompt_tokens": 18, | |||
| "completion_tokens": 225, | |||
| "total_tokens": 243 | |||
| } | |||
| } | |||
| ``` | |||
| Non-stream: | |||
| ```json | |||
| { | |||
| "choices":[ | |||
| { | |||
| "finish_reason":"stop", | |||
| "index":0, | |||
| "logprobs":null, | |||
| "message":{ | |||
| "content":"This is a test. If you have any specific questions or need information, feel free to ask, and I will do my best to provide an answer based on the knowledge I have. If your question is unrelated to the provided knowledge base, I will let you know.", | |||
| "role":"assistant" | |||
| } | |||
| } | |||
| ], | |||
| "created":1740543499, | |||
| "id":"chatcmpl-3a9c3572f29311efa69751e139332ced", | |||
| "model":"model", | |||
| "object":"chat.completion", | |||
| "usage":{ | |||
| "completion_tokens":246, | |||
| "completion_tokens_details":{ | |||
| "accepted_prediction_tokens":246, | |||
| "reasoning_tokens":18, | |||
| "rejected_prediction_tokens":0 | |||
| }, | |||
| "prompt_tokens":18, | |||
| "total_tokens":264 | |||
| } | |||
| } | |||
| ``` | |||
| Failure: | |||
| ```json | |||
| { | |||
| "code": 102, | |||
| "message": "The last content of this conversation is not from user." | |||
| } | |||
| ``` | |||
| ## DATASET MANAGEMENT | |||
| --- | |||
| @@ -13,10 +13,63 @@ Run the following command to download the Python SDK: | |||
| ```bash | |||
| pip install ragflow-sdk | |||
| ``` | |||
| ::: | |||
| --- | |||
| ## OpenAI-Compatible API | |||
| --- | |||
| ### Create chat completion | |||
| Creates a model response for the given historical chat conversation via OpenAI's API. | |||
| #### Parameters | |||
| ##### model: `str`, *Required* | |||
| The model used to generate the response. The server will parse this automatically, so you can set it to any value for now. | |||
| ##### messages: `list[object]`, *Required* | |||
| A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role. | |||
| ##### stream: `boolean` | |||
| Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream. | |||
| #### Returns | |||
| - Success: Respose [message](https://platform.openai.com/docs/api-reference/chat/create) like OpenAI | |||
| - Failure: `Exception` | |||
| #### Examples | |||
| ```python | |||
| from openai import OpenAI | |||
| model = "model" | |||
| client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>") | |||
| completion = client.chat.completions.create( | |||
| model=model, | |||
| messages=[ | |||
| {"role": "system", "content": "You are a helpful assistant."}, | |||
| {"role": "user", "content": "Who are you?"}, | |||
| ], | |||
| stream=True | |||
| ) | |||
| stream = True | |||
| if stream: | |||
| for chunk in completion: | |||
| print(chunk) | |||
| else: | |||
| print(completion.choices[0].message.content) | |||
| ``` | |||
| ## DATASET MANAGEMENT | |||
| --- | |||