### What problem does this PR solve? Add OpenAI-compatible http and python api reference ### Type of change - [x] Documentation Update --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>

8 months ago · b3b341173f
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@@ -217,7 +217,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Who you are?"},
            {"role": "user", "content": "Who are you?"},
            {"role": "assistant", "content": "I am an AI assistant named..."},
            {"role": "user", "content": "Can you tell me how to install neovim"},
        ],
@@ -236,14 +236,20 @@ def chat_completion_openai_like(tenant_id, chat_id):
    messages = req.get("messages", [])
    # To prevent empty [] input
    if len(messages) < 1:
        return get_error_data_result("You have to provide messages")
        return get_error_data_result("You have to provide messages.")
    if messages[-1]["role"] != "user":
        return get_error_data_result("The last content of this conversation is not from user.")

    prompt = messages[-1]["content"]
    # Treat context tokens as reasoning tokens
    context_token_used = sum(len(message["content"]) for message in messages)

    dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value)
    if not dia:
        return get_error_data_result(f"You don't own the chat {chat_id}")
    dia = dia[0]

    # Filter system and assistant messages
    # Filter system and non-sense assistant messages
    msg = None
    msg = [m for m in messages if m["role"] != "system" and (m["role"] != "assistant" or msg)]

@@ -251,7 +257,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
        # The value for the usage field on all chunks except for the last one will be null.
        # The usage field on the last chunk contains token usage statistics for the entire request.
        # The choices field on the last chunk will always be an empty array [].
        def streamed_respose_generator(chat_id, dia, msg):
        def streamed_response_generator(chat_id, dia, msg):
            token_used = 0
            response = {
                "id": f"chatcmpl-{chat_id}",
@@ -286,17 +292,17 @@ def chat_completion_openai_like(tenant_id, chat_id):
                response["choices"][0]["delta"]["content"] = "**ERROR**: " + str(e)
                yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")

            # The last chunck
            # The last chunk
            response["choices"][0]["delta"]["content"] = None
            response["choices"][0]["finish_reason"] = "stop"
            response["usage"] = {
                "prompt_tokens": len(msg),
                "prompt_tokens": len(prompt),
                "completion_tokens": token_used,
                "total_tokens": len(msg) + token_used
                "total_tokens": len(prompt) + token_used
            }
            yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")

        resp = Response(streamed_respose_generator(chat_id, dia, msg), mimetype="text/event-stream")
        resp = Response(streamed_response_generator(chat_id, dia, msg), mimetype="text/event-stream")
        resp.headers.add_header("Cache-control", "no-cache")
        resp.headers.add_header("Connection", "keep-alive")
        resp.headers.add_header("X-Accel-Buffering", "no")
@@ -308,6 +314,7 @@ def chat_completion_openai_like(tenant_id, chat_id):
            # focus answer content only
            answer = ans
            break
        content = answer["answer"]

        response  = {
            "id": f"chatcmpl-{chat_id}",
@@ -315,20 +322,20 @@ def chat_completion_openai_like(tenant_id, chat_id):
            "created": int(time.time()),
            "model": req.get("model", ""),
            "usage": {
                "prompt_tokens": len(messages),
                "completion_tokens": len(answer),
                "total_tokens": len(messages) + len(answer),
                "prompt_tokens": len(prompt),
                "completion_tokens": len(content),
                "total_tokens": len(prompt) + len(content),
                "completion_tokens_details": {
                    "reasoning_tokens": len(answer),
                    "accepted_prediction_tokens": len(answer),
                    "rejected_prediction_tokens": len(answer)
                    "reasoning_tokens": context_token_used,
                    "accepted_prediction_tokens": len(content),
                    "rejected_prediction_tokens": 0 # 0 for simplicity
                }
            },
            "choices": [
                {
                    "message": {
                        "role": "assistant",
                        "content": answer["answer"]
                        "content": content
                    },
                    "logprobs": None,
                    "finish_reason": "stop",
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
@@ -9,6 +9,154 @@ A complete reference for RAGFlow's RESTful API. Before proceeding, please ensure

 ---

 ## OpenAI-Compatible API

 ---

 ### Create chat completion

 **POST** `/api/v1/chats_openai/{chat_id}/chat/completions`

 Creates a model response for a given chat conversation.

 This API follows the same request and response format as OpenAI's API. It allows you to interact with the model in a manner similar to how you would with [OpenAI's API](https://platform.openai.com/docs/api-reference/chat/create).

 #### Request

 - Method: POST
 - URL: `/api/v1/chats_openai/{chat_id}/chat/completions`
 - Headers:
  - `'content-Type: application/json'`
  - `'Authorization: Bearer <YOUR_API_KEY>'`
 - Body:
  - `"model"`: `string`
  - `"messages"`: `object list`
  - `"stream"`: `boolean`

 ##### Request example

 ```bash
 curl --request POST \
     --url http://{address}/api/v1/chats_openai/{chat_id}/chat/completions \
     --header 'Content-Type: application/json' \
     --header 'Authorization: Bearer <YOUR_API_KEY>' \
     --data '{
        "model": "model",
        "messages": [{"role": "user", "content": "Say this is a test!"}],
        "stream": true
      }'
 ```

 ##### Request Parameters

 - `model` (*Body parameter*) `string`, *Required*
  The model used to generate the response. The server will parse this automatically, so you can set it to any value for now.

 - `messages` (*Body parameter*) `list[object]`, *Required*
  A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role.

 - `stream` (*Body parameter*) `boolean`
  Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream.

 #### Response

 Stream:

 ```json
 {
    "id": "chatcmpl-3a9c3572f29311efa69751e139332ced",
    "choices": [
        {
            "delta": {
                "content": "This is a test. If you have any specific questions or need information, feel",
                "role": "assistant",
                "function_call": null,
                "tool_calls": null
            },
            "finish_reason": null,
            "index": 0,
            "logprobs": null
        }
    ],
    "created": 1740543996,
    "model": "model",
    "object": "chat.completion.chunk",
    "system_fingerprint": "",
    "usage": null
 }
 // omit duplicated information
 {"choices":[{"delta":{"content":" free to ask, and I will do my best to provide an answer based on","role":"assistant"}}]}
 {"choices":[{"delta":{"content":" the knowledge I have. If your question is unrelated to the provided knowledge base,","role":"assistant"}}]}
 {"choices":[{"delta":{"content":" I will let you know.","role":"assistant"}}]}
 // the last chunk
 {
    "id": "chatcmpl-3a9c3572f29311efa69751e139332ced",
    "choices": [
        {
            "delta": {
                "content": null,
                "role": "assistant",
                "function_call": null,
                "tool_calls": null
            },
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null
        }
    ],
    "created": 1740543996,
    "model": "model",
    "object": "chat.completion.chunk",
    "system_fingerprint": "",
    "usage": {
        "prompt_tokens": 18,
        "completion_tokens": 225,
        "total_tokens": 243
    }
 }
 ```

 Non-stream:

 ```json
 {
    "choices":[
        {
            "finish_reason":"stop",
            "index":0,
            "logprobs":null,
            "message":{
                "content":"This is a test. If you have any specific questions or need information, feel free to ask, and I will do my best to provide an answer based on the knowledge I have. If your question is unrelated to the provided knowledge base, I will let you know.",
                "role":"assistant"
            }
        }
    ],
    "created":1740543499,
    "id":"chatcmpl-3a9c3572f29311efa69751e139332ced",
    "model":"model",
    "object":"chat.completion",
    "usage":{
        "completion_tokens":246,
        "completion_tokens_details":{
            "accepted_prediction_tokens":246,
            "reasoning_tokens":18,
            "rejected_prediction_tokens":0
        },
        "prompt_tokens":18,
        "total_tokens":264
    }
 }
 ```

 Failure:

 ```json
 {
  "code": 102,
  "message": "The last content of this conversation is not from user."
 }
 ```

 ## DATASET MANAGEMENT

 ---
--- a/docs/references/python_api_reference.md
+++ b/docs/references/python_api_reference.md
@@ -13,10 +13,63 @@ Run the following command to download the Python SDK:
 ```bash
 pip install ragflow-sdk
 ```

 :::

 ---

 ## OpenAI-Compatible API

 ---

 ### Create chat completion

 Creates a model response for the given historical chat conversation via OpenAI's API.

 #### Parameters

 ##### model: `str`, *Required*

 The model used to generate the response. The server will parse this automatically, so you can set it to any value for now.

 ##### messages: `list[object]`, *Required*

 A list of historical chat messages used to generate the response. This must contain at least one message with the `user` role.

 ##### stream: `boolean`

 Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream.

 #### Returns

 - Success: Respose [message](https://platform.openai.com/docs/api-reference/chat/create) like OpenAI
 - Failure: `Exception`

 #### Examples

 ```python
 from openai import OpenAI

 model = "model"
 client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>")

 completion = client.chat.completions.create(
    model=model,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who are you?"},
    ],
    stream=True
 )

 stream = True
 if stream:
    for chunk in completion:
        print(chunk)
 else:
    print(completion.choices[0].message.content)
 ```

 ## DATASET MANAGEMENT

 ---