| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205 | 
							- #!/usr/bin/env python3
 - 
 - import json
 - import time
 - import uuid
 - from collections.abc import Iterator
 - from typing import Any
 - 
 - from flask import Flask, Response, jsonify, request
 - 
 - app = Flask(__name__)
 - 
 - # Mock models list
 - MODELS = [
 -     {
 -         "id": "gpt-3.5-turbo",
 -         "object": "model",
 -         "created": 1677649963,
 -         "owned_by": "openai",
 -     },
 -     {"id": "gpt-4", "object": "model", "created": 1687882411, "owned_by": "openai"},
 -     {
 -         "id": "text-embedding-ada-002",
 -         "object": "model",
 -         "created": 1671217299,
 -         "owned_by": "openai-internal",
 -     },
 - ]
 - 
 - 
 - @app.route("/v1/models", methods=["GET"])
 - def list_models() -> Any:
 -     """List available models."""
 -     return jsonify({"object": "list", "data": MODELS})
 - 
 - 
 - @app.route("/v1/chat/completions", methods=["POST"])
 - def chat_completions() -> Any:
 -     """Handle chat completions."""
 -     data = request.json or {}
 -     model = data.get("model", "gpt-3.5-turbo")
 -     messages = data.get("messages", [])
 -     stream = data.get("stream", False)
 - 
 -     # Generate mock response
 -     response_content = "This is a mock response from the OpenAI server."
 -     if messages:
 -         last_message = messages[-1].get("content", "")
 -         response_content = f"Mock response to: {last_message[:100]}..."
 - 
 -     if stream:
 -         # Streaming response
 -         def generate() -> Iterator[str]:
 -             # Send initial chunk
 -             chunk = {
 -                 "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
 -                 "object": "chat.completion.chunk",
 -                 "created": int(time.time()),
 -                 "model": model,
 -                 "choices": [
 -                     {
 -                         "index": 0,
 -                         "delta": {"role": "assistant", "content": ""},
 -                         "finish_reason": None,
 -                     }
 -                 ],
 -             }
 -             yield f"data: {json.dumps(chunk)}\n\n"
 - 
 -             # Send content in chunks
 -             words = response_content.split()
 -             for word in words:
 -                 chunk = {
 -                     "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
 -                     "object": "chat.completion.chunk",
 -                     "created": int(time.time()),
 -                     "model": model,
 -                     "choices": [
 -                         {
 -                             "index": 0,
 -                             "delta": {"content": word + " "},
 -                             "finish_reason": None,
 -                         }
 -                     ],
 -                 }
 -                 yield f"data: {json.dumps(chunk)}\n\n"
 -                 time.sleep(0.05)  # Simulate streaming delay
 - 
 -             # Send final chunk
 -             chunk = {
 -                 "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
 -                 "object": "chat.completion.chunk",
 -                 "created": int(time.time()),
 -                 "model": model,
 -                 "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
 -             }
 -             yield f"data: {json.dumps(chunk)}\n\n"
 -             yield "data: [DONE]\n\n"
 - 
 -         return Response(generate(), mimetype="text/event-stream")
 -     else:
 -         # Non-streaming response
 -         return jsonify(
 -             {
 -                 "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
 -                 "object": "chat.completion",
 -                 "created": int(time.time()),
 -                 "model": model,
 -                 "choices": [
 -                     {
 -                         "index": 0,
 -                         "message": {"role": "assistant", "content": response_content},
 -                         "finish_reason": "stop",
 -                     }
 -                 ],
 -                 "usage": {
 -                     "prompt_tokens": len(str(messages)),
 -                     "completion_tokens": len(response_content.split()),
 -                     "total_tokens": len(str(messages)) + len(response_content.split()),
 -                 },
 -             }
 -         )
 - 
 - 
 - @app.route("/v1/completions", methods=["POST"])
 - def completions() -> Any:
 -     """Handle text completions."""
 -     data = request.json or {}
 -     model = data.get("model", "gpt-3.5-turbo-instruct")
 -     prompt = data.get("prompt", "")
 - 
 -     response_text = f"Mock completion for prompt: {prompt[:100]}..."
 - 
 -     return jsonify(
 -         {
 -             "id": f"cmpl-{uuid.uuid4().hex[:8]}",
 -             "object": "text_completion",
 -             "created": int(time.time()),
 -             "model": model,
 -             "choices": [
 -                 {
 -                     "text": response_text,
 -                     "index": 0,
 -                     "logprobs": None,
 -                     "finish_reason": "stop",
 -                 }
 -             ],
 -             "usage": {
 -                 "prompt_tokens": len(prompt.split()),
 -                 "completion_tokens": len(response_text.split()),
 -                 "total_tokens": len(prompt.split()) + len(response_text.split()),
 -             },
 -         }
 -     )
 - 
 - 
 - @app.route("/v1/embeddings", methods=["POST"])
 - def embeddings() -> Any:
 -     """Handle embeddings requests."""
 -     data = request.json or {}
 -     model = data.get("model", "text-embedding-ada-002")
 -     input_text = data.get("input", "")
 - 
 -     # Generate mock embedding (1536 dimensions for ada-002)
 -     mock_embedding = [0.1] * 1536
 - 
 -     return jsonify(
 -         {
 -             "object": "list",
 -             "data": [{"object": "embedding", "embedding": mock_embedding, "index": 0}],
 -             "model": model,
 -             "usage": {
 -                 "prompt_tokens": len(input_text.split()),
 -                 "total_tokens": len(input_text.split()),
 -             },
 -         }
 -     )
 - 
 - 
 - @app.route("/v1/models/<model_id>", methods=["GET"])
 - def get_model(model_id: str) -> tuple[Any, int] | Any:
 -     """Get specific model details."""
 -     for model in MODELS:
 -         if model["id"] == model_id:
 -             return jsonify(model)
 - 
 -     return jsonify({"error": "Model not found"}), 404
 - 
 - 
 - @app.route("/health", methods=["GET"])
 - def health() -> Any:
 -     """Health check endpoint."""
 -     return jsonify({"status": "healthy"})
 - 
 - 
 - if __name__ == "__main__":
 -     print("🚀 Starting Mock OpenAI Server on http://localhost:5004")
 -     print("Available endpoints:")
 -     print("  - GET  /v1/models")
 -     print("  - POST /v1/chat/completions")
 -     print("  - POST /v1/completions")
 -     print("  - POST /v1/embeddings")
 -     print("  - GET  /v1/models/<model_id>")
 -     print("  - GET  /health")
 -     app.run(host="0.0.0.0", port=5004, debug=True)
 
 
  |