Repository: 1rgs/claude-code-openai Branch: main Commit: dd4a29aff3b4 Files: 9 Total size: 108.3 KB Directory structure: gitextract_zxkxjc5b/ ├── .dockerignore ├── .github/ │ └── workflows/ │ └── publish.yml ├── .gitignore ├── .python-version ├── Dockerfile ├── README.md ├── pyproject.toml ├── server.py └── tests.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ # Git .git .gitignore .gitattributes # CI .codeclimate.yml .travis.yml .taskcluster.yml # Docker docker-compose.yml Dockerfile .docker .dockerignore # Byte-compiled / optimized / DLL files **/__pycache__/ **/*.py[cod] # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .cache nosetests.xml coverage.xml # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation docs/_build/ # PyBuilder target/ # Virtual environment .env .venv/ venv/ # PyCharm .idea # Python mode for VIM .ropeproject **/.ropeproject # Vim swap files **/*.swp # VS Code .vscode/ ================================================ FILE: .github/workflows/publish.yml ================================================ name: publish on: push: branches: - main jobs: publish: runs-on: ubuntu-latest permissions: packages: write contents: read steps: - uses: actions/checkout@main - name: Set up QEMU uses: docker/setup-qemu-action@master with: platforms: all - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@master - name: Log in to Github's container registry run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin # Docker images have to have lowercase names, and Github Actions doesn't # have template functions - name: Collect image metadata id: meta uses: docker/metadata-action@master with: images: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }} - name: Build uses: docker/build-push-action@master with: context: . file: ./Dockerfile platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} ================================================ FILE: .gitignore ================================================ # Environment variables .env # Python __pycache__/ *.py[cod] *$py.class *.so .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # Virtual environments venv/ env/ ENV/ # Logs *.log # IDE specific files .idea/ .vscode/ *.swp *.swo ================================================ FILE: .python-version ================================================ 3.10 ================================================ FILE: Dockerfile ================================================ FROM python:latest WORKDIR /claude-code-proxy # Copy package specifications COPY pyproject.toml uv.lock ./ # Install uv and project dependencies RUN pip install --upgrade uv && uv sync --locked # Copy project code to current directory COPY . . # Start the proxy EXPOSE 8082 CMD uv run uvicorn server:app --host 0.0.0.0 --port 8082 --reload ================================================ FILE: README.md ================================================ # Anthropic API Proxy for Gemini & OpenAI Models 🔄 **Use Anthropic clients (like Claude Code) with Gemini, OpenAI, or direct Anthropic backends.** 🤝 A proxy server that lets you use Anthropic clients with Gemini, OpenAI, or Anthropic models themselves (a transparent proxy of sorts), all via LiteLLM. 🌉 ![Anthropic API Proxy](pic.png) ## Quick Start ⚡ ### Prerequisites - OpenAI API key 🔑 - Google AI Studio (Gemini) API key (if using Google provider) 🔑 - Google Cloud Project with Vertex AI API enabled (if using Application Default Credentials for Gemini) ☁️ - [uv](https://github.com/astral-sh/uv) installed. ### Setup 🛠️ #### From source 1. **Clone this repository**: ```bash git clone https://github.com/1rgs/claude-code-proxy.git cd claude-code-proxy ``` 2. **Install uv** (if you haven't already): ```bash curl -LsSf https://astral.sh/uv/install.sh | sh ``` *(`uv` will handle dependencies based on `pyproject.toml` when you run the server)* 3. **Configure Environment Variables**: Copy the example environment file: ```bash cp .env.example .env ``` Edit `.env` and fill in your API keys and model configurations: * `ANTHROPIC_API_KEY`: (Optional) Needed only if proxying *to* Anthropic models. * `OPENAI_API_KEY`: Your OpenAI API key (Required if using the default OpenAI preference or as fallback). * `GEMINI_API_KEY`: Your Google AI Studio (Gemini) API key (Required if `PREFERRED_PROVIDER=google` and `USE_VERTEX_AUTH=true`). * `USE_VERTEX_AUTH` (Optional): Set to `true` to use Application Default Credentials (ADC) will be used (no static API key required). Note: when USE_VERTEX_AUTH=true, you must configure `VERTEX_PROJECT` and `VERTEX_LOCATION`. * `VERTEX_PROJECT` (Optional): Your Google Cloud Project ID (Required if `PREFERRED_PROVIDER=google` and `USE_VERTEX_AUTH=true`). * `VERTEX_LOCATION` (Optional): The Google Cloud region for Vertex AI (e.g., `us-central1`) (Required if `PREFERRED_PROVIDER=google` and `USE_VERTEX_AUTH=true`). * `PREFERRED_PROVIDER` (Optional): Set to `openai` (default), `google`, or `anthropic`. This determines the primary backend for mapping `haiku`/`sonnet`. * `BIG_MODEL` (Optional): The model to map `sonnet` requests to. Defaults to `gpt-4.1` (if `PREFERRED_PROVIDER=openai`) or `gemini-2.5-pro-preview-03-25`. Ignored when `PREFERRED_PROVIDER=anthropic`. * `SMALL_MODEL` (Optional): The model to map `haiku` requests to. Defaults to `gpt-4.1-mini` (if `PREFERRED_PROVIDER=openai`) or `gemini-2.0-flash`. Ignored when `PREFERRED_PROVIDER=anthropic`. **Mapping Logic:** - If `PREFERRED_PROVIDER=openai` (default), `haiku`/`sonnet` map to `SMALL_MODEL`/`BIG_MODEL` prefixed with `openai/`. - If `PREFERRED_PROVIDER=google`, `haiku`/`sonnet` map to `SMALL_MODEL`/`BIG_MODEL` prefixed with `gemini/` *if* those models are in the server's known `GEMINI_MODELS` list (otherwise falls back to OpenAI mapping). - If `PREFERRED_PROVIDER=anthropic`, `haiku`/`sonnet` requests are passed directly to Anthropic with the `anthropic/` prefix without remapping to different models. 4. **Run the server**: ```bash uv run uvicorn server:app --host 0.0.0.0 --port 8082 --reload ``` *(`--reload` is optional, for development)* #### Docker If using docker, download the example environment file to `.env` and edit it as described above. ```bash curl -O .env https://raw.githubusercontent.com/1rgs/claude-code-proxy/refs/heads/main/.env.example ``` Then, you can either start the container with [docker compose](https://docs.docker.com/compose/) (preferred): ```yml services: proxy: image: ghcr.io/1rgs/claude-code-proxy:latest restart: unless-stopped env_file: .env ports: - 8082:8082 ``` Or with a command: ```bash docker run -d --env-file .env -p 8082:8082 ghcr.io/1rgs/claude-code-proxy:latest ``` ### Using with Claude Code 🎮 1. **Install Claude Code** (if you haven't already): ```bash npm install -g @anthropic-ai/claude-code ``` 2. **Connect to your proxy**: ```bash ANTHROPIC_BASE_URL=http://localhost:8082 claude ``` 3. **That's it!** Your Claude Code client will now use the configured backend models (defaulting to Gemini) through the proxy. 🎯 ## Model Mapping 🗺️ The proxy automatically maps Claude models to either OpenAI or Gemini models based on the configured model: | Claude Model | Default Mapping | When BIG_MODEL/SMALL_MODEL is a Gemini model | |--------------|--------------|---------------------------| | haiku | openai/gpt-4o-mini | gemini/[model-name] | | sonnet | openai/gpt-4o | gemini/[model-name] | ### Supported Models #### OpenAI Models The following OpenAI models are supported with automatic `openai/` prefix handling: - o3-mini - o1 - o1-mini - o1-pro - gpt-4.5-preview - gpt-4o - gpt-4o-audio-preview - chatgpt-4o-latest - gpt-4o-mini - gpt-4o-mini-audio-preview - gpt-4.1 - gpt-4.1-mini #### Gemini Models The following Gemini models are supported with automatic `gemini/` prefix handling: - gemini-2.5-pro - gemini-2.5-flash ### Model Prefix Handling The proxy automatically adds the appropriate prefix to model names: - OpenAI models get the `openai/` prefix - Gemini models get the `gemini/` prefix - The BIG_MODEL and SMALL_MODEL will get the appropriate prefix based on whether they're in the OpenAI or Gemini model lists For example: - `gpt-4o` becomes `openai/gpt-4o` - `gemini-2.5-pro-preview-03-25` becomes `gemini/gemini-2.5-pro-preview-03-25` - When BIG_MODEL is set to a Gemini model, Claude Sonnet will map to `gemini/[model-name]` ### Customizing Model Mapping Control the mapping using environment variables in your `.env` file or directly: **Example 1: Default (Use OpenAI)** No changes needed in `.env` beyond API keys, or ensure: ```dotenv OPENAI_API_KEY="your-openai-key" GEMINI_API_KEY="your-google-key" # Needed if PREFERRED_PROVIDER=google # PREFERRED_PROVIDER="openai" # Optional, it's the default # BIG_MODEL="gpt-4.1" # Optional, it's the default # SMALL_MODEL="gpt-4.1-mini" # Optional, it's the default ``` **Example 2a: Prefer Google (using GEMINI_API_KEY)** ```dotenv GEMINI_API_KEY="your-google-key" OPENAI_API_KEY="your-openai-key" # Needed for fallback PREFERRED_PROVIDER="google" # BIG_MODEL="gemini-2.5-pro" # Optional, it's the default for Google pref # SMALL_MODEL="gemini-2.5-flash" # Optional, it's the default for Google pref ``` **Example 2b: Prefer Google (using Vertex AI with Application Default Credentials)** ```dotenv OPENAI_API_KEY="your-openai-key" # Needed for fallback PREFERRED_PROVIDER="google" VERTEX_PROJECT="your-gcp-project-id" VERTEX_LOCATION="us-central1" USE_VERTEX_AUTH=true # BIG_MODEL="gemini-2.5-pro" # Optional, it's the default for Google pref # SMALL_MODEL="gemini-2.5-flash" # Optional, it's the default for Google pref ``` **Example 3: Use Direct Anthropic ("Just an Anthropic Proxy" Mode)** ```dotenv ANTHROPIC_API_KEY="sk-ant-..." PREFERRED_PROVIDER="anthropic" # BIG_MODEL and SMALL_MODEL are ignored in this mode # haiku/sonnet requests are passed directly to Anthropic models ``` *Use case: This mode enables you to use the proxy infrastructure (for logging, middleware, request/response processing, etc.) while still using actual Anthropic models rather than being forced to remap to OpenAI or Gemini.* **Example 4: Use Specific OpenAI Models** ```dotenv OPENAI_API_KEY="your-openai-key" GEMINI_API_KEY="your-google-key" PREFERRED_PROVIDER="openai" BIG_MODEL="gpt-4o" # Example specific model SMALL_MODEL="gpt-4o-mini" # Example specific model ``` ## How It Works 🧩 This proxy works by: 1. **Receiving requests** in Anthropic's API format 📥 2. **Translating** the requests to OpenAI format via LiteLLM 🔄 3. **Sending** the translated request to OpenAI 📤 4. **Converting** the response back to Anthropic format 🔄 5. **Returning** the formatted response to the client ✅ The proxy handles both streaming and non-streaming responses, maintaining compatibility with all Claude clients. 🌊 ## Contributing 🤝 Contributions are welcome! Please feel free to submit a Pull Request. 🎁 ================================================ FILE: pyproject.toml ================================================ [project] name = "anthropic-proxy" version = "0.1.0" description = "Proxy that translates between Anthropic API and LiteLLM" readme = "README.md" requires-python = ">=3.10" dependencies = [ "fastapi[standard]>=0.115.11", "uvicorn>=0.34.0", "httpx>=0.25.0", "pydantic>=2.0.0", "litellm>=1.77.7", "python-dotenv>=1.0.0", "google-auth>=2.41.1", "google-cloud-aiplatform>=1.120.0", ] ================================================ FILE: server.py ================================================ from fastapi import FastAPI, Request, HTTPException import uvicorn import logging import json from pydantic import BaseModel, Field, field_validator from typing import List, Dict, Any, Optional, Union, Literal import httpx import os from fastapi.responses import JSONResponse, StreamingResponse import litellm import uuid import time from dotenv import load_dotenv import re from datetime import datetime import sys # Load environment variables from .env file load_dotenv() # Configure logging logging.basicConfig( level=logging.WARN, # Change to INFO level to show more details format='%(asctime)s - %(levelname)s - %(message)s', ) logger = logging.getLogger(__name__) # Configure uvicorn to be quieter import uvicorn # Tell uvicorn's loggers to be quiet logging.getLogger("uvicorn").setLevel(logging.WARNING) logging.getLogger("uvicorn.access").setLevel(logging.WARNING) logging.getLogger("uvicorn.error").setLevel(logging.WARNING) # Create a filter to block any log messages containing specific strings class MessageFilter(logging.Filter): def filter(self, record): # Block messages containing these strings blocked_phrases = [ "LiteLLM completion()", "HTTP Request:", "selected model name for cost calculation", "utils.py", "cost_calculator" ] if hasattr(record, 'msg') and isinstance(record.msg, str): for phrase in blocked_phrases: if phrase in record.msg: return False return True # Apply the filter to the root logger to catch all messages root_logger = logging.getLogger() root_logger.addFilter(MessageFilter()) # Custom formatter for model mapping logs class ColorizedFormatter(logging.Formatter): """Custom formatter to highlight model mappings""" BLUE = "\033[94m" GREEN = "\033[92m" YELLOW = "\033[93m" RED = "\033[91m" RESET = "\033[0m" BOLD = "\033[1m" def format(self, record): if record.levelno == logging.debug and "MODEL MAPPING" in record.msg: # Apply colors and formatting to model mapping logs return f"{self.BOLD}{self.GREEN}{record.msg}{self.RESET}" return super().format(record) # Apply custom formatter to console handler for handler in logger.handlers: if isinstance(handler, logging.StreamHandler): handler.setFormatter(ColorizedFormatter('%(asctime)s - %(levelname)s - %(message)s')) app = FastAPI() # Get API keys from environment ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY") OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") # Get Vertex AI project and location from environment (if set) VERTEX_PROJECT = os.environ.get("VERTEX_PROJECT", "unset") VERTEX_LOCATION = os.environ.get("VERTEX_LOCATION", "unset") # Option to use Gemini API key instead of ADC for Vertex AI USE_VERTEX_AUTH = os.environ.get("USE_VERTEX_AUTH", "False").lower() == "true" # Get OpenAI base URL from environment (if set) OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL") # Get preferred provider (default to openai) PREFERRED_PROVIDER = os.environ.get("PREFERRED_PROVIDER", "openai").lower() # Get model mapping configuration from environment # Default to latest OpenAI models if not set BIG_MODEL = os.environ.get("BIG_MODEL", "gpt-4.1") SMALL_MODEL = os.environ.get("SMALL_MODEL", "gpt-4.1-mini") # List of OpenAI models OPENAI_MODELS = [ "o3-mini", "o1", "o1-mini", "o1-pro", "gpt-4.5-preview", "gpt-4o", "gpt-4o-audio-preview", "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-audio-preview", "gpt-4.1", # Added default big model "gpt-4.1-mini" # Added default small model ] # List of Gemini models GEMINI_MODELS = [ "gemini-2.5-flash", "gemini-2.5-pro" ] # Helper function to clean schema for Gemini def clean_gemini_schema(schema: Any) -> Any: """Recursively removes unsupported fields from a JSON schema for Gemini.""" if isinstance(schema, dict): # Remove specific keys unsupported by Gemini tool parameters schema.pop("additionalProperties", None) schema.pop("default", None) # Check for unsupported 'format' in string types if schema.get("type") == "string" and "format" in schema: allowed_formats = {"enum", "date-time"} if schema["format"] not in allowed_formats: logger.debug(f"Removing unsupported format '{schema['format']}' for string type in Gemini schema.") schema.pop("format") # Recursively clean nested schemas (properties, items, etc.) for key, value in list(schema.items()): # Use list() to allow modification during iteration schema[key] = clean_gemini_schema(value) elif isinstance(schema, list): # Recursively clean items in a list return [clean_gemini_schema(item) for item in schema] return schema # Models for Anthropic API requests class ContentBlockText(BaseModel): type: Literal["text"] text: str class ContentBlockImage(BaseModel): type: Literal["image"] source: Dict[str, Any] class ContentBlockToolUse(BaseModel): type: Literal["tool_use"] id: str name: str input: Dict[str, Any] class ContentBlockToolResult(BaseModel): type: Literal["tool_result"] tool_use_id: str content: Union[str, List[Dict[str, Any]], Dict[str, Any], List[Any], Any] class SystemContent(BaseModel): type: Literal["text"] text: str class Message(BaseModel): role: Literal["user", "assistant"] content: Union[str, List[Union[ContentBlockText, ContentBlockImage, ContentBlockToolUse, ContentBlockToolResult]]] class Tool(BaseModel): name: str description: Optional[str] = None input_schema: Dict[str, Any] class ThinkingConfig(BaseModel): enabled: bool = True class MessagesRequest(BaseModel): model: str max_tokens: int messages: List[Message] system: Optional[Union[str, List[SystemContent]]] = None stop_sequences: Optional[List[str]] = None stream: Optional[bool] = False temperature: Optional[float] = 1.0 top_p: Optional[float] = None top_k: Optional[int] = None metadata: Optional[Dict[str, Any]] = None tools: Optional[List[Tool]] = None tool_choice: Optional[Dict[str, Any]] = None thinking: Optional[ThinkingConfig] = None original_model: Optional[str] = None # Will store the original model name @field_validator('model') def validate_model_field(cls, v, info): # Renamed to avoid conflict original_model = v new_model = v # Default to original value logger.debug(f"📋 MODEL VALIDATION: Original='{original_model}', Preferred='{PREFERRED_PROVIDER}', BIG='{BIG_MODEL}', SMALL='{SMALL_MODEL}'") # Remove provider prefixes for easier matching clean_v = v if clean_v.startswith('anthropic/'): clean_v = clean_v[10:] elif clean_v.startswith('openai/'): clean_v = clean_v[7:] elif clean_v.startswith('gemini/'): clean_v = clean_v[7:] # --- Mapping Logic --- START --- mapped = False if PREFERRED_PROVIDER == "anthropic": # Don't remap to big/small models, just add the prefix new_model = f"anthropic/{clean_v}" mapped = True # Map Haiku to SMALL_MODEL based on provider preference elif 'haiku' in clean_v.lower(): if PREFERRED_PROVIDER == "google" and SMALL_MODEL in GEMINI_MODELS: new_model = f"gemini/{SMALL_MODEL}" mapped = True else: new_model = f"openai/{SMALL_MODEL}" mapped = True # Map Sonnet to BIG_MODEL based on provider preference elif 'sonnet' in clean_v.lower(): if PREFERRED_PROVIDER == "google" and BIG_MODEL in GEMINI_MODELS: new_model = f"gemini/{BIG_MODEL}" mapped = True else: new_model = f"openai/{BIG_MODEL}" mapped = True # Add prefixes to non-mapped models if they match known lists elif not mapped: if clean_v in GEMINI_MODELS and not v.startswith('gemini/'): new_model = f"gemini/{clean_v}" mapped = True # Technically mapped to add prefix elif clean_v in OPENAI_MODELS and not v.startswith('openai/'): new_model = f"openai/{clean_v}" mapped = True # Technically mapped to add prefix # --- Mapping Logic --- END --- if mapped: logger.debug(f"📌 MODEL MAPPING: '{original_model}' ➡️ '{new_model}'") else: # If no mapping occurred and no prefix exists, log warning or decide default if not v.startswith(('openai/', 'gemini/', 'anthropic/')): logger.warning(f"⚠️ No prefix or mapping rule for model: '{original_model}'. Using as is.") new_model = v # Ensure we return the original if no rule applied # Store the original model in the values dictionary values = info.data if isinstance(values, dict): values['original_model'] = original_model return new_model class TokenCountRequest(BaseModel): model: str messages: List[Message] system: Optional[Union[str, List[SystemContent]]] = None tools: Optional[List[Tool]] = None thinking: Optional[ThinkingConfig] = None tool_choice: Optional[Dict[str, Any]] = None original_model: Optional[str] = None # Will store the original model name @field_validator('model') def validate_model_token_count(cls, v, info): # Renamed to avoid conflict # Use the same logic as MessagesRequest validator # NOTE: Pydantic validators might not share state easily if not class methods # Re-implementing the logic here for clarity, could be refactored original_model = v new_model = v # Default to original value logger.debug(f"📋 TOKEN COUNT VALIDATION: Original='{original_model}', Preferred='{PREFERRED_PROVIDER}', BIG='{BIG_MODEL}', SMALL='{SMALL_MODEL}'") # Remove provider prefixes for easier matching clean_v = v if clean_v.startswith('anthropic/'): clean_v = clean_v[10:] elif clean_v.startswith('openai/'): clean_v = clean_v[7:] elif clean_v.startswith('gemini/'): clean_v = clean_v[7:] # --- Mapping Logic --- START --- mapped = False # Map Haiku to SMALL_MODEL based on provider preference if 'haiku' in clean_v.lower(): if PREFERRED_PROVIDER == "google" and SMALL_MODEL in GEMINI_MODELS: new_model = f"gemini/{SMALL_MODEL}" mapped = True else: new_model = f"openai/{SMALL_MODEL}" mapped = True # Map Sonnet to BIG_MODEL based on provider preference elif 'sonnet' in clean_v.lower(): if PREFERRED_PROVIDER == "google" and BIG_MODEL in GEMINI_MODELS: new_model = f"gemini/{BIG_MODEL}" mapped = True else: new_model = f"openai/{BIG_MODEL}" mapped = True # Add prefixes to non-mapped models if they match known lists elif not mapped: if clean_v in GEMINI_MODELS and not v.startswith('gemini/'): new_model = f"gemini/{clean_v}" mapped = True # Technically mapped to add prefix elif clean_v in OPENAI_MODELS and not v.startswith('openai/'): new_model = f"openai/{clean_v}" mapped = True # Technically mapped to add prefix # --- Mapping Logic --- END --- if mapped: logger.debug(f"📌 TOKEN COUNT MAPPING: '{original_model}' ➡️ '{new_model}'") else: if not v.startswith(('openai/', 'gemini/', 'anthropic/')): logger.warning(f"⚠️ No prefix or mapping rule for token count model: '{original_model}'. Using as is.") new_model = v # Ensure we return the original if no rule applied # Store the original model in the values dictionary values = info.data if isinstance(values, dict): values['original_model'] = original_model return new_model class TokenCountResponse(BaseModel): input_tokens: int class Usage(BaseModel): input_tokens: int output_tokens: int cache_creation_input_tokens: int = 0 cache_read_input_tokens: int = 0 class MessagesResponse(BaseModel): id: str model: str role: Literal["assistant"] = "assistant" content: List[Union[ContentBlockText, ContentBlockToolUse]] type: Literal["message"] = "message" stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]] = None stop_sequence: Optional[str] = None usage: Usage @app.middleware("http") async def log_requests(request: Request, call_next): # Get request details method = request.method path = request.url.path # Log only basic request details at debug level logger.debug(f"Request: {method} {path}") # Process the request and get the response response = await call_next(request) return response # Not using validation function as we're using the environment API key def parse_tool_result_content(content): """Helper function to properly parse and normalize tool result content.""" if content is None: return "No content provided" if isinstance(content, str): return content if isinstance(content, list): result = "" for item in content: if isinstance(item, dict) and item.get("type") == "text": result += item.get("text", "") + "\n" elif isinstance(item, str): result += item + "\n" elif isinstance(item, dict): if "text" in item: result += item.get("text", "") + "\n" else: try: result += json.dumps(item) + "\n" except: result += str(item) + "\n" else: try: result += str(item) + "\n" except: result += "Unparseable content\n" return result.strip() if isinstance(content, dict): if content.get("type") == "text": return content.get("text", "") try: return json.dumps(content) except: return str(content) # Fallback for any other type try: return str(content) except: return "Unparseable content" def convert_anthropic_to_litellm(anthropic_request: MessagesRequest) -> Dict[str, Any]: """Convert Anthropic API request format to LiteLLM format (which follows OpenAI).""" # LiteLLM already handles Anthropic models when using the format model="anthropic/claude-3-opus-20240229" # So we just need to convert our Pydantic model to a dict in the expected format messages = [] # Add system message if present if anthropic_request.system: # Handle different formats of system messages if isinstance(anthropic_request.system, str): # Simple string format messages.append({"role": "system", "content": anthropic_request.system}) elif isinstance(anthropic_request.system, list): # List of content blocks system_text = "" for block in anthropic_request.system: if hasattr(block, 'type') and block.type == "text": system_text += block.text + "\n\n" elif isinstance(block, dict) and block.get("type") == "text": system_text += block.get("text", "") + "\n\n" if system_text: messages.append({"role": "system", "content": system_text.strip()}) # Add conversation messages for idx, msg in enumerate(anthropic_request.messages): content = msg.content if isinstance(content, str): messages.append({"role": msg.role, "content": content}) else: # Special handling for tool_result in user messages # OpenAI/LiteLLM format expects the assistant to call the tool, # and the user's next message to include the result as plain text if msg.role == "user" and any(block.type == "tool_result" for block in content if hasattr(block, "type")): # For user messages with tool_result, split into separate messages text_content = "" # Extract all text parts and concatenate them for block in content: if hasattr(block, "type"): if block.type == "text": text_content += block.text + "\n" elif block.type == "tool_result": # Add tool result as a message by itself - simulate the normal flow tool_id = block.tool_use_id if hasattr(block, "tool_use_id") else "" # Handle different formats of tool result content result_content = "" if hasattr(block, "content"): if isinstance(block.content, str): result_content = block.content elif isinstance(block.content, list): # If content is a list of blocks, extract text from each for content_block in block.content: if hasattr(content_block, "type") and content_block.type == "text": result_content += content_block.text + "\n" elif isinstance(content_block, dict) and content_block.get("type") == "text": result_content += content_block.get("text", "") + "\n" elif isinstance(content_block, dict): # Handle any dict by trying to extract text or convert to JSON if "text" in content_block: result_content += content_block.get("text", "") + "\n" else: try: result_content += json.dumps(content_block) + "\n" except: result_content += str(content_block) + "\n" elif isinstance(block.content, dict): # Handle dictionary content if block.content.get("type") == "text": result_content = block.content.get("text", "") else: try: result_content = json.dumps(block.content) except: result_content = str(block.content) else: # Handle any other type by converting to string try: result_content = str(block.content) except: result_content = "Unparseable content" # In OpenAI format, tool results come from the user (rather than being content blocks) text_content += f"Tool result for {tool_id}:\n{result_content}\n" # Add as a single user message with all the content messages.append({"role": "user", "content": text_content.strip()}) else: # Regular handling for other message types processed_content = [] for block in content: if hasattr(block, "type"): if block.type == "text": processed_content.append({"type": "text", "text": block.text}) elif block.type == "image": processed_content.append({"type": "image", "source": block.source}) elif block.type == "tool_use": # Handle tool use blocks if needed processed_content.append({ "type": "tool_use", "id": block.id, "name": block.name, "input": block.input }) elif block.type == "tool_result": # Handle different formats of tool result content processed_content_block = { "type": "tool_result", "tool_use_id": block.tool_use_id if hasattr(block, "tool_use_id") else "" } # Process the content field properly if hasattr(block, "content"): if isinstance(block.content, str): # If it's a simple string, create a text block for it processed_content_block["content"] = [{"type": "text", "text": block.content}] elif isinstance(block.content, list): # If it's already a list of blocks, keep it processed_content_block["content"] = block.content else: # Default fallback processed_content_block["content"] = [{"type": "text", "text": str(block.content)}] else: # Default empty content processed_content_block["content"] = [{"type": "text", "text": ""}] processed_content.append(processed_content_block) messages.append({"role": msg.role, "content": processed_content}) # Cap max_tokens for OpenAI models to their limit of 16384 max_tokens = anthropic_request.max_tokens if anthropic_request.model.startswith("openai/") or anthropic_request.model.startswith("gemini/"): max_tokens = min(max_tokens, 16384) logger.debug(f"Capping max_tokens to 16384 for OpenAI/Gemini model (original value: {anthropic_request.max_tokens})") # Create LiteLLM request dict litellm_request = { "model": anthropic_request.model, # it understands "anthropic/claude-x" format "messages": messages, "max_completion_tokens": max_tokens, "temperature": anthropic_request.temperature, "stream": anthropic_request.stream, } # Only include thinking field for Anthropic models if anthropic_request.thinking and anthropic_request.model.startswith("anthropic/"): litellm_request["thinking"] = anthropic_request.thinking # Add optional parameters if present if anthropic_request.stop_sequences: litellm_request["stop"] = anthropic_request.stop_sequences if anthropic_request.top_p: litellm_request["top_p"] = anthropic_request.top_p if anthropic_request.top_k: litellm_request["top_k"] = anthropic_request.top_k # Convert tools to OpenAI format if anthropic_request.tools: openai_tools = [] is_gemini_model = anthropic_request.model.startswith("gemini/") for tool in anthropic_request.tools: # Convert to dict if it's a pydantic model if hasattr(tool, 'dict'): tool_dict = tool.dict() else: # Ensure tool_dict is a dictionary, handle potential errors if 'tool' isn't dict-like try: tool_dict = dict(tool) if not isinstance(tool, dict) else tool except (TypeError, ValueError): logger.error(f"Could not convert tool to dict: {tool}") continue # Skip this tool if conversion fails # Clean the schema if targeting a Gemini model input_schema = tool_dict.get("input_schema", {}) if is_gemini_model: logger.debug(f"Cleaning schema for Gemini tool: {tool_dict.get('name')}") input_schema = clean_gemini_schema(input_schema) # Create OpenAI-compatible function tool openai_tool = { "type": "function", "function": { "name": tool_dict["name"], "description": tool_dict.get("description", ""), "parameters": input_schema # Use potentially cleaned schema } } openai_tools.append(openai_tool) litellm_request["tools"] = openai_tools # Convert tool_choice to OpenAI format if present if anthropic_request.tool_choice: if hasattr(anthropic_request.tool_choice, 'dict'): tool_choice_dict = anthropic_request.tool_choice.dict() else: tool_choice_dict = anthropic_request.tool_choice # Handle Anthropic's tool_choice format choice_type = tool_choice_dict.get("type") if choice_type == "auto": litellm_request["tool_choice"] = "auto" elif choice_type == "any": litellm_request["tool_choice"] = "any" elif choice_type == "tool" and "name" in tool_choice_dict: litellm_request["tool_choice"] = { "type": "function", "function": {"name": tool_choice_dict["name"]} } else: # Default to auto if we can't determine litellm_request["tool_choice"] = "auto" return litellm_request def convert_litellm_to_anthropic(litellm_response: Union[Dict[str, Any], Any], original_request: MessagesRequest) -> MessagesResponse: """Convert LiteLLM (OpenAI format) response to Anthropic API response format.""" # Enhanced response extraction with better error handling try: # Get the clean model name to check capabilities clean_model = original_request.model if clean_model.startswith("anthropic/"): clean_model = clean_model[len("anthropic/"):] elif clean_model.startswith("openai/"): clean_model = clean_model[len("openai/"):] # Check if this is a Claude model (which supports content blocks) is_claude_model = clean_model.startswith("claude-") # Handle ModelResponse object from LiteLLM if hasattr(litellm_response, 'choices') and hasattr(litellm_response, 'usage'): # Extract data from ModelResponse object directly choices = litellm_response.choices message = choices[0].message if choices and len(choices) > 0 else None content_text = message.content if message and hasattr(message, 'content') else "" tool_calls = message.tool_calls if message and hasattr(message, 'tool_calls') else None finish_reason = choices[0].finish_reason if choices and len(choices) > 0 else "stop" usage_info = litellm_response.usage response_id = getattr(litellm_response, 'id', f"msg_{uuid.uuid4()}") else: # For backward compatibility - handle dict responses # If response is a dict, use it, otherwise try to convert to dict try: response_dict = litellm_response if isinstance(litellm_response, dict) else litellm_response.dict() except AttributeError: # If .dict() fails, try to use model_dump or __dict__ try: response_dict = litellm_response.model_dump() if hasattr(litellm_response, 'model_dump') else litellm_response.__dict__ except AttributeError: # Fallback - manually extract attributes response_dict = { "id": getattr(litellm_response, 'id', f"msg_{uuid.uuid4()}"), "choices": getattr(litellm_response, 'choices', [{}]), "usage": getattr(litellm_response, 'usage', {}) } # Extract the content from the response dict choices = response_dict.get("choices", [{}]) message = choices[0].get("message", {}) if choices and len(choices) > 0 else {} content_text = message.get("content", "") tool_calls = message.get("tool_calls", None) finish_reason = choices[0].get("finish_reason", "stop") if choices and len(choices) > 0 else "stop" usage_info = response_dict.get("usage", {}) response_id = response_dict.get("id", f"msg_{uuid.uuid4()}") # Create content list for Anthropic format content = [] # Add text content block if present (text might be None or empty for pure tool call responses) if content_text is not None and content_text != "": content.append({"type": "text", "text": content_text}) # Add tool calls if present (tool_use in Anthropic format) - only for Claude models if tool_calls and is_claude_model: logger.debug(f"Processing tool calls: {tool_calls}") # Convert to list if it's not already if not isinstance(tool_calls, list): tool_calls = [tool_calls] for idx, tool_call in enumerate(tool_calls): logger.debug(f"Processing tool call {idx}: {tool_call}") # Extract function data based on whether it's a dict or object if isinstance(tool_call, dict): function = tool_call.get("function", {}) tool_id = tool_call.get("id", f"tool_{uuid.uuid4()}") name = function.get("name", "") arguments = function.get("arguments", "{}") else: function = getattr(tool_call, "function", None) tool_id = getattr(tool_call, "id", f"tool_{uuid.uuid4()}") name = getattr(function, "name", "") if function else "" arguments = getattr(function, "arguments", "{}") if function else "{}" # Convert string arguments to dict if needed if isinstance(arguments, str): try: arguments = json.loads(arguments) except json.JSONDecodeError: logger.warning(f"Failed to parse tool arguments as JSON: {arguments}") arguments = {"raw": arguments} logger.debug(f"Adding tool_use block: id={tool_id}, name={name}, input={arguments}") content.append({ "type": "tool_use", "id": tool_id, "name": name, "input": arguments }) elif tool_calls and not is_claude_model: # For non-Claude models, convert tool calls to text format logger.debug(f"Converting tool calls to text for non-Claude model: {clean_model}") # We'll append tool info to the text content tool_text = "\n\nTool usage:\n" # Convert to list if it's not already if not isinstance(tool_calls, list): tool_calls = [tool_calls] for idx, tool_call in enumerate(tool_calls): # Extract function data based on whether it's a dict or object if isinstance(tool_call, dict): function = tool_call.get("function", {}) tool_id = tool_call.get("id", f"tool_{uuid.uuid4()}") name = function.get("name", "") arguments = function.get("arguments", "{}") else: function = getattr(tool_call, "function", None) tool_id = getattr(tool_call, "id", f"tool_{uuid.uuid4()}") name = getattr(function, "name", "") if function else "" arguments = getattr(function, "arguments", "{}") if function else "{}" # Convert string arguments to dict if needed if isinstance(arguments, str): try: args_dict = json.loads(arguments) arguments_str = json.dumps(args_dict, indent=2) except json.JSONDecodeError: arguments_str = arguments else: arguments_str = json.dumps(arguments, indent=2) tool_text += f"Tool: {name}\nArguments: {arguments_str}\n\n" # Add or append tool text to content if content and content[0]["type"] == "text": content[0]["text"] += tool_text else: content.append({"type": "text", "text": tool_text}) # Get usage information - extract values safely from object or dict if isinstance(usage_info, dict): prompt_tokens = usage_info.get("prompt_tokens", 0) completion_tokens = usage_info.get("completion_tokens", 0) else: prompt_tokens = getattr(usage_info, "prompt_tokens", 0) completion_tokens = getattr(usage_info, "completion_tokens", 0) # Map OpenAI finish_reason to Anthropic stop_reason stop_reason = None if finish_reason == "stop": stop_reason = "end_turn" elif finish_reason == "length": stop_reason = "max_tokens" elif finish_reason == "tool_calls": stop_reason = "tool_use" else: stop_reason = "end_turn" # Default # Make sure content is never empty if not content: content.append({"type": "text", "text": ""}) # Create Anthropic-style response anthropic_response = MessagesResponse( id=response_id, model=original_request.model, role="assistant", content=content, stop_reason=stop_reason, stop_sequence=None, usage=Usage( input_tokens=prompt_tokens, output_tokens=completion_tokens ) ) return anthropic_response except Exception as e: import traceback error_traceback = traceback.format_exc() error_message = f"Error converting response: {str(e)}\n\nFull traceback:\n{error_traceback}" logger.error(error_message) # In case of any error, create a fallback response return MessagesResponse( id=f"msg_{uuid.uuid4()}", model=original_request.model, role="assistant", content=[{"type": "text", "text": f"Error converting response: {str(e)}. Please check server logs."}], stop_reason="end_turn", usage=Usage(input_tokens=0, output_tokens=0) ) async def handle_streaming(response_generator, original_request: MessagesRequest): """Handle streaming responses from LiteLLM and convert to Anthropic format.""" try: # Send message_start event message_id = f"msg_{uuid.uuid4().hex[:24]}" # Format similar to Anthropic's IDs message_data = { 'type': 'message_start', 'message': { 'id': message_id, 'type': 'message', 'role': 'assistant', 'model': original_request.model, 'content': [], 'stop_reason': None, 'stop_sequence': None, 'usage': { 'input_tokens': 0, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'output_tokens': 0 } } } yield f"event: message_start\ndata: {json.dumps(message_data)}\n\n" # Content block index for the first text block yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': 0, 'content_block': {'type': 'text', 'text': ''}})}\n\n" # Send a ping to keep the connection alive (Anthropic does this) yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n" tool_index = None current_tool_call = None tool_content = "" accumulated_text = "" # Track accumulated text content text_sent = False # Track if we've sent any text content text_block_closed = False # Track if text block is closed input_tokens = 0 output_tokens = 0 has_sent_stop_reason = False last_tool_index = 0 # Process each chunk async for chunk in response_generator: try: # Check if this is the end of the response with usage data if hasattr(chunk, 'usage') and chunk.usage is not None: if hasattr(chunk.usage, 'prompt_tokens'): input_tokens = chunk.usage.prompt_tokens if hasattr(chunk.usage, 'completion_tokens'): output_tokens = chunk.usage.completion_tokens # Handle text content if hasattr(chunk, 'choices') and len(chunk.choices) > 0: choice = chunk.choices[0] # Get the delta from the choice if hasattr(choice, 'delta'): delta = choice.delta else: # If no delta, try to get message delta = getattr(choice, 'message', {}) # Check for finish_reason to know when we're done finish_reason = getattr(choice, 'finish_reason', None) # Process text content delta_content = None # Handle different formats of delta content if hasattr(delta, 'content'): delta_content = delta.content elif isinstance(delta, dict) and 'content' in delta: delta_content = delta['content'] # Accumulate text content if delta_content is not None and delta_content != "": accumulated_text += delta_content # Always emit text deltas if no tool calls started if tool_index is None and not text_block_closed: text_sent = True yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': delta_content}})}\n\n" # Process tool calls delta_tool_calls = None # Handle different formats of tool calls if hasattr(delta, 'tool_calls'): delta_tool_calls = delta.tool_calls elif isinstance(delta, dict) and 'tool_calls' in delta: delta_tool_calls = delta['tool_calls'] # Process tool calls if any if delta_tool_calls: # First tool call we've seen - need to handle text properly if tool_index is None: # If we've been streaming text, close that text block if text_sent and not text_block_closed: text_block_closed = True yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': 0})}\n\n" # If we've accumulated text but not sent it, we need to emit it now # This handles the case where the first delta has both text and a tool call elif accumulated_text and not text_sent and not text_block_closed: # Send the accumulated text text_sent = True yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': accumulated_text}})}\n\n" # Close the text block text_block_closed = True yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': 0})}\n\n" # Close text block even if we haven't sent anything - models sometimes emit empty text blocks elif not text_block_closed: text_block_closed = True yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': 0})}\n\n" # Convert to list if it's not already if not isinstance(delta_tool_calls, list): delta_tool_calls = [delta_tool_calls] for tool_call in delta_tool_calls: # Get the index of this tool call (for multiple tools) current_index = None if isinstance(tool_call, dict) and 'index' in tool_call: current_index = tool_call['index'] elif hasattr(tool_call, 'index'): current_index = tool_call.index else: current_index = 0 # Check if this is a new tool or a continuation if tool_index is None or current_index != tool_index: # New tool call - create a new tool_use block tool_index = current_index last_tool_index += 1 anthropic_tool_index = last_tool_index # Extract function info if isinstance(tool_call, dict): function = tool_call.get('function', {}) name = function.get('name', '') if isinstance(function, dict) else "" tool_id = tool_call.get('id', f"toolu_{uuid.uuid4().hex[:24]}") else: function = getattr(tool_call, 'function', None) name = getattr(function, 'name', '') if function else '' tool_id = getattr(tool_call, 'id', f"toolu_{uuid.uuid4().hex[:24]}") # Start a new tool_use block yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': anthropic_tool_index, 'content_block': {'type': 'tool_use', 'id': tool_id, 'name': name, 'input': {}}})}\n\n" current_tool_call = tool_call tool_content = "" # Extract function arguments arguments = None if isinstance(tool_call, dict) and 'function' in tool_call: function = tool_call.get('function', {}) arguments = function.get('arguments', '') if isinstance(function, dict) else '' elif hasattr(tool_call, 'function'): function = getattr(tool_call, 'function', None) arguments = getattr(function, 'arguments', '') if function else '' # If we have arguments, send them as a delta if arguments: # Try to detect if arguments are valid JSON or just a fragment try: # If it's already a dict, use it if isinstance(arguments, dict): args_json = json.dumps(arguments) else: # Otherwise, try to parse it json.loads(arguments) args_json = arguments except (json.JSONDecodeError, TypeError): # If it's a fragment, treat it as a string args_json = arguments # Add to accumulated tool content tool_content += args_json if isinstance(args_json, str) else "" # Send the update yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': anthropic_tool_index, 'delta': {'type': 'input_json_delta', 'partial_json': args_json}})}\n\n" # Process finish_reason - end the streaming response if finish_reason and not has_sent_stop_reason: has_sent_stop_reason = True # Close any open tool call blocks if tool_index is not None: for i in range(1, last_tool_index + 1): yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n" # If we accumulated text but never sent or closed text block, do it now if not text_block_closed: if accumulated_text and not text_sent: # Send the accumulated text yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': accumulated_text}})}\n\n" # Close the text block yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': 0})}\n\n" # Map OpenAI finish_reason to Anthropic stop_reason stop_reason = "end_turn" if finish_reason == "length": stop_reason = "max_tokens" elif finish_reason == "tool_calls": stop_reason = "tool_use" elif finish_reason == "stop": stop_reason = "end_turn" # Send message_delta with stop reason and usage usage = {"output_tokens": output_tokens} yield f"event: message_delta\ndata: {json.dumps({'type': 'message_delta', 'delta': {'stop_reason': stop_reason, 'stop_sequence': None}, 'usage': usage})}\n\n" # Send message_stop event yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n" # Send final [DONE] marker to match Anthropic's behavior yield "data: [DONE]\n\n" return except Exception as e: # Log error but continue processing other chunks logger.error(f"Error processing chunk: {str(e)}") continue # If we didn't get a finish reason, close any open blocks if not has_sent_stop_reason: # Close any open tool call blocks if tool_index is not None: for i in range(1, last_tool_index + 1): yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n" # Close the text content block yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': 0})}\n\n" # Send final message_delta with usage usage = {"output_tokens": output_tokens} yield f"event: message_delta\ndata: {json.dumps({'type': 'message_delta', 'delta': {'stop_reason': 'end_turn', 'stop_sequence': None}, 'usage': usage})}\n\n" # Send message_stop event yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n" # Send final [DONE] marker to match Anthropic's behavior yield "data: [DONE]\n\n" except Exception as e: import traceback error_traceback = traceback.format_exc() error_message = f"Error in streaming: {str(e)}\n\nFull traceback:\n{error_traceback}" logger.error(error_message) # Send error message_delta yield f"event: message_delta\ndata: {json.dumps({'type': 'message_delta', 'delta': {'stop_reason': 'error', 'stop_sequence': None}, 'usage': {'output_tokens': 0}})}\n\n" # Send message_stop event yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n" # Send final [DONE] marker yield "data: [DONE]\n\n" @app.post("/v1/messages") async def create_message( request: MessagesRequest, raw_request: Request ): try: # print the body here body = await raw_request.body() # Parse the raw body as JSON since it's bytes body_json = json.loads(body.decode('utf-8')) original_model = body_json.get("model", "unknown") # Get the display name for logging, just the model name without provider prefix display_model = original_model if "/" in display_model: display_model = display_model.split("/")[-1] # Clean model name for capability check clean_model = request.model if clean_model.startswith("anthropic/"): clean_model = clean_model[len("anthropic/"):] elif clean_model.startswith("openai/"): clean_model = clean_model[len("openai/"):] logger.debug(f"📊 PROCESSING REQUEST: Model={request.model}, Stream={request.stream}") # Convert Anthropic request to LiteLLM format litellm_request = convert_anthropic_to_litellm(request) # Determine which API key to use based on the model if request.model.startswith("openai/"): litellm_request["api_key"] = OPENAI_API_KEY # Use custom OpenAI base URL if configured if OPENAI_BASE_URL: litellm_request["api_base"] = OPENAI_BASE_URL logger.debug(f"Using OpenAI API key and custom base URL {OPENAI_BASE_URL} for model: {request.model}") else: logger.debug(f"Using OpenAI API key for model: {request.model}") elif request.model.startswith("gemini/"): if USE_VERTEX_AUTH: litellm_request["vertex_project"] = VERTEX_PROJECT litellm_request["vertex_location"] = VERTEX_LOCATION litellm_request["custom_llm_provider"] = "vertex_ai" logger.debug(f"Using Gemini ADC with project={VERTEX_PROJECT}, location={VERTEX_LOCATION} and model: {request.model}") else: litellm_request["api_key"] = GEMINI_API_KEY logger.debug(f"Using Gemini API key for model: {request.model}") else: litellm_request["api_key"] = ANTHROPIC_API_KEY logger.debug(f"Using Anthropic API key for model: {request.model}") # For OpenAI models - modify request format to work with limitations if "openai" in litellm_request["model"] and "messages" in litellm_request: logger.debug(f"Processing OpenAI model request: {litellm_request['model']}") # For OpenAI models, we need to convert content blocks to simple strings # and handle other requirements for i, msg in enumerate(litellm_request["messages"]): # Special case - handle message content directly when it's a list of tool_result # This is a specific case we're seeing in the error if "content" in msg and isinstance(msg["content"], list): is_only_tool_result = True for block in msg["content"]: if not isinstance(block, dict) or block.get("type") != "tool_result": is_only_tool_result = False break if is_only_tool_result and len(msg["content"]) > 0: logger.warning(f"Found message with only tool_result content - special handling required") # Extract the content from all tool_result blocks all_text = "" for block in msg["content"]: all_text += "Tool Result:\n" result_content = block.get("content", []) # Handle different formats of content if isinstance(result_content, list): for item in result_content: if isinstance(item, dict) and item.get("type") == "text": all_text += item.get("text", "") + "\n" elif isinstance(item, dict): # Fall back to string representation of any dict try: item_text = item.get("text", json.dumps(item)) all_text += item_text + "\n" except: all_text += str(item) + "\n" elif isinstance(result_content, str): all_text += result_content + "\n" else: try: all_text += json.dumps(result_content) + "\n" except: all_text += str(result_content) + "\n" # Replace the list with extracted text litellm_request["messages"][i]["content"] = all_text.strip() or "..." logger.warning(f"Converted tool_result to plain text: {all_text.strip()[:200]}...") continue # Skip normal processing for this message # 1. Handle content field - normal case if "content" in msg: # Check if content is a list (content blocks) if isinstance(msg["content"], list): # Convert complex content blocks to simple string text_content = "" for block in msg["content"]: if isinstance(block, dict): # Handle different content block types if block.get("type") == "text": text_content += block.get("text", "") + "\n" # Handle tool_result content blocks - extract nested text elif block.get("type") == "tool_result": tool_id = block.get("tool_use_id", "unknown") text_content += f"[Tool Result ID: {tool_id}]\n" # Extract text from the tool_result content result_content = block.get("content", []) if isinstance(result_content, list): for item in result_content: if isinstance(item, dict) and item.get("type") == "text": text_content += item.get("text", "") + "\n" elif isinstance(item, dict): # Handle any dict by trying to extract text or convert to JSON if "text" in item: text_content += item.get("text", "") + "\n" else: try: text_content += json.dumps(item) + "\n" except: text_content += str(item) + "\n" elif isinstance(result_content, dict): # Handle dictionary content if result_content.get("type") == "text": text_content += result_content.get("text", "") + "\n" else: try: text_content += json.dumps(result_content) + "\n" except: text_content += str(result_content) + "\n" elif isinstance(result_content, str): text_content += result_content + "\n" else: try: text_content += json.dumps(result_content) + "\n" except: text_content += str(result_content) + "\n" # Handle tool_use content blocks elif block.get("type") == "tool_use": tool_name = block.get("name", "unknown") tool_id = block.get("id", "unknown") tool_input = json.dumps(block.get("input", {})) text_content += f"[Tool: {tool_name} (ID: {tool_id})]\nInput: {tool_input}\n\n" # Handle image content blocks elif block.get("type") == "image": text_content += "[Image content - not displayed in text format]\n" # Make sure content is never empty for OpenAI models if not text_content.strip(): text_content = "..." litellm_request["messages"][i]["content"] = text_content.strip() # Also check for None or empty string content elif msg["content"] is None: litellm_request["messages"][i]["content"] = "..." # Empty content not allowed # 2. Remove any fields OpenAI doesn't support in messages for key in list(msg.keys()): if key not in ["role", "content", "name", "tool_call_id", "tool_calls"]: logger.warning(f"Removing unsupported field from message: {key}") del msg[key] # 3. Final validation - check for any remaining invalid values and dump full message details for i, msg in enumerate(litellm_request["messages"]): # Log the message format for debugging logger.debug(f"Message {i} format check - role: {msg.get('role')}, content type: {type(msg.get('content'))}") # If content is still a list or None, replace with placeholder if isinstance(msg.get("content"), list): logger.warning(f"CRITICAL: Message {i} still has list content after processing: {json.dumps(msg.get('content'))}") # Last resort - stringify the entire content as JSON litellm_request["messages"][i]["content"] = f"Content as JSON: {json.dumps(msg.get('content'))}" elif msg.get("content") is None: logger.warning(f"Message {i} has None content - replacing with placeholder") litellm_request["messages"][i]["content"] = "..." # Fallback placeholder # Only log basic info about the request, not the full details logger.debug(f"Request for model: {litellm_request.get('model')}, stream: {litellm_request.get('stream', False)}") # Handle streaming mode if request.stream: # Use LiteLLM for streaming num_tools = len(request.tools) if request.tools else 0 log_request_beautifully( "POST", raw_request.url.path, display_model, litellm_request.get('model'), len(litellm_request['messages']), num_tools, 200 # Assuming success at this point ) # Ensure we use the async version for streaming response_generator = await litellm.acompletion(**litellm_request) return StreamingResponse( handle_streaming(response_generator, request), media_type="text/event-stream" ) else: # Use LiteLLM for regular completion num_tools = len(request.tools) if request.tools else 0 log_request_beautifully( "POST", raw_request.url.path, display_model, litellm_request.get('model'), len(litellm_request['messages']), num_tools, 200 # Assuming success at this point ) start_time = time.time() litellm_response = litellm.completion(**litellm_request) logger.debug(f"✅ RESPONSE RECEIVED: Model={litellm_request.get('model')}, Time={time.time() - start_time:.2f}s") # Convert LiteLLM response to Anthropic format anthropic_response = convert_litellm_to_anthropic(litellm_response, request) return anthropic_response except Exception as e: import traceback error_traceback = traceback.format_exc() # Capture as much info as possible about the error error_details = { "error": str(e), "type": type(e).__name__, "traceback": error_traceback } # Check for LiteLLM-specific attributes for attr in ['message', 'status_code', 'response', 'llm_provider', 'model']: if hasattr(e, attr): error_details[attr] = getattr(e, attr) # Check for additional exception details in dictionaries if hasattr(e, '__dict__'): for key, value in e.__dict__.items(): if key not in error_details and key not in ['args', '__traceback__']: error_details[key] = str(value) # Helper function to safely serialize objects for JSON def sanitize_for_json(obj): """递归地清理对象使其可以JSON序列化""" if isinstance(obj, dict): return {k: sanitize_for_json(v) for k, v in obj.items()} elif isinstance(obj, list): return [sanitize_for_json(item) for item in obj] elif hasattr(obj, '__dict__'): return sanitize_for_json(obj.__dict__) elif hasattr(obj, 'text'): return str(obj.text) else: try: json.dumps(obj) return obj except (TypeError, ValueError): return str(obj) # Log all error details with safe serialization sanitized_details = sanitize_for_json(error_details) logger.error(f"Error processing request: {json.dumps(sanitized_details, indent=2)}") # Format error for response error_message = f"Error: {str(e)}" if 'message' in error_details and error_details['message']: error_message += f"\nMessage: {error_details['message']}" if 'response' in error_details and error_details['response']: error_message += f"\nResponse: {error_details['response']}" # Return detailed error status_code = error_details.get('status_code', 500) raise HTTPException(status_code=status_code, detail=error_message) @app.post("/v1/messages/count_tokens") async def count_tokens( request: TokenCountRequest, raw_request: Request ): try: # Log the incoming token count request original_model = request.original_model or request.model # Get the display name for logging, just the model name without provider prefix display_model = original_model if "/" in display_model: display_model = display_model.split("/")[-1] # Clean model name for capability check clean_model = request.model if clean_model.startswith("anthropic/"): clean_model = clean_model[len("anthropic/"):] elif clean_model.startswith("openai/"): clean_model = clean_model[len("openai/"):] # Convert the messages to a format LiteLLM can understand converted_request = convert_anthropic_to_litellm( MessagesRequest( model=request.model, max_tokens=100, # Arbitrary value not used for token counting messages=request.messages, system=request.system, tools=request.tools, tool_choice=request.tool_choice, thinking=request.thinking ) ) # Use LiteLLM's token_counter function try: # Import token_counter function from litellm import token_counter # Log the request beautifully num_tools = len(request.tools) if request.tools else 0 log_request_beautifully( "POST", raw_request.url.path, display_model, converted_request.get('model'), len(converted_request['messages']), num_tools, 200 # Assuming success at this point ) # Prepare token counter arguments token_counter_args = { "model": converted_request["model"], "messages": converted_request["messages"], } # Add custom base URL for OpenAI models if configured if request.model.startswith("openai/") and OPENAI_BASE_URL: token_counter_args["api_base"] = OPENAI_BASE_URL # Count tokens token_count = token_counter(**token_counter_args) # Return Anthropic-style response return TokenCountResponse(input_tokens=token_count) except ImportError: logger.error("Could not import token_counter from litellm") # Fallback to a simple approximation return TokenCountResponse(input_tokens=1000) # Default fallback except Exception as e: import traceback error_traceback = traceback.format_exc() logger.error(f"Error counting tokens: {str(e)}\n{error_traceback}") raise HTTPException(status_code=500, detail=f"Error counting tokens: {str(e)}") @app.get("/") async def root(): return {"message": "Anthropic Proxy for LiteLLM"} # Define ANSI color codes for terminal output class Colors: CYAN = "\033[96m" BLUE = "\033[94m" GREEN = "\033[92m" YELLOW = "\033[93m" RED = "\033[91m" MAGENTA = "\033[95m" RESET = "\033[0m" BOLD = "\033[1m" UNDERLINE = "\033[4m" DIM = "\033[2m" def log_request_beautifully(method, path, claude_model, openai_model, num_messages, num_tools, status_code): """Log requests in a beautiful, twitter-friendly format showing Claude to OpenAI mapping.""" # Format the Claude model name nicely claude_display = f"{Colors.CYAN}{claude_model}{Colors.RESET}" # Extract endpoint name endpoint = path if "?" in endpoint: endpoint = endpoint.split("?")[0] # Extract just the OpenAI model name without provider prefix openai_display = openai_model if "/" in openai_display: openai_display = openai_display.split("/")[-1] openai_display = f"{Colors.GREEN}{openai_display}{Colors.RESET}" # Format tools and messages tools_str = f"{Colors.MAGENTA}{num_tools} tools{Colors.RESET}" messages_str = f"{Colors.BLUE}{num_messages} messages{Colors.RESET}" # Format status code status_str = f"{Colors.GREEN}✓ {status_code} OK{Colors.RESET}" if status_code == 200 else f"{Colors.RED}✗ {status_code}{Colors.RESET}" # Put it all together in a clear, beautiful format log_line = f"{Colors.BOLD}{method} {endpoint}{Colors.RESET} {status_str}" model_line = f"{claude_display} → {openai_display} {tools_str} {messages_str}" # Print to console print(log_line) print(model_line) sys.stdout.flush() if __name__ == "__main__": import sys if len(sys.argv) > 1 and sys.argv[1] == "--help": print("Run with: uvicorn server:app --reload --host 0.0.0.0 --port 8082") sys.exit(0) # Configure uvicorn to run with minimal logs uvicorn.run(app, host="0.0.0.0", port=8082, log_level="error") ================================================ FILE: tests.py ================================================ #!/usr/bin/env python3 """ Comprehensive test suite for Claude-on-OpenAI Proxy. This script provides tests for both streaming and non-streaming requests, with various scenarios including tool use, multi-turn conversations, and content blocks. Usage: python tests.py # Run all tests python tests.py --no-streaming # Skip streaming tests python tests.py --simple # Run only simple tests python tests.py --tools # Run tool-related tests only """ import os import json import time import httpx import argparse import asyncio import sys from datetime import datetime from typing import Dict, Any, List, Optional, Set from dotenv import load_dotenv # Load environment variables load_dotenv() # Configuration ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY") PROXY_API_KEY = os.environ.get("ANTHROPIC_API_KEY") # Using same key for proxy ANTHROPIC_API_URL = "https://api.anthropic.com/v1/messages" PROXY_API_URL = "http://localhost:8082/v1/messages" ANTHROPIC_VERSION = "2023-06-01" MODEL = "claude-3-sonnet-20240229" # Change to your preferred model # Headers anthropic_headers = { "x-api-key": ANTHROPIC_API_KEY, "anthropic-version": ANTHROPIC_VERSION, "content-type": "application/json", } proxy_headers = { "x-api-key": PROXY_API_KEY, "anthropic-version": ANTHROPIC_VERSION, "content-type": "application/json", } # Tool definitions calculator_tool = { "name": "calculator", "description": "Evaluate mathematical expressions", "input_schema": { "type": "object", "properties": { "expression": { "type": "string", "description": "The mathematical expression to evaluate" } }, "required": ["expression"] } } weather_tool = { "name": "weather", "description": "Get weather information for a location", "input_schema": { "type": "object", "properties": { "location": { "type": "string", "description": "The city or location to get weather for" }, "units": { "type": "string", "enum": ["celsius", "fahrenheit"], "description": "Temperature units" } }, "required": ["location"] } } search_tool = { "name": "search", "description": "Search for information on the web", "input_schema": { "type": "object", "properties": { "query": { "type": "string", "description": "The search query" } }, "required": ["query"] } } # Test scenarios TEST_SCENARIOS = { # Simple text response "simple": { "model": MODEL, "max_tokens": 300, "messages": [ {"role": "user", "content": "Hello, world! Can you tell me about Paris in 2-3 sentences?"} ] }, # Basic tool use "calculator": { "model": MODEL, "max_tokens": 300, "messages": [ {"role": "user", "content": "What is 135 + 7.5 divided by 2.5?"} ], "tools": [calculator_tool], "tool_choice": {"type": "auto"} }, # Multiple tools "multi_tool": { "model": MODEL, "max_tokens": 500, "temperature": 0.7, "top_p": 0.95, "system": "You are a helpful assistant that uses tools when appropriate. Be concise and precise.", "messages": [ {"role": "user", "content": "I'm planning a trip to New York next week. What's the weather like and what are some interesting places to visit?"} ], "tools": [weather_tool, search_tool], "tool_choice": {"type": "auto"} }, # Multi-turn conversation "multi_turn": { "model": MODEL, "max_tokens": 500, "messages": [ {"role": "user", "content": "Let's do some math. What is 240 divided by 8?"}, {"role": "assistant", "content": "To calculate 240 divided by 8, I'll perform the division:\n\n240 ÷ 8 = 30\n\nSo the result is 30."}, {"role": "user", "content": "Now multiply that by 4 and tell me the result."} ], "tools": [calculator_tool], "tool_choice": {"type": "auto"} }, # Content blocks "content_blocks": { "model": MODEL, "max_tokens": 500, "messages": [ {"role": "user", "content": [ {"type": "text", "text": "I need to know the weather in Los Angeles and calculate 75.5 / 5. Can you help with both?"} ]} ], "tools": [calculator_tool, weather_tool], "tool_choice": {"type": "auto"} }, # Simple streaming test "simple_stream": { "model": MODEL, "max_tokens": 100, "stream": True, "messages": [ {"role": "user", "content": "Count from 1 to 5, with one number per line."} ] }, # Tool use with streaming "calculator_stream": { "model": MODEL, "max_tokens": 300, "stream": True, "messages": [ {"role": "user", "content": "What is 135 + 17.5 divided by 2.5?"} ], "tools": [calculator_tool], "tool_choice": {"type": "auto"} } } # Required event types for Anthropic streaming responses REQUIRED_EVENT_TYPES = { "message_start", "content_block_start", "content_block_delta", "content_block_stop", "message_delta", "message_stop" } # ================= NON-STREAMING TESTS ================= def get_response(url, headers, data): """Send a request and get the response.""" start_time = time.time() response = httpx.post(url, headers=headers, json=data, timeout=30) elapsed = time.time() - start_time print(f"Response time: {elapsed:.2f} seconds") return response def compare_responses(anthropic_response, proxy_response, check_tools=False): """Compare the two responses to see if they're similar enough.""" anthropic_json = anthropic_response.json() proxy_json = proxy_response.json() print("\n--- Anthropic Response Structure ---") print(json.dumps({k: v for k, v in anthropic_json.items() if k != "content"}, indent=2)) print("\n--- Proxy Response Structure ---") print(json.dumps({k: v for k, v in proxy_json.items() if k != "content"}, indent=2)) # Basic structure verification with more flexibility # The proxy might map values differently, so we're more lenient in our checks assert proxy_json.get("role") == "assistant", "Proxy role is not 'assistant'" assert proxy_json.get("type") == "message", "Proxy type is not 'message'" # Check if stop_reason is reasonable (might be different between Anthropic and our proxy) valid_stop_reasons = ["end_turn", "max_tokens", "stop_sequence", "tool_use", None] assert proxy_json.get("stop_reason") in valid_stop_reasons, "Invalid stop reason" # Check content exists and has valid structure assert "content" in anthropic_json, "No content in Anthropic response" assert "content" in proxy_json, "No content in Proxy response" anthropic_content = anthropic_json["content"] proxy_content = proxy_json["content"] # Make sure content is a list and has at least one item assert isinstance(anthropic_content, list), "Anthropic content is not a list" assert isinstance(proxy_content, list), "Proxy content is not a list" assert len(proxy_content) > 0, "Proxy content is empty" # If we're checking for tool uses if check_tools: # Check if content has tool use anthropic_tool = None proxy_tool = None # Find tool use in Anthropic response for item in anthropic_content: if item.get("type") == "tool_use": anthropic_tool = item break # Find tool use in Proxy response for item in proxy_content: if item.get("type") == "tool_use": proxy_tool = item break # At least one of them should have a tool use if anthropic_tool is not None: print("\n---------- ANTHROPIC TOOL USE ----------") print(json.dumps(anthropic_tool, indent=2)) if proxy_tool is not None: print("\n---------- PROXY TOOL USE ----------") print(json.dumps(proxy_tool, indent=2)) # Check tool structure assert proxy_tool.get("name") is not None, "Proxy tool has no name" assert proxy_tool.get("input") is not None, "Proxy tool has no input" print("\n✅ Both responses contain tool use") else: print("\n⚠️ Proxy response does not contain tool use, but Anthropic does") elif proxy_tool is not None: print("\n---------- PROXY TOOL USE ----------") print(json.dumps(proxy_tool, indent=2)) print("\n⚠️ Proxy response contains tool use, but Anthropic does not") else: print("\n⚠️ Neither response contains tool use") # Check if content has text anthropic_text = None proxy_text = None for item in anthropic_content: if item.get("type") == "text": anthropic_text = item.get("text") break for item in proxy_content: if item.get("type") == "text": proxy_text = item.get("text") break # For tool use responses, there might not be text content if check_tools and (anthropic_text is None or proxy_text is None): print("\n⚠️ One or both responses don't have text content (expected for tool-only responses)") return True assert anthropic_text is not None, "No text found in Anthropic response" assert proxy_text is not None, "No text found in Proxy response" # Print the first few lines of each text response max_preview_lines = 5 anthropic_preview = "\n".join(anthropic_text.strip().split("\n")[:max_preview_lines]) proxy_preview = "\n".join(proxy_text.strip().split("\n")[:max_preview_lines]) print("\n---------- ANTHROPIC TEXT PREVIEW ----------") print(anthropic_preview) print("\n---------- PROXY TEXT PREVIEW ----------") print(proxy_preview) # Check for some minimum text overlap - proxy might have different exact wording # but should have roughly similar content return True # We're not enforcing similarity, just basic structure def test_request(test_name, request_data, check_tools=False): """Run a test with the given request data.""" print(f"\n{'='*20} RUNNING TEST: {test_name} {'='*20}") # Log the request data print(f"\nRequest data:\n{json.dumps({k: v for k, v in request_data.items() if k != 'messages'}, indent=2)}") # Make copies of the request data to avoid modifying the original anthropic_data = request_data.copy() proxy_data = request_data.copy() try: # Send requests to both APIs print("\nSending to Anthropic API...") anthropic_response = get_response(ANTHROPIC_API_URL, anthropic_headers, anthropic_data) print("\nSending to Proxy...") proxy_response = get_response(PROXY_API_URL, proxy_headers, proxy_data) # Check response codes print(f"\nAnthropic status code: {anthropic_response.status_code}") print(f"Proxy status code: {proxy_response.status_code}") if anthropic_response.status_code != 200 or proxy_response.status_code != 200: print("\n⚠️ One or both requests failed") if anthropic_response.status_code != 200: print(f"Anthropic error: {anthropic_response.text}") if proxy_response.status_code != 200: print(f"Proxy error: {proxy_response.text}") return False # Compare the responses result = compare_responses(anthropic_response, proxy_response, check_tools=check_tools) if result: print(f"\n✅ Test {test_name} passed!") return True else: print(f"\n❌ Test {test_name} failed!") return False except Exception as e: print(f"\n❌ Error in test {test_name}: {str(e)}") import traceback traceback.print_exc() return False # ================= STREAMING TESTS ================= class StreamStats: """Track statistics about a streaming response.""" def __init__(self): self.event_types = set() self.event_counts = {} self.first_event_time = None self.last_event_time = None self.total_chunks = 0 self.events = [] self.text_content = "" self.content_blocks = {} self.has_tool_use = False self.has_error = False self.error_message = "" self.text_content_by_block = {} def add_event(self, event_data): """Track information about each received event.""" now = datetime.now() if self.first_event_time is None: self.first_event_time = now self.last_event_time = now self.total_chunks += 1 # Record event type and increment count if "type" in event_data: event_type = event_data["type"] self.event_types.add(event_type) self.event_counts[event_type] = self.event_counts.get(event_type, 0) + 1 # Track specific event data if event_type == "content_block_start": block_idx = event_data.get("index") content_block = event_data.get("content_block", {}) if content_block.get("type") == "tool_use": self.has_tool_use = True self.content_blocks[block_idx] = content_block self.text_content_by_block[block_idx] = "" elif event_type == "content_block_delta": block_idx = event_data.get("index") delta = event_data.get("delta", {}) if delta.get("type") == "text_delta": text = delta.get("text", "") self.text_content += text # Also track text by block ID if block_idx in self.text_content_by_block: self.text_content_by_block[block_idx] += text # Keep track of all events for debugging self.events.append(event_data) def get_duration(self): """Calculate the total duration of the stream in seconds.""" if self.first_event_time is None or self.last_event_time is None: return 0 return (self.last_event_time - self.first_event_time).total_seconds() def summarize(self): """Print a summary of the stream statistics.""" print(f"Total chunks: {self.total_chunks}") print(f"Unique event types: {sorted(list(self.event_types))}") print(f"Event counts: {json.dumps(self.event_counts, indent=2)}") print(f"Duration: {self.get_duration():.2f} seconds") print(f"Has tool use: {self.has_tool_use}") # Print the first few lines of content if self.text_content: max_preview_lines = 5 text_preview = "\n".join(self.text_content.strip().split("\n")[:max_preview_lines]) print(f"Text preview:\n{text_preview}") else: print("No text content extracted") if self.has_error: print(f"Error: {self.error_message}") async def stream_response(url, headers, data, stream_name): """Send a streaming request and process the response.""" print(f"\nStarting {stream_name} stream...") stats = StreamStats() error = None try: async with httpx.AsyncClient() as client: # Add stream flag to ensure it's streamed request_data = data.copy() request_data["stream"] = True start_time = time.time() async with client.stream("POST", url, json=request_data, headers=headers, timeout=30) as response: if response.status_code != 200: error_text = await response.aread() stats.has_error = True stats.error_message = f"HTTP {response.status_code}: {error_text.decode('utf-8')}" error = stats.error_message print(f"Error: {stats.error_message}") return stats, error print(f"{stream_name} connected, receiving events...") # Process each chunk buffer = "" async for chunk in response.aiter_text(): if not chunk.strip(): continue # Handle multiple events in one chunk buffer += chunk events = buffer.split("\n\n") # Process all complete events for event_text in events[:-1]: # All but the last (possibly incomplete) event if not event_text.strip(): continue # Parse server-sent event format if "data: " in event_text: # Extract the data part data_parts = [] for line in event_text.split("\n"): if line.startswith("data: "): data_part = line[len("data: "):] # Skip the "[DONE]" marker if data_part == "[DONE]": break data_parts.append(data_part) if data_parts: try: event_data = json.loads("".join(data_parts)) stats.add_event(event_data) except json.JSONDecodeError as e: print(f"Error parsing event: {e}\nRaw data: {''.join(data_parts)}") # Keep the last (potentially incomplete) event for the next iteration buffer = events[-1] if events else "" # Process any remaining complete events in the buffer if buffer.strip(): lines = buffer.strip().split("\n") data_lines = [line[len("data: "):] for line in lines if line.startswith("data: ")] if data_lines and data_lines[0] != "[DONE]": try: event_data = json.loads("".join(data_lines)) stats.add_event(event_data) except: pass elapsed = time.time() - start_time print(f"{stream_name} stream completed in {elapsed:.2f} seconds") except Exception as e: stats.has_error = True stats.error_message = str(e) error = str(e) print(f"Error in {stream_name} stream: {e}") return stats, error def compare_stream_stats(anthropic_stats, proxy_stats): """Compare the statistics from the two streams to see if they're similar enough.""" print("\n--- Stream Comparison ---") # Required events anthropic_missing = REQUIRED_EVENT_TYPES - anthropic_stats.event_types proxy_missing = REQUIRED_EVENT_TYPES - proxy_stats.event_types print(f"Anthropic missing event types: {anthropic_missing}") print(f"Proxy missing event types: {proxy_missing}") # Check if proxy has the required events if proxy_missing: print(f"⚠️ Proxy is missing required event types: {proxy_missing}") else: print("✅ Proxy has all required event types") # Compare content if anthropic_stats.text_content and proxy_stats.text_content: anthropic_preview = "\n".join(anthropic_stats.text_content.strip().split("\n")[:5]) proxy_preview = "\n".join(proxy_stats.text_content.strip().split("\n")[:5]) print("\n--- Anthropic Content Preview ---") print(anthropic_preview) print("\n--- Proxy Content Preview ---") print(proxy_preview) # Compare tool use if anthropic_stats.has_tool_use and proxy_stats.has_tool_use: print("✅ Both have tool use") elif anthropic_stats.has_tool_use and not proxy_stats.has_tool_use: print("⚠️ Anthropic has tool use but proxy does not") elif not anthropic_stats.has_tool_use and proxy_stats.has_tool_use: print("⚠️ Proxy has tool use but Anthropic does not") # Success as long as proxy has some content and no errors return (not proxy_stats.has_error and len(proxy_stats.text_content) > 0 or proxy_stats.has_tool_use) async def test_streaming(test_name, request_data): """Run a streaming test with the given request data.""" print(f"\n{'='*20} RUNNING STREAMING TEST: {test_name} {'='*20}") # Log the request data print(f"\nRequest data:\n{json.dumps({k: v for k, v in request_data.items() if k != 'messages'}, indent=2)}") # Make copies of the request data to avoid modifying the original anthropic_data = request_data.copy() proxy_data = request_data.copy() if not anthropic_data.get("stream"): anthropic_data["stream"] = True if not proxy_data.get("stream"): proxy_data["stream"] = True check_tools = "tools" in request_data try: # Send streaming requests anthropic_stats, anthropic_error = await stream_response( ANTHROPIC_API_URL, anthropic_headers, anthropic_data, "Anthropic" ) proxy_stats, proxy_error = await stream_response( PROXY_API_URL, proxy_headers, proxy_data, "Proxy" ) # Print statistics print("\n--- Anthropic Stream Statistics ---") anthropic_stats.summarize() print("\n--- Proxy Stream Statistics ---") proxy_stats.summarize() # Compare the responses if anthropic_error: print(f"\n⚠️ Anthropic stream had an error: {anthropic_error}") # If Anthropic errors, the test passes if proxy does anything useful if not proxy_error and proxy_stats.total_chunks > 0: print(f"\n✅ Test {test_name} passed! (Proxy worked even though Anthropic failed)") return True else: print(f"\n❌ Test {test_name} failed! Both streams had errors.") return False if proxy_error: print(f"\n❌ Test {test_name} failed! Proxy had an error: {proxy_error}") return False result = compare_stream_stats(anthropic_stats, proxy_stats) if result: print(f"\n✅ Test {test_name} passed!") return True else: print(f"\n❌ Test {test_name} failed!") return False except Exception as e: print(f"\n❌ Error in test {test_name}: {str(e)}") import traceback traceback.print_exc() return False # ================= MAIN ================= async def run_tests(args): """Run all tests based on command-line arguments.""" # Track test results results = {} # First run non-streaming tests if not args.streaming_only: print("\n\n=========== RUNNING NON-STREAMING TESTS ===========\n") for test_name, test_data in TEST_SCENARIOS.items(): # Skip streaming tests if test_data.get("stream"): continue # Skip tool tests if requested if args.simple and "tools" in test_data: continue # Skip non-tool tests if tools_only if args.tools_only and "tools" not in test_data: continue # Run the test check_tools = "tools" in test_data result = test_request(test_name, test_data, check_tools=check_tools) results[test_name] = result # Now run streaming tests if not args.no_streaming: print("\n\n=========== RUNNING STREAMING TESTS ===========\n") for test_name, test_data in TEST_SCENARIOS.items(): # Only select streaming tests, or force streaming if not test_data.get("stream") and not test_name.endswith("_stream"): continue # Skip tool tests if requested if args.simple and "tools" in test_data: continue # Skip non-tool tests if tools_only if args.tools_only and "tools" not in test_data: continue # Run the streaming test result = await test_streaming(test_name, test_data) results[f"{test_name}_streaming"] = result # Print summary print("\n\n=========== TEST SUMMARY ===========\n") total = len(results) passed = sum(1 for v in results.values() if v) for test, result in results.items(): print(f"{test}: {'✅ PASS' if result else '❌ FAIL'}") print(f"\nTotal: {passed}/{total} tests passed") if passed == total: print("\n🎉 All tests passed!") return True else: print(f"\n⚠️ {total - passed} tests failed") return False async def main(): # Check that API key is set if not ANTHROPIC_API_KEY: print("Error: ANTHROPIC_API_KEY not set in .env file") return # Parse command-line arguments parser = argparse.ArgumentParser(description="Test the Claude-on-OpenAI proxy") parser.add_argument("--no-streaming", action="store_true", help="Skip streaming tests") parser.add_argument("--streaming-only", action="store_true", help="Only run streaming tests") parser.add_argument("--simple", action="store_true", help="Only run simple tests (no tools)") parser.add_argument("--tools-only", action="store_true", help="Only run tool tests") args = parser.parse_args() # Run tests success = await run_tests(args) sys.exit(0 if success else 1) if __name__ == "__main__": asyncio.run(main())