Files
mcpapp/src/providers/anthropic_provider.py

454 lines
25 KiB
Python

# src/providers/anthropic_provider.py
import json
import logging
import math
from collections.abc import Generator
from typing import Any
from anthropic import Anthropic, APIError, Stream
from anthropic.types import Message, MessageStreamEvent, TextDelta
from providers.base import BaseProvider
from src.llm_models import MODELS
from src.tools.conversion import convert_to_anthropic_tools
logger = logging.getLogger(__name__)
class AnthropicProvider(BaseProvider):
"""Provider implementation for Anthropic Claude models."""
def __init__(self, api_key: str, base_url: str | None = None):
# Anthropic client doesn't use base_url in the same way, but store it if needed
# Use default Anthropic endpoint if base_url is not provided or relevant
effective_base_url = base_url or MODELS.get("anthropic", {}).get("endpoint")
super().__init__(api_key, effective_base_url) # Pass base_url to parent, though Anthropic client might ignore it
logger.info("Initializing AnthropicProvider")
try:
self.client = Anthropic(api_key=self.api_key)
# Note: Anthropic client doesn't take base_url during init
except Exception as e:
logger.error(f"Failed to initialize Anthropic client: {e}", exc_info=True)
raise
def _get_context_window(self, model: str) -> int:
"""Retrieves the context window size for a given Anthropic model."""
default_window = 100000 # Default fallback for Anthropic
try:
provider_models = MODELS.get("anthropic", {}).get("models", [])
for m in provider_models:
if m.get("id") == model:
return m.get("context_window", default_window)
logger.warning(f"Context window for Anthropic model '{model}' not found in MODELS config. Using default: {default_window}")
return default_window
except Exception as e:
logger.error(f"Error retrieving context window for model {model}: {e}. Using default: {default_window}", exc_info=True)
return default_window
def _count_anthropic_tokens(self, messages: list[dict[str, Any]], system_prompt: str | None) -> int:
"""Counts tokens for Anthropic messages using the official client."""
# Note: Anthropic's count_tokens might not directly accept the message list format used for creation.
# It often expects plain text. We need to concatenate the content appropriately.
# This is a simplification and might not be perfectly accurate, especially with tool calls/results.
# A more robust approach might involve formatting messages into a single string representation.
text_to_count = ""
if system_prompt:
text_to_count += f"System: {system_prompt}\n\n"
for message in messages:
role = message.get("role")
content = message.get("content")
# Simple concatenation - might need refinement for complex content types (tool calls/results)
if isinstance(content, str):
text_to_count += f"{role}: {content}\n"
elif isinstance(content, list): # Handle tool results/calls if represented as list
try:
content_str = json.dumps(content)
text_to_count += f"{role}: {content_str}\n"
except Exception:
text_to_count += f"{role}: [Unserializable Content]\n"
try:
# Use the client's count_tokens method if available and works with text
# Check Anthropic documentation for the correct usage
# Assuming self.client.count_tokens exists and takes text
count = self.client.count_tokens(text=text_to_count)
logger.debug(f"Counted Anthropic tokens using client.count_tokens: {count}")
return count
except APIError as api_err:
# Handle potential errors if count_tokens itself is an API call or fails
logger.error(f"Anthropic API error during token count: {api_err}", exc_info=True)
# Fallback to approximation if official count fails?
estimated_tokens = math.ceil(len(text_to_count) / 4.0) # Same approximation as OpenAI
logger.warning(f"Falling back to character count approximation for Anthropic: {estimated_tokens}")
return estimated_tokens
except AttributeError:
# Fallback if count_tokens method doesn't exist or works differently
logger.warning("self.client.count_tokens not available or failed. Falling back to character count approximation.")
estimated_tokens = math.ceil(len(text_to_count) / 4.0) # Same approximation as OpenAI
return estimated_tokens
except Exception as e:
logger.error(f"Unexpected error during Anthropic token count: {e}", exc_info=True)
estimated_tokens = math.ceil(len(text_to_count) / 4.0) # Fallback approximation
logger.warning(f"Falling back to character count approximation due to unexpected error: {estimated_tokens}")
return estimated_tokens
def _truncate_messages(self, messages: list[dict[str, Any]], system_prompt: str | None, model: str) -> tuple[list[dict[str, Any]], str | None, int, int]:
"""
Truncates messages for Anthropic, preserving system prompt.
Returns:
- Potentially truncated list of messages.
- Original system prompt (or None).
- Initial token count.
- Final token count.
"""
context_limit = self._get_context_window(model)
buffer = 200 # Safety buffer
effective_limit = context_limit - buffer
initial_token_count = self._count_anthropic_tokens(messages, system_prompt)
final_token_count = initial_token_count
truncated_messages = list(messages) # Copy
# Anthropic requires alternating user/assistant messages. Truncation needs care.
# We remove from the beginning (after potential system prompt).
# Removing the oldest message (index 0 of the list passed here, as system is separate)
while final_token_count > effective_limit and len(truncated_messages) > 0:
# Always remove the oldest message (index 0)
removed_message = truncated_messages.pop(0)
logger.debug(f"Truncating Anthropic message at index 0 (Role: {removed_message.get('role')}) due to context limit.")
# Ensure alternation after removal if possible (might be complex)
# For simplicity, just remove and recount for now.
# A more robust approach might need to remove pairs (user/assistant).
final_token_count = self._count_anthropic_tokens(truncated_messages, system_prompt)
logger.debug(f"Recalculated Anthropic tokens: {final_token_count}")
# Safety break
if not truncated_messages:
logger.warning("Truncation resulted in empty message list for Anthropic.")
break
if initial_token_count != final_token_count:
logger.info(
f"Truncated messages for Anthropic model {model}. Initial tokens: {initial_token_count}, Final tokens: {final_token_count}, Limit: {context_limit} (Effective: {effective_limit})"
)
else:
logger.debug(f"No truncation needed for Anthropic model {model}. Tokens: {final_token_count}, Limit: {context_limit} (Effective: {effective_limit})")
# Ensure the remaining messages start with 'user' role if no system prompt
if not system_prompt and truncated_messages and truncated_messages[0].get("role") != "user":
logger.warning("First message after truncation is not 'user'. Prepending placeholder.")
# This might indicate an issue with the simple pop(0) logic if pairs weren't removed.
# For now, prepend a basic user message.
truncated_messages.insert(0, {"role": "user", "content": "[Context truncated]"})
# Recount after adding placeholder? Might exceed limit again. Risky.
# Let's log a warning instead of adding potentially problematic content.
# logger.warning("First message after truncation is not 'user'. This might cause issues with Anthropic API.")
return truncated_messages, system_prompt, initial_token_count, final_token_count
def _convert_messages(self, messages: list[dict[str, Any]]) -> tuple[str | None, list[dict[str, Any]]]:
"""Converts standard message format to Anthropic's format, extracting system prompt."""
anthropic_messages = []
system_prompt = None
for i, message in enumerate(messages):
role = message.get("role")
content = message.get("content")
if role == "system":
if i == 0:
system_prompt = content
logger.debug("Extracted system prompt for Anthropic.")
else:
# Handle system message not at the start (append to previous user message or add as user)
logger.warning("System message found not at the beginning. Treating as user message.")
anthropic_messages.append({"role": "user", "content": f"[System Note]\n{content}"})
continue
# Handle tool results specifically
if role == "tool":
# Find the preceding assistant message with the corresponding tool_use block
# This requires careful handling in the follow-up logic
tool_use_id = message.get("tool_call_id")
tool_content = content
# Format as a tool_result content block
anthropic_messages.append({"role": "user", "content": [{"type": "tool_result", "tool_use_id": tool_use_id, "content": tool_content}]})
continue
# Handle assistant message potentially containing tool_use blocks
if role == "assistant":
# Check if content is structured (e.g., from a previous tool call response)
if isinstance(content, list): # Assuming tool calls might be represented as a list
anthropic_messages.append({"role": "assistant", "content": content})
else:
anthropic_messages.append({"role": "assistant", "content": content}) # Regular text content
continue
# Regular user messages
if role == "user":
anthropic_messages.append({"role": "user", "content": content})
continue
logger.warning(f"Unsupported role '{role}' in message conversion for Anthropic.")
# Ensure conversation starts with a user message if no system prompt was used
if not system_prompt and anthropic_messages and anthropic_messages[0]["role"] != "user":
logger.warning("Anthropic conversation must start with a user message. Prepending empty user message.")
anthropic_messages.insert(0, {"role": "user", "content": "[Start of conversation]"}) # Or handle differently
return system_prompt, anthropic_messages
def create_chat_completion(
self,
messages: list[dict[str, str]],
model: str,
temperature: float = 0.4,
max_tokens: int | None = None, # Anthropic requires max_tokens
stream: bool = True,
tools: list[dict[str, Any]] | None = None,
) -> Stream[MessageStreamEvent] | Message:
"""Creates a chat completion using the Anthropic API, handling context truncation."""
logger.debug(f"Anthropic create_chat_completion called. Model: {model}, Stream: {stream}, Tools: {bool(tools)}")
# --- Context Truncation ---
# First, convert to Anthropic format to separate system prompt
temp_system_prompt, temp_anthropic_messages = self._convert_messages(messages)
# Then, truncate based on token count
truncated_anthropic_msgs, final_system_prompt, _, _ = self._truncate_messages(temp_anthropic_messages, temp_system_prompt, model)
# --------------------------
# Anthropic requires max_tokens
if max_tokens is None:
max_tokens = 4096 # Default value if not provided
logger.warning(f"max_tokens not provided for Anthropic, defaulting to {max_tokens}")
# system_prompt, anthropic_messages = self._convert_messages(messages) # Moved above
try:
completion_params = {
"model": model,
"messages": truncated_anthropic_msgs, # Use truncated messages
"temperature": temperature,
"max_tokens": max_tokens,
"stream": stream,
}
if final_system_prompt: # Use potentially modified system prompt
completion_params["system"] = final_system_prompt
if tools:
completion_params["tools"] = tools
# Anthropic doesn't have an explicit 'tool_choice' like OpenAI's 'auto' in the main API call
# Remove None values (though Anthropic requires max_tokens)
completion_params = {k: v for k, v in completion_params.items() if v is not None}
log_params = completion_params.copy()
if "messages" in log_params:
log_params["messages"] = [{k: (v[:100] + "..." if isinstance(v, str) and len(v) > 100 else v) for k, v in msg.items()} for msg in log_params["messages"][-2:]]
tools_log = log_params.get("tools", "Not Present")
logger.debug(f"Calling Anthropic API. Model: {log_params.get('model')}, Stream: {log_params.get('stream')}, System: {bool(log_params.get('system'))}, Tools: {tools_log}")
logger.debug(f"Full API Params (messages summarized): {log_params}")
response = self.client.messages.create(**completion_params)
logger.debug("Anthropic API call successful.")
# --- Capture Actual Usage ---
actual_usage = None
if isinstance(response, Message) and response.usage:
actual_usage = {
"prompt_tokens": response.usage.input_tokens, # Anthropic uses input_tokens
"completion_tokens": response.usage.output_tokens, # Anthropic uses output_tokens
# Anthropic doesn't typically provide total_tokens directly in usage block
"total_tokens": response.usage.input_tokens + response.usage.output_tokens,
}
logger.info(f"Actual Anthropic API usage: {actual_usage}")
# TODO: How to get usage for streaming responses? Anthropic might send it in a final 'message_stop' event? Needs investigation.
return response
# --------------------------
except Exception as e:
logger.error(f"Anthropic API error: {e}", exc_info=True)
raise
def get_streaming_content(self, response: Stream[MessageStreamEvent]) -> Generator[str, None, None]:
"""Yields content chunks from an Anthropic streaming response."""
logger.debug("Processing Anthropic stream...")
full_delta = ""
try:
# Iterate through events in the stream
for event in response:
if event.type == "content_block_delta":
# Check if the delta is for text content before accessing .text
if isinstance(event.delta, TextDelta):
delta_text = event.delta.text
if delta_text:
full_delta += delta_text
yield delta_text
# Ignore other delta types like InputJSONDelta for text streaming
# Other event types like 'message_start', 'content_block_start', etc., can be logged or handled if needed
elif event.type == "message_start":
logger.debug(f"Anthropic stream started. Model: {event.message.model}")
elif event.type == "message_stop":
# The stop_reason might be available on the 'message' object associated with the stream,
# not directly on the stop event itself. We log that the stop event occurred.
# Accessing the actual reason might require inspecting the final message state if needed.
logger.debug("Anthropic stream message_stop event received.")
elif event.type == "content_block_start":
if event.content_block.type == "tool_use":
logger.debug(f"Anthropic stream detected tool use start: ID {event.content_block.id}, Name: {event.content_block.name}")
elif event.type == "content_block_stop":
logger.debug(f"Anthropic stream detected content block stop. Index: {event.index}")
logger.debug(f"Anthropic stream finished. Total delta length: {len(full_delta)}")
except Exception as e:
logger.error(f"Error processing Anthropic stream: {e}", exc_info=True)
yield json.dumps({"error": f"Stream processing error: {str(e)}"})
def get_content(self, response: Message) -> str:
"""Extracts content from a non-streaming Anthropic response."""
try:
# Combine text content from all text blocks
text_content = "".join([block.text for block in response.content if block.type == "text"])
logger.debug(f"Extracted content (length {len(text_content)}) from non-streaming Anthropic response.")
return text_content
except Exception as e:
logger.error(f"Error extracting content from Anthropic response: {e}", exc_info=True)
return f"[Error extracting content: {str(e)}]"
def has_tool_calls(self, response: Stream[MessageStreamEvent] | Message) -> bool:
"""Checks if the Anthropic response contains tool calls."""
try:
if isinstance(response, Message): # Non-streaming
# Check stop reason and content blocks
has_tool_use_block = any(block.type == "tool_use" for block in response.content)
has_calls = response.stop_reason == "tool_use" or has_tool_use_block
logger.debug(f"Non-streaming Anthropic response check: stop_reason='{response.stop_reason}', has_tool_use_block={has_tool_use_block}. Result: {has_calls}")
return has_calls
elif isinstance(response, Stream):
# Cannot reliably check an unconsumed stream without consuming it.
# The LLMClient should handle this by checking after consumption or based on stop_reason if available post-stream.
logger.warning("has_tool_calls check on an Anthropic stream is unreliable before consumption.")
return False
else:
logger.warning(f"has_tool_calls received unexpected type for Anthropic: {type(response)}")
return False
except Exception as e:
logger.error(f"Error checking for Anthropic tool calls: {e}", exc_info=True)
return False
def parse_tool_calls(self, response: Message) -> list[dict[str, Any]]:
"""Parses tool calls from a non-streaming Anthropic response."""
parsed_calls = []
try:
if not isinstance(response, Message):
logger.error(f"parse_tool_calls expects Anthropic Message, got {type(response)}")
return []
if response.stop_reason != "tool_use":
logger.debug("No tool use indicated by stop_reason.")
# return [] # Might still have tool_use blocks even if stop_reason isn't tool_use? Check API docs. Let's check content anyway.
tool_use_blocks = [block for block in response.content if block.type == "tool_use"]
if not tool_use_blocks:
logger.debug("No 'tool_use' content blocks found in Anthropic response.")
return []
logger.debug(f"Parsing {len(tool_use_blocks)} 'tool_use' blocks from Anthropic response.")
for block in tool_use_blocks:
# Adapt server/tool name splitting if needed (similar to OpenAI provider)
# Assuming Anthropic tool names might also be prefixed like "server__tool"
parts = block.name.split("__", 1)
if len(parts) == 2:
server_name, func_name = parts
else:
logger.warning(f"Could not determine server_name from Anthropic tool name '{block.name}'.")
server_name = None
func_name = block.name
parsed_calls.append({
"id": block.id,
"server_name": server_name,
"function_name": func_name,
"arguments": json.dumps(block.input), # Anthropic input is already a dict, dump to string like OpenAI provider expects? Or keep as dict? Let's keep as dict for now.
# "arguments": block.input, # Keep as dict? Let's try this first.
})
return parsed_calls
except Exception as e:
logger.error(f"Error parsing Anthropic tool calls: {e}", exc_info=True)
return []
def format_tool_results(self, tool_call_id: str, result: Any) -> dict[str, Any]:
"""Formats a tool result for an Anthropic follow-up request."""
# Anthropic expects a 'tool_result' content block
# The content of the result block should typically be a string.
try:
if isinstance(result, dict):
content_str = json.dumps(result)
else:
content_str = str(result)
except Exception as e:
logger.error(f"Error JSON-encoding tool result for Anthropic {tool_call_id}: {e}")
content_str = json.dumps({"error": "Failed to encode tool result", "original_type": str(type(result))})
logger.debug(f"Formatting Anthropic tool result for call ID {tool_call_id}")
# This needs to be placed inside a "user" role message's content list
return {
"type": "tool_result",
"tool_use_id": tool_call_id,
"content": content_str,
# Optionally add is_error=True if result indicates an error
}
def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Converts internal tool format to Anthropic's format."""
# Use the conversion function, assuming it's correctly placed and imported
logger.debug(f"Converting {len(tools)} tools to Anthropic format.")
try:
# The conversion function needs to handle the server__tool prefixing
anthropic_tools = convert_to_anthropic_tools(tools)
logger.debug(f"Tool conversion result: {anthropic_tools}")
return anthropic_tools
except Exception as e:
logger.error(f"Error during Anthropic tool conversion: {e}", exc_info=True)
return []
# Helper needed by LLMClient's current tool handling logic (if adapting OpenAI's pattern)
def get_original_message_with_calls(self, response: Message) -> dict[str, Any]:
"""Extracts the assistant's message containing tool calls for Anthropic."""
try:
if isinstance(response, Message) and any(block.type == "tool_use" for block in response.content):
# Anthropic's response structure is different. The 'message' itself is the assistant's turn.
# We need to return a representation of this turn, including the tool_use blocks.
# Convert Pydantic models within content to dicts
content_list = [block.model_dump(exclude_unset=True) for block in response.content]
return {"role": "assistant", "content": content_list}
else:
logger.warning("Could not extract original message with tool calls from Anthropic response.")
return {"role": "assistant", "content": "[Could not extract tool calls message]"}
except Exception as e:
logger.error(f"Error extracting original Anthropic message with calls: {e}", exc_info=True)
return {"role": "assistant", "content": f"[Error extracting tool calls message: {str(e)}]"}
def get_usage(self, response: Any) -> dict[str, int] | None:
"""Extracts token usage from a non-streaming Anthropic response."""
try:
if isinstance(response, Message) and response.usage:
usage = {
"prompt_tokens": response.usage.input_tokens,
"completion_tokens": response.usage.output_tokens,
# "total_tokens": response.usage.input_tokens + response.usage.output_tokens, # Optional
}
logger.debug(f"Extracted usage from Anthropic response: {usage}")
return usage
else:
logger.warning(f"Could not extract usage from Anthropic response object of type {type(response)}")
return None
except Exception as e:
logger.error(f"Error extracting usage from Anthropic response: {e}", exc_info=True)
return None