mcpapp/src/providers/anthropic_provider.py

# src/providers/anthropic_provider.py
import json
import logging
import math
from collections.abc import Generator
from typing import Any

from anthropic import Anthropic, APIError, Stream
from anthropic.types import Message, MessageStreamEvent, TextDelta

from providers.base import BaseProvider
from src.llm_models import MODELS
from src.tools.conversion import convert_to_anthropic_tools

logger = logging.getLogger(__name__)


class AnthropicProvider(BaseProvider):
    """Provider implementation for Anthropic Claude models."""

    def __init__(self, api_key: str, base_url: str | None = None):
        # Anthropic client doesn't use base_url in the same way, but store it if needed
        # Use default Anthropic endpoint if base_url is not provided or relevant
        effective_base_url = base_url or MODELS.get("anthropic", {}).get("endpoint")
        super().__init__(api_key, effective_base_url)  # Pass base_url to parent, though Anthropic client might ignore it
        logger.info("Initializing AnthropicProvider")
        try:
            self.client = Anthropic(api_key=self.api_key)
            # Note: Anthropic client doesn't take base_url during init
        except Exception as e:
            logger.error(f"Failed to initialize Anthropic client: {e}", exc_info=True)
            raise

    def _get_context_window(self, model: str) -> int:
        """Retrieves the context window size for a given Anthropic model."""
        default_window = 100000  # Default fallback for Anthropic
        try:
            provider_models = MODELS.get("anthropic", {}).get("models", [])
            for m in provider_models:
                if m.get("id") == model:
                    return m.get("context_window", default_window)
            logger.warning(f"Context window for Anthropic model '{model}' not found in MODELS config. Using default: {default_window}")
            return default_window
        except Exception as e:
            logger.error(f"Error retrieving context window for model {model}: {e}. Using default: {default_window}", exc_info=True)
            return default_window

    def _count_anthropic_tokens(self, messages: list[dict[str, Any]], system_prompt: str | None) -> int:
        """Counts tokens for Anthropic messages using the official client."""
        # Note: Anthropic's count_tokens might not directly accept the message list format used for creation.
        # It often expects plain text. We need to concatenate the content appropriately.
        # This is a simplification and might not be perfectly accurate, especially with tool calls/results.
        # A more robust approach might involve formatting messages into a single string representation.
        text_to_count = ""
        if system_prompt:
            text_to_count += f"System: {system_prompt}\n\n"
        for message in messages:
            role = message.get("role")
            content = message.get("content")
            # Simple concatenation - might need refinement for complex content types (tool calls/results)
            if isinstance(content, str):
                text_to_count += f"{role}: {content}\n"
            elif isinstance(content, list):  # Handle tool results/calls if represented as list
                try:
                    content_str = json.dumps(content)
                    text_to_count += f"{role}: {content_str}\n"
                except Exception:
                    text_to_count += f"{role}: [Unserializable Content]\n"

        try:
            # Use the client's count_tokens method if available and works with text
            # Check Anthropic documentation for the correct usage
            # Assuming self.client.count_tokens exists and takes text
            count = self.client.count_tokens(text=text_to_count)
            logger.debug(f"Counted Anthropic tokens using client.count_tokens: {count}")
            return count
        except APIError as api_err:
            # Handle potential errors if count_tokens itself is an API call or fails
            logger.error(f"Anthropic API error during token count: {api_err}", exc_info=True)
            # Fallback to approximation if official count fails?
            estimated_tokens = math.ceil(len(text_to_count) / 4.0)  # Same approximation as OpenAI
            logger.warning(f"Falling back to character count approximation for Anthropic: {estimated_tokens}")
            return estimated_tokens
        except AttributeError:
            # Fallback if count_tokens method doesn't exist or works differently
            logger.warning("self.client.count_tokens not available or failed. Falling back to character count approximation.")
            estimated_tokens = math.ceil(len(text_to_count) / 4.0)  # Same approximation as OpenAI
            return estimated_tokens
        except Exception as e:
            logger.error(f"Unexpected error during Anthropic token count: {e}", exc_info=True)
            estimated_tokens = math.ceil(len(text_to_count) / 4.0)  # Fallback approximation
            logger.warning(f"Falling back to character count approximation due to unexpected error: {estimated_tokens}")
            return estimated_tokens

    def _truncate_messages(self, messages: list[dict[str, Any]], system_prompt: str | None, model: str) -> tuple[list[dict[str, Any]], str | None, int, int]:
        """
        Truncates messages for Anthropic, preserving system prompt.

        Returns:
            - Potentially truncated list of messages.
            - Original system prompt (or None).
            - Initial token count.
            - Final token count.
        """
        context_limit = self._get_context_window(model)
        buffer = 200  # Safety buffer
        effective_limit = context_limit - buffer

        initial_token_count = self._count_anthropic_tokens(messages, system_prompt)
        final_token_count = initial_token_count

        truncated_messages = list(messages)  # Copy

        # Anthropic requires alternating user/assistant messages. Truncation needs care.
        # We remove from the beginning (after potential system prompt).
        # Removing the oldest message (index 0 of the list passed here, as system is separate)

        while final_token_count > effective_limit and len(truncated_messages) > 0:
            # Always remove the oldest message (index 0)
            removed_message = truncated_messages.pop(0)
            logger.debug(f"Truncating Anthropic message at index 0 (Role: {removed_message.get('role')}) due to context limit.")

            # Ensure alternation after removal if possible (might be complex)
            # For simplicity, just remove and recount for now.
            # A more robust approach might need to remove pairs (user/assistant).

            final_token_count = self._count_anthropic_tokens(truncated_messages, system_prompt)
            logger.debug(f"Recalculated Anthropic tokens: {final_token_count}")

            # Safety break
            if not truncated_messages:
                logger.warning("Truncation resulted in empty message list for Anthropic.")
                break

        if initial_token_count != final_token_count:
            logger.info(
                f"Truncated messages for Anthropic model {model}. Initial tokens: {initial_token_count}, Final tokens: {final_token_count}, Limit: {context_limit} (Effective: {effective_limit})"
            )
        else:
            logger.debug(f"No truncation needed for Anthropic model {model}. Tokens: {final_token_count}, Limit: {context_limit} (Effective: {effective_limit})")

        # Ensure the remaining messages start with 'user' role if no system prompt
        if not system_prompt and truncated_messages and truncated_messages[0].get("role") != "user":
            logger.warning("First message after truncation is not 'user'. Prepending placeholder.")
            # This might indicate an issue with the simple pop(0) logic if pairs weren't removed.
            # For now, prepend a basic user message.
            truncated_messages.insert(0, {"role": "user", "content": "[Context truncated]"})
            # Recount after adding placeholder? Might exceed limit again. Risky.
            # Let's log a warning instead of adding potentially problematic content.
            # logger.warning("First message after truncation is not 'user'. This might cause issues with Anthropic API.")

        return truncated_messages, system_prompt, initial_token_count, final_token_count

    def _convert_messages(self, messages: list[dict[str, Any]]) -> tuple[str | None, list[dict[str, Any]]]:
        """Converts standard message format to Anthropic's format, extracting system prompt."""
        anthropic_messages = []
        system_prompt = None
        for i, message in enumerate(messages):
            role = message.get("role")
            content = message.get("content")

            if role == "system":
                if i == 0:
                    system_prompt = content
                    logger.debug("Extracted system prompt for Anthropic.")
                else:
                    # Handle system message not at the start (append to previous user message or add as user)
                    logger.warning("System message found not at the beginning. Treating as user message.")
                    anthropic_messages.append({"role": "user", "content": f"[System Note]\n{content}"})
                continue

            # Handle tool results specifically
            if role == "tool":
                # Find the preceding assistant message with the corresponding tool_use block
                # This requires careful handling in the follow-up logic
                tool_use_id = message.get("tool_call_id")
                tool_content = content
                # Format as a tool_result content block
                anthropic_messages.append({"role": "user", "content": [{"type": "tool_result", "tool_use_id": tool_use_id, "content": tool_content}]})
                continue

            # Handle assistant message potentially containing tool_use blocks
            if role == "assistant":
                # Check if content is structured (e.g., from a previous tool call response)
                if isinstance(content, list):  # Assuming tool calls might be represented as a list
                    anthropic_messages.append({"role": "assistant", "content": content})
                else:
                    anthropic_messages.append({"role": "assistant", "content": content})  # Regular text content
                continue

            # Regular user messages
            if role == "user":
                anthropic_messages.append({"role": "user", "content": content})
                continue

            logger.warning(f"Unsupported role '{role}' in message conversion for Anthropic.")

        # Ensure conversation starts with a user message if no system prompt was used
        if not system_prompt and anthropic_messages and anthropic_messages[0]["role"] != "user":
            logger.warning("Anthropic conversation must start with a user message. Prepending empty user message.")
            anthropic_messages.insert(0, {"role": "user", "content": "[Start of conversation]"})  # Or handle differently

        return system_prompt, anthropic_messages

    def create_chat_completion(
        self,
        messages: list[dict[str, str]],
        model: str,
        temperature: float = 0.4,
        max_tokens: int | None = None,  # Anthropic requires max_tokens
        stream: bool = True,
        tools: list[dict[str, Any]] | None = None,
    ) -> Stream[MessageStreamEvent] | Message:
        """Creates a chat completion using the Anthropic API, handling context truncation."""
        logger.debug(f"Anthropic create_chat_completion called. Model: {model}, Stream: {stream}, Tools: {bool(tools)}")

        # --- Context Truncation ---
        # First, convert to Anthropic format to separate system prompt
        temp_system_prompt, temp_anthropic_messages = self._convert_messages(messages)
        # Then, truncate based on token count
        truncated_anthropic_msgs, final_system_prompt, _, _ = self._truncate_messages(temp_anthropic_messages, temp_system_prompt, model)
        # --------------------------

        # Anthropic requires max_tokens
        if max_tokens is None:
            max_tokens = 4096  # Default value if not provided
            logger.warning(f"max_tokens not provided for Anthropic, defaulting to {max_tokens}")

        # system_prompt, anthropic_messages = self._convert_messages(messages) # Moved above

        try:
            completion_params = {
                "model": model,
                "messages": truncated_anthropic_msgs,  # Use truncated messages
                "temperature": temperature,
                "max_tokens": max_tokens,
                "stream": stream,
            }
            if final_system_prompt:  # Use potentially modified system prompt
                completion_params["system"] = final_system_prompt
            if tools:
                completion_params["tools"] = tools
                # Anthropic doesn't have an explicit 'tool_choice' like OpenAI's 'auto' in the main API call

            # Remove None values (though Anthropic requires max_tokens)
            completion_params = {k: v for k, v in completion_params.items() if v is not None}

            log_params = completion_params.copy()
            if "messages" in log_params:
                log_params["messages"] = [{k: (v[:100] + "..." if isinstance(v, str) and len(v) > 100 else v) for k, v in msg.items()} for msg in log_params["messages"][-2:]]
            tools_log = log_params.get("tools", "Not Present")
            logger.debug(f"Calling Anthropic API. Model: {log_params.get('model')}, Stream: {log_params.get('stream')}, System: {bool(log_params.get('system'))}, Tools: {tools_log}")
            logger.debug(f"Full API Params (messages summarized): {log_params}")

            response = self.client.messages.create(**completion_params)
            logger.debug("Anthropic API call successful.")

            # --- Capture Actual Usage ---
            actual_usage = None
            if isinstance(response, Message) and response.usage:
                actual_usage = {
                    "prompt_tokens": response.usage.input_tokens,  # Anthropic uses input_tokens
                    "completion_tokens": response.usage.output_tokens,  # Anthropic uses output_tokens
                    # Anthropic doesn't typically provide total_tokens directly in usage block
                    "total_tokens": response.usage.input_tokens + response.usage.output_tokens,
                }
                logger.info(f"Actual Anthropic API usage: {actual_usage}")
            # TODO: How to get usage for streaming responses? Anthropic might send it in a final 'message_stop' event? Needs investigation.

            return response
            # --------------------------

        except Exception as e:
            logger.error(f"Anthropic API error: {e}", exc_info=True)
            raise

    def get_streaming_content(self, response: Stream[MessageStreamEvent]) -> Generator[str, None, None]:
        """Yields content chunks from an Anthropic streaming response."""
        logger.debug("Processing Anthropic stream...")
        full_delta = ""
        try:
            # Iterate through events in the stream
            for event in response:
                if event.type == "content_block_delta":
                    # Check if the delta is for text content before accessing .text
                    if isinstance(event.delta, TextDelta):
                        delta_text = event.delta.text
                        if delta_text:
                            full_delta += delta_text
                            yield delta_text
                    # Ignore other delta types like InputJSONDelta for text streaming
                # Other event types like 'message_start', 'content_block_start', etc., can be logged or handled if needed
                elif event.type == "message_start":
                    logger.debug(f"Anthropic stream started. Model: {event.message.model}")
                elif event.type == "message_stop":
                    # The stop_reason might be available on the 'message' object associated with the stream,
                    # not directly on the stop event itself. We log that the stop event occurred.
                    # Accessing the actual reason might require inspecting the final message state if needed.
                    logger.debug("Anthropic stream message_stop event received.")
                elif event.type == "content_block_start":
                    if event.content_block.type == "tool_use":
                        logger.debug(f"Anthropic stream detected tool use start: ID {event.content_block.id}, Name: {event.content_block.name}")
                elif event.type == "content_block_stop":
                    logger.debug(f"Anthropic stream detected content block stop. Index: {event.index}")

            logger.debug(f"Anthropic stream finished. Total delta length: {len(full_delta)}")
        except Exception as e:
            logger.error(f"Error processing Anthropic stream: {e}", exc_info=True)
            yield json.dumps({"error": f"Stream processing error: {str(e)}"})

    def get_content(self, response: Message) -> str:
        """Extracts content from a non-streaming Anthropic response."""
        try:
            # Combine text content from all text blocks
            text_content = "".join([block.text for block in response.content if block.type == "text"])
            logger.debug(f"Extracted content (length {len(text_content)}) from non-streaming Anthropic response.")
            return text_content
        except Exception as e:
            logger.error(f"Error extracting content from Anthropic response: {e}", exc_info=True)
            return f"[Error extracting content: {str(e)}]"

    def has_tool_calls(self, response: Stream[MessageStreamEvent] | Message) -> bool:
        """Checks if the Anthropic response contains tool calls."""
        try:
            if isinstance(response, Message):  # Non-streaming
                # Check stop reason and content blocks
                has_tool_use_block = any(block.type == "tool_use" for block in response.content)
                has_calls = response.stop_reason == "tool_use" or has_tool_use_block
                logger.debug(f"Non-streaming Anthropic response check: stop_reason='{response.stop_reason}', has_tool_use_block={has_tool_use_block}. Result: {has_calls}")
                return has_calls
            elif isinstance(response, Stream):
                # Cannot reliably check an unconsumed stream without consuming it.
                # The LLMClient should handle this by checking after consumption or based on stop_reason if available post-stream.
                logger.warning("has_tool_calls check on an Anthropic stream is unreliable before consumption.")
                return False
            else:
                logger.warning(f"has_tool_calls received unexpected type for Anthropic: {type(response)}")
                return False
        except Exception as e:
            logger.error(f"Error checking for Anthropic tool calls: {e}", exc_info=True)
            return False

    def parse_tool_calls(self, response: Message) -> list[dict[str, Any]]:
        """Parses tool calls from a non-streaming Anthropic response."""
        parsed_calls = []
        try:
            if not isinstance(response, Message):
                logger.error(f"parse_tool_calls expects Anthropic Message, got {type(response)}")
                return []

            if response.stop_reason != "tool_use":
                logger.debug("No tool use indicated by stop_reason.")
                # return [] # Might still have tool_use blocks even if stop_reason isn't tool_use? Check API docs. Let's check content anyway.

            tool_use_blocks = [block for block in response.content if block.type == "tool_use"]
            if not tool_use_blocks:
                logger.debug("No 'tool_use' content blocks found in Anthropic response.")
                return []

            logger.debug(f"Parsing {len(tool_use_blocks)} 'tool_use' blocks from Anthropic response.")
            for block in tool_use_blocks:
                # Adapt server/tool name splitting if needed (similar to OpenAI provider)
                # Assuming Anthropic tool names might also be prefixed like "server__tool"
                parts = block.name.split("__", 1)
                if len(parts) == 2:
                    server_name, func_name = parts
                else:
                    logger.warning(f"Could not determine server_name from Anthropic tool name '{block.name}'.")
                    server_name = None
                    func_name = block.name

                parsed_calls.append({
                    "id": block.id,
                    "server_name": server_name,
                    "function_name": func_name,
                    "arguments": json.dumps(block.input),  # Anthropic input is already a dict, dump to string like OpenAI provider expects? Or keep as dict? Let's keep as dict for now.
                    # "arguments": block.input, # Keep as dict? Let's try this first.
                })

            return parsed_calls
        except Exception as e:
            logger.error(f"Error parsing Anthropic tool calls: {e}", exc_info=True)
            return []

    def format_tool_results(self, tool_call_id: str, result: Any) -> dict[str, Any]:
        """Formats a tool result for an Anthropic follow-up request."""
        # Anthropic expects a 'tool_result' content block
        # The content of the result block should typically be a string.
        try:
            if isinstance(result, dict):
                content_str = json.dumps(result)
            else:
                content_str = str(result)
        except Exception as e:
            logger.error(f"Error JSON-encoding tool result for Anthropic {tool_call_id}: {e}")
            content_str = json.dumps({"error": "Failed to encode tool result", "original_type": str(type(result))})

        logger.debug(f"Formatting Anthropic tool result for call ID {tool_call_id}")
        # This needs to be placed inside a "user" role message's content list
        return {
            "type": "tool_result",
            "tool_use_id": tool_call_id,
            "content": content_str,
            # Optionally add is_error=True if result indicates an error
        }

    def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
        """Converts internal tool format to Anthropic's format."""
        # Use the conversion function, assuming it's correctly placed and imported
        logger.debug(f"Converting {len(tools)} tools to Anthropic format.")
        try:
            # The conversion function needs to handle the server__tool prefixing
            anthropic_tools = convert_to_anthropic_tools(tools)
            logger.debug(f"Tool conversion result: {anthropic_tools}")
            return anthropic_tools
        except Exception as e:
            logger.error(f"Error during Anthropic tool conversion: {e}", exc_info=True)
            return []

    # Helper needed by LLMClient's current tool handling logic (if adapting OpenAI's pattern)
    def get_original_message_with_calls(self, response: Message) -> dict[str, Any]:
        """Extracts the assistant's message containing tool calls for Anthropic."""
        try:
            if isinstance(response, Message) and any(block.type == "tool_use" for block in response.content):
                # Anthropic's response structure is different. The 'message' itself is the assistant's turn.
                # We need to return a representation of this turn, including the tool_use blocks.
                # Convert Pydantic models within content to dicts
                content_list = [block.model_dump(exclude_unset=True) for block in response.content]
                return {"role": "assistant", "content": content_list}
            else:
                logger.warning("Could not extract original message with tool calls from Anthropic response.")
                return {"role": "assistant", "content": "[Could not extract tool calls message]"}
        except Exception as e:
            logger.error(f"Error extracting original Anthropic message with calls: {e}", exc_info=True)
            return {"role": "assistant", "content": f"[Error extracting tool calls message: {str(e)}]"}

    def get_usage(self, response: Any) -> dict[str, int] | None:
        """Extracts token usage from a non-streaming Anthropic response."""
        try:
            if isinstance(response, Message) and response.usage:
                usage = {
                    "prompt_tokens": response.usage.input_tokens,
                    "completion_tokens": response.usage.output_tokens,
                    # "total_tokens": response.usage.input_tokens + response.usage.output_tokens, # Optional
                }
                logger.debug(f"Extracted usage from Anthropic response: {usage}")
                return usage
            else:
                logger.warning(f"Could not extract usage from Anthropic response object of type {type(response)}")
                return None
        except Exception as e:
            logger.error(f"Error extracting usage from Anthropic response: {e}", exc_info=True)
            return None