Source code for pipecat.adapters.base_llm_adapter

#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

"""Base adapter for LLM provider integration.

This module provides the abstract base class for implementing LLM provider-specific
adapters that handle tool format conversion and standardization.
"""

import warnings
from abc import ABC, abstractmethod
from collections.abc import Mapping
from typing import Any, Generic, TypeVar

from loguru import logger

from pipecat.adapters.schemas.function_schema import FunctionSchema
from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.processors.aggregators.llm_context import (
    LLMContext,
    LLMContextMessage,
    LLMSpecificMessage,
    NotGiven,
)

# Should be a TypedDict
TLLMInvocationParams = TypeVar("TLLMInvocationParams", bound=Mapping[str, Any])


[docs] class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]): """Abstract base class for LLM provider adapters. Provides a standard interface for converting to provider-specific formats. Handles: - Extracting provider-specific parameters for LLM invocation from a universal LLM context - Converting standardized tools schema to provider-specific tool formats. - Extracting messages from the LLM context for the purposes of logging about the specific provider. - Resolving conflicts between ``system_instruction`` and initial system/developer messages in the conversation context. Subclasses must implement provider-specific conversion logic. """
[docs] def __init__(self): """Initialize the adapter.""" self._warned_system_instruction = False self._builtin_tools: dict[str, FunctionSchema] = {}
@property def builtin_tools(self) -> dict[str, FunctionSchema]: """Built-in tools automatically merged into every inference request. Keyed by tool name for O(1) lookup, insertion, and removal. The service injects tools here so they are sent transparently on every inference request without the user having to add them to their ``ToolsSchema``. Returns: Mutable dict mapping tool name to ``FunctionSchema``. """ return self._builtin_tools @property @abstractmethod def id_for_llm_specific_messages(self) -> str: """Get the identifier used in LLMSpecificMessage instances for this LLM provider. Returns: The identifier string. """ pass
[docs] @abstractmethod def get_llm_invocation_params(self, context: LLMContext, **kwargs) -> TLLMInvocationParams: """Get provider-specific LLM invocation parameters from a universal LLM context. Args: context: The LLM context containing messages, tools, etc. **kwargs: Additional provider-specific arguments that subclasses can use. Returns: Provider-specific parameters for invoking the LLM. """ pass
[docs] @abstractmethod def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[Any]: """Convert tools schema to the provider's specific format. Args: tools_schema: The standardized tools schema to convert. Returns: List of tools in the provider's expected format. """ pass
[docs] @abstractmethod def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]: """Get messages from a universal LLM context in a format ready for logging about this provider. Args: context: The LLM context containing messages. Returns: List of messages in a format ready for logging about this provider. """ pass
[docs] def create_llm_specific_message(self, message: Any) -> LLMSpecificMessage: """Create an LLM-specific message (as opposed to a standard message) for use in an LLMContext. Args: message: The message content. Returns: A LLMSpecificMessage instance. """ return LLMSpecificMessage(llm=self.id_for_llm_specific_messages, message=message)
[docs] def get_messages( self, context: LLMContext, *, truncate_large_values: bool = False ) -> list[LLMContextMessage]: """Get messages from the LLM context, including standard and LLM-specific messages. Args: context: The LLM context containing messages. truncate_large_values: If True, return deep copies of messages with large values replaced by short placeholders. Returns: List of messages including standard and LLM-specific messages. """ return context.get_messages( self.id_for_llm_specific_messages, truncate_large_values=truncate_large_values )
[docs] def from_standard_tools(self, tools: Any) -> list[Any] | NotGiven: """Convert tools from standard format to provider format. Built-in tools are automatically merged into the schema before conversion so that every inference request receives them without the user having to declare them explicitly. Args: tools: Tools in standard format or provider-specific format. Returns: List of tools converted to provider format, or original tools if not in standard format. """ if self._builtin_tools: if isinstance(tools, ToolsSchema): tools = ToolsSchema( standard_tools=tools.standard_tools + list(self._builtin_tools.values()), custom_tools=tools.custom_tools, ) else: # User supplied tools in a legacy/provider-specific format. # Built-in tools cannot be safely merged, so they will not be injected. # Migrate to ToolsSchema to enable built-in tool support; use custom_tools # as an escape hatch for any provider-specific tools that don't fit the # standard schema. if tools is not None: warnings.warn( "Built-in tools (e.g. async tool cancellation) could not be injected " "because the supplied tools are not a ToolsSchema instance. " "Migrate to ToolsSchema to enable built-in tool support. " "Use ToolsSchema(custom_tools=...) as an escape hatch for any " "provider-specific tools that don't fit the standard schema.", DeprecationWarning, stacklevel=2, ) # Fall through and return the original tools unchanged. if isinstance(tools, ToolsSchema): return self.to_provider_tools_format(tools) # Fallback to return the same tools in case they are not in a standard format return tools
def _extract_initial_system( self, messages: list, *, system_instruction: str | None = None, ) -> str | None: """Extract an initial ``"system"`` message for use as a system instruction. Only useful for services that expect the system instruction as a separate parameter, not inline in conversation history (today, all non-OpenAI services). Does not extract ``"developer"`` messages — those are converted to ``"user"`` by the adapter's subsequent message loop, like any other non-system role the provider doesn't support. Checks ``messages[0]``. If the role is ``"system"``, pops and returns its content. If extracting would leave the messages list empty (``len(messages) == 1``), the message is converted to ``"user"`` role instead of being extracted, to prevent sending an empty conversation history to providers that require at least one non-system message. Args: messages: Message list in standard format (mutated in-place). system_instruction: The system instruction from service settings or ``run_inference``. Only used to decide whether to warn about a conflict in the single-message case. Returns: The extracted system message content, or ``None`` if nothing was extracted. """ if not messages: return None if messages[0].get("role") != "system": return None # Would extracting empty the list? Convert to "user" instead. if len(messages) == 1: if system_instruction: if not self._warned_system_instruction: self._warned_system_instruction = True logger.warning( "Both system_instruction and an initial system message in" " context are set. Using system_instruction. The context" " system message is being converted to a user message to" " avoid sending an empty conversation history." ) messages[0]["role"] = "user" return None # Extract content = messages[0].get("content", "") if isinstance(content, list): # Join text parts for providers that expect a string system instruction content = " ".join( part.get("text", "") for part in content if part.get("type") == "text" ) messages.pop(0) return content def _resolve_system_instruction( self, system_from_context: str | None, system_instruction: str | None, *, discard_context_system: bool, ) -> str | None: """Resolve conflict between ``system_instruction`` and an extracted context system message. Args: system_from_context: Content extracted from an initial ``"system"`` message by :meth:`_extract_initial_system`, or detected inline (OpenAI adapters). system_instruction: From service settings or ``run_inference`` param. discard_context_system: If ``True`` (non-OpenAI adapters), the context system message is discarded when ``system_instruction`` is also present. If ``False`` (OpenAI adapters), both are kept. Returns: The effective system instruction to use, or ``None`` if the system instruction is already represented in the messages (OpenAI path). """ if system_from_context and system_instruction: if not self._warned_system_instruction: self._warned_system_instruction = True if discard_context_system: logger.warning( "Both system_instruction and an initial system message" " in context are set. Using system_instruction." ) else: logger.warning( "Both system_instruction and an initial system message" " in context are set, which may be unintended. Keeping" " both, but consider using system_instruction for" " system-level instructions and developer messages in" " context for supplementary guidance." ) if system_instruction: return system_instruction if system_from_context: if discard_context_system: return system_from_context else: # Content is already in messages; nothing to prepend return None return None