Source code for pipecat.adapters.services.open_ai_responses_adapter

#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

"""OpenAI Responses API adapter for Pipecat."""

from typing import Any, TypedDict

from openai._types import NotGiven as OpenAINotGiven
from openai.types.responses import FunctionToolParam, ResponseInputItemParam, ToolParam

from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
from pipecat.processors.aggregators.llm_context import (
    LLMContext,
    LLMContextMessage,
    LLMSpecificMessage,
)



[docs]
class OpenAIResponsesLLMInvocationParams(TypedDict, total=False):
    """Context-based parameters for invoking OpenAI Responses API."""

    input: list[ResponseInputItemParam]
    tools: list[ToolParam] | OpenAINotGiven
    instructions: str




[docs]
class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParams]):
    """OpenAI Responses API adapter for Pipecat.

    Handles:

    - Converting LLMContext messages to Responses API input items
    - Converting Pipecat's standardized tools schema to Responses API function tool format
    - Extracting and sanitizing messages from the LLM context for logging
    """

    @property
    def id_for_llm_specific_messages(self) -> str:
        """Get the identifier used in LLMSpecificMessage instances."""
        return "openai_responses"


[docs]
    def get_llm_invocation_params(
        self,
        context: LLMContext,
        *,
        system_instruction: str | None = None,
    ) -> OpenAIResponsesLLMInvocationParams:
        """Get Responses API invocation parameters from a universal LLM context.

        Args:
            context: The LLM context containing messages, tools, etc.
            system_instruction: Optional system instruction from service settings.

        Returns:
            Dictionary of parameters for the Responses API.
        """
        messages = self.get_messages(context)

        # Check for conflict: system_instruction + initial system message
        if system_instruction and messages:
            first_msg = messages[0] if not isinstance(messages[0], LLMSpecificMessage) else None
            if first_msg and first_msg.get("role") == "system":
                self._resolve_system_instruction(
                    first_msg.get("content", ""),
                    system_instruction,
                    discard_context_system=False,
                )

        input_items = self._convert_messages_to_input(messages)

        params: OpenAIResponsesLLMInvocationParams = {
            "input": input_items,
            "tools": self.from_standard_tools(context.tools),
        }

        if system_instruction:
            # Compatibility: The Responses API requires at least one input
            # message when instructions are provided. Contexts that worked with
            # OpenAILLMService (system_instruction + empty messages) need the
            # instructions converted to an initial developer message.
            #
            # NOTE: The service layer (OpenAIResponsesLLMService) internally
            # manages `previous_response_id` for incremental context delivery
            # over WebSocket. This runs post-adapter — the adapter always
            # produces the full input list and the service determines what
            # subset to send. This empty-input fallback is therefore only
            # relevant for one-shot or initial calls.
            #
            # If we added support for user-provided explicit
            # `previous_response_id` and/or `conversation_id` (overriding
            # internal management), we'd need to revisit this logic, as it'd
            # be legit to provide instructions without input items. Note that
            # over HTTP, `previous_response_id` requires `store=True` (30-day
            # OpenAI-side storage), which is why the HTTP variant doesn't use
            # it. The WebSocket variant avoids this via a connection-local
            # in-memory cache — see the class docstrings in llm.py.
            if not input_items:
                params["input"] = [{"role": "developer", "content": system_instruction}]
            else:
                params["instructions"] = system_instruction

        return params



[docs]
    def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[ToolParam]:
        """Convert function schemas to Responses API function tool format.

        Args:
            tools_schema: The Pipecat tools schema to convert.

        Returns:
            List of Responses API function tool definitions.
        """
        functions_schema = tools_schema.standard_tools
        result = []
        for func in functions_schema:
            d = func.to_default_dict()
            tool: FunctionToolParam = {
                "type": "function",
                "name": d["name"],
                "parameters": d.get("parameters", {}),
                "strict": d.get("strict", None),
            }
            if "description" in d:
                tool["description"] = d["description"]
            result.append(tool)
        custom_openai_tools = []
        if tools_schema.custom_tools:
            custom_openai_tools = tools_schema.custom_tools.get(AdapterType.OPENAI, [])
        return result + custom_openai_tools



[docs]
    def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]:
        """Get messages from context in a format ready for logging.

        Binary data (images, audio) is replaced with short placeholders.

        Args:
            context: The LLM context containing messages.

        Returns:
            List of messages in a format ready for logging.
        """
        return self.get_messages(context, truncate_large_values=True)


    def _convert_messages_to_input(
        self, messages: list[LLMContextMessage]
    ) -> list[ResponseInputItemParam]:
        """Convert LLMContext messages to Responses API input items.

        Args:
            messages: Messages from the LLMContext.

        Returns:
            List of Responses API input items.
        """
        result: list[ResponseInputItemParam] = []

        for message in messages:
            if isinstance(message, LLMSpecificMessage):
                result.append(message.message)
                continue

            role = message.get("role")

            if role in ("system", "developer"):
                content = message.get("content", "")
                if isinstance(content, list):
                    content = self._convert_multimodal_content(content)
                result.append({"role": "developer", "content": content})

            elif role == "user":
                content = message.get("content", "")
                if isinstance(content, list):
                    content = self._convert_multimodal_content(content)
                result.append({"role": "user", "content": content})

            elif role == "assistant":
                tool_calls = message.get("tool_calls")
                if tool_calls:
                    for tc in tool_calls:
                        func = tc.get("function", {})
                        result.append(
                            {
                                "type": "function_call",
                                "call_id": tc.get("id", ""),
                                "name": func.get("name", ""),
                                "arguments": func.get("arguments", ""),
                            }
                        )
                else:
                    content = message.get("content", "")
                    if isinstance(content, list):
                        content = self._convert_multimodal_content(content)
                    result.append({"role": "assistant", "content": content})

            elif role == "tool":
                content = message.get("content", "")
                if not isinstance(content, str):
                    content = str(content)
                result.append(
                    {
                        "type": "function_call_output",
                        "call_id": message.get("tool_call_id", ""),
                        "output": content,
                    }
                )

        return result

    def _convert_multimodal_content(self, content: list) -> list:
        """Convert multimodal content parts to Responses API format.

        Args:
            content: List of content parts from the LLMContext message.

        Returns:
            List of content parts in Responses API format.
        """
        result = []
        for part in content:
            part_type = part.get("type")
            if part_type == "text":
                result.append({"type": "input_text", "text": part.get("text", "")})
            elif part_type == "image_url":
                image_url_obj = part.get("image_url", {})
                result.append(
                    {
                        "type": "input_image",
                        "image_url": image_url_obj.get("url", ""),
                        "detail": image_url_obj.get("detail", "auto"),
                    }
                )
            else:
                # Pass through other types as-is. Note: "input_audio" is not
                # yet supported by the Responses API (coming soon per OpenAI
                # docs) but the LLMContext format already matches the expected
                # shape, so it should work once support is enabled.
                result.append(part)
        return result