Source code for pipecat.services.openai.realtime.events

#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

"""Event models and data structures for OpenAI Realtime API communication."""

import json
import uuid
from typing import Any, Literal

from pydantic import BaseModel, ConfigDict, Field

from pipecat.adapters.schemas.tools_schema import ToolsSchema

#
# session properties
#


[docs] class AudioFormat(BaseModel): """Base class for audio format configuration.""" type: str
[docs] class PCMAudioFormat(AudioFormat): """PCM audio format configuration. Parameters: type: Audio format type, always "audio/pcm". rate: Sample rate, always 24000 for PCM. """ type: Literal["audio/pcm"] = "audio/pcm" rate: Literal[24000] = 24000
[docs] class PCMUAudioFormat(AudioFormat): """PCMU (G.711 μ-law) audio format configuration. Parameters: type: Audio format type, always "audio/pcmu". """ type: Literal["audio/pcmu"] = "audio/pcmu"
[docs] class PCMAAudioFormat(AudioFormat): """PCMA (G.711 A-law) audio format configuration. Parameters: type: Audio format type, always "audio/pcma". """ type: Literal["audio/pcma"] = "audio/pcma"
[docs] class InputAudioTranscription(BaseModel): """Configuration for audio transcription settings.""" model: str = "gpt-4o-transcribe" language: str | None prompt: str | None
[docs] def __init__( self, model: str | None = "gpt-4o-transcribe", language: str | None = None, prompt: str | None = None, ): """Initialize InputAudioTranscription. Args: model: Transcription model to use (e.g., "gpt-4o-transcribe", "whisper-1"). language: Optional language code for transcription. prompt: Optional transcription hint text. """ super().__init__(model=model, language=language, prompt=prompt)
[docs] class TurnDetection(BaseModel): """Server-side voice activity detection configuration. Parameters: type: Detection type, must be "server_vad". threshold: Voice activity detection threshold (0.0-1.0). Defaults to 0.5. prefix_padding_ms: Padding before speech starts in milliseconds. Defaults to 300. silence_duration_ms: Silence duration to detect speech end in milliseconds. Defaults to 500. """ type: Literal["server_vad"] | None = "server_vad" threshold: float | None = 0.5 prefix_padding_ms: int | None = 300 silence_duration_ms: int | None = 500
[docs] class SemanticTurnDetection(BaseModel): """Semantic-based turn detection configuration. Parameters: type: Detection type, must be "semantic_vad". eagerness: Turn detection eagerness level. Can be "low", "medium", "high", or "auto". create_response: Whether to automatically create responses on turn detection. interrupt_response: Whether to interrupt ongoing responses on turn detection. """ type: Literal["semantic_vad"] | None = "semantic_vad" eagerness: Literal["low", "medium", "high", "auto"] | None = None create_response: bool | None = None interrupt_response: bool | None = None
[docs] class InputAudioNoiseReduction(BaseModel): """Input audio noise reduction configuration. Parameters: type: Noise reduction type for different microphone scenarios. """ type: Literal["near_field", "far_field"] | None
[docs] class AudioInput(BaseModel): """Audio input configuration. Parameters: format: The format of the input audio. transcription: Configuration for input audio transcription. noise_reduction: Configuration for input audio noise reduction. turn_detection: Configuration for turn detection, or False to disable. """ format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None transcription: InputAudioTranscription | None = None noise_reduction: InputAudioNoiseReduction | None = None turn_detection: TurnDetection | SemanticTurnDetection | bool | None = None
[docs] class AudioOutput(BaseModel): """Audio output configuration. Parameters: format: The format of the output audio. voice: The voice the model uses to respond. speed: The speed of the model's spoken response. """ format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None voice: str | None = None speed: float | None = None
[docs] class AudioConfiguration(BaseModel): """Audio configuration for input and output. Parameters: input: Configuration for input audio. output: Configuration for output audio. """ input: AudioInput | None = None output: AudioOutput | None = None
[docs] class SessionProperties(BaseModel): """Configuration properties for an OpenAI Realtime session. Parameters: type: The type of session, always "realtime". object: Object type identifier, always "realtime.session". id: Unique identifier for the session. model: The Realtime model used for this session. Note: The model is set at connection time via model arg in __init__ and cannot be changed during the session. output_modalities: The set of modalities the model can respond with. instructions: System instructions for the assistant. audio: Configuration for input and output audio. tools: Available function tools for the assistant. tool_choice: Tool usage strategy ("auto", "none", or "required"). max_output_tokens: Maximum tokens in response or "inf" for unlimited. tracing: Configuration options for tracing. prompt: Reference to a prompt template and its variables. expires_at: Session expiration timestamp. include: Additional fields to include in server outputs. """ # Needed to support ToolSchema in tools field. model_config = ConfigDict(arbitrary_types_allowed=True) type: Literal["realtime"] | None = "realtime" object: Literal["realtime.session"] | None = None id: str | None = None model: str | None = None output_modalities: list[Literal["text", "audio"]] | None = None instructions: str | None = None audio: AudioConfiguration | None = None # Tools can only be ToolsSchema when provided by the user, in either the # OpenAIRealtimeLLMService constructor or through LLMUpdateSettingsFrame. # We'll never serialize/deserialize ToolsSchema when talking to the server. tools: ToolsSchema | list[dict] | None = None tool_choice: Literal["auto", "none", "required"] | None = None max_output_tokens: int | Literal["inf"] | None = None tracing: Literal["auto"] | dict | None = None prompt: dict | None = None expires_at: int | None = None include: list[str] | None = None
# # context #
[docs] class ItemContent(BaseModel): """Content within a conversation item. Parameters: type: Content type (text, audio, input_text, input_audio, input_image, output_text, or output_audio). text: Text content for text-based items. audio: Base64-encoded audio data for audio items. transcript: Transcribed text for audio items. image_url: Base64-encoded image data as a data URI for input_image items. detail: Detail level for image processing ("auto", "low", or "high"). """ type: Literal[ "text", "audio", "input_text", "input_audio", "input_image", "output_text", "output_audio" ] text: str | None = None audio: str | None = None # base64-encoded audio transcript: str | None = None image_url: str | None = None # base64-encoded image as data URI detail: Literal["auto", "low", "high"] | None = None
[docs] class ConversationItem(BaseModel): """A conversation item in the realtime session. Parameters: id: Unique identifier for the item, auto-generated if not provided. object: Object type identifier for the realtime API. type: Item type (message, function_call, or function_call_output). status: Current status of the item. role: Speaker role for message items (user, assistant, or system). content: Content list for message items. call_id: Function call identifier for function_call items. name: Function name for function_call items. arguments: Function arguments as JSON string for function_call items. output: Function output as JSON string for function_call_output items. """ id: str = Field(default_factory=lambda: str(uuid.uuid4().hex)) object: Literal["realtime.item"] | None = None type: Literal["message", "function_call", "function_call_output"] status: Literal["completed", "in_progress", "incomplete"] | None = None # role and content are present for message items role: Literal["user", "assistant", "system"] | None = None content: list[ItemContent] | None = None # these four fields are present for function_call items call_id: str | None = None name: str | None = None arguments: str | None = None output: str | None = None
[docs] class RealtimeConversation(BaseModel): """A realtime conversation session. Parameters: id: Unique identifier for the conversation. object: Object type identifier, always "realtime.conversation". """ id: str object: Literal["realtime.conversation"]
[docs] class ResponseProperties(BaseModel): """Properties for configuring assistant responses. Parameters: output_modalities: Output modalities for the response. Must be either ["text"] or ["audio"]. Defaults to ["audio"]. instructions: Specific instructions for this response. audio: Audio configuration for this response. tools: Available tools for this response. tool_choice: Tool usage strategy for this response. temperature: Sampling temperature for this response. max_output_tokens: Maximum tokens for this response. """ output_modalities: list[Literal["text", "audio"]] | None = ["audio"] instructions: str | None = None audio: AudioConfiguration | None = None tools: list[dict] | None = None tool_choice: Literal["auto", "none", "required"] | None = None temperature: float | None = None max_output_tokens: int | Literal["inf"] | None = None
# # error class #
[docs] class RealtimeError(BaseModel): """Error information from the realtime API. Parameters: type: Error type identifier. code: Specific error code. message: Human-readable error message. param: Parameter name that caused the error, if applicable. event_id: Event ID associated with the error, if applicable. """ type: str code: str | None = "" message: str param: str | None = None event_id: str | None = None
# # client events #
[docs] class ClientEvent(BaseModel): """Base class for client events sent to the realtime API. Parameters: event_id: Unique identifier for the event, auto-generated if not provided. """ event_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
[docs] class SessionUpdateEvent(ClientEvent): """Event to update session properties. Parameters: type: Event type, always "session.update". session: Updated session properties. """ type: Literal["session.update"] = "session.update" session: SessionProperties
[docs] def model_dump(self, *args, **kwargs) -> dict[str, Any]: """Serialize the event to a dictionary. Handles special serialization for turn_detection where False becomes null. Args: *args: Positional arguments passed to parent model_dump. **kwargs: Keyword arguments passed to parent model_dump. Returns: Dictionary representation of the event. """ dump = super().model_dump(*args, **kwargs) # Handle turn_detection in audio.input so that False becomes null if "audio" in dump["session"] and dump["session"]["audio"]: if "input" in dump["session"]["audio"] and dump["session"]["audio"]["input"]: if "turn_detection" in dump["session"]["audio"]["input"]: if dump["session"]["audio"]["input"]["turn_detection"] is False: dump["session"]["audio"]["input"]["turn_detection"] = None return dump
[docs] class InputAudioBufferAppendEvent(ClientEvent): """Event to append audio data to the input buffer. Parameters: type: Event type, always "input_audio_buffer.append". audio: Base64-encoded audio data to append. """ type: Literal["input_audio_buffer.append"] = "input_audio_buffer.append" audio: str # base64-encoded audio
[docs] class InputAudioBufferCommitEvent(ClientEvent): """Event to commit the current input audio buffer. Parameters: type: Event type, always "input_audio_buffer.commit". """ type: Literal["input_audio_buffer.commit"] = "input_audio_buffer.commit"
[docs] class InputAudioBufferClearEvent(ClientEvent): """Event to clear the input audio buffer. Parameters: type: Event type, always "input_audio_buffer.clear". """ type: Literal["input_audio_buffer.clear"] = "input_audio_buffer.clear"
[docs] class ConversationItemCreateEvent(ClientEvent): """Event to create a new conversation item. Parameters: type: Event type, always "conversation.item.create". previous_item_id: ID of the item to insert after, if any. item: The conversation item to create. """ type: Literal["conversation.item.create"] = "conversation.item.create" previous_item_id: str | None = None item: ConversationItem
[docs] class ConversationItemTruncateEvent(ClientEvent): """Event to truncate a conversation item's audio content. Parameters: type: Event type, always "conversation.item.truncate". item_id: ID of the item to truncate. content_index: Index of the content to truncate within the item. audio_end_ms: End time in milliseconds for the truncated audio. """ type: Literal["conversation.item.truncate"] = "conversation.item.truncate" item_id: str content_index: int audio_end_ms: int
[docs] class ConversationItemDeleteEvent(ClientEvent): """Event to delete a conversation item. Parameters: type: Event type, always "conversation.item.delete". item_id: ID of the item to delete. """ type: Literal["conversation.item.delete"] = "conversation.item.delete" item_id: str
[docs] class ConversationItemRetrieveEvent(ClientEvent): """Event to retrieve a conversation item by ID. Parameters: type: Event type, always "conversation.item.retrieve". item_id: ID of the item to retrieve. """ type: Literal["conversation.item.retrieve"] = "conversation.item.retrieve" item_id: str
[docs] class ResponseCreateEvent(ClientEvent): """Event to create a new assistant response. Parameters: type: Event type, always "response.create". response: Optional response configuration properties. """ type: Literal["response.create"] = "response.create" response: ResponseProperties | None = None
[docs] class ResponseCancelEvent(ClientEvent): """Event to cancel the current assistant response. Parameters: type: Event type, always "response.cancel". """ type: Literal["response.cancel"] = "response.cancel"
# # server events #
[docs] class ServerEvent(BaseModel): """Base class for server events received from the realtime API. Parameters: event_id: Unique identifier for the event. type: Type of the server event. """ model_config = ConfigDict(arbitrary_types_allowed=True) event_id: str type: str
[docs] class SessionCreatedEvent(ServerEvent): """Event indicating a session has been created. Parameters: type: Event type, always "session.created". session: The created session properties. """ type: Literal["session.created"] session: SessionProperties
[docs] class SessionUpdatedEvent(ServerEvent): """Event indicating a session has been updated. Parameters: type: Event type, always "session.updated". session: The updated session properties. """ type: Literal["session.updated"] session: SessionProperties
[docs] class ConversationCreated(ServerEvent): """Event indicating a conversation has been created. Parameters: type: Event type, always "conversation.created". conversation: The created conversation. """ type: Literal["conversation.created"] conversation: RealtimeConversation
[docs] class ConversationItemAdded(ServerEvent): """Event indicating a conversation item has been added. Parameters: type: Event type, always "conversation.item.added". previous_item_id: ID of the previous item, if any. item: The added conversation item. """ type: Literal["conversation.item.added"] previous_item_id: str | None = None item: ConversationItem
[docs] class ConversationItemDone(ServerEvent): """Event indicating a conversation item is done processing. Parameters: type: Event type, always "conversation.item.done". previous_item_id: ID of the previous item, if any. item: The completed conversation item. """ type: Literal["conversation.item.done"] previous_item_id: str | None = None item: ConversationItem
[docs] class ConversationItemInputAudioTranscriptionDelta(ServerEvent): """Event containing incremental input audio transcription. Parameters: type: Event type, always "conversation.item.input_audio_transcription.delta". item_id: ID of the conversation item being transcribed. content_index: Index of the content within the item. delta: Incremental transcription text. """ type: Literal["conversation.item.input_audio_transcription.delta"] item_id: str content_index: int delta: str
[docs] class ConversationItemInputAudioTranscriptionCompleted(ServerEvent): """Event indicating input audio transcription is complete. Parameters: type: Event type, always "conversation.item.input_audio_transcription.completed". item_id: ID of the conversation item that was transcribed. content_index: Index of the content within the item. transcript: Complete transcription text. """ type: Literal["conversation.item.input_audio_transcription.completed"] item_id: str content_index: int transcript: str
[docs] class ConversationItemInputAudioTranscriptionFailed(ServerEvent): """Event indicating input audio transcription failed. Parameters: type: Event type, always "conversation.item.input_audio_transcription.failed". item_id: ID of the conversation item that failed transcription. content_index: Index of the content within the item. error: Error details for the transcription failure. """ type: Literal["conversation.item.input_audio_transcription.failed"] item_id: str content_index: int error: RealtimeError
[docs] class ConversationItemTruncated(ServerEvent): """Event indicating a conversation item has been truncated. Parameters: type: Event type, always "conversation.item.truncated". item_id: ID of the truncated conversation item. content_index: Index of the content within the item. audio_end_ms: End time in milliseconds for the truncated audio. """ type: Literal["conversation.item.truncated"] item_id: str content_index: int audio_end_ms: int
[docs] class ConversationItemDeleted(ServerEvent): """Event indicating a conversation item has been deleted. Parameters: type: Event type, always "conversation.item.deleted". item_id: ID of the deleted conversation item. """ type: Literal["conversation.item.deleted"] item_id: str
[docs] class ConversationItemRetrieved(ServerEvent): """Event containing a retrieved conversation item. Parameters: type: Event type, always "conversation.item.retrieved". item: The retrieved conversation item. """ type: Literal["conversation.item.retrieved"] item: ConversationItem
[docs] class ResponseCreated(ServerEvent): """Event indicating an assistant response has been created. Parameters: type: Event type, always "response.created". response: The created response object. """ type: Literal["response.created"] response: "Response"
[docs] class ResponseDone(ServerEvent): """Event indicating an assistant response is complete. Parameters: type: Event type, always "response.done". response: The completed response object. """ type: Literal["response.done"] response: "Response"
[docs] class ResponseOutputItemAdded(ServerEvent): """Event indicating an output item has been added to a response. Parameters: type: Event type, always "response.output_item.added". response_id: ID of the response. output_index: Index of the output item. item: The added conversation item. """ type: Literal["response.output_item.added"] response_id: str output_index: int item: ConversationItem
[docs] class ResponseOutputItemDone(ServerEvent): """Event indicating an output item is complete. Parameters: type: Event type, always "response.output_item.done". response_id: ID of the response. output_index: Index of the output item. item: The completed conversation item. """ type: Literal["response.output_item.done"] response_id: str output_index: int item: ConversationItem
[docs] class ResponseContentPartAdded(ServerEvent): """Event indicating a content part has been added to a response. Parameters: type: Event type, always "response.content_part.added". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. content_index: Index of the content part. part: The added content part. """ type: Literal["response.content_part.added"] response_id: str item_id: str output_index: int content_index: int part: ItemContent
[docs] class ResponseContentPartDone(ServerEvent): """Event indicating a content part is complete. Parameters: type: Event type, always "response.content_part.done". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. content_index: Index of the content part. part: The completed content part. """ type: Literal["response.content_part.done"] response_id: str item_id: str output_index: int content_index: int part: ItemContent
[docs] class ResponseTextDelta(ServerEvent): """Event containing incremental text from a response. Parameters: type: Event type, always "response.output_text.delta". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. content_index: Index of the content part. delta: Incremental text content. """ type: Literal["response.output_text.delta"] response_id: str item_id: str output_index: int content_index: int delta: str
[docs] class ResponseTextDone(ServerEvent): """Event indicating text content is complete. Parameters: type: Event type, always "response.output_text.done". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. content_index: Index of the content part. text: Complete text content. """ type: Literal["response.output_text.done"] response_id: str item_id: str output_index: int content_index: int text: str
[docs] class ResponseAudioTranscriptDelta(ServerEvent): """Event containing incremental audio transcript from a response. Parameters: type: Event type, always "response.output_audio_transcript.delta". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. content_index: Index of the content part. delta: Incremental transcript text. """ type: Literal["response.output_audio_transcript.delta"] response_id: str item_id: str output_index: int content_index: int delta: str
[docs] class ResponseAudioTranscriptDone(ServerEvent): """Event indicating audio transcript is complete. Parameters: type: Event type, always "response.output_audio_transcript.done". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. content_index: Index of the content part. transcript: Complete transcript text. """ type: Literal["response.output_audio_transcript.done"] response_id: str item_id: str output_index: int content_index: int transcript: str
[docs] class ResponseAudioDelta(ServerEvent): """Event containing incremental audio data from a response. Parameters: type: Event type, always "response.output_audio.delta". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. content_index: Index of the content part. delta: Base64-encoded incremental audio data. """ type: Literal["response.output_audio.delta"] response_id: str item_id: str output_index: int content_index: int delta: str # base64-encoded audio
[docs] class ResponseAudioDone(ServerEvent): """Event indicating audio content is complete. Parameters: type: Event type, always "response.output_audio.done". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. content_index: Index of the content part. """ type: Literal["response.output_audio.done"] response_id: str item_id: str output_index: int content_index: int
[docs] class ResponseFunctionCallArgumentsDelta(ServerEvent): """Event containing incremental function call arguments. Parameters: type: Event type, always "response.function_call_arguments.delta". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. call_id: ID of the function call. delta: Incremental function arguments as JSON. """ type: Literal["response.function_call_arguments.delta"] response_id: str item_id: str output_index: int call_id: str delta: str
[docs] class ResponseFunctionCallArgumentsDone(ServerEvent): """Event indicating function call arguments are complete. Parameters: type: Event type, always "response.function_call_arguments.done". response_id: ID of the response. item_id: ID of the conversation item. output_index: Index of the output item. call_id: ID of the function call. arguments: Complete function arguments as JSON string. """ type: Literal["response.function_call_arguments.done"] response_id: str item_id: str output_index: int call_id: str arguments: str
[docs] class InputAudioBufferSpeechStarted(ServerEvent): """Event indicating speech has started in the input audio buffer. Parameters: type: Event type, always "input_audio_buffer.speech_started". audio_start_ms: Start time of speech in milliseconds. item_id: ID of the associated conversation item. """ type: Literal["input_audio_buffer.speech_started"] audio_start_ms: int item_id: str
[docs] class InputAudioBufferSpeechStopped(ServerEvent): """Event indicating speech has stopped in the input audio buffer. Parameters: type: Event type, always "input_audio_buffer.speech_stopped". audio_end_ms: End time of speech in milliseconds. item_id: ID of the associated conversation item. """ type: Literal["input_audio_buffer.speech_stopped"] audio_end_ms: int item_id: str
[docs] class InputAudioBufferCommitted(ServerEvent): """Event indicating the input audio buffer has been committed. Parameters: type: Event type, always "input_audio_buffer.committed". previous_item_id: ID of the previous item, if any. item_id: ID of the committed conversation item. """ type: Literal["input_audio_buffer.committed"] previous_item_id: str | None = None item_id: str
[docs] class InputAudioBufferCleared(ServerEvent): """Event indicating the input audio buffer has been cleared. Parameters: type: Event type, always "input_audio_buffer.cleared". """ type: Literal["input_audio_buffer.cleared"]
[docs] class ErrorEvent(ServerEvent): """Event indicating an error occurred. Parameters: type: Event type, always "error". error: Error details. """ type: Literal["error"] error: RealtimeError
[docs] class RateLimitsUpdated(ServerEvent): """Event indicating rate limits have been updated. Parameters: type: Event type, always "rate_limits.updated". rate_limits: List of rate limit information. """ type: Literal["rate_limits.updated"] rate_limits: list[dict[str, Any]]
[docs] class CachedTokensDetails(BaseModel): """Details about cached tokens. Parameters: text_tokens: Number of cached text tokens. audio_tokens: Number of cached audio tokens. """ text_tokens: int | None = 0 audio_tokens: int | None = 0
[docs] class TokenDetails(BaseModel): """Detailed token usage information. Parameters: cached_tokens: Number of cached tokens used. Defaults to 0. text_tokens: Number of text tokens used. Defaults to 0. audio_tokens: Number of audio tokens used. Defaults to 0. cached_tokens_details: Detailed breakdown of cached tokens. image_tokens: Number of image tokens used (for input only). """ model_config = ConfigDict(extra="allow") cached_tokens: int | None = 0 text_tokens: int | None = 0 audio_tokens: int | None = 0 cached_tokens_details: CachedTokensDetails | None = None image_tokens: int | None = 0
[docs] class Usage(BaseModel): """Token usage statistics for a response. Parameters: total_tokens: Total number of tokens used. input_tokens: Number of input tokens used. output_tokens: Number of output tokens used. input_token_details: Detailed breakdown of input token usage. output_token_details: Detailed breakdown of output token usage. """ total_tokens: int input_tokens: int output_tokens: int input_token_details: TokenDetails output_token_details: TokenDetails
[docs] class Response(BaseModel): """A complete assistant response. Parameters: id: Unique identifier for the response. object: Object type, always "realtime.response". status: Current status of the response. status_details: Additional status information. output: List of conversation items in the response. conversation_id: Which conversation the response is added to. output_modalities: The set of modalities the model used to respond. max_output_tokens: Maximum number of output tokens used. audio: Audio configuration for the response. usage: Token usage statistics for the response. voice: The voice the model used to respond. temperature: Sampling temperature used for the response. output_audio_format: The format of output audio. """ id: str object: Literal["realtime.response"] status: Literal["completed", "in_progress", "incomplete", "cancelled", "failed"] status_details: Any output: list[ConversationItem] output_modalities: list[Literal["text", "audio"]] | None = None max_output_tokens: int | Literal["inf"] | None = None audio: AudioConfiguration | None = None usage: Usage | None = None voice: str | None = None temperature: float | None = None output_audio_format: str | None = None
_server_event_types = { "error": ErrorEvent, "session.created": SessionCreatedEvent, "session.updated": SessionUpdatedEvent, "conversation.created": ConversationCreated, "input_audio_buffer.committed": InputAudioBufferCommitted, "input_audio_buffer.cleared": InputAudioBufferCleared, "input_audio_buffer.speech_started": InputAudioBufferSpeechStarted, "input_audio_buffer.speech_stopped": InputAudioBufferSpeechStopped, "conversation.item.added": ConversationItemAdded, "conversation.item.done": ConversationItemDone, "conversation.item.input_audio_transcription.delta": ConversationItemInputAudioTranscriptionDelta, "conversation.item.input_audio_transcription.completed": ConversationItemInputAudioTranscriptionCompleted, "conversation.item.input_audio_transcription.failed": ConversationItemInputAudioTranscriptionFailed, "conversation.item.truncated": ConversationItemTruncated, "conversation.item.deleted": ConversationItemDeleted, "conversation.item.retrieved": ConversationItemRetrieved, "response.created": ResponseCreated, "response.done": ResponseDone, "response.output_item.added": ResponseOutputItemAdded, "response.output_item.done": ResponseOutputItemDone, "response.content_part.added": ResponseContentPartAdded, "response.content_part.done": ResponseContentPartDone, "response.output_text.delta": ResponseTextDelta, "response.output_text.done": ResponseTextDone, "response.output_audio_transcript.delta": ResponseAudioTranscriptDelta, "response.output_audio_transcript.done": ResponseAudioTranscriptDone, "response.output_audio.delta": ResponseAudioDelta, "response.output_audio.done": ResponseAudioDone, "response.function_call_arguments.delta": ResponseFunctionCallArgumentsDelta, "response.function_call_arguments.done": ResponseFunctionCallArgumentsDone, "rate_limits.updated": RateLimitsUpdated, }
[docs] def parse_server_event(str): """Parse a server event from JSON string. Args: str: JSON string containing the server event. Returns: Parsed server event object of the appropriate type. Raises: Exception: If the event type is unimplemented or parsing fails. """ try: event = json.loads(str) event_type = event["type"] if event_type not in _server_event_types: raise Exception(f"Unimplemented server event type: {event_type}") return _server_event_types[event_type].model_validate(event) except Exception as e: raise Exception(f"{e} \n\n{str}")