Source code for pipecat.services.groq.tts

#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

"""Groq text-to-speech service implementation."""

import io
import wave
from collections.abc import AsyncGenerator
from dataclasses import dataclass, field

from loguru import logger
from pydantic import BaseModel

from pipecat.frames.frames import (
    ErrorFrame,
    Frame,
    TTSAudioRawFrame,
)
from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, assert_given
from pipecat.services.tts_service import TTSService
from pipecat.transcriptions.language import Language
from pipecat.utils.tracing.service_decorators import traced_tts

try:
    from groq import AsyncGroq
except ModuleNotFoundError as e:
    logger.error(f"Exception: {e}")
    logger.error("In order to use Groq, you need to `pip install pipecat-ai[groq]`.")
    raise Exception(f"Missing module: {e}")


[docs] @dataclass class GroqTTSSettings(TTSSettings): """Settings for GroqTTSService. Parameters: speed: Speech speed multiplier. Defaults to 1.0. """ speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
[docs] class GroqTTSService(TTSService): """Groq text-to-speech service implementation. Provides text-to-speech synthesis using Groq's TTS API. The service operates at a fixed 48kHz sample rate and supports various voices and output formats. """ Settings = GroqTTSSettings _settings: Settings
[docs] class InputParams(BaseModel): """Input parameters for Groq TTS configuration. .. deprecated:: 0.0.105 Use ``settings=GroqTTSService.Settings(...)`` instead. Parameters: language: Language for speech synthesis. Defaults to English. speed: Speech speed multiplier. Defaults to 1.0. """ language: Language | None = Language.EN speed: float | None = 1.0
GROQ_SAMPLE_RATE = 48000 # Groq TTS only supports 48kHz sample rate
[docs] def __init__( self, *, api_key: str, output_format: str = "wav", params: InputParams | None = None, model_name: str | None = None, voice_id: str | None = None, sample_rate: int | None = GROQ_SAMPLE_RATE, settings: Settings | None = None, **kwargs, ): """Initialize Groq TTS service. Args: api_key: Groq API key for authentication. output_format: Audio output format. Defaults to "wav". params: Additional input parameters for voice customization. .. deprecated:: 0.0.105 Use ``settings=GroqTTSService.Settings(...)`` instead. model_name: TTS model to use. .. deprecated:: 0.0.105 Use ``settings=GroqTTSService.Settings(model=...)`` instead. voice_id: Voice identifier to use. .. deprecated:: 0.0.105 Use ``settings=GroqTTSService.Settings(voice=...)`` instead. sample_rate: Audio sample rate. Must be 48000 Hz for Groq TTS. settings: Runtime-updatable settings. When provided alongside deprecated parameters, ``settings`` values take precedence. **kwargs: Additional arguments passed to parent TTSService class. """ if sample_rate != self.GROQ_SAMPLE_RATE: logger.warning(f"Groq TTS only supports {self.GROQ_SAMPLE_RATE}Hz sample rate. ") # 1. Initialize default_settings with hardcoded defaults default_settings = self.Settings( model="canopylabs/orpheus-v1-english", voice="autumn", language="en", speed=1.0, ) # 2. Apply direct init arg overrides (deprecated) if model_name is not None: self._warn_init_param_moved_to_settings("model_name", "model") default_settings.model = model_name if voice_id is not None: self._warn_init_param_moved_to_settings("voice_id", "voice") default_settings.voice = voice_id # 3. Apply params overrides — only if settings not provided if params is not None: self._warn_init_param_moved_to_settings("params") if not settings: default_settings.language = str(params.language) if params.language else "en" default_settings.speed = params.speed # 4. Apply settings delta (canonical API, always wins) if settings is not None: default_settings.apply_update(settings) super().__init__( pause_frame_processing=True, push_start_frame=True, push_stop_frames=True, sample_rate=sample_rate, settings=default_settings, **kwargs, ) self._api_key = api_key self._output_format = output_format self._client = AsyncGroq(api_key=self._api_key)
[docs] def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. Returns: True, as Groq TTS service supports metrics generation. """ return True
[docs] @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: """Generate speech from text using Groq's TTS API. Args: text: The text to synthesize into speech. context_id: The context ID for tracking audio frames. Yields: Frame: Audio frames containing the synthesized speech data. """ logger.debug(f"{self}: Generating TTS [{text}]") measuring_ttfb = True try: model = assert_given(self._settings.model) voice = assert_given(self._settings.voice) speed = assert_given(self._settings.speed) if model is None: raise ValueError("Groq TTS model must be specified") if speed is None: raise ValueError("Groq TTS speed must be specified") response = await self._client.audio.speech.create( model=model, voice=voice, response_format=self._output_format, # Note: as of 2026-02-25, only a speed of 1.0 is supported, but # here we pass it for completeness and future-proofing speed=speed, input=text, ) async for data in response.iter_bytes(): if measuring_ttfb: await self.stop_ttfb_metrics() measuring_ttfb = False with wave.open(io.BytesIO(data)) as w: channels = w.getnchannels() frame_rate = w.getframerate() num_frames = w.getnframes() bytes = w.readframes(num_frames) yield TTSAudioRawFrame(bytes, frame_rate, channels, context_id=context_id) except Exception as e: yield ErrorFrame(error=f"Unknown error occurred: {e}")