Source code for pipecat.services.whisper.utils

#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

"""Utility functions for extracting probability metrics from STT services."""

import math

from pipecat.frames.frames import TranscriptionFrame


[docs] def extract_whisper_probability(frame: TranscriptionFrame) -> float | None: """Extract probability from Whisper-based TranscriptionFrame result. Works with Groq, OpenAI Whisper, or other Whisper-based services that use verbose_json format with segments containing avg_logprob. Converts avg_logprob to probability. Args: frame: TranscriptionFrame with result from GroqSTTService or OpenAISTTService (when include_prob_metrics=True and using Whisper models). Returns: Probability (0-1) if available, None otherwise. Example:: from pipecat.services.groq.stt import GroqSTTService from pipecat.services.whisper.utils import extract_whisper_probability stt = GroqSTTService(include_prob_metrics=True) # ... use stt in pipeline ... # In your frame processor: if isinstance(frame, TranscriptionFrame): prob = extract_whisper_probability(frame) if prob: print(f"Transcription confidence: {prob:.2%}") """ if not frame.result: return None # Whisper verbose_json format: response.segments[0].avg_logprob if hasattr(frame.result, "segments") and frame.result.segments: segment = frame.result.segments[0] avg_logprob = getattr(segment, "avg_logprob", None) if avg_logprob is not None: return math.exp(avg_logprob) return None
[docs] def extract_openai_gpt4o_probability(frame: TranscriptionFrame) -> float | None: """Extract probability from OpenAI GPT-4o-transcribe TranscriptionFrame result. Args: frame: TranscriptionFrame with result from OpenAISTTService using GPT-4o-transcribe model (when include_prob_metrics=True). Returns: Probability (0-1) if available, None otherwise. Example:: from pipecat.services.openai.stt import OpenAISTTService from pipecat.services.whisper.utils import extract_openai_gpt4o_probability stt = OpenAISTTService(model="gpt-4o-transcribe", include_prob_metrics=True) # ... use stt in pipeline ... # In your frame processor: if isinstance(frame, TranscriptionFrame): prob = extract_openai_gpt4o_probability(frame) if prob: print(f"Transcription confidence: {prob:.2%}") """ if not frame.result: return None # OpenAI GPT-4o-transcribe format: response.logprobs if hasattr(frame.result, "logprobs"): logprobs = frame.result.logprobs if logprobs: # Calculate average logprob and convert to probability avg_logprob = sum(logprobs) / len(logprobs) return math.exp(avg_logprob) return None
[docs] def extract_deepgram_probability(frame: TranscriptionFrame) -> float | None: """Extract probability from Deepgram TranscriptionFrame result. Args: frame: TranscriptionFrame with result from DeepgramSTTService. Returns: Probability (0-1) if available, None otherwise. Returns alternative-level confidence if available, otherwise calculates average confidence from word-level confidences. Example:: from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.whisper.utils import extract_deepgram_probability stt = DeepgramSTTService() # ... use stt in pipeline ... # In your frame processor: if isinstance(frame, TranscriptionFrame): prob = extract_deepgram_probability(frame) if prob: print(f"Transcription confidence: {prob:.2%}") """ if not frame.result: return None result = frame.result if hasattr(result, "channel") and result.channel: if hasattr(result.channel, "alternatives") and result.channel.alternatives: alt = result.channel.alternatives[0] conf = getattr(alt, "confidence", None) if conf is not None: return float(conf) words = getattr(alt, "words", None) if words: word_confs = [getattr(w, "confidence", None) for w in words] word_confs = [c for c in word_confs if c is not None] if word_confs: return float(sum(word_confs) / len(word_confs)) return None