#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
"""OpenAI image generation service implementation.
This module provides integration with OpenAI's DALL-E image generation API
for creating images from text prompts.
"""
import io
from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
from typing import Literal
import aiohttp
from loguru import logger
from openai import AsyncOpenAI
from PIL import Image
from pipecat.frames.frames import (
ErrorFrame,
Frame,
URLImageRawFrame,
)
from pipecat.services.image_service import ImageGenService
from pipecat.services.settings import NOT_GIVEN, ImageGenSettings, _NotGiven, assert_given
[docs]
@dataclass
class OpenAIImageGenSettings(ImageGenSettings):
"""Settings for the OpenAI image generation service.
Parameters:
model: DALL-E model identifier.
image_size: Target size for generated images.
"""
image_size: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
[docs]
class OpenAIImageGenService(ImageGenService):
"""OpenAI DALL-E image generation service.
Provides image generation capabilities using OpenAI's DALL-E models.
Supports various image sizes and can generate images from text prompts
with configurable quality and style parameters.
"""
Settings = OpenAIImageGenSettings
_settings: Settings
[docs]
def __init__(
self,
*,
api_key: str,
base_url: str | None = None,
aiohttp_session: aiohttp.ClientSession,
image_size: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]
| None = None,
model: str | None = None,
settings: Settings | None = None,
):
"""Initialize the OpenAI image generation service.
Args:
api_key: OpenAI API key for authentication.
base_url: Custom base URL for OpenAI API. If None, uses default.
aiohttp_session: HTTP session for downloading generated images.
image_size: Target size for generated images. Defaults to "1024x1024".
.. deprecated:: 0.0.105
Use ``settings=OpenAIImageGenService.Settings(image_size=...)`` instead.
model: DALL-E model to use for generation. Defaults to "dall-e-3".
.. deprecated:: 0.0.105
Use ``settings=OpenAIImageGenService.Settings(model=...)`` instead.
settings: Runtime-updatable settings. When provided alongside deprecated
parameters, ``settings`` values take precedence.
"""
# 1. Initialize default_settings with hardcoded defaults
default_settings = self.Settings(
model="dall-e-3",
image_size=None,
)
# 2. Apply direct init arg overrides (deprecated)
if model is not None:
self._warn_init_param_moved_to_settings("model", "model")
default_settings.model = model
if image_size is not None:
self._warn_init_param_moved_to_settings("image_size", "image_size")
default_settings.image_size = image_size
# 4. Apply settings delta (canonical API, always wins)
if settings is not None:
default_settings.apply_update(settings)
super().__init__(settings=default_settings)
self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
self._aiohttp_session = aiohttp_session
[docs]
async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
"""Generate an image from a text prompt using OpenAI's DALL-E.
Args:
prompt: Text description of the image to generate.
Yields:
Frame: URLImageRawFrame containing the generated image data.
"""
logger.debug(f"Generating image from prompt: {prompt}")
image = await self._client.images.generate(
prompt=prompt,
model=assert_given(self._settings.model),
n=1,
size=assert_given(self._settings.image_size),
)
image_url = image.data[0].url
if not image_url:
yield ErrorFrame("Image generation failed")
return
# Load the image from the url
async with self._aiohttp_session.get(image_url) as response:
image_stream = io.BytesIO(await response.content.read())
image = Image.open(image_stream)
frame = URLImageRawFrame(
image=image.tobytes(),
size=image.size,
format=image.format,
url=image_url,
)
yield frame