Realtime Configuration

Run Configuration

Bases: TypedDict

Configuration for running a realtime agent session.

Source code in src/agents/realtime/config.py

class RealtimeRunConfig(TypedDict):
    """Configuration for running a realtime agent session."""

    model_settings: NotRequired[RealtimeSessionModelSettings]
    """Settings for the realtime model session."""

    output_guardrails: NotRequired[list[OutputGuardrail[Any]]]
    """List of output guardrails to run on the agent's responses."""

    guardrails_settings: NotRequired[RealtimeGuardrailsSettings]
    """Settings for guardrail execution."""

    tracing_disabled: NotRequired[bool]
    """Whether tracing is disabled for this run."""

model_settings `instance-attribute`

model_settings: NotRequired[RealtimeSessionModelSettings]

Settings for the realtime model session.

output_guardrails `instance-attribute`

output_guardrails: NotRequired[list[OutputGuardrail[Any]]]

List of output guardrails to run on the agent's responses.

guardrails_settings `instance-attribute`

guardrails_settings: NotRequired[RealtimeGuardrailsSettings]

Settings for guardrail execution.

tracing_disabled `instance-attribute`

tracing_disabled: NotRequired[bool]

Whether tracing is disabled for this run.

Model Settings

Bases: TypedDict

Model settings for a realtime model session.

Source code in src/agents/realtime/config.py

class RealtimeSessionModelSettings(TypedDict):
    """Model settings for a realtime model session."""

    model_name: NotRequired[RealtimeModelName]
    """The name of the realtime model to use."""

    instructions: NotRequired[str]
    """System instructions for the model."""

    modalities: NotRequired[list[Literal["text", "audio"]]]
    """The modalities the model should support."""

    voice: NotRequired[str]
    """The voice to use for audio output."""

    speed: NotRequired[float]
    """The speed of the model's responses."""

    input_audio_format: NotRequired[RealtimeAudioFormat]
    """The format for input audio streams."""

    output_audio_format: NotRequired[RealtimeAudioFormat]
    """The format for output audio streams."""

    input_audio_transcription: NotRequired[RealtimeInputAudioTranscriptionConfig]
    """Configuration for transcribing input audio."""

    turn_detection: NotRequired[RealtimeTurnDetectionConfig]
    """Configuration for detecting conversation turns."""

    tool_choice: NotRequired[ToolChoice]
    """How the model should choose which tools to call."""

    tools: NotRequired[list[Tool]]
    """List of tools available to the model."""

    handoffs: NotRequired[list[Handoff]]
    """List of handoff configurations."""

    tracing: NotRequired[RealtimeModelTracingConfig | None]
    """Configuration for request tracing."""

model_name `instance-attribute`

model_name: NotRequired[RealtimeModelName]

The name of the realtime model to use.

instructions `instance-attribute`

instructions: NotRequired[str]

System instructions for the model.

modalities `instance-attribute`

modalities: NotRequired[list[Literal['text', 'audio']]]

The modalities the model should support.

voice `instance-attribute`

voice: NotRequired[str]

The voice to use for audio output.

speed `instance-attribute`

speed: NotRequired[float]

The speed of the model's responses.

input_audio_format `instance-attribute`

input_audio_format: NotRequired[RealtimeAudioFormat]

The format for input audio streams.

output_audio_format `instance-attribute`

output_audio_format: NotRequired[RealtimeAudioFormat]

The format for output audio streams.

input_audio_transcription `instance-attribute`

input_audio_transcription: NotRequired[
    RealtimeInputAudioTranscriptionConfig
]

Configuration for transcribing input audio.

turn_detection `instance-attribute`

turn_detection: NotRequired[RealtimeTurnDetectionConfig]

Configuration for detecting conversation turns.

tool_choice `instance-attribute`

tool_choice: NotRequired[ToolChoice]

How the model should choose which tools to call.

tools `instance-attribute`

tools: NotRequired[list[Tool]]

List of tools available to the model.

handoffs `instance-attribute`

handoffs: NotRequired[list[Handoff]]

List of handoff configurations.

tracing `instance-attribute`

tracing: NotRequired[RealtimeModelTracingConfig | None]

Configuration for request tracing.

Audio Configuration

Bases: TypedDict

Configuration for audio transcription in realtime sessions.

Source code in src/agents/realtime/config.py

class RealtimeInputAudioTranscriptionConfig(TypedDict):
    """Configuration for audio transcription in realtime sessions."""

    language: NotRequired[str]
    """The language code for transcription."""

    model: NotRequired[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"] | str]
    """The transcription model to use."""

    prompt: NotRequired[str]
    """An optional prompt to guide transcription."""

language `instance-attribute`

language: NotRequired[str]

The language code for transcription.

model `instance-attribute`

model: NotRequired[
    Literal[
        "gpt-4o-transcribe",
        "gpt-4o-mini-transcribe",
        "whisper-1",
    ]
    | str
]

The transcription model to use.

prompt `instance-attribute`

prompt: NotRequired[str]

An optional prompt to guide transcription.

Bases: TypedDict

Turn detection config. Allows extra vendor keys if needed.

Source code in src/agents/realtime/config.py

class RealtimeTurnDetectionConfig(TypedDict):
    """Turn detection config. Allows extra vendor keys if needed."""

    type: NotRequired[Literal["semantic_vad", "server_vad"]]
    """The type of voice activity detection to use."""

    create_response: NotRequired[bool]
    """Whether to create a response when a turn is detected."""

    eagerness: NotRequired[Literal["auto", "low", "medium", "high"]]
    """How eagerly to detect turn boundaries."""

    interrupt_response: NotRequired[bool]
    """Whether to allow interrupting the assistant's response."""

    prefix_padding_ms: NotRequired[int]
    """Padding time in milliseconds before turn detection."""

    silence_duration_ms: NotRequired[int]
    """Duration of silence in milliseconds to trigger turn detection."""

    threshold: NotRequired[float]
    """The threshold for voice activity detection."""

type `instance-attribute`

type: NotRequired[Literal['semantic_vad', 'server_vad']]

The type of voice activity detection to use.

create_response `instance-attribute`

create_response: NotRequired[bool]

Whether to create a response when a turn is detected.

eagerness `instance-attribute`

eagerness: NotRequired[
    Literal["auto", "low", "medium", "high"]
]

How eagerly to detect turn boundaries.

interrupt_response `instance-attribute`

interrupt_response: NotRequired[bool]

Whether to allow interrupting the assistant's response.

prefix_padding_ms `instance-attribute`

prefix_padding_ms: NotRequired[int]

Padding time in milliseconds before turn detection.

silence_duration_ms `instance-attribute`

silence_duration_ms: NotRequired[int]

Duration of silence in milliseconds to trigger turn detection.

threshold `instance-attribute`

threshold: NotRequired[float]

The threshold for voice activity detection.

Guardrails Settings

Bases: TypedDict

Settings for output guardrails in realtime sessions.

Source code in src/agents/realtime/config.py

class RealtimeGuardrailsSettings(TypedDict):
    """Settings for output guardrails in realtime sessions."""

    debounce_text_length: NotRequired[int]
    """
    The minimum number of characters to accumulate before running guardrails on transcript
    deltas. Defaults to 100. Guardrails run every time the accumulated text reaches
    1x, 2x, 3x, etc. times this threshold.
    """

debounce_text_length `instance-attribute`

debounce_text_length: NotRequired[int]

The minimum number of characters to accumulate before running guardrails on transcript deltas. Defaults to 100. Guardrails run every time the accumulated text reaches 1x, 2x, 3x, etc. times this threshold.

Model Configuration

Bases: TypedDict

Options for connecting to a realtime model.

Source code in src/agents/realtime/model.py

class RealtimeModelConfig(TypedDict):
    """Options for connecting to a realtime model."""

    api_key: NotRequired[str | Callable[[], MaybeAwaitable[str]]]
    """The API key (or function that returns a key) to use when connecting. If unset, the model will
    try to use a sane default. For example, the OpenAI Realtime model will try to use the
    `OPENAI_API_KEY`  environment variable.
    """

    url: NotRequired[str]
    """The URL to use when connecting. If unset, the model will use a sane default. For example,
    the OpenAI Realtime model will use the default OpenAI WebSocket URL.
    """

    initial_model_settings: NotRequired[RealtimeSessionModelSettings]
    """The initial model settings to use when connecting."""

    playback_tracker: NotRequired[RealtimePlaybackTracker]
    """The playback tracker to use when tracking audio playback progress. If not set, the model will
    use a default implementation that assumes audio is played immediately, at realtime speed.

    A playback tracker is useful for interruptions. The model generates audio much faster than
    realtime playback speed. So if there's an interruption, its useful for the model to know how
    much of the audio has been played by the user. In low-latency scenarios, it's fine to assume
    that audio is played back immediately at realtime speed. But in scenarios like phone calls or
    other remote interactions, you can set a playback tracker that lets the model know when audio
    is played to the user.
    """

api_key `instance-attribute`

api_key: NotRequired[
    str | Callable[[], MaybeAwaitable[str]]
]

The API key (or function that returns a key) to use when connecting. If unset, the model will try to use a sane default. For example, the OpenAI Realtime model will try to use the OPENAI_API_KEY environment variable.

url `instance-attribute`

url: NotRequired[str]

The URL to use when connecting. If unset, the model will use a sane default. For example, the OpenAI Realtime model will use the default OpenAI WebSocket URL.

initial_model_settings `instance-attribute`

initial_model_settings: NotRequired[
    RealtimeSessionModelSettings
]

The initial model settings to use when connecting.

playback_tracker `instance-attribute`

playback_tracker: NotRequired[RealtimePlaybackTracker]

The playback tracker to use when tracking audio playback progress. If not set, the model will use a default implementation that assumes audio is played immediately, at realtime speed.

A playback tracker is useful for interruptions. The model generates audio much faster than realtime playback speed. So if there's an interruption, its useful for the model to know how much of the audio has been played by the user. In low-latency scenarios, it's fine to assume that audio is played back immediately at realtime speed. But in scenarios like phone calls or other remote interactions, you can set a playback tracker that lets the model know when audio is played to the user.

Tracing Configuration

Bases: TypedDict

Configuration for tracing in realtime model sessions.

Source code in src/agents/realtime/config.py

class RealtimeModelTracingConfig(TypedDict):
    """Configuration for tracing in realtime model sessions."""

    workflow_name: NotRequired[str]
    """The workflow name to use for tracing."""

    group_id: NotRequired[str]
    """A group identifier to use for tracing, to link multiple traces together."""

    metadata: NotRequired[dict[str, Any]]
    """Additional metadata to include with the trace."""

workflow_name `instance-attribute`

workflow_name: NotRequired[str]

The workflow name to use for tracing.

group_id `instance-attribute`

group_id: NotRequired[str]

A group identifier to use for tracing, to link multiple traces together.

metadata `instance-attribute`

metadata: NotRequired[dict[str, Any]]

Additional metadata to include with the trace.

User Input Types

User input that can be a string or structured message.

Bases: TypedDict

A text input from the user.

Source code in src/agents/realtime/config.py

class RealtimeUserInputText(TypedDict):
    """A text input from the user."""

    type: Literal["input_text"]
    """The type identifier for text input."""

    text: str
    """The text content from the user."""

type `instance-attribute`

type: Literal['input_text']

The type identifier for text input.

text `instance-attribute`

text: str

The text content from the user.

Bases: TypedDict

A message input from the user.

Source code in src/agents/realtime/config.py

class RealtimeUserInputMessage(TypedDict):
    """A message input from the user."""

    type: Literal["message"]
    """The type identifier for message inputs."""

    role: Literal["user"]
    """The role identifier for user messages."""

    content: list[RealtimeUserInputText]
    """List of text content items in the message."""

type `instance-attribute`

type: Literal['message']

The type identifier for message inputs.

role `instance-attribute`

role: Literal['user']

The role identifier for user messages.

content `instance-attribute`

content: list[RealtimeUserInputText]

List of text content items in the message.

Client Messages

Bases: TypedDict

A raw message to be sent to the model.

Source code in src/agents/realtime/config.py

class RealtimeClientMessage(TypedDict):
    """A raw message to be sent to the model."""

    type: str  # explicitly required
    """The type of the message."""

    other_data: NotRequired[dict[str, Any]]
    """Merged into the message body."""

type `instance-attribute`

type: str

The type of the message.

other_data `instance-attribute`

other_data: NotRequired[dict[str, Any]]

Merged into the message body.

Type Aliases

The name of a realtime model.

The audio format for realtime audio streams.

Realtime Configuration

Run Configuration

model_settings instance-attribute

output_guardrails instance-attribute

guardrails_settings instance-attribute

tracing_disabled instance-attribute

Model Settings

model_name instance-attribute

instructions instance-attribute

modalities instance-attribute

voice instance-attribute

speed instance-attribute

input_audio_format instance-attribute

output_audio_format instance-attribute

input_audio_transcription instance-attribute

turn_detection instance-attribute

tool_choice instance-attribute

tools instance-attribute

handoffs instance-attribute

tracing instance-attribute

Audio Configuration

language instance-attribute

model instance-attribute

prompt instance-attribute

type instance-attribute

create_response instance-attribute

eagerness instance-attribute

interrupt_response instance-attribute

prefix_padding_ms instance-attribute

silence_duration_ms instance-attribute

threshold instance-attribute

Guardrails Settings

debounce_text_length instance-attribute

Model Configuration

api_key instance-attribute

url instance-attribute

initial_model_settings instance-attribute

playback_tracker instance-attribute

Tracing Configuration

workflow_name instance-attribute

group_id instance-attribute

metadata instance-attribute

User Input Types

type instance-attribute

text instance-attribute

type instance-attribute

role instance-attribute

content instance-attribute

Client Messages

type instance-attribute

other_data instance-attribute

Type Aliases

model_settings `instance-attribute`

output_guardrails `instance-attribute`

guardrails_settings `instance-attribute`

tracing_disabled `instance-attribute`

model_name `instance-attribute`

instructions `instance-attribute`

modalities `instance-attribute`

voice `instance-attribute`

speed `instance-attribute`

input_audio_format `instance-attribute`

output_audio_format `instance-attribute`

input_audio_transcription `instance-attribute`

turn_detection `instance-attribute`

tool_choice `instance-attribute`

tools `instance-attribute`

handoffs `instance-attribute`

tracing `instance-attribute`

language `instance-attribute`

model `instance-attribute`

prompt `instance-attribute`

type `instance-attribute`

create_response `instance-attribute`

eagerness `instance-attribute`

interrupt_response `instance-attribute`

prefix_padding_ms `instance-attribute`

silence_duration_ms `instance-attribute`

threshold `instance-attribute`

debounce_text_length `instance-attribute`

api_key `instance-attribute`

url `instance-attribute`

initial_model_settings `instance-attribute`

playback_tracker `instance-attribute`

workflow_name `instance-attribute`

group_id `instance-attribute`

metadata `instance-attribute`

type `instance-attribute`

text `instance-attribute`

type `instance-attribute`

role `instance-attribute`

content `instance-attribute`

type `instance-attribute`

other_data `instance-attribute`