`OpenAI Responses model`

OpenAIResponsesModel

Bases: Model

Implementation of Model that uses the OpenAI Responses API.

Source code in src/cai/sdk/agents/models/openai_responses.py

class OpenAIResponsesModel(Model):
    """
    Implementation of `Model` that uses the OpenAI Responses API.
    """

    def __init__(
        self,
        model: str | ChatModel,
        openai_client: AsyncOpenAI,
    ) -> None:
        print(f"\nDEBUG: OpenAIResponsesModel initialized with model: {model}\n")
        self.model = model
        self._client = openai_client

        # Track interaction counter and token totals for cli display
        self.interaction_counter = 0
        self.total_input_tokens = 0
        self.total_output_tokens = 0
        self.total_reasoning_tokens = 0
        self.agent_name = "Agent"  # Default name

    def set_agent_name(self, name: str) -> None:
        """Set the agent name for CLI display purposes."""
        self.agent_name = name

    def _non_null_or_not_given(self, value: Any) -> Any:
        return value if value is not None else NOT_GIVEN

    async def get_response(
        self,
        system_instructions: str | None,
        input: str | list[TResponseInputItem],
        model_settings: ModelSettings,
        tools: list[Tool],
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
    ) -> ModelResponse:
        # Increment the interaction counter for CLI display
        self.interaction_counter += 1

        # --- Snapshot inputs for token accounting parity with IN: ---
        sys_tokens = 0
        tool_defs_tokens = 0
        try:
            # Count system tokens dynamically
            if system_instructions:
                try:
                    from cai.sdk.agents.models.openai_chatcompletions import count_tokens_with_tiktoken as _ct
                    sys_tokens, _ = _ct(str(system_instructions))
                except Exception:
                    sys_tokens = len(str(system_instructions)) // 4
            # Count tool definition tokens from params_json_schema
            if tools:
                try:
                    import json as _json
                    import tiktoken as _t
                    try:
                        enc = _t.get_encoding("cl100k_base")
                    except Exception:
                        enc = _t.get_encoding("gpt2")
                    for tool in tools:
                        schema = getattr(tool, "params_json_schema", None)
                        if schema:
                            as_str = _json.dumps(schema)
                            tool_defs_tokens += len(enc.encode(as_str)) if enc else len(as_str) // 4
                except Exception:
                    pass
            self._last_request_breakdown = {
                "system_tokens": int(sys_tokens or 0),
                "tool_definitions_tokens": int(tool_defs_tokens or 0),
                "messages_breakdown": {"user": 0, "assistant": 0, "tool_calls": 0, "tool_results": 0},
            }
        except Exception:
            self._last_request_breakdown = None

        with response_span(disabled=tracing.is_disabled()) as span_response:
            try:
                async with model_wait_hints():
                    response = await self._fetch_response(
                        system_instructions,
                        input,
                        model_settings,
                        tools,
                        output_schema,
                        handoffs,
                        stream=False,
                    )

                if _debug.DONT_LOG_MODEL_DATA:
                    logger.debug("LLM responded")
                else:
                    logger.debug(
                        "LLM resp:\n"
                        f"{json.dumps([x.model_dump() for x in response.output], indent=2)}\n"
                    )

                usage = (
                    Usage(
                        requests=1,
                        input_tokens=response.usage.input_tokens,
                        output_tokens=response.usage.output_tokens,
                        total_tokens=response.usage.total_tokens,
                    )
                    if response.usage
                    else Usage()
                )

                if tracing.include_data():
                    span_response.span_data.response = response
                    span_response.span_data.input = input

                # Print the agent message for CLI display
                from cai.util import cli_print_agent_messages

                try:
                    # Create a message-like object to display
                    message_obj = type(
                        "ResponseWrapper",
                        (),
                        {
                            "content": "\n".join(
                                [
                                    str(item.get("content", ""))
                                    if hasattr(item, "get")
                                    else str(getattr(item, "text", ""))
                                    for item in response.output
                                    if hasattr(item, "get") or hasattr(item, "text")
                                ]
                            ),
                            "tool_calls": [
                                type(
                                    "ToolCallWrapper",
                                    (),
                                    {"name": item.name, "arguments": item.arguments},
                                )
                                for item in response.output
                                if hasattr(item, "name") and hasattr(item, "arguments")
                            ],
                        },
                    )

                    cli_print_agent_messages(
                        agent_name=getattr(self, "agent_name", "Agent"),
                        message=message_obj,
                        counter=getattr(self, "interaction_counter", 0),
                        model=str(self.model),
                        debug=False,
                        interaction_input_tokens=usage.input_tokens,
                        interaction_output_tokens=usage.output_tokens,
                        interaction_reasoning_tokens=0,
                        total_input_tokens=getattr(self, "total_input_tokens", 0),
                        total_output_tokens=getattr(self, "total_output_tokens", 0),
                        total_reasoning_tokens=getattr(self, "total_reasoning_tokens", 0),
                        interaction_cost=None,
                        total_cost=None,
                    )

                    # Update token totals
                    self.total_input_tokens += usage.input_tokens
                    self.total_output_tokens += usage.output_tokens

                    # Store actual tokens to align usage metrics with IN:
                    try:
                        self._last_request_actual_input_tokens = int(usage.input_tokens or 0)
                        # Compute overhead to reconcile totals
                        br = self._last_request_breakdown or {}
                        sys_t = int(br.get("system_tokens", 0) or 0)
                        tool_t = int(br.get("tool_definitions_tokens", 0) or 0)
                        msg_map = br.get("messages_breakdown", {}) or {}
                        msg_sum = sum(int(v or 0) for v in msg_map.values())
                        known = sys_t + tool_t + msg_sum
                        overhead = int(self._last_request_actual_input_tokens) - known
                        self._last_request_breakdown_overhead = int(overhead) if overhead > 0 else 0
                    except Exception:
                        self._last_request_breakdown_overhead = 0
                except Exception as e:
                    logger.error(f"Error printing agent message: {e}")

            except Exception as e:
                span_response.set_error(
                    SpanError(
                        message="Error getting response",
                        data={
                            "error": str(e) if tracing.include_data() else e.__class__.__name__,
                        },
                    )
                )
                request_id = e.request_id if isinstance(e, APIStatusError) else None
                logger.error(f"Error getting response: {e}. (request_id: {request_id})")
                raise

        return ModelResponse(
            output=response.output,
            usage=usage,
            referenceable_id=response.id,
        )

    async def stream_response(
        self,
        system_instructions: str | None,
        input: str | list[TResponseInputItem],
        model_settings: ModelSettings,
        tools: list[Tool],
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
    ) -> AsyncIterator[ResponseStreamEvent]:
        """
        Yields a partial message as it is generated, as well as the usage information.
        """
        # Increment the interaction counter for CLI display
        self.interaction_counter += 1

        with response_span(disabled=tracing.is_disabled()) as span_response:
            stream_wait_hints = ModelStreamWaitHints()
            await stream_wait_hints.start()
            try:
                stream = await self._fetch_response(
                    system_instructions,
                    input,
                    model_settings,
                    tools,
                    output_schema,
                    handoffs,
                    stream=True,
                )

                final_response: Response | None = None

                async for chunk in stream:
                    await stream_wait_hints.stop()
                    if isinstance(chunk, ResponseCompletedEvent):
                        final_response = chunk.response
                    yield chunk

                if final_response and tracing.include_data():
                    span_response.span_data.response = final_response
                    span_response.span_data.input = input

                # Print the agent message for CLI display
                from cai.util import cli_print_agent_messages

                try:
                    # Create a message-like object to display
                    message_obj = type(
                        "ResponseWrapper",
                        (),
                        {
                            "content": "\n".join(
                                [
                                    str(item.get("content", ""))
                                    if hasattr(item, "get")
                                    else str(getattr(item, "text", ""))
                                    for item in final_response.output
                                    if hasattr(item, "get") or hasattr(item, "text")
                                ]
                            ),
                            "tool_calls": [
                                type(
                                    "ToolCallWrapper",
                                    (),
                                    {"name": item.name, "arguments": item.arguments},
                                )
                                for item in final_response.output
                                if hasattr(item, "name") and hasattr(item, "arguments")
                            ],
                        },
                    )

                    cli_print_agent_messages(
                        agent_name=getattr(self, "agent_name", "Agent"),
                        message=message_obj,
                        counter=getattr(self, "interaction_counter", 0),
                        model=str(self.model),
                        debug=False,
                        interaction_input_tokens=final_response.usage.input_tokens,
                        interaction_output_tokens=final_response.usage.output_tokens,
                        interaction_reasoning_tokens=0,
                        total_input_tokens=getattr(self, "total_input_tokens", 0),
                        total_output_tokens=getattr(self, "total_output_tokens", 0),
                        total_reasoning_tokens=getattr(self, "total_reasoning_tokens", 0),
                        interaction_cost=None,
                        total_cost=None,
                    )

                    # Update token totals
                    self.total_input_tokens += final_response.usage.input_tokens
                    self.total_output_tokens += final_response.usage.output_tokens

                    # Store actual tokens to align usage metrics with IN: for streamed path
                    try:
                        self._last_request_actual_input_tokens = int(final_response.usage.input_tokens or 0)
                        br = self._last_request_breakdown or {}
                        sys_t = int(br.get("system_tokens", 0) or 0)
                        tool_t = int(br.get("tool_definitions_tokens", 0) or 0)
                        msg_map = br.get("messages_breakdown", {}) or {}
                        msg_sum = sum(int(v or 0) for v in msg_map.values())
                        known = sys_t + tool_t + msg_sum
                        overhead = int(self._last_request_actual_input_tokens) - known
                        self._last_request_breakdown_overhead = int(overhead) if overhead > 0 else 0
                    except Exception:
                        self._last_request_breakdown_overhead = 0
                except Exception as e:
                    logger.error(f"Error printing agent message: {e}")

            except Exception as e:
                span_response.set_error(
                    SpanError(
                        message="Error streaming response",
                        data={
                            "error": str(e) if tracing.include_data() else e.__class__.__name__,
                        },
                    )
                )
                logger.error(f"Error streaming response: {e}")
                raise
            finally:
                try:
                    await stream_wait_hints.stop()
                except Exception:
                    pass

    @overload
    async def _fetch_response(
        self,
        system_instructions: str | None,
        input: str | list[TResponseInputItem],
        model_settings: ModelSettings,
        tools: list[Tool],
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        stream: Literal[True],
    ) -> AsyncStream[ResponseStreamEvent]: ...

    @overload
    async def _fetch_response(
        self,
        system_instructions: str | None,
        input: str | list[TResponseInputItem],
        model_settings: ModelSettings,
        tools: list[Tool],
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        stream: Literal[False],
    ) -> Response: ...

    async def _fetch_response(
        self,
        system_instructions: str | None,
        input: str | list[TResponseInputItem],
        model_settings: ModelSettings,
        tools: list[Tool],
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        stream: Literal[True] | Literal[False] = False,
    ) -> Response | AsyncStream[ResponseStreamEvent]:
        list_input = ItemHelpers.input_to_new_input_list(input)

        parallel_tool_calls = (
            True
            if model_settings.parallel_tool_calls and tools and len(tools) > 0
            else False
            if model_settings.parallel_tool_calls is False
            else NOT_GIVEN
        )

        tool_choice = Converter.convert_tool_choice(model_settings.tool_choice)
        converted_tools = Converter.convert_tools(tools, handoffs)
        response_format = Converter.get_response_format(output_schema)

        if _debug.DONT_LOG_MODEL_DATA:
            logger.debug("Calling LLM")
        else:
            logger.debug(
                f"Calling LLM {self.model} with input:\n"
                f"{json.dumps(list_input, indent=2)}\n"
                f"Tools:\n{json.dumps(converted_tools.tools, indent=2)}\n"
                f"Stream: {stream}\n"
                f"Tool choice: {tool_choice}\n"
                f"Response format: {response_format}\n"
            )

        return await self._client.responses.create(
            instructions=self._non_null_or_not_given(system_instructions),
            model=self.model,
            input=list_input,
            include=converted_tools.includes,
            tools=converted_tools.tools,
            temperature=self._non_null_or_not_given(model_settings.temperature),
            top_p=self._non_null_or_not_given(model_settings.top_p),
            truncation=self._non_null_or_not_given(model_settings.truncation),
            max_output_tokens=self._non_null_or_not_given(model_settings.max_tokens),
            tool_choice=tool_choice,
            parallel_tool_calls=parallel_tool_calls,
            stream=stream,
            extra_headers=_HEADERS,
            text=response_format,
            store=self._non_null_or_not_given(model_settings.store),
        )

    def _get_client(self) -> AsyncOpenAI:
        if self._client is None:
            api_key = resolve_llm_openai_compatible_api_key(str(self.model))
            if not api_key:
                raise UserError(
                    "Missing API key for selected model. "
                    "For alias-family models (alias*/cai*/csi*), set ALIAS_API_KEY. "
                    "For OpenAI models, set OPENAI_API_KEY."
                )
            self._client = AsyncOpenAI(api_key=api_key)
        return self._client

set_agent_name

set_agent_name(name: str) -> None

Set the agent name for CLI display purposes.

Source code in src/cai/sdk/agents/models/openai_responses.py

def set_agent_name(self, name: str) -> None:
    """Set the agent name for CLI display purposes."""
    self.agent_name = name

stream_response `async`

stream_response(
    system_instructions: str | None,
    input: str | list[TResponseInputItem],
    model_settings: ModelSettings,
    tools: list[Tool],
    output_schema: AgentOutputSchema | None,
    handoffs: list[Handoff],
    tracing: ModelTracing,
) -> AsyncIterator[ResponseStreamEvent]

Yields a partial message as it is generated, as well as the usage information.

Source code in src/cai/sdk/agents/models/openai_responses.py

async def stream_response(
    self,
    system_instructions: str | None,
    input: str | list[TResponseInputItem],
    model_settings: ModelSettings,
    tools: list[Tool],
    output_schema: AgentOutputSchema | None,
    handoffs: list[Handoff],
    tracing: ModelTracing,
) -> AsyncIterator[ResponseStreamEvent]:
    """
    Yields a partial message as it is generated, as well as the usage information.
    """
    # Increment the interaction counter for CLI display
    self.interaction_counter += 1

    with response_span(disabled=tracing.is_disabled()) as span_response:
        stream_wait_hints = ModelStreamWaitHints()
        await stream_wait_hints.start()
        try:
            stream = await self._fetch_response(
                system_instructions,
                input,
                model_settings,
                tools,
                output_schema,
                handoffs,
                stream=True,
            )

            final_response: Response | None = None

            async for chunk in stream:
                await stream_wait_hints.stop()
                if isinstance(chunk, ResponseCompletedEvent):
                    final_response = chunk.response
                yield chunk

            if final_response and tracing.include_data():
                span_response.span_data.response = final_response
                span_response.span_data.input = input

            # Print the agent message for CLI display
            from cai.util import cli_print_agent_messages

            try:
                # Create a message-like object to display
                message_obj = type(
                    "ResponseWrapper",
                    (),
                    {
                        "content": "\n".join(
                            [
                                str(item.get("content", ""))
                                if hasattr(item, "get")
                                else str(getattr(item, "text", ""))
                                for item in final_response.output
                                if hasattr(item, "get") or hasattr(item, "text")
                            ]
                        ),
                        "tool_calls": [
                            type(
                                "ToolCallWrapper",
                                (),
                                {"name": item.name, "arguments": item.arguments},
                            )
                            for item in final_response.output
                            if hasattr(item, "name") and hasattr(item, "arguments")
                        ],
                    },
                )

                cli_print_agent_messages(
                    agent_name=getattr(self, "agent_name", "Agent"),
                    message=message_obj,
                    counter=getattr(self, "interaction_counter", 0),
                    model=str(self.model),
                    debug=False,
                    interaction_input_tokens=final_response.usage.input_tokens,
                    interaction_output_tokens=final_response.usage.output_tokens,
                    interaction_reasoning_tokens=0,
                    total_input_tokens=getattr(self, "total_input_tokens", 0),
                    total_output_tokens=getattr(self, "total_output_tokens", 0),
                    total_reasoning_tokens=getattr(self, "total_reasoning_tokens", 0),
                    interaction_cost=None,
                    total_cost=None,
                )

                # Update token totals
                self.total_input_tokens += final_response.usage.input_tokens
                self.total_output_tokens += final_response.usage.output_tokens

                # Store actual tokens to align usage metrics with IN: for streamed path
                try:
                    self._last_request_actual_input_tokens = int(final_response.usage.input_tokens or 0)
                    br = self._last_request_breakdown or {}
                    sys_t = int(br.get("system_tokens", 0) or 0)
                    tool_t = int(br.get("tool_definitions_tokens", 0) or 0)
                    msg_map = br.get("messages_breakdown", {}) or {}
                    msg_sum = sum(int(v or 0) for v in msg_map.values())
                    known = sys_t + tool_t + msg_sum
                    overhead = int(self._last_request_actual_input_tokens) - known
                    self._last_request_breakdown_overhead = int(overhead) if overhead > 0 else 0
                except Exception:
                    self._last_request_breakdown_overhead = 0
            except Exception as e:
                logger.error(f"Error printing agent message: {e}")

        except Exception as e:
            span_response.set_error(
                SpanError(
                    message="Error streaming response",
                    data={
                        "error": str(e) if tracing.include_data() else e.__class__.__name__,
                    },
                )
            )
            logger.error(f"Error streaming response: {e}")
            raise
        finally:
            try:
                await stream_wait_hints.stop()
            except Exception:
                pass

Converter

Source code in src/cai/sdk/agents/models/openai_responses.py

class Converter:
    @classmethod
    def convert_tool_choice(
        cls, tool_choice: Literal["auto", "required", "none"] | str | None
    ) -> response_create_params.ToolChoice | NotGiven:
        if tool_choice is None:
            return NOT_GIVEN
        elif tool_choice == "required":
            return "required"
        elif tool_choice == "auto":
            return "auto"
        elif tool_choice == "none":
            return "none"
        elif tool_choice == "file_search":
            return {
                "type": "file_search",
            }
        elif tool_choice == "web_search_preview":
            return {
                "type": "web_search_preview",
            }
        elif tool_choice == "computer_use_preview":
            return {
                "type": "computer_use_preview",
            }
        else:
            return {
                "type": "function",
                "name": tool_choice,
            }

    @classmethod
    def get_response_format(
        cls, output_schema: AgentOutputSchema | None
    ) -> ResponseTextConfigParam | NotGiven:
        if output_schema is None or output_schema.is_plain_text():
            return NOT_GIVEN
        else:
            return {
                "format": {
                    "type": "json_schema",
                    "name": "final_output",
                    "schema": output_schema.json_schema(),
                    "strict": output_schema.strict_json_schema,
                }
            }

    @classmethod
    def convert_tools(
        cls,
        tools: list[Tool],
        handoffs: list[Handoff[Any]],
    ) -> ConvertedTools:
        converted_tools: list[ToolParam] = []
        includes: list[IncludeLiteral] = []

        computer_tools = [tool for tool in tools if isinstance(tool, ComputerTool)]
        if len(computer_tools) > 1:
            raise UserError(f"You can only provide one computer tool. Got {len(computer_tools)}")

        for tool in tools:
            converted_tool, include = cls._convert_tool(tool)
            converted_tools.append(converted_tool)
            if include:
                includes.append(include)

        for handoff in handoffs:
            converted_tools.append(cls._convert_handoff_tool(handoff))

        return ConvertedTools(tools=converted_tools, includes=includes)

    @classmethod
    def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, IncludeLiteral | None]:
        """Returns converted tool and includes"""

        if isinstance(tool, FunctionTool):
            converted_tool: ToolParam = {
                "name": tool.name,
                "parameters": tool.params_json_schema,
                "strict": tool.strict_json_schema,
                "type": "function",
                "description": tool.description,
            }
            includes: IncludeLiteral | None = None
        elif isinstance(tool, WebSearchTool):
            ws: WebSearchToolParam = {
                "type": "web_search_preview",
                "user_location": tool.user_location,
                "search_context_size": tool.search_context_size,
            }
            converted_tool = ws
            includes = None
        elif isinstance(tool, FileSearchTool):
            converted_tool = {
                "type": "file_search",
                "vector_store_ids": tool.vector_store_ids,
            }
            if tool.max_num_results:
                converted_tool["max_num_results"] = tool.max_num_results
            if tool.ranking_options:
                converted_tool["ranking_options"] = tool.ranking_options
            if tool.filters:
                converted_tool["filters"] = tool.filters

            includes = "file_search_call.results" if tool.include_search_results else None
        elif isinstance(tool, ComputerTool):
            converted_tool = {
                "type": "computer_use_preview",
                "environment": tool.computer.environment,
                "display_width": tool.computer.dimensions[0],
                "display_height": tool.computer.dimensions[1],
            }
            includes = None

        else:
            raise UserError(f"Unknown tool type: {type(tool)}, tool")

        return converted_tool, includes

    @classmethod
    def _convert_handoff_tool(cls, handoff: Handoff) -> ToolParam:
        return {
            "name": handoff.tool_name,
            "parameters": handoff.input_json_schema,
            "strict": handoff.strict_json_schema,
            "type": "function",
            "description": handoff.tool_description,
        }

OpenAI Responses model

OpenAIResponsesModel

set_agent_name

stream_response async

Converter

`OpenAI Responses model`

stream_response `async`