vllm.entrypoints.responses_utils ¶

_construct_single_message_from_response_item ¶

_construct_single_message_from_response_item(
    item: ResponseInputOutputItem,
) -> ChatCompletionMessageParam

Source code in vllm/entrypoints/responses_utils.py

def _construct_single_message_from_response_item(
    item: ResponseInputOutputItem,
) -> ChatCompletionMessageParam:
    if isinstance(item, ResponseFunctionToolCall):
        # Append the function call as a tool call.
        return ChatCompletionAssistantMessageParam(
            role="assistant",
            tool_calls=[
                ChatCompletionMessageToolCallParam(
                    id=item.call_id,
                    function=FunctionCallTool(
                        name=item.name,
                        arguments=item.arguments,
                    ),
                    type="function",
                )
            ],
        )
    elif isinstance(item, ResponseReasoningItem):
        reasoning_content = ""
        if item.encrypted_content:
            raise ValueError("Encrypted content is not supported.")
        if len(item.summary) == 1:
            reasoning_content = item.summary[0].text
        elif item.content and len(item.content) == 1:
            reasoning_content = item.content[0].text
        return {
            "role": "assistant",
            "reasoning": reasoning_content,
        }
    elif isinstance(item, ResponseOutputMessage):
        return {
            "role": "assistant",
            "content": item.content[0].text,
        }
    elif isinstance(item, ResponseFunctionToolCallOutputItem):
        return ChatCompletionToolMessageParam(
            role="tool",
            content=item.output,
            tool_call_id=item.call_id,
        )
    elif isinstance(item, dict) and item.get("type") == "function_call_output":
        # Append the function call output as a tool message.
        return ChatCompletionToolMessageParam(
            role="tool",
            content=item.get("output"),
            tool_call_id=item.get("call_id"),
        )
    return item  # type: ignore

_maybe_combine_reasoning_and_tool_call ¶

_maybe_combine_reasoning_and_tool_call(
    item: ResponseInputOutputItem,
    messages: list[ChatCompletionMessageParam],
) -> ChatCompletionMessageParam | None

Many models treat MCP calls and reasoning as a single message. This function checks if the last message is a reasoning message and the current message is a tool call

Source code in vllm/entrypoints/responses_utils.py

def _maybe_combine_reasoning_and_tool_call(
    item: ResponseInputOutputItem, messages: list[ChatCompletionMessageParam]
) -> ChatCompletionMessageParam | None:
    """Many models treat MCP calls and reasoning as a single message.
    This function checks if the last message is a reasoning message and
    the current message is a tool call"""
    if not (
        isinstance(item, ResponseFunctionToolCall) and item.id.startswith(MCP_PREFIX)
    ):
        return None
    if len(messages) == 0:
        return None
    last_message = messages[-1]
    if not (
        last_message.get("role") == "assistant"
        and last_message.get("reasoning") is not None
    ):
        return None

    last_message["tool_calls"] = [
        ChatCompletionMessageToolCallParam(
            id=item.call_id,
            function=FunctionCallTool(
                name=item.name,
                arguments=item.arguments,
            ),
            type="function",
        )
    ]
    return last_message

construct_chat_messages_with_tool_call ¶

construct_chat_messages_with_tool_call(
    input_messages: list[ResponseInputOutputItem],
) -> list[ChatCompletionMessageParam]

This function wraps _construct_single_message_from_response_item Because some chatMessages come from multiple response items for example a reasoning item and a MCP tool call are two response items but are one chat message

Source code in vllm/entrypoints/responses_utils.py

def construct_chat_messages_with_tool_call(
    input_messages: list[ResponseInputOutputItem],
) -> list[ChatCompletionMessageParam]:
    """This function wraps _construct_single_message_from_response_item
    Because some chatMessages come from multiple response items
    for example a reasoning item and a MCP tool call are two response items
    but are one chat message
    """
    messages: list[ChatCompletionMessageParam] = []
    for item in input_messages:
        maybe_combined_message = _maybe_combine_reasoning_and_tool_call(item, messages)
        if maybe_combined_message is not None:
            messages[-1] = maybe_combined_message
        else:
            messages.append(_construct_single_message_from_response_item(item))

    return messages

construct_input_messages ¶

construct_input_messages(
    *,
    request_instructions: str | None = None,
    request_input: str | list[ResponseInputOutputItem],
    prev_msg: list[ChatCompletionMessageParam]
    | None = None,
    prev_response_output: list[ResponseOutputItem]
    | None = None,
)

Source code in vllm/entrypoints/responses_utils.py

def construct_input_messages(
    *,
    request_instructions: str | None = None,
    request_input: str | list[ResponseInputOutputItem],
    prev_msg: list[ChatCompletionMessageParam] | None = None,
    prev_response_output: list[ResponseOutputItem] | None = None,
):
    messages: list[ChatCompletionMessageParam] = []
    if request_instructions:
        messages.append(
            {
                "role": "system",
                "content": request_instructions,
            }
        )

    # Prepend the conversation history.
    if prev_msg is not None:
        # Add the previous messages.
        messages.extend(prev_msg)
    if prev_response_output is not None:
        # Add the previous output.
        for output_item in prev_response_output:
            # NOTE: We skip the reasoning output.
            if isinstance(output_item, ResponseOutputMessage):
                for content in output_item.content:
                    messages.append(
                        {
                            "role": "assistant",
                            "content": content.text,
                        }
                    )

    # Append the new input.
    # Responses API supports simple text inputs without chat format.
    if isinstance(request_input, str):
        messages.append({"role": "user", "content": request_input})
    else:
        input_messages = construct_chat_messages_with_tool_call(request_input)
        messages.extend(input_messages)
    return messages

construct_tool_dicts ¶

construct_tool_dicts(
    tools: list[Tool], tool_choice: ToolChoice
) -> list[dict[str, Any]] | None

Source code in vllm/entrypoints/responses_utils.py

def construct_tool_dicts(
    tools: list[Tool], tool_choice: ToolChoice
) -> list[dict[str, Any]] | None:
    if tools is None or (tool_choice == "none"):
        tool_dicts = None
    else:
        tool_dicts = [
            convert_tool_responses_to_completions_format(tool.model_dump())
            for tool in tools
        ]
    return tool_dicts

convert_tool_responses_to_completions_format ¶

convert_tool_responses_to_completions_format(
    tool: dict,
) -> dict

Convert a flat tool schema

{"type": "function", "name": "...", "description": "...", "parameters": {...}}

into: {"type": "function", "function": {...}}

Source code in vllm/entrypoints/responses_utils.py

def convert_tool_responses_to_completions_format(tool: dict) -> dict:
    """
    Convert a flat tool schema:
        {"type": "function", "name": "...", "description": "...", "parameters": {...}}
    into:
        {"type": "function", "function": {...}}
    """
    return {
        "type": "function",
        "function": tool,
    }

extract_tool_types ¶

extract_tool_types(tools: list[Tool]) -> set[str]

Extracts the tool types from the given tools.

Source code in vllm/entrypoints/responses_utils.py

def extract_tool_types(tools: list[Tool]) -> set[str]:
    """
    Extracts the tool types from the given tools.
    """
    tool_types: set[str] = set()
    for tool in tools:
        if tool.type == "mcp":
            # Allow the MCP Tool type to enable built in tools if the
            # server_label is allowlisted in
            # envs.VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS
            if tool.server_label in envs.VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS:
                tool_types.add(tool.server_label)
        else:
            tool_types.add(tool.type)
    return tool_types

make_response_output_items_from_parsable_context ¶

make_response_output_items_from_parsable_context(
    response_messages: list[ResponseInputOutputItem],
) -> list[ResponseOutputItem]

Given a list of sentences, construct ResponseOutput Items.

Source code in vllm/entrypoints/responses_utils.py

def make_response_output_items_from_parsable_context(
    response_messages: list[ResponseInputOutputItem],
) -> list[ResponseOutputItem]:
    """Given a list of sentences, construct ResponseOutput Items."""
    output_messages: list[ResponseOutputItem] = []
    for message in response_messages:
        if not isinstance(message, ResponseFunctionToolCallOutputItem):
            output_messages.append(message)
        else:
            if len(output_messages) == 0:
                raise ValueError(
                    "Cannot have a FunctionToolCallOutput before FunctionToolCall."
                )
            if isinstance(output_messages[-1], ResponseFunctionToolCall):
                mcp_message = McpCall(
                    id=f"{MCP_PREFIX}{random_uuid()}",
                    arguments=output_messages[-1].arguments,
                    name=output_messages[-1].name,
                    server_label=output_messages[
                        -1
                    ].name,  # TODO: store the server label
                    type=f"{MCP_PREFIX}call",
                    status="completed",
                    output=message.output,
                    # TODO: support error output
                )
                output_messages[-1] = mcp_message

    return output_messages