Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 49 additions & 14 deletions cli/serve/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
import sys
import time
import uuid
from typing import Any

try:
import typer
import uvicorn
from fastapi import FastAPI, Request
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel
except ImportError as e:
raise ImportError(
"The 'm serve' command requires extra dependencies. "
Expand All @@ -31,7 +33,9 @@
OpenAIError,
OpenAIErrorResponse,
)
from .schema_converter import json_schema_to_pydantic
from .streaming import stream_chat_completion_chunks
from .utils import extract_finish_reason

app = FastAPI(
title="M serve OpenAI API Compatible Server",
Expand Down Expand Up @@ -108,7 +112,7 @@ def _build_model_options(request: ChatCompletionRequest) -> dict:
"presence_penalty", # Presence penalty - not yet implemented
"frequency_penalty", # Frequency penalty - not yet implemented
"logit_bias", # Logit bias - not yet implemented
"response_format", # Response format (json_object) - not yet implemented
"response_format", # Response format - handled separately
"functions", # Legacy function calling - not yet implemented
"function_call", # Legacy function calling - not yet implemented
"tools", # Tool calling - not yet implemented
Expand Down Expand Up @@ -137,6 +141,10 @@ def _build_model_options(request: ChatCompletionRequest) -> dict:

def make_chat_endpoint(module):
"""Makes a chat endpoint using a custom module."""
# Inspect serve function once at endpoint creation time
serve_sig = inspect.signature(module.serve)
accepts_format = "format" in serve_sig.parameters
is_async = inspect.iscoroutinefunction(module.serve)

async def endpoint(request: ChatCompletionRequest):
try:
Expand All @@ -154,22 +162,49 @@ async def endpoint(request: ChatCompletionRequest):

model_options = _build_model_options(request)

# Handle response_format
format_model: type[BaseModel] | None = None
if request.response_format is not None:
if request.response_format.type == "json_schema":
if request.response_format.json_schema is None:
return create_openai_error_response(
status_code=400,
message="json_schema field is required when response_format.type is 'json_schema'",
error_type="invalid_request_error",
param="response_format.json_schema",
)
try:
format_model = json_schema_to_pydantic(
request.response_format.json_schema.schema_,
request.response_format.json_schema.name,
)
except ValueError as e:
return create_openai_error_response(
status_code=400,
message=f"Invalid JSON schema: {e!s}",
error_type="invalid_request_error",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Scoped nit on the schema-conversion error handling (the broader except Exception handler at L252 is pre-existing, not in scope for this PR).

L181 catches only ValueError from json_schema_to_pydantic, but the converter can also surface non-ValueError exceptions for pathological schemas that should still be client errors:

  • create_model(...) can raise PydanticUserError / PydanticSchemaGenerationError on schemas that produce an internally-inconsistent model.
  • Enum(...) in _enum_annotation can raise TypeError for certain member-name edge cases (see the enum-collision comment above).

These currently fall through to the generic 500 handler and surface to the client as "Internal server error: ..." rather than a clean 400 "Invalid JSON schema".

Minimal fix — widen this block only:

except (ValueError, TypeError) as e:
    return create_openai_error_response(
        status_code=400,
        message=f"Invalid JSON schema: {e!s}",
        error_type="invalid_request_error",
        param="response_format.json_schema.schema",
    )

Or import the Pydantic error types explicitly and include them in the tuple. Either way keeps the scope tight to schema-conversion failures and leaves the outer handler alone.

param="response_format.json_schema.schema",
)
# For "json_object" and "text", format_model remains None
# Note: "json_object" mode is not yet implemented - the backend
# receives no signal to produce JSON output (same as "text" mode)

# Build kwargs for serve call
serve_kwargs: dict[str, Any] = {
"input": request.messages,
"requirements": request.requirements,
"model_options": model_options,
}
if accepts_format:
serve_kwargs["format"] = format_model

# Detect if serve is async or sync and handle accordingly
if inspect.iscoroutinefunction(module.serve):
if is_async:
# It's async, await it directly
output = await module.serve(
input=request.messages,
requirements=request.requirements,
model_options=model_options,
)
output = await module.serve(**serve_kwargs)
else:
# It's sync, run in thread pool to avoid blocking event loop
output = await asyncio.to_thread(
module.serve,
input=request.messages,
requirements=request.requirements,
model_options=model_options,
)
output = await asyncio.to_thread(module.serve, **serve_kwargs)

# system_fingerprint represents backend config hash, not model name
# The model name is already in response.model (line 73)
Expand Down Expand Up @@ -200,7 +235,7 @@ async def endpoint(request: ChatCompletionRequest):
message=ChatCompletionMessage(
content=output.value, role="assistant"
),
finish_reason="stop",
finish_reason=extract_finish_reason(output),
)
],
object="chat.completion", # type: ignore
Expand Down
20 changes: 19 additions & 1 deletion cli/serve/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,26 @@ class ToolFunction(BaseModel):
function: FunctionDefinition


class JsonSchemaFormat(BaseModel):
"""JSON Schema definition for structured output."""

name: str
"""Name of the schema."""

schema_: dict[str, Any] = Field(alias="schema")
"""JSON Schema definition."""

strict: bool | None = None
Comment thread
markstur marked this conversation as resolved.
"""Accepted for OpenAI compatibility; currently ignored by ``m serve``."""

model_config = {"populate_by_name": True}


class ResponseFormat(BaseModel):
type: Literal["text", "json_object"]
type: Literal["text", "json_object", "json_schema"]

json_schema: JsonSchemaFormat | None = None
"""JSON Schema definition when type is 'json_schema'."""


class StreamOptions(BaseModel):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: ResponseFormat accepts {"type": "json_schema", "json_schema": null} at Pydantic-parse time and defers the "json_schema is required when type=json_schema" check to the endpoint at app.py:169-175. The endpoint does catch it and returns a clean 400, so this is a code-organisation concern rather than a functional bug.

A model_validator on ResponseFormat would co-locate the invariant with the type it constrains and shrink the endpoint body by a branch:

from pydantic import model_validator

class ResponseFormat(BaseModel):
    type: Literal["text", "json_object", "json_schema"]
    json_schema: JsonSchemaFormat | None = None

    @model_validator(mode="after")
    def _require_json_schema(self) -> "ResponseFormat":
        if self.type == "json_schema" and self.json_schema is None:
            raise ValueError("json_schema is required when type is 'json_schema'")
        return self

The endpoint 's existing except RequestValidationError handler (via validation_exception_handler) will then turn the ValueError into a 400 with the same shape as other validation errors, and the explicit block at app.py:169-175 can go away.

Not blocking — happy to ship as-is if the author prefers endpoint-level validation.

Expand Down
Loading
Loading