LocalAI version:
localai/localai:latest-gpu-nvidia-cuda-13
Environment, CPU architecture, OS, and Version:
K8S
Model from https://huggingface.co/unsloth/Qwen3.5-9B-GGUF (but I don't think it's model related)
Given the v1/completions request below, chunks start with the following deltas. (two {{ at beginning).
This is not according to the schema (not even valid JSON) and then trips my application logic that can't handle broken JSON.
When setting stream to false this does not happen, response is valid JSON in this case.
BTW I don't know if all chunks are supposed to have the same id.
data: {"created":1776253697,"object":"chat.completion.chunk","id":"1763134f-d0b0-4cae-9900-9840c49de2bc","model":"Qwen3.5-9B-GGUF","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":null}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
data: {"created":1776253697,"object":"chat.completion.chunk","id":"1763134f-d0b0-4cae-9900-9840c49de2bc","model":"Qwen3.5-9B-GGUF","choices":[{"index":0,"finish_reason":null,"delta":{"content":"{"}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
data: {"created":1776253697,"object":"chat.completion.chunk","id":"1763134f-d0b0-4cae-9900-9840c49de2bc","model":"Qwen3.5-9B-GGUF","choices":[{"index":0,"finish_reason":null,"delta":{"content":"{"}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
data: {"created":1776253697,"object":"chat.completion.chunk","id":"1763134f-d0b0-4cae-9900-9840c49de2bc","model":"Qwen3.5-9B-GGUF","choices":[{"index":0,"finish_reason":null,"delta":{"content":" \""}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
data: {"created":1776253697,"object":"chat.completion.chunk","id":"1763134f-d0b0-4cae-9900-9840c49de2bc","model":"Qwen3.5-9B-GGUF","choices":[{"index":0,"finish_reason":null,"delta":{"content":"m"}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
{
"model": "Qwen3.5-9B-GGUF",
"messages": [
{
"id": "fc92663c-ad66-4834-b0a8-94d017816552",
"created_at": -1863054981,
"completed_at": -1863054980,
"role": "system",
"content": "say hello",
"metadata": {}
},
{
"id": "3dad2b20-bb46-466e-ad39-2240a6c4f974",
"created_at": -1863054977,
"completed_at": -1863054847,
"role": "user",
"content": "hi?",
"metadata": {}
}
],
"metadata": {
"enable_thinking": "false"
},
"logprobs": false,
"temperature": 0.7,
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "result",
"schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"markdownAnswer": {
"type": "string"
},
"references": {
"type": "array",
"items": {
"type": "object",
"properties": {
"originIdentifier": {
"type": "string",
"pattern": "^[0-9]{1,4}-[0-9]{1,2}$"
},
"originMediaType": {
"type": "string"
}
},
"required": [
"originIdentifier",
"originMediaType"
]
},
"minItems": 1
}
},
"required": [
"markdownAnswer",
"references"
]
}
}
},
"stream": true,
"reasoning": {
"effort": "none"
},
"stream_options": {
"include_usage": true
},
"reasoning_effort": "none"
}
LocalAI version:
localai/localai:latest-gpu-nvidia-cuda-13
Environment, CPU architecture, OS, and Version:
K8S
Model from https://huggingface.co/unsloth/Qwen3.5-9B-GGUF (but I don't think it's model related)
Given the v1/completions request below, chunks start with the following deltas. (two {{ at beginning).
This is not according to the schema (not even valid JSON) and then trips my application logic that can't handle broken JSON.
When setting stream to false this does not happen, response is valid JSON in this case.
BTW I don't know if all chunks are supposed to have the same id.