From 0b097169cfb5aa35b2b0a7b869c12b9320de480f Mon Sep 17 00:00:00 2001
From: 1zzxy1 <1535960104@qq.com>
Date: Wed, 25 Mar 2026 18:18:22 +0800
Subject: [PATCH 1/3] fix: raise MiMo TTS legacy timeout default
---
astrbot/core/config/default.py | 2 +-
astrbot/core/provider/sources/mimo_api_common.py | 1 +
.../core/provider/sources/mimo_tts_api_source.py | 9 ++++++++-
tests/test_mimo_api_sources.py | 16 ++++++++++++++++
4 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
index 0f43dbd06d..bc847134e7 100644
--- a/astrbot/core/config/default.py
+++ b/astrbot/core/config/default.py
@@ -1558,7 +1558,7 @@ class ChatProviderTemplate(TypedDict):
"mimo-tts-style-prompt": "",
"mimo-tts-dialect": "",
"mimo-tts-seed-text": "Hello, MiMo, have you had lunch?",
- "timeout": "20",
+ "timeout": "60",
"proxy": "",
},
"Genie TTS": {
diff --git a/astrbot/core/provider/sources/mimo_api_common.py b/astrbot/core/provider/sources/mimo_api_common.py
index d3bf75e66d..71120aebcd 100644
--- a/astrbot/core/provider/sources/mimo_api_common.py
+++ b/astrbot/core/provider/sources/mimo_api_common.py
@@ -17,6 +17,7 @@
DEFAULT_MIMO_TTS_MODEL = "mimo-v2-tts"
DEFAULT_MIMO_TTS_VOICE = "mimo_default"
DEFAULT_MIMO_TTS_SEED_TEXT = "Hello, MiMo, have you had lunch?"
+DEFAULT_MIMO_TTS_TIMEOUT = 60
DEFAULT_MIMO_STT_MODEL = "mimo-v2-omni"
DEFAULT_MIMO_STT_SYSTEM_PROMPT = (
"You are a speech transcription assistant. "
diff --git a/astrbot/core/provider/sources/mimo_tts_api_source.py b/astrbot/core/provider/sources/mimo_tts_api_source.py
index 2966bfb7d8..86db0d3152 100644
--- a/astrbot/core/provider/sources/mimo_tts_api_source.py
+++ b/astrbot/core/provider/sources/mimo_tts_api_source.py
@@ -8,6 +8,7 @@
DEFAULT_MIMO_API_BASE,
DEFAULT_MIMO_TTS_MODEL,
DEFAULT_MIMO_TTS_SEED_TEXT,
+ DEFAULT_MIMO_TTS_TIMEOUT,
DEFAULT_MIMO_TTS_VOICE,
MiMoAPIError,
build_api_url,
@@ -33,7 +34,13 @@ def __init__(
self.chosen_api_key = provider_config.get("api_key", "")
self.api_base = provider_config.get("api_base", DEFAULT_MIMO_API_BASE)
self.proxy = provider_config.get("proxy", "")
- self.timeout = normalize_timeout(provider_config.get("timeout", 20))
+ timeout = normalize_timeout(
+ provider_config.get("timeout", DEFAULT_MIMO_TTS_TIMEOUT)
+ )
+ # MiMo TTS often needs longer than the legacy 20s default for large outputs.
+ if timeout in (None, 20):
+ timeout = DEFAULT_MIMO_TTS_TIMEOUT
+ self.timeout = timeout
self.voice = provider_config.get("mimo-tts-voice", DEFAULT_MIMO_TTS_VOICE)
self.audio_format = provider_config.get("mimo-tts-format", "wav")
self.style_prompt = provider_config.get("mimo-tts-style-prompt", "")
diff --git a/tests/test_mimo_api_sources.py b/tests/test_mimo_api_sources.py
index c2b02aa136..d74a969e3a 100644
--- a/tests/test_mimo_api_sources.py
+++ b/tests/test_mimo_api_sources.py
@@ -136,6 +136,22 @@ def test_mimo_headers_use_single_authorization_method():
}
+def test_mimo_tts_raises_legacy_default_timeout_to_60_seconds():
+ provider = _make_tts_provider({"timeout": "20"})
+ try:
+ assert provider.timeout == 60
+ finally:
+ asyncio.run(provider.terminate())
+
+
+def test_mimo_tts_preserves_explicit_custom_timeout():
+ provider = _make_tts_provider({"timeout": 90})
+ try:
+ assert provider.timeout == 90
+ finally:
+ asyncio.run(provider.terminate())
+
+
@pytest.mark.asyncio
async def test_mimo_tts_get_audio_handles_empty_choices():
provider = _make_tts_provider()
From acdc0913d9b7dfec0d91d515a8a912d4eaffbb3b Mon Sep 17 00:00:00 2001
From: Soulter <37870767+Soulter@users.noreply.github.com>
Date: Wed, 25 Mar 2026 18:48:42 +0800
Subject: [PATCH 2/3] Delete tests/test_mimo_api_sources.py
---
tests/test_mimo_api_sources.py | 253 ---------------------------------
1 file changed, 253 deletions(-)
delete mode 100644 tests/test_mimo_api_sources.py
diff --git a/tests/test_mimo_api_sources.py b/tests/test_mimo_api_sources.py
deleted file mode 100644
index d74a969e3a..0000000000
--- a/tests/test_mimo_api_sources.py
+++ /dev/null
@@ -1,253 +0,0 @@
-import asyncio
-from types import SimpleNamespace
-
-import pytest
-
-from astrbot.core.provider.sources.mimo_api_common import MiMoAPIError, build_headers
-from astrbot.core.provider.sources.mimo_stt_api_source import ProviderMiMoSTTAPI
-from astrbot.core.provider.sources.mimo_tts_api_source import ProviderMiMoTTSAPI
-
-
-def _make_tts_provider(overrides: dict | None = None) -> ProviderMiMoTTSAPI:
- provider_config = {
- "id": "test-mimo-tts",
- "type": "mimo_tts_api",
- "model": "mimo-v2-tts",
- "api_key": "test-key",
- "mimo-tts-voice": "mimo_default",
- "mimo-tts-format": "wav",
- "mimo-tts-seed-text": "seed text",
- }
- if overrides:
- provider_config.update(overrides)
- return ProviderMiMoTTSAPI(provider_config=provider_config, provider_settings={})
-
-
-def _make_stt_provider(overrides: dict | None = None) -> ProviderMiMoSTTAPI:
- provider_config = {
- "id": "test-mimo-stt",
- "type": "mimo_stt_api",
- "model": "mimo-v2-omni",
- "api_key": "test-key",
- }
- if overrides:
- provider_config.update(overrides)
- return ProviderMiMoSTTAPI(provider_config=provider_config, provider_settings={})
-
-
-def test_mimo_tts_user_prompt_returns_seed_text():
- provider = _make_tts_provider()
- try:
- assert provider._build_user_prompt() == "seed text"
- finally:
- asyncio.run(provider.terminate())
-
-
-def test_mimo_tts_assistant_content_prefixes_style_and_dialect():
- provider = _make_tts_provider(
- {
- "mimo-tts-style-prompt": "开心",
- "mimo-tts-dialect": "四川话",
- "mimo-tts-seed-text": "You are chatting with a close friend.",
- }
- )
- try:
- payload = provider._build_payload("hello")
- assert payload["messages"][0] == {
- "role": "user",
- "content": "You are chatting with a close friend.",
- }
- assert payload["messages"][1]["content"] == "hello"
- finally:
- asyncio.run(provider.terminate())
-
-
-def test_mimo_tts_payload_omits_user_message_without_seed_text():
- provider = _make_tts_provider(
- {
- "mimo-tts-seed-text": "",
- "mimo-tts-style-prompt": "开心",
- }
- )
- try:
- payload = provider._build_payload("hello")
- assert payload["messages"] == [
- {
- "role": "assistant",
- "content": "hello",
- }
- ]
- finally:
- asyncio.run(provider.terminate())
-
-
-def test_mimo_tts_singing_style_uses_single_style_tag():
- provider = _make_tts_provider(
- {
- "mimo-tts-style-prompt": "唱歌 开心",
- "mimo-tts-dialect": "粤语",
- }
- )
- try:
- payload = provider._build_payload("歌词")
- assert payload["messages"][1]["content"] == "歌词"
- finally:
- asyncio.run(provider.terminate())
-
-
-def test_mimo_tts_plain_text_stays_in_assistant_message_when_no_style():
- provider = _make_tts_provider(
- {
- "mimo-tts-seed-text": "",
- }
- )
- try:
- payload = provider._build_payload("hello")
- assert payload["messages"] == [
- {
- "role": "assistant",
- "content": "hello",
- }
- ]
- finally:
- asyncio.run(provider.terminate())
-
-
-def test_mimo_tts_seed_text_is_not_prepended_to_assistant_content():
- provider = _make_tts_provider(
- {
- "mimo-tts-style-prompt": "开心",
- "mimo-tts-seed-text": "reference text",
- }
- )
- try:
- payload = provider._build_payload("明天就是周五了")
- assert payload["messages"][0]["content"] == "reference text"
- assert payload["messages"][1]["content"] == "明天就是周五了"
- assert "reference text" not in payload["messages"][1]["content"]
- finally:
- asyncio.run(provider.terminate())
-
-
-def test_mimo_headers_use_single_authorization_method():
- assert build_headers("test-key") == {
- "Content-Type": "application/json",
- "Authorization": "Bearer test-key",
- }
-
-
-def test_mimo_tts_raises_legacy_default_timeout_to_60_seconds():
- provider = _make_tts_provider({"timeout": "20"})
- try:
- assert provider.timeout == 60
- finally:
- asyncio.run(provider.terminate())
-
-
-def test_mimo_tts_preserves_explicit_custom_timeout():
- provider = _make_tts_provider({"timeout": 90})
- try:
- assert provider.timeout == 90
- finally:
- asyncio.run(provider.terminate())
-
-
-@pytest.mark.asyncio
-async def test_mimo_tts_get_audio_handles_empty_choices():
- provider = _make_tts_provider()
-
- class _Response:
- status_code = 200
- text = '{"choices":[]}'
-
- def raise_for_status(self):
- return None
-
- def json(self):
- return {"choices": []}
-
- provider.client = SimpleNamespace(post=_fake_post(_Response()))
-
- with pytest.raises(MiMoAPIError, match="returned no audio payload"):
- await provider.get_audio("hello")
-
-
-@pytest.mark.asyncio
-async def test_mimo_stt_payload_includes_audio_and_prompt(monkeypatch):
- provider = _make_stt_provider(
- {
- "mimo-stt-system-prompt": "system prompt",
- "mimo-stt-user-prompt": "user prompt",
- }
- )
-
- captured: dict = {}
-
- async def fake_prepare_audio_input(_audio_source: str):
- return "ZmFrZQ==", []
-
- class _Response:
- status_code = 200
- text = '{"choices":[{"message":{"content":"transcribed text"}}]}'
-
- def raise_for_status(self):
- return None
-
- def json(self):
- return {"choices": [{"message": {"content": "transcribed text"}}]}
-
- async def fake_post(_url, headers=None, json=None):
- captured["headers"] = headers
- captured["json"] = json
- return _Response()
-
- monkeypatch.setattr(
- "astrbot.core.provider.sources.mimo_stt_api_source.prepare_audio_input",
- fake_prepare_audio_input,
- )
- provider.client = SimpleNamespace(post=fake_post)
-
- result = await provider.get_text("/tmp/test.wav")
-
- assert result == "transcribed text"
- assert captured["json"]["messages"][0]["content"] == "system prompt"
- assert captured["json"]["messages"][1]["content"][0]["type"] == "input_audio"
- assert (
- captured["json"]["messages"][1]["content"][0]["input_audio"]["data"]
- == "ZmFrZQ=="
- )
- assert captured["json"]["messages"][1]["content"][1]["text"] == "user prompt"
-
-
-@pytest.mark.asyncio
-async def test_mimo_stt_get_text_handles_empty_choices(monkeypatch):
- provider = _make_stt_provider()
-
- async def fake_prepare_audio_input(_audio_source: str):
- return "ZmFrZQ==", []
-
- class _Response:
- status_code = 200
- text = '{"choices":[]}'
-
- def raise_for_status(self):
- return None
-
- def json(self):
- return {"choices": []}
-
- monkeypatch.setattr(
- "astrbot.core.provider.sources.mimo_stt_api_source.prepare_audio_input",
- fake_prepare_audio_input,
- )
- provider.client = SimpleNamespace(post=_fake_post(_Response()))
-
- with pytest.raises(MiMoAPIError, match="returned empty transcription"):
- await provider.get_text("/tmp/test.wav")
-
-
-def _fake_post(response):
- async def _post(*_args, **_kwargs):
- return response
-
- return _post
From 7e5cc21ae2e5f349efdc1a3ba5b5b510491f216d Mon Sep 17 00:00:00 2001
From: 1zzxy1 <1535960104@qq.com>
Date: Wed, 25 Mar 2026 19:05:12 +0800
Subject: [PATCH 3/3] fix: preserve explicit mimo tts timeout settings
# Conflicts:
# tests/test_mimo_api_sources.py
---
astrbot/core/provider/sources/mimo_tts_api_source.py | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/astrbot/core/provider/sources/mimo_tts_api_source.py b/astrbot/core/provider/sources/mimo_tts_api_source.py
index 86db0d3152..7040370885 100644
--- a/astrbot/core/provider/sources/mimo_tts_api_source.py
+++ b/astrbot/core/provider/sources/mimo_tts_api_source.py
@@ -34,13 +34,9 @@ def __init__(
self.chosen_api_key = provider_config.get("api_key", "")
self.api_base = provider_config.get("api_base", DEFAULT_MIMO_API_BASE)
self.proxy = provider_config.get("proxy", "")
- timeout = normalize_timeout(
+ self.timeout = normalize_timeout(
provider_config.get("timeout", DEFAULT_MIMO_TTS_TIMEOUT)
)
- # MiMo TTS often needs longer than the legacy 20s default for large outputs.
- if timeout in (None, 20):
- timeout = DEFAULT_MIMO_TTS_TIMEOUT
- self.timeout = timeout
self.voice = provider_config.get("mimo-tts-voice", DEFAULT_MIMO_TTS_VOICE)
self.audio_format = provider_config.get("mimo-tts-format", "wav")
self.style_prompt = provider_config.get("mimo-tts-style-prompt", "")