From 0b097169cfb5aa35b2b0a7b869c12b9320de480f Mon Sep 17 00:00:00 2001 From: 1zzxy1 <1535960104@qq.com> Date: Wed, 25 Mar 2026 18:18:22 +0800 Subject: [PATCH 1/3] fix: raise MiMo TTS legacy timeout default --- astrbot/core/config/default.py | 2 +- astrbot/core/provider/sources/mimo_api_common.py | 1 + .../core/provider/sources/mimo_tts_api_source.py | 9 ++++++++- tests/test_mimo_api_sources.py | 16 ++++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 0f43dbd06d..bc847134e7 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -1558,7 +1558,7 @@ class ChatProviderTemplate(TypedDict): "mimo-tts-style-prompt": "", "mimo-tts-dialect": "", "mimo-tts-seed-text": "Hello, MiMo, have you had lunch?", - "timeout": "20", + "timeout": "60", "proxy": "", }, "Genie TTS": { diff --git a/astrbot/core/provider/sources/mimo_api_common.py b/astrbot/core/provider/sources/mimo_api_common.py index d3bf75e66d..71120aebcd 100644 --- a/astrbot/core/provider/sources/mimo_api_common.py +++ b/astrbot/core/provider/sources/mimo_api_common.py @@ -17,6 +17,7 @@ DEFAULT_MIMO_TTS_MODEL = "mimo-v2-tts" DEFAULT_MIMO_TTS_VOICE = "mimo_default" DEFAULT_MIMO_TTS_SEED_TEXT = "Hello, MiMo, have you had lunch?" +DEFAULT_MIMO_TTS_TIMEOUT = 60 DEFAULT_MIMO_STT_MODEL = "mimo-v2-omni" DEFAULT_MIMO_STT_SYSTEM_PROMPT = ( "You are a speech transcription assistant. " diff --git a/astrbot/core/provider/sources/mimo_tts_api_source.py b/astrbot/core/provider/sources/mimo_tts_api_source.py index 2966bfb7d8..86db0d3152 100644 --- a/astrbot/core/provider/sources/mimo_tts_api_source.py +++ b/astrbot/core/provider/sources/mimo_tts_api_source.py @@ -8,6 +8,7 @@ DEFAULT_MIMO_API_BASE, DEFAULT_MIMO_TTS_MODEL, DEFAULT_MIMO_TTS_SEED_TEXT, + DEFAULT_MIMO_TTS_TIMEOUT, DEFAULT_MIMO_TTS_VOICE, MiMoAPIError, build_api_url, @@ -33,7 +34,13 @@ def __init__( self.chosen_api_key = provider_config.get("api_key", "") self.api_base = provider_config.get("api_base", DEFAULT_MIMO_API_BASE) self.proxy = provider_config.get("proxy", "") - self.timeout = normalize_timeout(provider_config.get("timeout", 20)) + timeout = normalize_timeout( + provider_config.get("timeout", DEFAULT_MIMO_TTS_TIMEOUT) + ) + # MiMo TTS often needs longer than the legacy 20s default for large outputs. + if timeout in (None, 20): + timeout = DEFAULT_MIMO_TTS_TIMEOUT + self.timeout = timeout self.voice = provider_config.get("mimo-tts-voice", DEFAULT_MIMO_TTS_VOICE) self.audio_format = provider_config.get("mimo-tts-format", "wav") self.style_prompt = provider_config.get("mimo-tts-style-prompt", "") diff --git a/tests/test_mimo_api_sources.py b/tests/test_mimo_api_sources.py index c2b02aa136..d74a969e3a 100644 --- a/tests/test_mimo_api_sources.py +++ b/tests/test_mimo_api_sources.py @@ -136,6 +136,22 @@ def test_mimo_headers_use_single_authorization_method(): } +def test_mimo_tts_raises_legacy_default_timeout_to_60_seconds(): + provider = _make_tts_provider({"timeout": "20"}) + try: + assert provider.timeout == 60 + finally: + asyncio.run(provider.terminate()) + + +def test_mimo_tts_preserves_explicit_custom_timeout(): + provider = _make_tts_provider({"timeout": 90}) + try: + assert provider.timeout == 90 + finally: + asyncio.run(provider.terminate()) + + @pytest.mark.asyncio async def test_mimo_tts_get_audio_handles_empty_choices(): provider = _make_tts_provider() From acdc0913d9b7dfec0d91d515a8a912d4eaffbb3b Mon Sep 17 00:00:00 2001 From: Soulter <37870767+Soulter@users.noreply.github.com> Date: Wed, 25 Mar 2026 18:48:42 +0800 Subject: [PATCH 2/3] Delete tests/test_mimo_api_sources.py --- tests/test_mimo_api_sources.py | 253 --------------------------------- 1 file changed, 253 deletions(-) delete mode 100644 tests/test_mimo_api_sources.py diff --git a/tests/test_mimo_api_sources.py b/tests/test_mimo_api_sources.py deleted file mode 100644 index d74a969e3a..0000000000 --- a/tests/test_mimo_api_sources.py +++ /dev/null @@ -1,253 +0,0 @@ -import asyncio -from types import SimpleNamespace - -import pytest - -from astrbot.core.provider.sources.mimo_api_common import MiMoAPIError, build_headers -from astrbot.core.provider.sources.mimo_stt_api_source import ProviderMiMoSTTAPI -from astrbot.core.provider.sources.mimo_tts_api_source import ProviderMiMoTTSAPI - - -def _make_tts_provider(overrides: dict | None = None) -> ProviderMiMoTTSAPI: - provider_config = { - "id": "test-mimo-tts", - "type": "mimo_tts_api", - "model": "mimo-v2-tts", - "api_key": "test-key", - "mimo-tts-voice": "mimo_default", - "mimo-tts-format": "wav", - "mimo-tts-seed-text": "seed text", - } - if overrides: - provider_config.update(overrides) - return ProviderMiMoTTSAPI(provider_config=provider_config, provider_settings={}) - - -def _make_stt_provider(overrides: dict | None = None) -> ProviderMiMoSTTAPI: - provider_config = { - "id": "test-mimo-stt", - "type": "mimo_stt_api", - "model": "mimo-v2-omni", - "api_key": "test-key", - } - if overrides: - provider_config.update(overrides) - return ProviderMiMoSTTAPI(provider_config=provider_config, provider_settings={}) - - -def test_mimo_tts_user_prompt_returns_seed_text(): - provider = _make_tts_provider() - try: - assert provider._build_user_prompt() == "seed text" - finally: - asyncio.run(provider.terminate()) - - -def test_mimo_tts_assistant_content_prefixes_style_and_dialect(): - provider = _make_tts_provider( - { - "mimo-tts-style-prompt": "开心", - "mimo-tts-dialect": "四川话", - "mimo-tts-seed-text": "You are chatting with a close friend.", - } - ) - try: - payload = provider._build_payload("hello") - assert payload["messages"][0] == { - "role": "user", - "content": "You are chatting with a close friend.", - } - assert payload["messages"][1]["content"] == "hello" - finally: - asyncio.run(provider.terminate()) - - -def test_mimo_tts_payload_omits_user_message_without_seed_text(): - provider = _make_tts_provider( - { - "mimo-tts-seed-text": "", - "mimo-tts-style-prompt": "开心", - } - ) - try: - payload = provider._build_payload("hello") - assert payload["messages"] == [ - { - "role": "assistant", - "content": "hello", - } - ] - finally: - asyncio.run(provider.terminate()) - - -def test_mimo_tts_singing_style_uses_single_style_tag(): - provider = _make_tts_provider( - { - "mimo-tts-style-prompt": "唱歌 开心", - "mimo-tts-dialect": "粤语", - } - ) - try: - payload = provider._build_payload("歌词") - assert payload["messages"][1]["content"] == "歌词" - finally: - asyncio.run(provider.terminate()) - - -def test_mimo_tts_plain_text_stays_in_assistant_message_when_no_style(): - provider = _make_tts_provider( - { - "mimo-tts-seed-text": "", - } - ) - try: - payload = provider._build_payload("hello") - assert payload["messages"] == [ - { - "role": "assistant", - "content": "hello", - } - ] - finally: - asyncio.run(provider.terminate()) - - -def test_mimo_tts_seed_text_is_not_prepended_to_assistant_content(): - provider = _make_tts_provider( - { - "mimo-tts-style-prompt": "开心", - "mimo-tts-seed-text": "reference text", - } - ) - try: - payload = provider._build_payload("明天就是周五了") - assert payload["messages"][0]["content"] == "reference text" - assert payload["messages"][1]["content"] == "明天就是周五了" - assert "reference text" not in payload["messages"][1]["content"] - finally: - asyncio.run(provider.terminate()) - - -def test_mimo_headers_use_single_authorization_method(): - assert build_headers("test-key") == { - "Content-Type": "application/json", - "Authorization": "Bearer test-key", - } - - -def test_mimo_tts_raises_legacy_default_timeout_to_60_seconds(): - provider = _make_tts_provider({"timeout": "20"}) - try: - assert provider.timeout == 60 - finally: - asyncio.run(provider.terminate()) - - -def test_mimo_tts_preserves_explicit_custom_timeout(): - provider = _make_tts_provider({"timeout": 90}) - try: - assert provider.timeout == 90 - finally: - asyncio.run(provider.terminate()) - - -@pytest.mark.asyncio -async def test_mimo_tts_get_audio_handles_empty_choices(): - provider = _make_tts_provider() - - class _Response: - status_code = 200 - text = '{"choices":[]}' - - def raise_for_status(self): - return None - - def json(self): - return {"choices": []} - - provider.client = SimpleNamespace(post=_fake_post(_Response())) - - with pytest.raises(MiMoAPIError, match="returned no audio payload"): - await provider.get_audio("hello") - - -@pytest.mark.asyncio -async def test_mimo_stt_payload_includes_audio_and_prompt(monkeypatch): - provider = _make_stt_provider( - { - "mimo-stt-system-prompt": "system prompt", - "mimo-stt-user-prompt": "user prompt", - } - ) - - captured: dict = {} - - async def fake_prepare_audio_input(_audio_source: str): - return "ZmFrZQ==", [] - - class _Response: - status_code = 200 - text = '{"choices":[{"message":{"content":"transcribed text"}}]}' - - def raise_for_status(self): - return None - - def json(self): - return {"choices": [{"message": {"content": "transcribed text"}}]} - - async def fake_post(_url, headers=None, json=None): - captured["headers"] = headers - captured["json"] = json - return _Response() - - monkeypatch.setattr( - "astrbot.core.provider.sources.mimo_stt_api_source.prepare_audio_input", - fake_prepare_audio_input, - ) - provider.client = SimpleNamespace(post=fake_post) - - result = await provider.get_text("/tmp/test.wav") - - assert result == "transcribed text" - assert captured["json"]["messages"][0]["content"] == "system prompt" - assert captured["json"]["messages"][1]["content"][0]["type"] == "input_audio" - assert ( - captured["json"]["messages"][1]["content"][0]["input_audio"]["data"] - == "ZmFrZQ==" - ) - assert captured["json"]["messages"][1]["content"][1]["text"] == "user prompt" - - -@pytest.mark.asyncio -async def test_mimo_stt_get_text_handles_empty_choices(monkeypatch): - provider = _make_stt_provider() - - async def fake_prepare_audio_input(_audio_source: str): - return "ZmFrZQ==", [] - - class _Response: - status_code = 200 - text = '{"choices":[]}' - - def raise_for_status(self): - return None - - def json(self): - return {"choices": []} - - monkeypatch.setattr( - "astrbot.core.provider.sources.mimo_stt_api_source.prepare_audio_input", - fake_prepare_audio_input, - ) - provider.client = SimpleNamespace(post=_fake_post(_Response())) - - with pytest.raises(MiMoAPIError, match="returned empty transcription"): - await provider.get_text("/tmp/test.wav") - - -def _fake_post(response): - async def _post(*_args, **_kwargs): - return response - - return _post From 7e5cc21ae2e5f349efdc1a3ba5b5b510491f216d Mon Sep 17 00:00:00 2001 From: 1zzxy1 <1535960104@qq.com> Date: Wed, 25 Mar 2026 19:05:12 +0800 Subject: [PATCH 3/3] fix: preserve explicit mimo tts timeout settings # Conflicts: # tests/test_mimo_api_sources.py --- astrbot/core/provider/sources/mimo_tts_api_source.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/astrbot/core/provider/sources/mimo_tts_api_source.py b/astrbot/core/provider/sources/mimo_tts_api_source.py index 86db0d3152..7040370885 100644 --- a/astrbot/core/provider/sources/mimo_tts_api_source.py +++ b/astrbot/core/provider/sources/mimo_tts_api_source.py @@ -34,13 +34,9 @@ def __init__( self.chosen_api_key = provider_config.get("api_key", "") self.api_base = provider_config.get("api_base", DEFAULT_MIMO_API_BASE) self.proxy = provider_config.get("proxy", "") - timeout = normalize_timeout( + self.timeout = normalize_timeout( provider_config.get("timeout", DEFAULT_MIMO_TTS_TIMEOUT) ) - # MiMo TTS often needs longer than the legacy 20s default for large outputs. - if timeout in (None, 20): - timeout = DEFAULT_MIMO_TTS_TIMEOUT - self.timeout = timeout self.voice = provider_config.get("mimo-tts-voice", DEFAULT_MIMO_TTS_VOICE) self.audio_format = provider_config.get("mimo-tts-format", "wav") self.style_prompt = provider_config.get("mimo-tts-style-prompt", "")