diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c134d8661..8c0c44ab2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -151,6 +151,7 @@ Breaking changes in this release: - 👷🏻 Added `npm run build-browser` script for building test harness package only, in PR [#5667](https://github.com/microsoft/BotFramework-WebChat/pull/5667), by [@compulim](https://github.com/compulim) - Added pull-based capabilities system for dynamically discovering adapter capabilities at runtime, in PR [#5679](https://github.com/microsoft/BotFramework-WebChat/pull/5679), by [@pranavjoshi001](https://github.com/pranavjoshi001) - Added Speech-to-Speech (S2S) support for real-time voice conversations, in PR [#5654](https://github.com/microsoft/BotFramework-WebChat/pull/5654), by [@pranavjoshi](https://github.com/pranavjoshi001) +- Added mute/unmute functionality for speech-to-speech with silent chunks to keep server connection alive, in PR [#5688](https://github.com/microsoft/BotFramework-WebChat/pull/5688), by [@pranavjoshi](https://github.com/pranavjoshi001) ### Changed diff --git a/__tests__/html2/speechToSpeech/barge.in.html b/__tests__/html2/speechToSpeech/barge.in.html index d12f20c51f..43c75264f9 100644 --- a/__tests__/html2/speechToSpeech/barge.in.html +++ b/__tests__/html2/speechToSpeech/barge.in.html @@ -21,7 +21,10 @@ 3. User barges in (server detects) → "Listening..." (user speaking) 4. Server processes → "Processing..." 5. Bot responds with new audio → "Talk to interrupt..." (bot speaking again) - 6. User toggles mic off + 6. User clicks dismiss button to stop voice session + + Note: Mic button toggles between listening/muted states. + Use dismiss button to completely stop the voice session. --> diff --git a/__tests__/html2/speechToSpeech/dtmf.input.html.snap-1.png b/__tests__/html2/speechToSpeech/dtmf.input.html.snap-1.png index c29b07065a..93636cfc8e 100644 Binary files a/__tests__/html2/speechToSpeech/dtmf.input.html.snap-1.png and b/__tests__/html2/speechToSpeech/dtmf.input.html.snap-1.png differ diff --git a/__tests__/html2/speechToSpeech/dtmf.input.html.snap-2.png b/__tests__/html2/speechToSpeech/dtmf.input.html.snap-2.png index be9f100b54..55a4c1ccad 100644 Binary files a/__tests__/html2/speechToSpeech/dtmf.input.html.snap-2.png and b/__tests__/html2/speechToSpeech/dtmf.input.html.snap-2.png differ diff --git a/__tests__/html2/speechToSpeech/happy.path.html b/__tests__/html2/speechToSpeech/happy.path.html index e326982a9c..4b1046371a 100644 --- a/__tests__/html2/speechToSpeech/happy.path.html +++ b/__tests__/html2/speechToSpeech/happy.path.html @@ -32,12 +32,18 @@ // Set voice configuration capability to enable microphone button directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false }); + // Enable voice-only mode (hides send button, shows mic + dismiss buttons) + directLine.setCapability('getIsVoiceOnlyMode', true, { emitEvent: false }); render( , document.getElementById('webchat') @@ -152,8 +158,10 @@ expect(botActivityStatus.innerText).toContain('|'); expect(botActivityStatus.innerText).toContain('Just now'); - // WHEN: User stops recording by clicking microphone button again - await host.click(micButton); + // WHEN: User stops voice recording by clicking dismiss button + const dismissButton = document.querySelector(`[data-testid="${testIds.sendBoxDismissButton}"]`); + expect(dismissButton).toBeTruthy(); + await host.click(dismissButton); // THEN: Button should change to not-recording state await pageConditions.became( diff --git a/__tests__/html2/speechToSpeech/happy.path.html.snap-1.png b/__tests__/html2/speechToSpeech/happy.path.html.snap-1.png index f9e16e326a..7846167c8d 100644 Binary files a/__tests__/html2/speechToSpeech/happy.path.html.snap-1.png and b/__tests__/html2/speechToSpeech/happy.path.html.snap-1.png differ diff --git a/__tests__/html2/speechToSpeech/happy.path.html.snap-2.png b/__tests__/html2/speechToSpeech/happy.path.html.snap-2.png index d3a3724d22..d1bf547291 100644 Binary files a/__tests__/html2/speechToSpeech/happy.path.html.snap-2.png and b/__tests__/html2/speechToSpeech/happy.path.html.snap-2.png differ diff --git a/__tests__/html2/speechToSpeech/multiple.turns.html b/__tests__/html2/speechToSpeech/multiple.turns.html index 7a5ccc5971..d88ae5069a 100644 --- a/__tests__/html2/speechToSpeech/multiple.turns.html +++ b/__tests__/html2/speechToSpeech/multiple.turns.html @@ -41,12 +41,18 @@ // Set voice configuration capability to enable microphone button directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false }); + // Enable voice-only mode (hides send button, shows mic + dismiss buttons) + directLine.setCapability('getIsVoiceOnlyMode', true, { emitEvent: false }); render( , document.getElementById('webchat') @@ -313,8 +319,10 @@ expect(activities[4]).toHaveProperty('textContent', 'Thank you!'); expect(activities[5]).toHaveProperty('textContent', "You're welcome! Have a safe flight."); - // ===== END: Turn off mic ===== - await host.click(micButton); + // ===== END: Stop voice recording using dismiss button ===== + const dismissButton = document.querySelector(`[data-testid="${testIds.sendBoxDismissButton}"]`); + expect(dismissButton).toBeTruthy(); + await host.click(dismissButton); await pageConditions.became( 'Recording stopped', diff --git a/__tests__/html2/speechToSpeech/multiple.turns.html.snap-1.png b/__tests__/html2/speechToSpeech/multiple.turns.html.snap-1.png index 65531ebe9c..ede648a441 100644 Binary files a/__tests__/html2/speechToSpeech/multiple.turns.html.snap-1.png and b/__tests__/html2/speechToSpeech/multiple.turns.html.snap-1.png differ diff --git a/__tests__/html2/speechToSpeech/mute.functionality.html b/__tests__/html2/speechToSpeech/mute.functionality.html new file mode 100644 index 0000000000..b2e3d53064 --- /dev/null +++ b/__tests__/html2/speechToSpeech/mute.functionality.html @@ -0,0 +1,173 @@ + + + + + + + + + + + + + +
+ + + + diff --git a/__tests__/html2/speechToSpeech/mute.functionality.html.snap-1.png b/__tests__/html2/speechToSpeech/mute.functionality.html.snap-1.png new file mode 100644 index 0000000000..3d816b4884 Binary files /dev/null and b/__tests__/html2/speechToSpeech/mute.functionality.html.snap-1.png differ diff --git a/__tests__/html2/speechToSpeech/outgoing.audio.interval.html b/__tests__/html2/speechToSpeech/outgoing.audio.interval.html index d2167aba2a..f8761f7f4b 100644 --- a/__tests__/html2/speechToSpeech/outgoing.audio.interval.html +++ b/__tests__/html2/speechToSpeech/outgoing.audio.interval.html @@ -43,6 +43,8 @@ // Set voice configuration capability to enable microphone button directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false }); + // Enable voice-only mode (hides send button, shows mic + dismiss buttons) + directLine.setCapability('getIsVoiceOnlyMode', true, { emitEvent: false }); // Intercept postActivity to capture outgoing voice chunks const capturedChunks = []; @@ -98,8 +100,10 @@ 2000 ); - // ===== STEP 3: Stop recording ===== - await host.click(micButton); + // ===== STEP 3: Stop voice recording using dismiss button ===== + const dismissButton = document.querySelector(`[data-testid="${testIds.sendBoxDismissButton}"]`); + expect(dismissButton).toBeTruthy(); + await host.click(dismissButton); await pageConditions.became( 'Recording stopped', diff --git a/packages/api/src/boot/internal.ts b/packages/api/src/boot/internal.ts index 1deec63da8..6d786c0376 100644 --- a/packages/api/src/boot/internal.ts +++ b/packages/api/src/boot/internal.ts @@ -2,5 +2,6 @@ export { default as LowPriorityDecoratorComposer } from '../decorator/internal/L export { default as usePostVoiceActivity } from '../hooks/internal/usePostVoiceActivity'; export { default as useSetDictateState } from '../hooks/internal/useSetDictateState'; export { default as useShouldShowMicrophoneButton } from '../hooks/internal/useShouldShowMicrophoneButton'; +export { default as useVoiceStateWritable } from '../hooks/internal/useVoiceStateWritable'; export { LegacyActivityContextProvider, type LegacyActivityContextType } from '../legacy/LegacyActivityBridgeContext'; export { default as StyleOptionsComposer } from '../providers/StyleOptions/StyleOptionsComposer'; diff --git a/packages/api/src/localization/en-US.json b/packages/api/src/localization/en-US.json index 7e22ba4c18..871b526eaa 100644 --- a/packages/api/src/localization/en-US.json +++ b/packages/api/src/localization/en-US.json @@ -123,6 +123,8 @@ "_SPEECH_INPUT_MICROPHONE_BUTTON_CLOSE_ALT.comment": "This is for screen reader and is the label of the microphone button, when clicked, will close microphone.", "SPEECH_INPUT_MICROPHONE_BUTTON_OPEN_ALT": "Microphone on", "_SPEECH_INPUT_MICROPHONE_BUTTON_OPEN_ALT.comment": "This is for screen reader and is the label of the microphone button, when clicked, will open microphone.", + "SPEECH_INPUT_STOP_RECORDING_ALT": "Stop recording", + "_SPEECH_INPUT_STOP_RECORDING_ALT.comment": "This is for screen reader and is the label of the dismiss button that stops recording in voice only mode.", "SPEECH_INPUT_STARTING": "Starting…", "SUGGESTED_ACTIONS_FLIPPER_NEXT_ALT": "Next", "_SUGGESTED_ACTIONS_FLIPPER_NEXT_ALT.comment": "This is for screen reader for the label of the right flipper button for suggested actions. Probably can re-use the value from CAROUSEL_FLIPPER_NEXT_ALT.", @@ -133,10 +135,12 @@ "TEXT_INPUT_ALT": "Message input box", "_TEXT_INPUT_ALT.comment": "This is for screen reader for the label of the message input box.", "TEXT_INPUT_PLACEHOLDER": "Type your message", - "TEXT_INPUT_SPEECH_IDLE_PLACEHOLDER": "Start talking...", + "TEXT_INPUT_SPEECH_IDLE_PLACEHOLDER": "Click mic to start", "_TEXT_INPUT_SPEECH_IDLE_PLACEHOLDER.comment": "This is the placeholder text shown in the message input box when speech-to-speech is enabled and in idle state.", "TEXT_INPUT_SPEECH_LISTENING_PLACEHOLDER": "Listening...", "_TEXT_INPUT_SPEECH_LISTENING_PLACEHOLDER.comment": "This is the placeholder text shown in the message input box when speech-to-speech is enabled and actively listening to user speech.", + "TEXT_INPUT_SPEECH_MUTED_PLACEHOLDER": "Muted", + "_TEXT_INPUT_SPEECH_MUTED_PLACEHOLDER.comment": "This is the placeholder text shown in the message input box when speech-to-speech is enabled and the microphone is muted.", "TEXT_INPUT_SPEECH_PROCESSING_PLACEHOLDER": "Processing...", "_TEXT_INPUT_SPEECH_PROCESSING_PLACEHOLDER.comment": "This is the placeholder text shown in the message input box when speech-to-speech is enabled and processing the user's speech input.", "TEXT_INPUT_SPEECH_BOT_SPEAKING_PLACEHOLDER": "Talk to interrupt...", diff --git a/packages/api/src/providers/Capabilities/private/capabilityRegistry.ts b/packages/api/src/providers/Capabilities/private/capabilityRegistry.ts index c0c6bdc3f6..061eb72395 100644 --- a/packages/api/src/providers/Capabilities/private/capabilityRegistry.ts +++ b/packages/api/src/providers/Capabilities/private/capabilityRegistry.ts @@ -39,6 +39,10 @@ const CAPABILITY_REGISTRY: readonly CapabilityDescriptor[] = { key: 'voiceConfiguration', getterName: 'getVoiceConfiguration' + }, + { + key: 'isVoiceOnlyMode', + getterName: 'getIsVoiceOnlyMode' } ]); diff --git a/packages/api/src/providers/Capabilities/types/Capabilities.ts b/packages/api/src/providers/Capabilities/types/Capabilities.ts index c72d96bd12..a186f9b9ea 100644 --- a/packages/api/src/providers/Capabilities/types/Capabilities.ts +++ b/packages/api/src/providers/Capabilities/types/Capabilities.ts @@ -3,6 +3,7 @@ */ type Capabilities = Readonly<{ voiceConfiguration?: VoiceConfiguration | undefined; + isVoiceOnlyMode?: boolean | undefined; }>; /** diff --git a/packages/api/src/providers/SpeechToSpeech/private/VoiceRecorderBridge.tsx b/packages/api/src/providers/SpeechToSpeech/private/VoiceRecorderBridge.tsx index ea00dbe81d..757f9e5940 100644 --- a/packages/api/src/providers/SpeechToSpeech/private/VoiceRecorderBridge.tsx +++ b/packages/api/src/providers/SpeechToSpeech/private/VoiceRecorderBridge.tsx @@ -11,6 +11,7 @@ export function VoiceRecorderBridge(): null { const [voiceState] = useVoiceState(); const postVoiceActivity = usePostVoiceActivity(); + const muted = voiceState === 'muted'; // Derive recording state from voiceState - recording is active when not idle const recording = voiceState !== 'idle'; @@ -29,7 +30,13 @@ export function VoiceRecorderBridge(): null { [postVoiceActivity] ); - const { record } = useRecorder(handleAudioChunk); + const { record, mute } = useRecorder(handleAudioChunk); + + useEffect(() => { + if (muted) { + return mute(); + } + }, [mute, muted]); useEffect(() => { if (recording) { diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx index 6bb47cfa14..2b65f9195b 100644 --- a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx +++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx @@ -36,13 +36,16 @@ const mockWorkletNode = { port: mockWorkletPort }; +const mockSourceNode = { + connect: jest.fn(), + disconnect: jest.fn() +}; + const mockAudioContext = { audioWorklet: { addModule: jest.fn().mockResolvedValue(undefined) }, - createMediaStreamSource: jest.fn(() => ({ - connect: jest.fn() - })), + createMediaStreamSource: jest.fn(() => mockSourceNode), destination: {}, resume: jest.fn().mockResolvedValue(undefined), state: 'running' @@ -218,4 +221,74 @@ describe('useRecorder', () => { }); }); }); + + test('should return mute function', () => { + render(); + expect(typeof hookData?.mute).toBe('function'); + }); + + test('should send MUTE command and stop media stream when mute is called', async () => { + render(); + + // Start recording first + act(() => { + hookData?.record(); + }); + + await waitFor(() => { + expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'START' }); + }); + + // Clear mocks to isolate mute behavior + mockWorkletPort.postMessage.mockClear(); + mockTrack.stop.mockClear(); + mockSourceNode.disconnect.mockClear(); + + // Call mute + act(() => { + hookData?.mute(); + }); + + // Should send MUTE command to worklet + expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'MUTE' }); + // Should stop media stream tracks (mic indicator OFF) + expect(mockTrack.stop).toHaveBeenCalledTimes(1); + // Should disconnect source node + expect(mockSourceNode.disconnect).toHaveBeenCalledTimes(1); + }); + + test('should return unmute function from mute() that sends UNMUTE and restarts media stream', async () => { + render(); + + // Start recording first + act(() => { + hookData?.record(); + }); + + await waitFor(() => { + expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'START' }); + }); + + // Call mute and get unmute function + let unmute: (() => void) | undefined; + act(() => { + unmute = hookData?.mute(); + }); + + // Clear mocks to isolate unmute behavior + mockWorkletPort.postMessage.mockClear(); + mockMediaDevices.getUserMedia.mockClear(); + + // Call unmute + act(() => { + unmute?.(); + }); + + // Should send UNMUTE command to worklet + expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'UNMUTE' }); + // Should restart media stream + await waitFor(() => { + expect(mockMediaDevices.getUserMedia).toHaveBeenCalledTimes(1); + }); + }); }); diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts index 05ed029003..f67e07d241 100644 --- a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts +++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts @@ -8,9 +8,11 @@ declare class AudioWorkletProcessor { buffer: number[]; bufferSize: number; constructor(options?: AudioWorkletNodeOptions); + muted: boolean; process(inputs: Float32Array[][], outputs: Float32Array[][], parameters: Record): boolean; readonly port: MessagePort; recording: boolean; + silentFrame: Float32Array; } declare function registerProcessor(name: string, processorCtor: typeof AudioWorkletProcessor): void; @@ -20,12 +22,16 @@ declare function registerProcessor(name: string, processorCtor: typeof AudioWork * without any TypeScript annotations that could be transformed by the compiler. */ const audioProcessorCode = `(${function () { + const RENDER_QUANTUM = 128; + class AudioRecorderProcessor extends AudioWorkletProcessor { constructor(options: AudioWorkletNodeOptions) { super(); this.buffer = []; this.bufferSize = options.processorOptions.bufferSize; + this.muted = false; this.recording = false; + this.silentFrame = new Float32Array(RENDER_QUANTUM); // Pre-allocated zeros this.port.onmessage = e => { if (e.data.command === 'START') { @@ -33,13 +39,20 @@ const audioProcessorCode = `(${function () { } else if (e.data.command === 'STOP') { this.recording = false; this.buffer = []; + } else if (e.data.command === 'MUTE') { + this.muted = true; + } else if (e.data.command === 'UNMUTE') { + this.muted = false; } }; } process(inputs: Float32Array[][]) { - if (inputs[0] && inputs[0].length && this.recording) { - this.buffer.push(...inputs[0][0]); + if (this.recording) { + // Use real audio when not muted, otherwise silenced chunk to keep connection alive (all zeros). + const audioData = !this.muted && inputs[0] && inputs[0].length ? inputs[0][0] : this.silentFrame; + this.buffer.push(...audioData); + while (this.buffer.length >= this.bufferSize) { const chunk = this.buffer.splice(0, this.bufferSize); this.port.postMessage({ eventType: 'audio', audioData: new Float32Array(chunk) }); @@ -62,6 +75,7 @@ const MS_IN_SECOND = 1000; export function useRecorder(onAudioChunk: (base64: string, timestamp: string) => void) { const [{ Date }] = usePonyfill(); const audioCtxRef = useRef(undefined); + const sourceRef = useRef(undefined); const streamRef = useRef(undefined); const voiceConfiguration = useCapabilities(caps => caps.voiceConfiguration); const workletRef = useRef(undefined); @@ -69,17 +83,44 @@ export function useRecorder(onAudioChunk: (base64: string, timestamp: string) => const chunkIntervalMs = voiceConfiguration?.chunkIntervalMs ?? DEFAULT_CHUNK_SIZE_IN_MS; const sampleRate = voiceConfiguration?.sampleRate ?? DEFAULT_SAMPLE_RATE; + const stopMediaStream = useCallback(() => { + if (streamRef.current) { + streamRef.current.getTracks().forEach(track => track.stop()); + streamRef.current = undefined; + } + }, [streamRef]); + + // Acquire MediaStream and connect source to worklet + const acquireAndConnectMediaStream = useCallback(async () => { + const audioCtx = audioCtxRef.current; + if (!audioCtx) { + return; + } + + const stream = await navigator.mediaDevices.getUserMedia({ + audio: { + channelCount: 1, + echoCancellation: true, + sampleRate + } + }); + streamRef.current = stream; + + const source = audioCtx.createMediaStreamSource(stream); + if (workletRef.current) { + source.connect(workletRef.current); + } + sourceRef.current = source; + }, [audioCtxRef, sampleRate, sourceRef, streamRef, workletRef]); + const stopRecording = useCallback(() => { if (workletRef.current) { workletRef.current.port.postMessage({ command: 'STOP' }); workletRef.current.disconnect(); workletRef.current = undefined; } - if (streamRef.current) { - streamRef.current.getTracks().forEach(track => track.stop()); - streamRef.current = undefined; - } - }, [streamRef, workletRef]); + stopMediaStream(); + }, [stopMediaStream, workletRef]); const initAudio = useCallback(async () => { if (audioCtxRef.current) { @@ -103,15 +144,7 @@ export function useRecorder(onAudioChunk: (base64: string, timestamp: string) => if (audioCtx.state === 'suspended') { await audioCtx.resume(); } - const stream = await navigator.mediaDevices.getUserMedia({ - audio: { - channelCount: 1, - echoCancellation: true, - sampleRate - } - }); - streamRef.current = stream; - const source = audioCtx.createMediaStreamSource(stream); + const worklet = new AudioWorkletNode(audioCtx, 'audio-recorder', { processorOptions: { bufferSize: (sampleRate * chunkIntervalMs) / MS_IN_SECOND @@ -131,16 +164,57 @@ export function useRecorder(onAudioChunk: (base64: string, timestamp: string) => } }; - source.connect(worklet); worklet.connect(audioCtx.destination); - worklet.port.postMessage({ command: 'START' }); workletRef.current = worklet; - }, [audioCtxRef, chunkIntervalMs, Date, initAudio, onAudioChunk, sampleRate]); + + await acquireAndConnectMediaStream(); + + worklet.port.postMessage({ command: 'START' }); + }, [ + Date, + acquireAndConnectMediaStream, + audioCtxRef, + chunkIntervalMs, + initAudio, + onAudioChunk, + sampleRate, + workletRef + ]); + + const muteRecording = useCallback(() => { + // Stop MediaStream (mic indicator OFF) and disconnect source + stopMediaStream(); + + if (sourceRef.current) { + sourceRef.current.disconnect(); + sourceRef.current = undefined; + } + + // Tell worklet to output silence + if (workletRef.current) { + workletRef.current.port.postMessage({ command: 'MUTE' }); + } + + // Return unmute function + return () => { + if (!audioCtxRef.current || !workletRef.current) { + return; + } + + // Tell worklet to use real audio + workletRef.current.port.postMessage({ command: 'UNMUTE' }); + + // Restart MediaStream and reconnect source (fire and forget) + acquireAndConnectMediaStream(); + }; + }, [acquireAndConnectMediaStream, audioCtxRef, sourceRef, stopMediaStream, workletRef]); const record = useCallback(() => { startRecording(); return stopRecording; }, [startRecording, stopRecording]); - return useMemo(() => ({ record }), [record]); + const mute = useCallback(() => muteRecording(), [muteRecording]); + + return useMemo(() => ({ record, mute }), [record, mute]); } diff --git a/packages/bundle/src/boot/actual/internal.ts b/packages/bundle/src/boot/actual/internal.ts index 5949642426..3996862fea 100644 --- a/packages/bundle/src/boot/actual/internal.ts +++ b/packages/bundle/src/boot/actual/internal.ts @@ -4,7 +4,11 @@ export { type ActivityStatusMiddleware, type TypingIndicatorMiddleware } from 'botframework-webchat-api'; -export { usePostVoiceActivity, useShouldShowMicrophoneButton } from 'botframework-webchat-api/internal'; +export { + usePostVoiceActivity, + useShouldShowMicrophoneButton, + useVoiceStateWritable +} from 'botframework-webchat-api/internal'; export { CodeHighlighterComposer, createIconComponent, diff --git a/packages/core/src/actions/setVoiceState.ts b/packages/core/src/actions/setVoiceState.ts index 53fc12b7c2..70feef25c3 100644 --- a/packages/core/src/actions/setVoiceState.ts +++ b/packages/core/src/actions/setVoiceState.ts @@ -1,6 +1,6 @@ const VOICE_SET_STATE = 'WEB_CHAT/VOICE_SET_STATE' as const; -type VoiceState = 'idle' | 'listening' | 'user_speaking' | 'processing' | 'bot_speaking'; +type VoiceState = 'idle' | 'listening' | 'muted' | 'user_speaking' | 'processing' | 'bot_speaking'; type VoiceSetStateAction = { type: typeof VOICE_SET_STATE; diff --git a/packages/fluent-theme/src/components/icon/FluentIcon.module.css b/packages/fluent-theme/src/components/icon/FluentIcon.module.css index f6f274c23a..172435c04c 100644 --- a/packages/fluent-theme/src/components/icon/FluentIcon.module.css +++ b/packages/fluent-theme/src/components/icon/FluentIcon.module.css @@ -50,11 +50,24 @@ --webchat__fluent-icon--mask: url('data:image/svg+xml;utf8,'); } -:global(.webchat) .icon--microphone { +:global(.webchat) .icon--microphone-active { --webchat__fluent-icon--mask: url('data:image/svg+xml;utf8,'); } +:global(.webchat) .icon--microphone-idle { + --webchat__fluent-icon--mask: url('data:image/svg+xml;utf8,'); +} + +:global(.webchat) .icon--microphone-mute { + --webchat__fluent-icon--mask: url('data:image/svg+xml;utf8,'); +} + :global(.webchat) .icon--audio-playing { --webchat__fluent-icon--mask: url('data:image/svg+xml;utf8,'); } + +:global(.webchat) .icon--dismiss { + --webchat__fluent-icon--mask: url('data:image/svg+xml;utf8,'); +} + /* #endregion */ diff --git a/packages/fluent-theme/src/components/sendBox/DismissToolbarButton.tsx b/packages/fluent-theme/src/components/sendBox/DismissToolbarButton.tsx new file mode 100644 index 0000000000..07f4160de0 --- /dev/null +++ b/packages/fluent-theme/src/components/sendBox/DismissToolbarButton.tsx @@ -0,0 +1,38 @@ +import { hooks } from 'botframework-webchat'; +import React, { memo, useCallback } from 'react'; + +import testIds from '../../testIds'; +import { FluentIcon } from '../icon'; +import { ToolbarButton } from './Toolbar'; + +const { useStopVoice, useLocalizer } = hooks; + +/** + * Dismiss button that stops the voice session and returns to idle state. + * This is needed to stop recording as mic button is used to start/mute and cannot be used to stop recording. + */ +function DismissToolbarButton() { + const localize = useLocalizer(); + const stopVoice = useStopVoice(); + + const ariaLabel = localize('SPEECH_INPUT_STOP_RECORDING_ALT'); + + const handleDismissClick = useCallback(() => { + stopVoice(); + }, [stopVoice]); + + return ( + + + + ); +} + +DismissToolbarButton.displayName = 'SendBox.DismissToolbarButton'; + +export default memo(DismissToolbarButton); diff --git a/packages/fluent-theme/src/components/sendBox/MicrophoneToolbarButton.tsx b/packages/fluent-theme/src/components/sendBox/MicrophoneToolbarButton.tsx index 946f2a5e88..bda230760d 100644 --- a/packages/fluent-theme/src/components/sendBox/MicrophoneToolbarButton.tsx +++ b/packages/fluent-theme/src/components/sendBox/MicrophoneToolbarButton.tsx @@ -1,6 +1,7 @@ import { hooks } from 'botframework-webchat'; +import { useVoiceStateWritable } from 'botframework-webchat/internal'; import cx from 'classnames'; -import React, { memo, useCallback } from 'react'; +import React, { memo, useCallback, useMemo } from 'react'; import { useStyles } from '../../styles'; import testIds from '../../testIds'; @@ -9,24 +10,39 @@ import { ToolbarButton } from './Toolbar'; import styles from './Toolbar.module.css'; -const { useVoiceState, useStartVoice, useStopVoice, useLocalizer } = hooks; +const { useLocalizer, useStartVoice } = hooks; function MicrophoneToolbarButton() { - const [voiceState] = useVoiceState(); + const [voiceState, setVoiceState] = useVoiceStateWritable(); const classNames = useStyles(styles); const localize = useLocalizer(); const startVoice = useStartVoice(); - const stopVoice = useStopVoice(); const recording = voiceState !== 'idle'; + const icon = useMemo(() => { + switch (voiceState) { + case 'muted': + return 'microphone-mute'; + case 'bot_speaking': + return 'audio-playing'; + case 'idle': + return 'microphone-idle'; + default: + return 'microphone-active'; + } + }, [voiceState]); + const handleMicrophoneClick = useCallback(() => { - if (recording) { - stopVoice(); // Stop recognition and synthesis. - } else { + if (voiceState === 'idle') { startVoice(); // If it was stopped, will start recognition. It will synthesize when the bot respond. + } else if (voiceState === 'listening') { + setVoiceState('muted'); // listening <-> muted (VoiceRecorderBridge handles silent chunks) + } else if (voiceState === 'muted') { + setVoiceState('listening'); // listening <-> muted } - }, [recording, startVoice, stopVoice]); + // Other states (user_speaking, processing, bot_speaking) are non-interactive + }, [startVoice, setVoiceState, voiceState]); const ariaLabel = localize( recording ? 'SPEECH_INPUT_MICROPHONE_BUTTON_OPEN_ALT' : 'SPEECH_INPUT_MICROPHONE_BUTTON_CLOSE_ALT' @@ -39,7 +55,7 @@ function MicrophoneToolbarButton() { - + ); } diff --git a/packages/fluent-theme/src/components/sendBox/SendBox.tsx b/packages/fluent-theme/src/components/sendBox/SendBox.tsx index 05fb2bf184..e738a4bc93 100644 --- a/packages/fluent-theme/src/components/sendBox/SendBox.tsx +++ b/packages/fluent-theme/src/components/sendBox/SendBox.tsx @@ -20,6 +20,7 @@ import { FluentIcon } from '../icon'; import { SuggestedActions } from '../suggestedActions'; import { TelephoneKeypadSurrogate, useTelephoneKeypadShown, type DTMF } from '../telephoneKeypad'; import AddAttachmentButton from './AddAttachmentButton'; +import DismissToolbarButton from './DismissToolbarButton'; import ErrorMessage from './ErrorMessage'; import useSpeechStateMessage from './private/useSpeechStateMessage'; import useSubmitError from './private/useSubmitError'; @@ -31,6 +32,7 @@ import { Toolbar, ToolbarButton, ToolbarSeparator } from './Toolbar'; import MicrophoneToolbarButton from './MicrophoneToolbarButton'; const { + useCapabilities, useFocus, useLocalizer, useMakeThumbnail, @@ -64,6 +66,7 @@ function SendBox(props: Props) { const variantClassName = useVariantClassName(styles); const errorMessageId = useUniqueId('sendbox__error-message-id'); const inputRef = useRef(null); + const isVoiceOnlyMode = useCapabilities(caps => caps.isVoiceOnlyMode); const localize = useLocalizer(); const makeThumbnail = useMakeThumbnail(); const postVoiceActivity = usePostVoiceActivity(); @@ -257,9 +260,10 @@ function SendBox(props: Props) { {!hideTelephoneKeypadButton && } {!disableFileUpload && } + {showMicrophoneButton && } - {showMicrophoneButton ? ( - + {showMicrophoneButton && isVoiceOnlyMode ? ( + ) : (