temporalio · jssmith · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
@@ -84,6 +84,7 @@ dev = [
   "opentelemetry-exporter-otlp-proto-grpc>=1.11.1,<2",
   "opentelemetry-semantic-conventions>=0.40b0,<1",
   "opentelemetry-sdk-extension-aws>=2.0.0,<3",
+  "async-timeout>=4.0,<6; python_version < '3.11'",
 ]
 
 [tool.poe.tasks]

@@ -1,14 +1,31 @@
+import json
+import logging
 from collections.abc import AsyncGenerator, Callable
-from datetime import timedelta
+from datetime import datetime, timedelta, timezone
 
 from google.adk.models import BaseLlm, LLMRegistry
 from google.adk.models.llm_request import LlmRequest
 from google.adk.models.llm_response import LlmResponse
 
 import temporalio.workflow
 from temporalio import activity, workflow
+from temporalio.contrib.pubsub import PubSubClient
 from temporalio.workflow import ActivityConfig
 
+logger = logging.getLogger(__name__)
+
+EVENTS_TOPIC = "events"
+
+
+def _make_event(event_type: str, **data: object) -> bytes:
+    return json.dumps(
+        {
+            "type": event_type,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "data": data,
+        }
+    ).encode()
+
 
 @activity.defn
 async def invoke_model(llm_request: LlmRequest) -> list[LlmResponse]:
@@ -36,13 +53,78 @@ async def invoke_model(llm_request: LlmRequest) -> list[LlmResponse]:
     ]
 
 
+@activity.defn
+async def invoke_model_streaming(llm_request: LlmRequest) -> list[LlmResponse]:
+    """Streaming-aware model activity.
+
+    Calls the LLM with stream=True, publishes TEXT_DELTA events via
+    PubSubClient as tokens arrive, and returns the collected responses.
+
+    The PubSubClient auto-detects the activity context to find the parent
+    workflow for publishing.
+
+    Args:
+        llm_request: The LLM request containing model name and parameters.
+
+    Returns:
+        List of LLM responses from the model.
+    """
+    if llm_request.model is None:
+        raise ValueError("No model name provided, could not create LLM.")
+
+    llm = LLMRegistry.new_llm(llm_request.model)
+    if not llm:
+        raise ValueError(f"Failed to create LLM for model: {llm_request.model}")
+
+    pubsub = PubSubClient.from_activity(batch_interval=0.1)
-    pubsub = PubSubClient.from_activity(batch_interval=0.1)
+    pubsub = PubSubClient.from_activity(batch_interval_ms=100)
-    pubsub = PubSubClient.from_activity(batch_interval=0.1)
+    pubsub = PubSubClient.from_activity(batch_interval_ms=100)
+    responses: list[LlmResponse] = []
+    text_buffer = ""
+
+    async with pubsub:
+        pubsub.publish(EVENTS_TOPIC, _make_event("LLM_CALL_START"), force_flush=True)
+
+        async for response in llm.generate_content_async(
+            llm_request=llm_request, stream=True
+        ):
+            activity.heartbeat()
+            responses.append(response)
+
+            if response.content and response.content.parts:
+                for part in response.content.parts:
+                    if part.text:
+                        text_buffer += part.text
+                        pubsub.publish(
+                            EVENTS_TOPIC,
+                            _make_event("TEXT_DELTA", delta=part.text),
+                        )
+                    if part.function_call:
+                        pubsub.publish(
+                            EVENTS_TOPIC,
+                            _make_event(
+                                "TOOL_CALL_START",
+                                tool_name=part.function_call.name,
+                            ),
+                        )
+
+        if text_buffer:
+            pubsub.publish(
+                EVENTS_TOPIC,
+                _make_event("TEXT_COMPLETE", text=text_buffer),
+                force_flush=True,
+            )
+        pubsub.publish(EVENTS_TOPIC, _make_event("LLM_CALL_COMPLETE"), force_flush=True)
+
+    return responses
+
+
 class TemporalModel(BaseLlm):
     """A Temporal-based LLM model that executes model invocations as activities."""
 
     def __init__(
         self,
         model_name: str,
         activity_config: ActivityConfig | None = None,
+        streaming: bool = False,
         *,
         summary_fn: Callable[[LlmRequest], str | None] | None = None,
     ) -> None:
@@ -51,6 +133,9 @@ def __init__(
         Args:
             model_name: The name of the model to use.
             activity_config: Configuration options for the activity execution.
+            streaming: When True, the model activity uses the streaming LLM
+                endpoint and publishes token events via PubSubClient. The
+                workflow is unaffected -- it still receives complete responses.
             summary_fn: Optional callable that receives the LlmRequest and
                 returns a summary string (or None) for the activity. Must be
                 deterministic as it is called during workflow execution. If
@@ -62,6 +147,7 @@ def __init__(
         """
         super().__init__(model=model_name)
         self._model_name = model_name
+        self._streaming = streaming
-        self._streaming = streaming
+        self._is_streaming = is_streaming
-        self._streaming = streaming
+        self._is_streaming = is_streaming
         self._summary_fn = summary_fn
         self._activity_config = ActivityConfig(
             start_to_close_timeout=timedelta(seconds=60)
@@ -80,7 +166,8 @@ async def generate_content_async(
 
         Args:
             llm_request: The LLM request containing model parameters and content.
-            stream: Whether to stream the response (currently ignored).
+            stream: Whether to stream the response (currently ignored; use the
+                ``streaming`` constructor parameter instead).
 
         Yields:
             The responses from the model.
@@ -103,8 +190,9 @@ async def generate_content_async(
                 agent_name = llm_request.config.labels.get("adk_agent_name")
                 if agent_name:
                     config["summary"] = agent_name
+        activity_fn = invoke_model_streaming if self._streaming else invoke_model
         responses = await workflow.execute_activity(
-            invoke_model,
+            activity_fn,
             args=[llm_request],
             **config,
         )

@@ -8,7 +8,10 @@
 
 from temporalio import workflow
 from temporalio.contrib.google_adk_agents._mcp import TemporalMcpToolSetProvider
-from temporalio.contrib.google_adk_agents._model import invoke_model
+from temporalio.contrib.google_adk_agents._model import (
+    invoke_model,
+    invoke_model_streaming,
+)
 from temporalio.contrib.pydantic import (
     PydanticPayloadConverter,
     ToJsonOptions,
@@ -95,7 +98,7 @@ def workflow_runner(runner: WorkflowRunner | None) -> WorkflowRunner:
                 )
             return runner
 
-        new_activities = [invoke_model]
+        new_activities = [invoke_model, invoke_model_streaming]
         if toolset_providers is not None:
             for toolset_provider in toolset_providers:
                 new_activities.extend(toolset_provider._get_activities())