open-telemetry
diff --git a/‎instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py‎
Lines changed: 13 additions & 300 deletions b/‎instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py‎
Lines changed: 13 additions & 300 deletions
@@ -13,24 +13,28 @@
 # limitations under the License.
 
 
-import asyncio
-import inspect
-from collections.abc import AsyncIterator, Iterator
 from timeit import default_timer
-from typing import Any, Optional, cast
+from typing import Any, Optional
 
-from opentelemetry._logs import Logger, LogRecord
-from opentelemetry.context import get_current
+from opentelemetry._logs import Logger
 from opentelemetry.semconv._incubating.attributes import (
     gen_ai_attributes as GenAIAttributes,
 )
 from opentelemetry.semconv._incubating.attributes import (
     server_attributes as ServerAttributes,
 )
 from opentelemetry.trace import Span, SpanKind, Tracer
-from opentelemetry.trace.propagation import set_span_in_context
 
 from .instruments import Instruments
+from .streaming import (
+    AsyncStreamWrapper,
+    StreamWrapper,
+    SyncStreamWrapper,
+)
+
+# Re-export StreamWrapper for backwards compatibility
+__all__ = ["StreamWrapper"]
+
 from .utils import (
     choice_to_event,
     get_llm_request_attributes,
@@ -73,9 +77,7 @@ def traced_method(wrapped, instance, args, kwargs):
                 else:
                     parsed_result = result
                 if is_streaming(kwargs):
-                    return StreamWrapper(
-                        parsed_result, span, logger, capture_content
-                    )
+                    return SyncStreamWrapper(parsed_result, span, logger, capture_content)
 
                 if span.is_recording():
                     _set_response_attributes(
@@ -137,9 +139,7 @@ async def traced_method(wrapped, instance, args, kwargs):
                 else:
                     parsed_result = result
                 if is_streaming(kwargs):
-                    return StreamWrapper(
-                        parsed_result, span, logger, capture_content
-                    )
+                    return AsyncStreamWrapper(parsed_result, span, logger, capture_content)
 
                 if span.is_recording():
                     _set_response_attributes(
@@ -487,290 +487,3 @@ def _set_embeddings_response_attributes(
             result.usage.prompt_tokens,
         )
         # Don't set output tokens for embeddings as all tokens are input tokens
-
-
-class ToolCallBuffer:
-    def __init__(self, index, tool_call_id, function_name):
-        self.index = index
-        self.function_name = function_name
-        self.tool_call_id = tool_call_id
-        self.arguments = []
-
-    def append_arguments(self, arguments):
-        self.arguments.append(arguments)
-
-
-class ChoiceBuffer:
-    def __init__(self, index):
-        self.index = index
-        self.finish_reason = None
-        self.text_content = []
-        self.tool_calls_buffers = []
-
-    def append_text_content(self, content):
-        self.text_content.append(content)
-
-    def append_tool_call(self, tool_call):
-        idx = tool_call.index
-        # make sure we have enough tool call buffers
-        for _ in range(len(self.tool_calls_buffers), idx + 1):
-            self.tool_calls_buffers.append(None)
-
-        if not self.tool_calls_buffers[idx]:
-            self.tool_calls_buffers[idx] = ToolCallBuffer(
-                idx, tool_call.id, tool_call.function.name
-            )
-        self.tool_calls_buffers[idx].append_arguments(
-            tool_call.function.arguments
-        )
-
-
-class StreamWrapper:
-    span: Span
-    response_id: Optional[str] = None
-    response_model: Optional[str] = None
-    service_tier: Optional[str] = None
-    finish_reasons: list = []
-    prompt_tokens: Optional[int] = 0
-    completion_tokens: Optional[int] = 0
-
-    def __init__(
-        self,
-        stream: Iterator[Any] | AsyncIterator[Any],
-        span: Span,
-        logger: Logger,
-        capture_content: bool,
-    ):
-        self.stream = stream
-        self.span = span
-        self.choice_buffers = []
-        self._span_started = False
-        self.capture_content = capture_content
-
-        self.logger = logger
-        self.setup()
-
-    def setup(self):
-        if not self._span_started:
-            self._span_started = True
-
-    def cleanup(self):
-        if self._span_started:
-            if self.span.is_recording():
-                if self.response_model:
-                    set_span_attribute(
-                        self.span,
-                        GenAIAttributes.GEN_AI_RESPONSE_MODEL,
-                        self.response_model,
-                    )
-
-                if self.response_id:
-                    set_span_attribute(
-                        self.span,
-                        GenAIAttributes.GEN_AI_RESPONSE_ID,
-                        self.response_id,
-                    )
-
-                set_span_attribute(
-                    self.span,
-                    GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS,
-                    self.prompt_tokens,
-                )
-                set_span_attribute(
-                    self.span,
-                    GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS,
-                    self.completion_tokens,
-                )
-
-                set_span_attribute(
-                    self.span,
-                    GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
-                    self.service_tier,
-                )
-
-                set_span_attribute(
-                    self.span,
-                    GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS,
-                    self.finish_reasons,
-                )
-
-            for idx, choice in enumerate(self.choice_buffers):
-                message: dict[str, Any] = {"role": "assistant"}
-                if self.capture_content and choice.text_content:
-                    message["content"] = "".join(choice.text_content)
-                if choice.tool_calls_buffers:
-                    tool_calls = []
-                    for tool_call in choice.tool_calls_buffers:
-                        function = {"name": tool_call.function_name}
-                        if self.capture_content:
-                            function["arguments"] = "".join(
-                                tool_call.arguments
-                            )
-                        tool_call_dict = {
-                            "id": tool_call.tool_call_id,
-                            "type": "function",
-                            "function": function,
-                        }
-                        tool_calls.append(tool_call_dict)
-                    message["tool_calls"] = tool_calls
-
-                body = {
-                    "index": idx,
-                    "finish_reason": choice.finish_reason or "error",
-                    "message": message,
-                }
-
-                event_attributes = {
-                    GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value
-                }
-                context = set_span_in_context(self.span, get_current())
-                self.logger.emit(
-                    LogRecord(
-                        event_name="gen_ai.choice",
-                        attributes=event_attributes,
-                        body=body,
-                        context=context,
-                    )
-                )
-
-            self.span.end()
-            self._span_started = False
-
-    def __enter__(self):
-        self.setup()
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        try:
-            if exc_type is not None:
-                handle_span_exception(self.span, exc_val)
-        finally:
-            self.cleanup()
-        return False  # Propagate the exception
-
-    async def __aenter__(self):
-        self.setup()
-        return self
-
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
-        try:
-            if exc_type is not None:
-                handle_span_exception(self.span, exc_val)
-        finally:
-            self.cleanup()
-        return False  # Propagate the exception
-
-    def close(self):
-        try:
-            close_fn = getattr(self.stream, "close", None)
-            if not callable(close_fn):
-                return
-
-            close_result = close_fn()
-            if inspect.isawaitable(close_result):
-                try:
-                    loop = asyncio.get_running_loop()
-                except RuntimeError:
-                    asyncio.run(cast(Any, close_result))
-                else:
-                    loop.create_task(cast(Any, close_result))
-        finally:
-            self.cleanup()
-
-    def __iter__(self):
-        return self
-
-    def __aiter__(self):
-        return self
-
-    def __next__(self):
-        try:
-            chunk = next(cast(Iterator[Any], self.stream))
-            self.process_chunk(chunk)
-            return chunk
-        except StopIteration:
-            self.cleanup()
-            raise
-        except Exception as error:
-            handle_span_exception(self.span, error)
-            self.cleanup()
-            raise
-
-    async def __anext__(self):
-        try:
-            chunk = await anext(cast(AsyncIterator[Any], self.stream))
-            self.process_chunk(chunk)
-            return chunk
-        except StopAsyncIteration:
-            self.cleanup()
-            raise
-        except Exception as error:
-            handle_span_exception(self.span, error)
-            self.cleanup()
-            raise
-
-    def set_response_model(self, chunk):
-        if self.response_model:
-            return
-
-        if getattr(chunk, "model", None):
-            self.response_model = chunk.model
-
-    def set_response_id(self, chunk):
-        if self.response_id:
-            return
-
-        if getattr(chunk, "id", None):
-            self.response_id = chunk.id
-
-    def set_response_service_tier(self, chunk):
-        if self.service_tier:
-            return
-
-        if getattr(chunk, "service_tier", None):
-            self.service_tier = chunk.service_tier
-
-    def build_streaming_response(self, chunk):
-        if getattr(chunk, "choices", None) is None:
-            return
-
-        choices = chunk.choices
-        for choice in choices:
-            if not choice.delta:
-                continue
-
-            # make sure we have enough choice buffers
-            for idx in range(len(self.choice_buffers), choice.index + 1):
-                self.choice_buffers.append(ChoiceBuffer(idx))
-
-            if choice.finish_reason:
-                self.choice_buffers[
-                    choice.index
-                ].finish_reason = choice.finish_reason
-
-            if choice.delta.content is not None:
-                self.choice_buffers[choice.index].append_text_content(
-                    choice.delta.content
-                )
-
-            if choice.delta.tool_calls is not None:
-                for tool_call in choice.delta.tool_calls:
-                    self.choice_buffers[choice.index].append_tool_call(
-                        tool_call
-                    )
-
-    def set_usage(self, chunk):
-        if getattr(chunk, "usage", None):
-            self.completion_tokens = chunk.usage.completion_tokens
-            self.prompt_tokens = chunk.usage.prompt_tokens
-
-    def process_chunk(self, chunk):
-        self.set_response_id(chunk)
-        self.set_response_model(chunk)
-        self.set_response_service_tier(chunk)
-        self.build_streaming_response(chunk)
-        self.set_usage(chunk)
-
-    def parse(self):
-        """Called when using with_raw_response with stream=True"""
-        return self