1313# limitations under the License.
1414
1515
16+ import asyncio
17+ import inspect
1618from timeit import default_timer
1719from typing import Any , Optional
1820
@@ -336,28 +338,39 @@ def _record_metrics(
336338 )
337339
338340 if result and getattr (result , "usage" , None ):
339- # Always record input tokens
340- input_attributes = {
341- ** common_attributes ,
342- GenAIAttributes .GEN_AI_TOKEN_TYPE : GenAIAttributes .GenAiTokenTypeValues .INPUT .value ,
343- }
344- instruments .token_usage_histogram .record (
345- result .usage .prompt_tokens ,
346- attributes = input_attributes ,
341+ # Get input tokens - Responses API uses input_tokens, Chat Completions uses prompt_tokens
342+ input_tokens = getattr (result .usage , "input_tokens" , None ) or getattr (
343+ result .usage , "prompt_tokens" , None
347344 )
348345
346+ if input_tokens is not None :
347+ input_attributes = {
348+ ** common_attributes ,
349+ GenAIAttributes .GEN_AI_TOKEN_TYPE : GenAIAttributes .GenAiTokenTypeValues .INPUT .value ,
350+ }
351+ instruments .token_usage_histogram .record (
352+ input_tokens ,
353+ attributes = input_attributes ,
354+ )
355+
349356 # For embeddings, don't record output tokens as all tokens are input tokens
350357 if (
351358 operation_name
352359 != GenAIAttributes .GenAiOperationNameValues .EMBEDDINGS .value
353360 ):
354- output_attributes = {
355- ** common_attributes ,
356- GenAIAttributes .GEN_AI_TOKEN_TYPE : GenAIAttributes .GenAiTokenTypeValues .COMPLETION .value ,
357- }
358- instruments .token_usage_histogram .record (
359- result .usage .completion_tokens , attributes = output_attributes
360- )
361+ # Get output tokens - Responses API uses output_tokens, Chat Completions uses completion_tokens
362+ output_tokens = getattr (
363+ result .usage , "output_tokens" , None
364+ ) or getattr (result .usage , "completion_tokens" , None )
365+
366+ if output_tokens is not None :
367+ output_attributes = {
368+ ** common_attributes ,
369+ GenAIAttributes .GEN_AI_TOKEN_TYPE : GenAIAttributes .GenAiTokenTypeValues .COMPLETION .value ,
370+ }
371+ instruments .token_usage_histogram .record (
372+ output_tokens , attributes = output_attributes
373+ )
361374
362375
363376def _set_response_attributes (
@@ -403,6 +416,50 @@ def _set_response_attributes(
403416 )
404417
405418
419+ def _set_responses_response_attributes (
420+ span , result , logger : Logger , capture_content : bool
421+ ):
422+ """Set span attributes from a Responses API result."""
423+ set_span_attribute (
424+ span ,
425+ GenAIAttributes .GEN_AI_RESPONSE_MODEL ,
426+ getattr (result , "model" , None ),
427+ )
428+
429+ if getattr (result , "id" , None ):
430+ set_span_attribute (span , GenAIAttributes .GEN_AI_RESPONSE_ID , result .id )
431+
432+ # Responses API uses "output" instead of "choices", and "status" instead of "finish_reason"
433+ if getattr (result , "output" , None ):
434+ finish_reasons = []
435+ for output_item in result .output :
436+ status = getattr (output_item , "status" , None )
437+ finish_reasons .append (status or "error" )
438+ set_span_attribute (
439+ span ,
440+ GenAIAttributes .GEN_AI_RESPONSE_FINISH_REASONS ,
441+ finish_reasons ,
442+ )
443+
444+ # Get the usage - Responses API uses input_tokens/output_tokens
445+ if getattr (result , "usage" , None ):
446+ input_tokens = getattr (result .usage , "input_tokens" , None )
447+ if input_tokens is not None :
448+ set_span_attribute (
449+ span ,
450+ GenAIAttributes .GEN_AI_USAGE_INPUT_TOKENS ,
451+ input_tokens ,
452+ )
453+
454+ output_tokens = getattr (result .usage , "output_tokens" , None )
455+ if output_tokens is not None :
456+ set_span_attribute (
457+ span ,
458+ GenAIAttributes .GEN_AI_USAGE_OUTPUT_TOKENS ,
459+ output_tokens ,
460+ )
461+
462+
406463def _set_embeddings_response_attributes (
407464 span : Span ,
408465 result : Any ,
@@ -539,7 +596,7 @@ def cleanup(self):
539596 )
540597
541598 for idx , choice in enumerate (self .choice_buffers ):
542- message = {"role" : "assistant" }
599+ message : dict [ str , Any ] = {"role" : "assistant" }
543600 if self .capture_content and choice .text_content :
544601 message ["content" ] = "" .join (choice .text_content )
545602 if choice .tool_calls_buffers :
@@ -605,8 +662,17 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
605662 return False # Propagate the exception
606663
607664 def close (self ):
608- self .stream .close ()
609- self .cleanup ()
665+ try :
666+ close_result = self .stream .close ()
667+ if inspect .isawaitable (close_result ):
668+ try :
669+ loop = asyncio .get_running_loop ()
670+ except RuntimeError :
671+ asyncio .run (close_result )
672+ else :
673+ loop .create_task (close_result )
674+ finally :
675+ self .cleanup ()
610676
611677 def __iter__ (self ):
612678 return self
@@ -629,7 +695,7 @@ def __next__(self):
629695
630696 async def __anext__ (self ):
631697 try :
632- chunk = await self .stream .__anext__ ()
698+ chunk = await self .stream .__anext__ () # type: ignore[attr-defined]
633699 self .process_chunk (chunk )
634700 return chunk
635701 except StopAsyncIteration :
0 commit comments