Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 64 additions & 1 deletion sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from sentry_sdk.utils import event_from_exception

if TYPE_CHECKING:
from typing import Any, Dict
from typing import Any, Dict, List
from datetime import datetime

try:
Expand All @@ -35,6 +35,68 @@ def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
return metadata


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
"""
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,...",
"detail": "high"
}
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
"""

def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
if item.get("type") == "image_url":
image_url = item.get("image_url") or {}
if image_url.get("url", "").startswith("data:"):
return {
"type": "blob",
"modality": "image",
"mime_type": item["image_url"]["url"].split(";base64,")[0],
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Incorrect mime_type includes data: prefix

The mime_type extraction is incorrect. Using split(";base64,")[0] on a URL like "data:image/jpeg;base64,..." returns "data:image/jpeg" instead of "image/jpeg" as documented in the function's docstring. The "data:" prefix needs to be removed to get the proper MIME type.

Fix in Cursor Fix in Web

This comment was marked as outdated.

"content": item["image_url"]["url"].split(";base64,")[1],
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: IndexError when data URL lacks base64 encoding marker

The code assumes all data URLs contain ";base64," after checking only that they start with "data:". Data URLs can omit base64 encoding (e.g., "data:image/jpeg,rawdata"). In such cases, split(";base64,") returns a single-element list, and accessing [1] will raise an IndexError.

Fix in Cursor Fix in Web

}
Comment on lines +84 to +85
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The data URI parsing in _convert_message_parts is unsafe. It can cause an IndexError with malformed URIs and incorrectly extracts the mime_type with the "data:" prefix.
Severity: HIGH | Confidence: High

🔍 Detailed Analysis

In the _convert_message_parts function, a data URI from item["image_url"]["url"] is parsed by splitting on ";base64,". This implementation has two flaws. First, if the URI is malformed and does not contain ";base64,", accessing the second element of the split result will raise an IndexError, crashing the callback as it is not wrapped in a try-except block. Second, the first element of the split is assigned to mime_type, which incorrectly includes the "data:" prefix (e.g., "data:image/png" instead of "image/png"), contradicting docstrings and other tests in the codebase.

💡 Suggested Fix

Implement robust parsing for the data URI. First, validate that the url string contains ";base64,". If not, handle the error gracefully. If it is valid, extract the MIME type by taking the part before ";base64," and removing the "data:" prefix. Consider using a regular expression for more reliable parsing.

🤖 Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: sentry_sdk/integrations/litellm.py#L84-L85

Potential issue: In the `_convert_message_parts` function, a data URI from
`item["image_url"]["url"]` is parsed by splitting on ";base64,". This implementation has
two flaws. First, if the URI is malformed and does not contain ";base64,", accessing the
second element of the split result will raise an `IndexError`, crashing the callback as
it is not wrapped in a `try-except` block. Second, the first element of the split is
assigned to `mime_type`, which incorrectly includes the "data:" prefix (e.g.,
"data:image/png" instead of "image/png"), contradicting docstrings and other tests in
the codebase.

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 7657057

else:
return {
"type": "uri",
"uri": item["image_url"]["url"],
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Unsafe direct access bypasses null-safe variable check

Line 78 creates a null-safe image_url variable using item.get("image_url") or {}, but lines 83, 84, and 89 bypass this by directly accessing item["image_url"]["url"]. If item["image_url"] is None, missing, or an empty dict, the code enters the else branch (since empty string doesn't start with data:), and line 89 crashes with TypeError or KeyError. The code should use the safe image_url variable instead of re-accessing item["image_url"].

Additional Locations (1)

Fix in Cursor Fix in Web

}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Else branch crashes when image_url is None or incomplete

The code on line 78 safely retrieves image_url = item.get("image_url") or {} to handle missing or None values, but the else branch on line 89 directly accesses item["image_url"]["url"] instead of using the safe image_url variable. When item["image_url"] is None, empty, or lacks a "url" key, the safe check on line 79 evaluates to False, routing to this else branch which then crashes with TypeError or KeyError.

Fix in Cursor Fix in Web

return item

for message in messages:
content = message.get("content")
if isinstance(content, list):
message["content"] = [_map_item(item) for item in content]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Function mutates original kwargs messages corrupting API request

The _convert_message_parts function modifies message dictionaries in-place rather than creating copies. Since messages comes directly from kwargs.get("messages", []), this mutates the original request data that LiteLLM will use for the actual API call to the LLM provider, potentially corrupting the request. A deep copy of the messages should be made before modification.

Fix in Cursor Fix in Web

return messages


def _input_callback(kwargs: "Dict[str, Any]") -> None:
"""Handle the start of a request."""
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
Expand Down Expand Up @@ -101,6 +163,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
messages = kwargs.get("messages", [])
if messages:
scope = sentry_sdk.get_current_scope()
messages = _convert_message_parts(messages)
messages_data = truncate_and_annotate_messages(messages, span, scope)
if messages_data is not None:
set_data_normalized(
Expand Down
161 changes: 161 additions & 0 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import json
import pytest
import time
Expand All @@ -23,6 +24,7 @@ async def __call__(self, *args, **kwargs):
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations.litellm import (
LiteLLMIntegration,
_convert_message_parts,
_input_callback,
_success_callback,
_failure_callback,
Expand Down Expand Up @@ -753,3 +755,162 @@ def test_litellm_message_truncation(sentry_init, capture_events):
assert "small message 4" in str(parsed_messages[0])
assert "small message 5" in str(parsed_messages[1])
assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5


IMAGE_DATA = b"fake_image_data_12345"
IMAGE_B64 = base64.b64encode(IMAGE_DATA).decode("utf-8")
IMAGE_DATA_URI = f"data:image/png;base64,{IMAGE_B64}"


def test_binary_content_encoding_image_url(sentry_init, capture_events):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Look at this image:"},
{
"type": "image_url",
"image_url": {"url": IMAGE_DATA_URI, "detail": "high"},
},
],
}
]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
_input_callback(kwargs)
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())

(event,) = events
(span,) = event["spans"]
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

blob_item = next(
(
item
for msg in messages_data
if "content" in msg
for item in msg["content"]
if item.get("type") == "blob"
),
None,
)
assert blob_item is not None
assert blob_item["modality"] == "image"
assert blob_item["mime_type"] == "data:image/png"
assert IMAGE_B64 in blob_item["content"] or "[Filtered]" in str(
blob_item["content"]
)


def test_binary_content_encoding_mixed_content(sentry_init, capture_events):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Here is an image:"},
{
"type": "image_url",
"image_url": {"url": IMAGE_DATA_URI},
},
{"type": "text", "text": "What do you see?"},
],
}
]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
_input_callback(kwargs)
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())

(event,) = events
(span,) = event["spans"]
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

content_items = [
item for msg in messages_data if "content" in msg for item in msg["content"]
]
assert any(item.get("type") == "text" for item in content_items)
assert any(item.get("type") == "blob" for item in content_items)


def test_binary_content_encoding_uri_type(sentry_init, capture_events):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": "https://example.com/image.jpg"},
}
],
}
]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
_input_callback(kwargs)
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())

(event,) = events
(span,) = event["spans"]
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

uri_item = next(
(
item
for msg in messages_data
if "content" in msg
for item in msg["content"]
if item.get("type") == "uri"
),
None,
)
assert uri_item is not None
assert uri_item["uri"] == "https://example.com/image.jpg"


def test_convert_message_parts_direct():
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Hello"},
{
"type": "image_url",
"image_url": {"url": IMAGE_DATA_URI},
},
],
}
]
converted = _convert_message_parts(messages)
blob_item = next(
item for item in converted[0]["content"] if item.get("type") == "blob"
)
assert blob_item["modality"] == "image"
assert blob_item["mime_type"] == "data:image/png"
assert IMAGE_B64 in blob_item["content"]
Loading