Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ option(
"Whether to include gzip compression for the OTLP http exporter in the SDK"
OFF)

option(
WITH_OTLP_UTF8_VALIDITY
"Whether to enable UTF-8 validity checks for the OTLP exporter in the SDK" ON)

option(WITH_CURL_LOGGING "Whether to enable select CURL verbosity in OTel logs"
OFF)

Expand Down
29 changes: 29 additions & 0 deletions cmake/protobuf.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,32 @@ endif()
set(PROTOBUF_PROTOC_EXECUTABLE "${Protobuf_PROTOC_EXECUTABLE}")

message(STATUS "PROTOBUF_PROTOC_EXECUTABLE=${PROTOBUF_PROTOC_EXECUTABLE}")

# When protobuf is fetched or built as a gRPC submodule utf8_range is compiled
# but may need the alias target created
if(WITH_OTLP_UTF8_VALIDITY)
if(TARGET utf8_validity AND NOT TARGET utf8_range::utf8_validity)
add_library(utf8_range::utf8_validity ALIAS utf8_validity)
if(protobuf_SOURCE_DIR AND EXISTS
"${protobuf_SOURCE_DIR}/third_party/utf8_range")
target_include_directories(
utf8_validity
PUBLIC
"$<BUILD_INTERFACE:${protobuf_SOURCE_DIR}/third_party/utf8_range>")
endif()
endif()

# For the find_package(Protobuf) path, utf8_range can also be found
if(NOT TARGET utf8_range::utf8_validity)
find_package(utf8_range CONFIG QUIET)
endif()

# The utf8_range target should now be available for use in otel-cpp
if(NOT TARGET utf8_range::utf8_validity)
message(
WARNING
"Target (utf8_range::utf8_validity) was not found. "
"We will not validate UTF-8 strings in the OTLP exporter."
"Which may lead to record drops when sending non UTF-8 data as string.")
endif()
endif()
14 changes: 14 additions & 0 deletions exporters/otlp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,19 @@ load("//bazel:otel_cc_benchmark.bzl", "otel_cc_benchmark")

package(default_visibility = ["//visibility:public"])

# utf8_range from protobuf source tree for UTF-8 validation in OTLP
# value conversion. This mirrors the CMake opentelemetry_otlp_utf8_validity
# target.
cc_library(
name = "otlp_utf8_validity",
defines = ["ENABLE_OTLP_UTF8_VALIDITY"],
tags = ["otlp"],
deps = [
"@com_google_protobuf//third_party/utf8_range",
"@com_google_protobuf//third_party/utf8_range:utf8_validity",
],
)

cc_library(
name = "otlp_recordable",
srcs = [
Expand All @@ -32,6 +45,7 @@ cc_library(
strip_include_prefix = "include",
tags = ["otlp"],
deps = [
":otlp_utf8_validity",
"//sdk/src/logs",
"//sdk/src/resource",
"//sdk/src/trace",
Expand Down
18 changes: 15 additions & 3 deletions exporters/otlp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,17 @@ target_include_directories(

set(OPENTELEMETRY_OTLP_TARGETS opentelemetry_otlp_recordable)

target_link_libraries(opentelemetry_otlp_recordable PUBLIC opentelemetry_logs)
target_link_libraries(opentelemetry_otlp_recordable
PUBLIC opentelemetry_metrics)
if(WITH_OTLP_UTF8_VALIDITY AND TARGET utf8_range::utf8_validity)
target_link_libraries(
opentelemetry_otlp_recordable
PUBLIC opentelemetry_logs opentelemetry_metrics
PRIVATE utf8_range::utf8_validity)
target_compile_definitions(opentelemetry_otlp_recordable
PRIVATE ENABLE_OTLP_UTF8_VALIDITY)
else()
target_link_libraries(opentelemetry_otlp_recordable
PUBLIC opentelemetry_logs opentelemetry_metrics)
endif()

#
# opentelemetry_exporter_otlp_builder_utils
Expand Down Expand Up @@ -875,6 +883,10 @@ if(BUILD_TESTING)
add_executable(otlp_log_recordable_test test/otlp_log_recordable_test.cc)
target_link_libraries(otlp_log_recordable_test ${GTEST_BOTH_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT} opentelemetry_otlp_recordable)
if(WITH_OTLP_UTF8_VALIDITY AND TARGET utf8_range::utf8_validity)
target_compile_definitions(otlp_log_recordable_test
PRIVATE ENABLE_OTLP_UTF8_VALIDITY)
endif()
gtest_add_tests(
TARGET otlp_log_recordable_test
TEST_PREFIX exporter.otlp.
Expand Down
69 changes: 65 additions & 4 deletions exporters/otlp/src/otlp_populate_attribute_utils.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

#if defined(ENABLE_OTLP_UTF8_VALIDITY)
# include <utf8_validity.h>
#endif

#include <stdint.h>
#include <string>
#include <unordered_map>
Expand Down Expand Up @@ -81,12 +85,35 @@ void OtlpPopulateAttributeUtils::PopulateAnyValue(
}
else if (nostd::holds_alternative<const char *>(value))
{
proto_value->set_string_value(nostd::get<const char *>(value));
const char *str_value = nostd::get<const char *>(value);
#if defined(ENABLE_OTLP_UTF8_VALIDITY)
if (utf8_range::IsStructurallyValid(str_value))
Comment thread
owent marked this conversation as resolved.
{
proto_value->set_string_value(str_value);
}
else
{
proto_value->set_bytes_value(str_value, strlen(str_value));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, strlen means the instrumented application can send binary in a string, as long as the binary does not contains a nul character ...

A properly instrumented application will need to provide a sequence of bytes instead of a string anyway, to work properly.

But this change (which faithfully implement the specs) makes every application slower, due to the added UTF-8 validation, to try to accommodate poorly instrumented applications using the wrong types.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We receive a const char* that we assume is null-terminated, but it may contain invalid UTF-8.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should include <cstring> for strlen then?

}
#else
proto_value->set_string_value(str_value);
#endif
}
else if (nostd::holds_alternative<nostd::string_view>(value))
{
proto_value->set_string_value(nostd::get<nostd::string_view>(value).data(),
nostd::get<nostd::string_view>(value).size());
nostd::string_view str_value = nostd::get<nostd::string_view>(value);
#if defined(ENABLE_OTLP_UTF8_VALIDITY)
if (utf8_range::IsStructurallyValid({str_value.data(), str_value.size()}))
{
proto_value->set_string_value(str_value.data(), str_value.size());
}
else
{
proto_value->set_bytes_value(str_value.data(), str_value.size());
}
#else
proto_value->set_string_value(str_value.data(), str_value.size());
#endif
}
else if (nostd::holds_alternative<nostd::span<const uint8_t>>(value))
{
Expand Down Expand Up @@ -159,7 +186,18 @@ void OtlpPopulateAttributeUtils::PopulateAnyValue(
auto array_value = proto_value->mutable_array_value();
for (const auto &val : nostd::get<nostd::span<const nostd::string_view>>(value))
{
#if defined(ENABLE_OTLP_UTF8_VALIDITY)
if (utf8_range::IsStructurallyValid({val.data(), val.size()}))
{
array_value->add_values()->set_string_value(val.data(), val.size());
}
else
{
array_value->add_values()->set_bytes_value(val.data(), val.size());
}
#else
array_value->add_values()->set_string_value(val.data(), val.size());
#endif
}
}
}
Expand Down Expand Up @@ -224,7 +262,19 @@ void OtlpPopulateAttributeUtils::PopulateAnyValue(
}
else if (nostd::holds_alternative<std::string>(value))
{
proto_value->set_string_value(nostd::get<std::string>(value));
const std::string &str_value = nostd::get<std::string>(value);
#if defined(ENABLE_OTLP_UTF8_VALIDITY)
if (utf8_range::IsStructurallyValid(str_value))
{
proto_value->set_string_value(str_value);
}
else
{
proto_value->set_bytes_value(str_value);
}
#else
proto_value->set_string_value(str_value);
#endif
}
else if (nostd::holds_alternative<std::vector<bool>>(value))
{
Expand Down Expand Up @@ -281,7 +331,18 @@ void OtlpPopulateAttributeUtils::PopulateAnyValue(
auto array_value = proto_value->mutable_array_value();
for (const auto &val : nostd::get<std::vector<std::string>>(value))
{
#if defined(ENABLE_OTLP_UTF8_VALIDITY)
if (utf8_range::IsStructurallyValid(val))
{
array_value->add_values()->set_string_value(val);
}
else
{
array_value->add_values()->set_bytes_value(val);
}
#else
array_value->add_values()->set_string_value(val);
#endif
}
}
}
Expand Down
33 changes: 33 additions & 0 deletions exporters/otlp/test/otlp_log_recordable_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,17 @@ TEST(OtlpLogRecordable, Basic)
memcmp(reinterpret_cast<const void *>(rec.log_record().body().bytes_value().data()),
reinterpret_cast<const void *>(byte_arr), 5));
EXPECT_EQ(rec.log_record().body().bytes_value().size(), 5);

#if defined(ENABLE_OTLP_UTF8_VALIDITY)
// Test bytes body: non-UTF8 string_view is automatically converted to bytes_value
const char byte_body[] = {'\x80', '\x81', '\x82', '\x83', '\x84'};
nostd::string_view byte_body_view(byte_body, 5);
rec.SetBody(byte_body_view);
EXPECT_TRUE(0 ==
memcmp(reinterpret_cast<const void *>(rec.log_record().body().bytes_value().data()),
reinterpret_cast<const void *>(byte_body), 5));
EXPECT_EQ(rec.log_record().body().bytes_value().size(), 5);
#endif
}

TEST(OtlpLogRecordable, GetResource)
Expand Down Expand Up @@ -128,6 +139,14 @@ TEST(OtlpLogRecordable, SetSingleAttribute)
nostd::span<const uint8_t>{reinterpret_cast<const uint8_t *>(byte_arr), 4});
rec.SetAttribute(byte_key, byte_val);

#if defined(ENABLE_OTLP_UTF8_VALIDITY)
// Non-UTF8 string_view is automatically converted to bytes_value
nostd::string_view non_utf8_string_key = "non_utf8_string_attr";
const char non_utf8_string_arr[] = {'\x80', '\x81', '\x82', '\x83'};
common::AttributeValue non_utf8_string_val(nostd::string_view(non_utf8_string_arr, 4));
rec.SetAttribute(non_utf8_string_key, non_utf8_string_val);
#endif

int checked_attributes = 0;
for (auto &attribute : rec.log_record().attributes())
{
Expand All @@ -154,8 +173,22 @@ TEST(OtlpLogRecordable, SetSingleAttribute)
reinterpret_cast<const void *>(byte_arr), 4));
EXPECT_EQ(attribute.value().bytes_value().size(), 4);
}
#if defined(ENABLE_OTLP_UTF8_VALIDITY)
else if (attribute.key() == non_utf8_string_key)
{
++checked_attributes;
EXPECT_TRUE(0 ==
memcmp(reinterpret_cast<const void *>(attribute.value().bytes_value().data()),
reinterpret_cast<const void *>(non_utf8_string_arr), 4));
EXPECT_EQ(attribute.value().bytes_value().size(), 4);
}
#endif
}
#if defined(ENABLE_OTLP_UTF8_VALIDITY)
EXPECT_EQ(5, checked_attributes);
#else
EXPECT_EQ(4, checked_attributes);
#endif
}

// Test non-int array types. Int array types are tested using templates (see IntAttributeTest)
Expand Down
Loading