diff --git a/cmd/root.go b/cmd/root.go index bf90c017..e8b298c7 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -7,6 +7,10 @@ import ( "path/filepath" "time" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "github.com/localstack/lstk/internal/api" "github.com/localstack/lstk/internal/auth" "github.com/localstack/lstk/internal/config" @@ -16,6 +20,7 @@ import ( "github.com/localstack/lstk/internal/output" "github.com/localstack/lstk/internal/runtime" "github.com/localstack/lstk/internal/telemetry" + "github.com/localstack/lstk/internal/tracing" "github.com/localstack/lstk/internal/ui" "github.com/localstack/lstk/internal/update" "github.com/localstack/lstk/internal/version" @@ -73,11 +78,23 @@ func Execute(ctx context.Context) error { tel := telemetry.New(cfg.AnalyticsEndpoint, cfg.DisableEvents) defer tel.Close() + shutdownTracing := tracing.Init(ctx, cfg.OTLPEndpoint) + logger, cleanup, err := newLogger() if err != nil { logger = log.Nop() } defer cleanup() + defer func() { + // Use a fresh context: the parent ctx may already be cancelled (e.g. Ctrl+C) + // by the time this defer runs, which would cause Shutdown to return immediately + // without flushing buffered spans to the collector. + shutCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := shutdownTracing(shutCtx); err != nil { + logger.Error("failed to shut down tracing: %v", err) + } + }() logger.Info("lstk %s starting", version.Version()) // Resolve auth token for telemetry: keyring first, then env var. @@ -93,6 +110,7 @@ func Execute(ctx context.Context) error { root := NewRootCmd(cfg, tel, logger) root.SilenceErrors = true root.SilenceUsage = true + wrapCommandsWithTracing(root) if err := root.ExecuteContext(ctx); err != nil { if !output.IsSilent(err) { @@ -204,6 +222,34 @@ func commandWithTelemetry(name string, tel *telemetry.Client, fn func(*cobra.Com } } +// wrapCommandsWithTracing walks the Cobra command tree and wraps every RunE with +// an OTel span. This is done once after the tree is built so individual commands +// don't need to know about tracing at all. +func wrapCommandsWithTracing(cmd *cobra.Command) { + if cmd.RunE != nil { + original := cmd.RunE + name := cmd.Name() + cmd.RunE = func(c *cobra.Command, args []string) error { + ctx, span := otel.Tracer("github.com/localstack/lstk").Start(c.Context(), "lstk."+name) + defer span.End() + c.SetContext(ctx) + + err := original(c, args) + if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + span.SetAttributes(attribute.Int("lstk.exit_code", 1)) + } else { + span.SetAttributes(attribute.Int("lstk.exit_code", 0)) + } + return err + } + } + for _, child := range cmd.Commands() { + wrapCommandsWithTracing(child) + } +} + func isInteractiveMode(cfg *env.Env) bool { return !cfg.NonInteractive && ui.IsInteractive() } diff --git a/cmd/status.go b/cmd/status.go index a89a5904..c7f3d874 100644 --- a/cmd/status.go +++ b/cmd/status.go @@ -2,7 +2,6 @@ package cmd import ( "fmt" - "net/http" "os" "github.com/localstack/lstk/internal/config" @@ -33,7 +32,7 @@ func newStatusCmd(cfg *env.Env, tel *telemetry.Client) *cobra.Command { return fmt.Errorf("failed to get config: %w", err) } - awsClient := aws.NewClient(&http.Client{}) + awsClient := aws.NewClient() if isInteractiveMode(cfg) { return ui.RunStatus(cmd.Context(), rt, appCfg.Containers, cfg.LocalStackHost, awsClient) diff --git a/docker-compose.tracing.yaml b/docker-compose.tracing.yaml new file mode 100644 index 00000000..bc201c97 --- /dev/null +++ b/docker-compose.tracing.yaml @@ -0,0 +1,17 @@ +# Starts Jaeger for local trace collection and visualization. +# UI: http://localhost:16686 +# OTLP HTTP: http://localhost:4318 (used by lstk) +# OTLP gRPC: localhost:4317 +# +# Usage: +# docker compose -f docker-compose.tracing.yaml up -d + +services: + jaeger: + image: jaegertracing/all-in-one:latest + environment: + COLLECTOR_OTLP_ENABLED: "true" + ports: + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + - "16686:16686" # UI diff --git a/env.example b/env.example index e5fb9860..d6aeec53 100644 --- a/env.example +++ b/env.example @@ -9,3 +9,6 @@ export LSTK_WEB_APP_URL=https://app.staging.aws.localstack.cloud # Force file-based keyring backend instead of system keychain (optional) # export LSTK_KEYRING=file + +# OTLP trace collector endpoint (defaults to http://localhost:4318) +# export LSTK_OTLP_ENDPOINT=http://localhost:4318 diff --git a/go.mod b/go.mod index 8b7f4576..e23ca9ef 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,11 @@ require ( github.com/spf13/viper v1.21.0 github.com/stretchr/testify v1.11.1 github.com/zalando/go-keyring v0.2.8 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 + go.opentelemetry.io/otel v1.40.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.39.0 + go.opentelemetry.io/otel/sdk v1.40.0 + go.opentelemetry.io/otel/trace v1.40.0 go.uber.org/mock v0.6.0 golang.org/x/term v0.42.0 gopkg.in/ini.v1 v1.67.1 @@ -30,6 +35,7 @@ require ( github.com/Microsoft/go-winio v0.6.2 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/aymanbagabas/go-udiff v0.3.1 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/charmbracelet/colorprofile v0.4.2 // indirect github.com/charmbracelet/harmonica v0.2.0 // indirect @@ -54,6 +60,7 @@ require ( github.com/go-viper/mapstructure/v2 v2.5.0 // indirect github.com/godbus/dbus/v5 v5.2.2 // indirect github.com/google/go-cmp v0.7.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/lucasb-eyer/go-colorful v1.3.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect @@ -78,14 +85,17 @@ require ( github.com/subosito/gotenv v1.6.0 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 // indirect - go.opentelemetry.io/otel v1.40.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.39.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect go.opentelemetry.io/otel/metric v1.40.0 // indirect - go.opentelemetry.io/otel/trace v1.40.0 // indirect + go.opentelemetry.io/proto/otlp v1.9.0 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/net v0.47.0 // indirect golang.org/x/sys v0.43.0 // indirect golang.org/x/text v0.34.0 // indirect golang.org/x/time v0.14.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect + google.golang.org/grpc v1.77.0 // indirect + google.golang.org/protobuf v1.36.10 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 6719517f..38278cb1 100644 --- a/go.sum +++ b/go.sum @@ -72,6 +72,8 @@ github.com/go-viper/mapstructure/v2 v2.5.0 h1:vM5IJoUAy3d7zRSVtIwQgBj7BiWtMPfmPE github.com/go-viper/mapstructure/v2 v2.5.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ= github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -177,6 +179,8 @@ go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZY go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A= go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= @@ -196,6 +200,8 @@ golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls= google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= diff --git a/internal/api/client.go b/internal/api/client.go index e496c8bd..1266db7f 100644 --- a/internal/api/client.go +++ b/internal/api/client.go @@ -10,6 +10,8 @@ import ( "net/url" "time" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "github.com/localstack/lstk/internal/log" "github.com/localstack/lstk/internal/version" ) @@ -113,9 +115,17 @@ type PlatformClient struct { func NewPlatformClient(apiEndpoint string, logger log.Logger) *PlatformClient { return &PlatformClient{ - baseURL: apiEndpoint, - httpClient: &http.Client{Timeout: 30 * time.Second}, - logger: logger, + baseURL: apiEndpoint, + httpClient: &http.Client{ + Timeout: 30 * time.Second, + Transport: otelhttp.NewTransport( + http.DefaultTransport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "platform " + r.Method + " " + r.URL.Path + }), + ), + }, + logger: logger, } } diff --git a/internal/container/start.go b/internal/container/start.go index 1fb0abf2..3d6acf05 100644 --- a/internal/container/start.go +++ b/internal/container/start.go @@ -14,6 +14,8 @@ import ( "time" "github.com/containerd/errdefs" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "github.com/localstack/lstk/internal/api" "github.com/localstack/lstk/internal/auth" "github.com/localstack/lstk/internal/awsconfig" @@ -322,7 +324,7 @@ func startContainers(ctx context.Context, rt runtime.Runtime, sink output.Sink, output.EmitStatus(sink, "waiting", c.Name, "") healthURL := fmt.Sprintf("http://localhost:%s%s", c.Port, c.HealthPath) - if err := awaitStartup(ctx, rt, sink, containerID, "LocalStack", healthURL); err != nil { + if err := awaitStartup(ctx, rt, sink, containerID, c.EmulatorType, "LocalStack", healthURL); err != nil { emitEmulatorStartError(ctx, tel, c, telemetry.ErrCodeStartFailed, err.Error()) return err } @@ -408,8 +410,16 @@ func validateLicense(ctx context.Context, sink output.Sink, opts StartOptions, t // - Failure: container stops running (e.g., license activation failed), returns error with container logs // // TODO: move to Runtime interface if other runtimes (k8s?) need native readiness probes -func awaitStartup(ctx context.Context, rt runtime.Runtime, sink output.Sink, containerID, name, healthURL string) error { - client := &http.Client{Timeout: 2 * time.Second} +func awaitStartup(ctx context.Context, rt runtime.Runtime, sink output.Sink, containerID, emulatorType, name, healthURL string) error { + client := &http.Client{ + Timeout: 2 * time.Second, + Transport: otelhttp.NewTransport( + http.DefaultTransport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return emulatorType + " " + r.Method + " " + r.URL.Path + }), + ), + } for { running, err := rt.IsRunning(ctx, containerID) diff --git a/internal/emulator/aws/client.go b/internal/emulator/aws/client.go index cd31a723..e0122213 100644 --- a/internal/emulator/aws/client.go +++ b/internal/emulator/aws/client.go @@ -9,6 +9,8 @@ import ( "sort" "strings" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "github.com/localstack/lstk/internal/emulator" ) @@ -16,8 +18,17 @@ type Client struct { http *http.Client } -func NewClient(httpClient *http.Client) *Client { - return &Client{http: httpClient} +func NewClient() *Client { + return &Client{ + http: &http.Client{ + Transport: otelhttp.NewTransport( + http.DefaultTransport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "aws " + r.Method + " " + r.URL.Path + }), + ), + }, + } } type healthResponse struct { diff --git a/internal/emulator/aws/client_test.go b/internal/emulator/aws/client_test.go index d9fc014c..cbb915fc 100644 --- a/internal/emulator/aws/client_test.go +++ b/internal/emulator/aws/client_test.go @@ -23,7 +23,7 @@ func TestFetchVersion(t *testing.T) { })) defer server.Close() - c := NewClient(&http.Client{}) + c := NewClient() version, err := c.FetchVersion(context.Background(), server.Listener.Addr().String()) require.NoError(t, err) assert.Equal(t, "4.14.1", version) @@ -36,7 +36,7 @@ func TestFetchVersion(t *testing.T) { })) defer server.Close() - c := NewClient(&http.Client{}) + c := NewClient() _, err := c.FetchVersion(context.Background(), server.Listener.Addr().String()) require.Error(t, err) }) @@ -54,7 +54,7 @@ func TestFetchResources(t *testing.T) { })) defer server.Close() - c := NewClient(&http.Client{}) + c := NewClient() rows, err := c.FetchResources(context.Background(), server.Listener.Addr().String()) require.NoError(t, err) require.Len(t, rows, 2) @@ -74,7 +74,7 @@ func TestFetchResources(t *testing.T) { })) defer server.Close() - c := NewClient(&http.Client{}) + c := NewClient() rows, err := c.FetchResources(context.Background(), server.Listener.Addr().String()) require.NoError(t, err) require.Len(t, rows, 1) @@ -88,7 +88,7 @@ func TestFetchResources(t *testing.T) { })) defer server.Close() - c := NewClient(&http.Client{}) + c := NewClient() rows, err := c.FetchResources(context.Background(), server.Listener.Addr().String()) require.NoError(t, err) assert.Empty(t, rows) @@ -101,7 +101,7 @@ func TestFetchResources(t *testing.T) { })) defer server.Close() - c := NewClient(&http.Client{}) + c := NewClient() _, err := c.FetchResources(context.Background(), server.Listener.Addr().String()) require.Error(t, err) }) diff --git a/internal/env/env.go b/internal/env/env.go index c1775c0b..5283d6f1 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -17,9 +17,10 @@ type Env struct { WebAppURL string ForceFileKeyring bool AnalyticsEndpoint string + OTLPEndpoint string - NonInteractive bool - GitHubToken string + NonInteractive bool + GitHubToken string } // Init initializes environment variable configuration and returns the result. @@ -31,6 +32,7 @@ func Init() *Env { viper.SetDefault("api_endpoint", "https://api.localstack.cloud") viper.SetDefault("web_app_url", "https://app.localstack.cloud") viper.SetDefault("analytics_endpoint", "https://analytics.localstack.cloud/v1/events") + viper.SetDefault("otlp_endpoint", "http://localhost:4318") // LOCALSTACK_* variables are not prefixed with LSTK_ so they work seamlessly // across all LocalStack tools without per-tool configuration return &Env{ @@ -42,6 +44,7 @@ func Init() *Env { WebAppURL: viper.GetString("web_app_url"), ForceFileKeyring: viper.GetString("keyring") == "file", AnalyticsEndpoint: viper.GetString("analytics_endpoint"), + OTLPEndpoint: viper.GetString("otlp_endpoint"), GitHubToken: viper.GetString("github_token"), } diff --git a/internal/runtime/docker.go b/internal/runtime/docker.go index 3686657b..39fe5a7d 100644 --- a/internal/runtime/docker.go +++ b/internal/runtime/docker.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "log" + "net/http" "os" "os/exec" "path/filepath" @@ -20,6 +21,8 @@ import ( "github.com/docker/docker/client" "github.com/docker/docker/pkg/stdcopy" "github.com/docker/go-connections/nat" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "github.com/localstack/lstk/internal/output" ) @@ -29,7 +32,18 @@ type DockerRuntime struct { } func NewDockerRuntime(dockerHost string) (*DockerRuntime, error) { - opts := []client.Opt{client.FromEnv, client.WithAPIVersionNegotiation()} + // The Docker SDK always wraps its transport with otelhttp (client.go line 238). + // WithTraceOptions appends to the SDK's traceOpts, and the last WithSpanNameFormatter + // wins, so this overrides the default "METHOD PATH" naming with our "docker " prefix. + opts := []client.Opt{ + client.FromEnv, + client.WithAPIVersionNegotiation(), + client.WithTraceOptions( + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "docker " + r.Method + " " + r.URL.Path + }), + ), + } // When DOCKER_HOST is not set, the Docker SDK defaults to /var/run/docker.sock. // If that socket doesn't exist, probe known alternative locations (e.g. Colima). diff --git a/internal/tracing/tracing.go b/internal/tracing/tracing.go new file mode 100644 index 00000000..01be5ddb --- /dev/null +++ b/internal/tracing/tracing.go @@ -0,0 +1,68 @@ +// Package tracing configures OpenTelemetry distributed tracing for lstk. +// Spans are exported via OTLP/HTTP to localhost:4318 by default. +// +// To start a local trace backend (Jaeger): +// +// docker compose -f docker-compose.tracing.yaml up -d +// +// Then open http://localhost:16686 to browse traces. +// Override the endpoint with OTEL_EXPORTER_OTLP_ENDPOINT (e.g. "http://localhost:4318"). +// Export errors (e.g. no collector running) are silently ignored. +package tracing + +import ( + "context" + stdruntime "runtime" + "strings" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + + "github.com/localstack/lstk/internal/version" +) + +// Init configures the global OpenTelemetry TracerProvider and returns a shutdown +// function that must be called before process exit to flush pending spans. +// If initialisation fails, a no-op shutdown is returned. +func Init(ctx context.Context, otlpEndpoint string) func(context.Context) error { + noop := func(context.Context) error { return nil } + + // Suppress export errors (e.g. "connection refused" when no collector is running). + // Tracing is best-effort: lstk works normally without a running collector. + otel.SetErrorHandler(otel.ErrorHandlerFunc(func(error) {})) + + // Default to plain HTTP for local collectors (Jaeger, OTel Collector on localhost). + // Use TLS when the endpoint is an https:// URL. + var exporterOpts []otlptracehttp.Option + exporterOpts = append(exporterOpts, otlptracehttp.WithEndpointURL(otlpEndpoint)) + if !strings.HasPrefix(otlpEndpoint, "https://") { + exporterOpts = append(exporterOpts, otlptracehttp.WithInsecure()) + } + exp, err := otlptracehttp.New(ctx, exporterOpts...) + if err != nil { + return noop + } + + res := resource.NewWithAttributes("", + attribute.String("service.name", "lstk"), + attribute.String("service.version", version.Version()), + attribute.String("os.type", stdruntime.GOOS), + attribute.String("host.arch", stdruntime.GOARCH), + ) + + tp := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(exp), + sdktrace.WithResource(res), + ) + otel.SetTracerProvider(tp) + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + return tp.Shutdown +}