diff --git a/bridge.go b/bridge.go index 355a61ea..511a5edc 100644 --- a/bridge.go +++ b/bridge.go @@ -190,6 +190,7 @@ func newInterceptionProcessor(p provider.Provider, cbs *circuitbreaker.ProviderC asyncRecorder.WithProvider(p.Name()) asyncRecorder.WithModel(interceptor.Model()) asyncRecorder.WithInitiatorID(actor.ID) + asyncRecorder.WithClient(string(client)) interceptor.Setup(logger, asyncRecorder, mcpProxy) if err := rec.RecordInterception(ctx, &recorder.InterceptionRecord{ @@ -231,13 +232,13 @@ func newInterceptionProcessor(p provider.Provider, cbs *circuitbreaker.ProviderC return interceptor.ProcessRequest(rw, r) }); err != nil { if m != nil { - m.InterceptionCount.WithLabelValues(p.Name(), interceptor.Model(), metrics.InterceptionCountStatusFailed, route, r.Method, actor.ID).Add(1) + m.InterceptionCount.WithLabelValues(p.Name(), interceptor.Model(), metrics.InterceptionCountStatusFailed, route, r.Method, actor.ID, string(client)).Add(1) } span.SetStatus(codes.Error, fmt.Sprintf("interception failed: %v", err)) log.Warn(ctx, "interception failed", slog.Error(err)) } else { if m != nil { - m.InterceptionCount.WithLabelValues(p.Name(), interceptor.Model(), metrics.InterceptionCountStatusCompleted, route, r.Method, actor.ID).Add(1) + m.InterceptionCount.WithLabelValues(p.Name(), interceptor.Model(), metrics.InterceptionCountStatusCompleted, route, r.Method, actor.ID, string(client)).Add(1) } log.Debug(ctx, "interception ended") } diff --git a/internal/integrationtest/metrics_test.go b/internal/integrationtest/metrics_test.go index 23e38475..69e4eea5 100644 --- a/internal/integrationtest/metrics_test.go +++ b/internal/integrationtest/metrics_test.go @@ -25,10 +25,12 @@ func TestMetrics_Interception(t *testing.T) { name string fixture []byte path string + headers http.Header expectStatus string expectModel string expectRoute string expectProvider string + expectClient aibridge.Client allowOverflow bool // error fixtures may cause retries }{ { @@ -39,72 +41,98 @@ func TestMetrics_Interception(t *testing.T) { expectModel: "claude-sonnet-4-0", expectRoute: "/v1/messages", expectProvider: config.ProviderAnthropic, + expectClient: aibridge.ClientUnknown, }, { name: "ant_error", fixture: fixtures.AntNonStreamError, path: pathAnthropicMessages, + headers: http.Header{"User-Agent": []string{"kilo-code/1.2.3"}}, expectStatus: metrics.InterceptionCountStatusFailed, expectModel: "claude-sonnet-4-0", expectRoute: "/v1/messages", expectProvider: config.ProviderAnthropic, + expectClient: aibridge.ClientKilo, allowOverflow: true, }, + { + name: "ant_simple_claude_code", + fixture: fixtures.AntSimple, + path: pathAnthropicMessages, + headers: http.Header{"User-Agent": []string{"claude-code/1.0.0"}}, + expectStatus: metrics.InterceptionCountStatusCompleted, + expectModel: "claude-sonnet-4-0", + expectRoute: "/v1/messages", + expectProvider: config.ProviderAnthropic, + expectClient: aibridge.ClientClaudeCode, + }, { name: "oai_chat_simple", fixture: fixtures.OaiChatSimple, path: pathOpenAIChatCompletions, + headers: http.Header{"User-Agent": []string{"copilot/1.0.0"}}, expectStatus: metrics.InterceptionCountStatusCompleted, expectModel: "gpt-4.1", expectRoute: "/v1/chat/completions", expectProvider: config.ProviderOpenAI, + expectClient: aibridge.ClientCopilotCLI, }, { name: "oai_chat_error", fixture: fixtures.OaiChatNonStreamError, path: pathOpenAIChatCompletions, + headers: http.Header{"User-Agent": []string{"githubcopilotchat/0.30.0"}}, expectStatus: metrics.InterceptionCountStatusFailed, expectModel: "gpt-4.1", expectRoute: "/v1/chat/completions", expectProvider: config.ProviderOpenAI, + expectClient: aibridge.ClientCopilotVSC, allowOverflow: true, }, { name: "oai_responses_blocking_simple", fixture: fixtures.OaiResponsesBlockingSimple, path: pathOpenAIResponses, + headers: http.Header{"X-Cursor-Client-Version": []string{"0.50.0"}}, expectStatus: metrics.InterceptionCountStatusCompleted, expectModel: "gpt-4o-mini", expectRoute: "/v1/responses", expectProvider: config.ProviderOpenAI, + expectClient: aibridge.ClientCursor, }, { name: "oai_responses_blocking_error", fixture: fixtures.OaiResponsesBlockingHttpErr, path: pathOpenAIResponses, + headers: http.Header{"User-Agent": []string{"codex/1.0.0"}}, expectStatus: metrics.InterceptionCountStatusFailed, expectModel: "gpt-4o-mini", expectRoute: "/v1/responses", expectProvider: config.ProviderOpenAI, + expectClient: aibridge.ClientCodex, allowOverflow: true, }, { name: "oai_responses_streaming_simple", fixture: fixtures.OaiResponsesStreamingSimple, path: pathOpenAIResponses, + headers: http.Header{"User-Agent": []string{"zed/0.200.0"}}, expectStatus: metrics.InterceptionCountStatusCompleted, expectModel: "gpt-4o-mini", expectRoute: "/v1/responses", expectProvider: config.ProviderOpenAI, + expectClient: aibridge.ClientZed, }, { name: "oai_responses_streaming_error", fixture: fixtures.OaiResponsesStreamingHttpErr, path: pathOpenAIResponses, + headers: http.Header{"Originator": []string{"roo-code"}}, expectStatus: metrics.InterceptionCountStatusFailed, expectModel: "gpt-4o-mini", expectRoute: "/v1/responses", expectProvider: config.ProviderOpenAI, + expectClient: aibridge.ClientRoo, allowOverflow: true, }, } @@ -125,12 +153,12 @@ func TestMetrics_Interception(t *testing.T) { withMetrics(m), ) - resp := bridgeServer.makeRequest(t, http.MethodPost, tc.path, fix.Request()) + resp := bridgeServer.makeRequest(t, http.MethodPost, tc.path, fix.Request(), tc.headers) _, err := io.ReadAll(resp.Body) require.NoError(t, err) count := promtest.ToFloat64(m.InterceptionCount.WithLabelValues( - tc.expectProvider, tc.expectModel, tc.expectStatus, tc.expectRoute, "POST", defaultActorID)) + tc.expectProvider, tc.expectModel, tc.expectStatus, tc.expectRoute, "POST", defaultActorID, string(tc.expectClient))) require.Equal(t, 1.0, count) require.Equal(t, 1, promtest.CollectAndCount(m.InterceptionDuration)) require.Equal(t, 1, promtest.CollectAndCount(m.InterceptionCount)) @@ -229,16 +257,51 @@ func TestMetrics_PromptCount(t *testing.T) { withMetrics(m), ) - resp := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIChatCompletions, fix.Request()) + resp := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIChatCompletions, fix.Request(), http.Header{"User-Agent": []string{"claude-code/1.0.0"}}) require.Equal(t, http.StatusOK, resp.StatusCode) _, err := io.ReadAll(resp.Body) require.NoError(t, err) prompts := promtest.ToFloat64(m.PromptCount.WithLabelValues( - config.ProviderOpenAI, "gpt-4.1", defaultActorID)) + config.ProviderOpenAI, "gpt-4.1", defaultActorID, string(aibridge.ClientClaudeCode))) require.Equal(t, 1.0, prompts) } +func TestMetrics_TokenUseCount(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) + t.Cleanup(cancel) + + fix := fixtures.Parse(t, fixtures.OaiResponsesBlockingCachedInputTokens) + upstream := newMockUpstream(t, ctx, newFixtureResponse(fix)) + + m := aibridge.NewMetrics(prometheus.NewRegistry()) + bridgeServer := newBridgeTestServer(t, ctx, upstream.URL, + withMetrics(m), + ) + + resp := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request(), + http.Header{"User-Agent": []string{"claude-code/1.0.0"}}) + require.Equal(t, http.StatusOK, resp.StatusCode) + _, _ = io.ReadAll(resp.Body) + + clientLabel := string(aibridge.ClientClaudeCode) + // Token metrics are recorded asynchronously; wait for them to appear. + require.Eventually(t, func() bool { + return promtest.ToFloat64(m.TokenUseCount.WithLabelValues( + config.ProviderOpenAI, "gpt-4.1", "input", defaultActorID, clientLabel)) > 0 + }, time.Second*10, time.Millisecond*50) + + require.Equal(t, 129.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "input", defaultActorID, clientLabel))) // 12033 - 11904 (cached) + require.Equal(t, 44.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "output", defaultActorID, clientLabel))) + + // ExtraTokenTypes + require.Equal(t, 11904.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "input_cached", defaultActorID, clientLabel))) + require.Equal(t, 0.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "output_reasoning", defaultActorID, clientLabel))) + require.Equal(t, 12077.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "total_tokens", defaultActorID, clientLabel))) +} + func TestMetrics_NonInjectedToolUseCount(t *testing.T) { t.Parallel() diff --git a/metrics/metrics.go b/metrics/metrics.go index 0c5f5f8b..6d14ab9d 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -42,20 +42,20 @@ func NewMetrics(reg prometheus.Registerer) *Metrics { return &Metrics{ // Interception-related metrics. - // Pessimistic cardinality: 2 providers, 5 models, 2 statuses, 2 routes, 3 methods = up to 120 PER INITIATOR. + // Pessimistic cardinality: 3 providers, 5 models, 2 statuses, 3 routes, 3 methods, 10 clients = up to 2700 PER INITIATOR. InterceptionCount: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Subsystem: "interceptions", Name: "total", Help: "The count of intercepted requests.", - }, append(baseLabels, "status", "route", "method", "initiator_id")), - // Pessimistic cardinality: 2 providers, 5 models, 2 routes = up to 20. + }, append(baseLabels, "status", "route", "method", "initiator_id", "client")), + // Pessimistic cardinality: 3 providers, 5 models, 3 routes = up to 45. // NOTE: route is not unbounded because this is only for intercepted routes. InterceptionsInflight: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ Subsystem: "interceptions", Name: "inflight", Help: "The number of intercepted requests which are being processed.", }, append(baseLabels, "route")), - // Pessimistic cardinality: 2 providers, 5 models, 7 buckets + 3 extra series (count, sum, +Inf) = up to 100. + // Pessimistic cardinality: 3 providers, 5 models, 7 buckets + 3 extra series (count, sum, +Inf) = up to 150. InterceptionDuration: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ Subsystem: "interceptions", Name: "duration_seconds", @@ -67,7 +67,7 @@ func NewMetrics(reg prometheus.Registerer) *Metrics { Buckets: []float64{0.5, 2, 5, 15, 30, 60, 120}, }, baseLabels), - // Pessimistic cardinality: 2 providers, 10 routes, 3 methods = up to 60. + // Pessimistic cardinality: 3 providers, 10 routes, 3 methods = up to 90. // NOTE: route is not unbounded because PassthroughRoutes (see provider.go) is a static list. PassthroughCount: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Subsystem: "passthrough", @@ -77,31 +77,31 @@ func NewMetrics(reg prometheus.Registerer) *Metrics { // Prompt-related metrics. - // Pessimistic cardinality: 2 providers, 5 models = up to 10 PER INITIATOR. + // Pessimistic cardinality: 3 providers, 5 models, 10 clients = up to 150 PER INITIATOR. PromptCount: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Subsystem: "prompts", Name: "total", Help: "The number of prompts issued by users (initiators).", - }, append(baseLabels, "initiator_id")), + }, append(baseLabels, "initiator_id", "client")), // Token-related metrics. - // Pessimistic cardinality: 2 providers, 5 models, 10 types = up to 100 PER INITIATOR. + // Pessimistic cardinality: 3 providers, 5 models, 10 types, 10 clients = up to 1500 PER INITIATOR. TokenUseCount: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Subsystem: "tokens", Name: "total", Help: "The number of tokens used by intercepted requests.", - }, append(baseLabels, "type", "initiator_id")), + }, append(baseLabels, "type", "initiator_id", "client")), // Tool-related metrics. - // Pessimistic cardinality: 2 providers, 5 models, 3 servers, 30 tools = up to 900. + // Pessimistic cardinality: 3 providers, 5 models, 3 servers, 30 tools = up to 1350. InjectedToolUseCount: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Subsystem: "injected_tool_invocations", Name: "total", Help: "The number of times an injected MCP tool was invoked by aibridge.", }, append(baseLabels, "server", "name")), - // Pessimistic cardinality: 2 providers, 5 models, 30 tools = up to 300. + // Pessimistic cardinality: 3 providers, 5 models, 30 tools = up to 450. NonInjectedToolUseCount: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Subsystem: "non_injected_tool_selections", Name: "total", @@ -110,19 +110,19 @@ func NewMetrics(reg prometheus.Registerer) *Metrics { // Circuit breaker metrics. - // Pessimistic cardinality: 2 providers, 2 endpoints, 5 models = up to 20. + // Pessimistic cardinality: 3 providers, 2 endpoints, 5 models = up to 30. CircuitBreakerState: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ Subsystem: "circuit_breaker", Name: "state", Help: "Current state of the circuit breaker (0=closed, 0.5=half-open, 1=open).", }, []string{"provider", "endpoint", "model"}), - // Pessimistic cardinality: 2 providers, 2 endpoints, 5 models = up to 20. + // Pessimistic cardinality: 3 providers, 2 endpoints, 5 models = up to 30. CircuitBreakerTrips: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Subsystem: "circuit_breaker", Name: "trips_total", Help: "Total number of times the circuit breaker transitioned to open state.", }, []string{"provider", "endpoint", "model"}), - // Pessimistic cardinality: 2 providers, 2 endpoints, 5 models = up to 20. + // Pessimistic cardinality: 3 providers, 2 endpoints, 5 models = up to 30. CircuitBreakerRejects: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ Subsystem: "circuit_breaker", Name: "rejects_total", diff --git a/recorder/recorder.go b/recorder/recorder.go index 38144886..6e37b632 100644 --- a/recorder/recorder.go +++ b/recorder/recorder.go @@ -131,7 +131,10 @@ type AsyncRecorder struct { timeout time.Duration metrics *metrics.Metrics - provider, model, initiatorID string + provider string + model string + initiatorID string + client string wg sync.WaitGroup } @@ -158,6 +161,10 @@ func (a *AsyncRecorder) WithInitiatorID(initiatorID string) { a.initiatorID = initiatorID } +func (a *AsyncRecorder) WithClient(client string) { + a.client = client +} + // RecordInterception must NOT be called asynchronously. // If an interception cannot be recorded, the whole request should fail. func (a *AsyncRecorder) RecordInterception(ctx context.Context, req *InterceptionRecord) error { @@ -193,7 +200,7 @@ func (a *AsyncRecorder) RecordPromptUsage(ctx context.Context, req *PromptUsageR } if a.metrics != nil && req.Prompt != "" { // TODO: will be irrelevant once https://github.com/coder/aibridge/issues/55 is fixed. - a.metrics.PromptCount.WithLabelValues(a.provider, a.model, a.initiatorID).Add(1) + a.metrics.PromptCount.WithLabelValues(a.provider, a.model, a.initiatorID, a.client).Add(1) } }() @@ -213,10 +220,10 @@ func (a *AsyncRecorder) RecordTokenUsage(ctx context.Context, req *TokenUsageRec } if a.metrics != nil { - a.metrics.TokenUseCount.WithLabelValues(a.provider, a.model, "input", a.initiatorID).Add(float64(req.Input)) - a.metrics.TokenUseCount.WithLabelValues(a.provider, a.model, "output", a.initiatorID).Add(float64(req.Output)) + a.metrics.TokenUseCount.WithLabelValues(a.provider, a.model, "input", a.initiatorID, a.client).Add(float64(req.Input)) + a.metrics.TokenUseCount.WithLabelValues(a.provider, a.model, "output", a.initiatorID, a.client).Add(float64(req.Output)) for k, v := range req.ExtraTokenTypes { - a.metrics.TokenUseCount.WithLabelValues(a.provider, a.model, k, a.initiatorID).Add(float64(v)) + a.metrics.TokenUseCount.WithLabelValues(a.provider, a.model, k, a.initiatorID, a.client).Add(float64(v)) } } }()