-
Notifications
You must be signed in to change notification settings - Fork 97
Response status updates #531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
24840a5
4c641ba
de5092a
39642e8
0da836e
1cd837f
6affaf4
b56b956
22862ad
6cd445f
9c420e2
bdfed02
e3c57fe
8db4c8c
e7290f5
b8fac6e
f85716c
9294160
f3b98f4
eaff067
c3f60c5
80cc33d
b8016a4
a6f6fca
87855af
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -46,6 +46,7 @@ public class AnalyticsClient { | |||||||||
| private static final String instanceId = UUID.randomUUID().toString(); | ||||||||||
| private static final int WAIT_FOR_THREAD_COMPLETE_S = 5; | ||||||||||
| private static final int TERMINATION_TIMEOUT_S = 1; | ||||||||||
| private static final long MAX_RETRY_AFTER_SECONDS = 300L; | ||||||||||
|
|
||||||||||
| static { | ||||||||||
| Map<String, String> library = new LinkedHashMap<>(); | ||||||||||
|
|
@@ -409,8 +410,8 @@ public void run() { | |||||||||
| static class BatchUploadTask implements Runnable { | ||||||||||
| private static final Backo BACKO = | ||||||||||
| Backo.builder() // | ||||||||||
| .base(TimeUnit.SECONDS, 15) // | ||||||||||
| .cap(TimeUnit.HOURS, 1) // | ||||||||||
| .base(TimeUnit.MILLISECONDS, 100) // | ||||||||||
| .cap(TimeUnit.MINUTES, 1) // | ||||||||||
| .jitter(1) // | ||||||||||
| .build(); | ||||||||||
|
|
||||||||||
|
|
@@ -438,12 +439,37 @@ private void notifyCallbacksWithException(Batch batch, Exception exception) { | |||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /** Returns {@code true} to indicate a batch should be retried. {@code false} otherwise. */ | ||||||||||
| boolean upload() { | ||||||||||
| private enum RetryStrategy { | ||||||||||
| NONE, | ||||||||||
| BACKOFF, | ||||||||||
| RETRY_AFTER | ||||||||||
| } | ||||||||||
|
|
||||||||||
| private static final class UploadResult { | ||||||||||
| final RetryStrategy strategy; | ||||||||||
| final long retryAfterSeconds; | ||||||||||
|
|
||||||||||
| UploadResult(RetryStrategy strategy) { | ||||||||||
| this(strategy, 0L); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| UploadResult(RetryStrategy strategy, long retryAfterSeconds) { | ||||||||||
| this.strategy = strategy; | ||||||||||
| this.retryAfterSeconds = retryAfterSeconds; | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /** | ||||||||||
| * Perform a single upload attempt. | ||||||||||
| * | ||||||||||
| * @param attempt overall number of attempts so far (1-based) | ||||||||||
| */ | ||||||||||
| UploadResult upload(int attempt) { | ||||||||||
| client.log.print(VERBOSE, "Uploading batch %s.", batch.sequence()); | ||||||||||
|
|
||||||||||
| try { | ||||||||||
| Call<UploadResponse> call = client.service.upload(client.uploadUrl, batch); | ||||||||||
| Call<UploadResponse> call; | ||||||||||
| call = client.service.upload(attempt - 1, client.uploadUrl, batch); | ||||||||||
| Response<UploadResponse> response = call.execute(); | ||||||||||
|
|
||||||||||
| if (response.isSuccessful()) { | ||||||||||
|
|
@@ -455,59 +481,148 @@ boolean upload() { | |||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| return false; | ||||||||||
| return new UploadResult(RetryStrategy.NONE); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| int status = response.code(); | ||||||||||
| if (is5xx(status)) { | ||||||||||
|
|
||||||||||
| if (isStatusRetryAfterEligible(status)) { | ||||||||||
| String retryAfterHeader = response.headers().get("Retry-After"); | ||||||||||
| Long retryAfterSeconds = parseRetryAfterSeconds(retryAfterHeader); | ||||||||||
| if (retryAfterSeconds != null) { | ||||||||||
| client.log.print( | ||||||||||
| DEBUG, | ||||||||||
| "Could not upload batch %s due to status %s with Retry-After %s seconds. Retrying after delay.", | ||||||||||
| batch.sequence(), | ||||||||||
| status, | ||||||||||
| retryAfterSeconds); | ||||||||||
| return new UploadResult(RetryStrategy.RETRY_AFTER, retryAfterSeconds); | ||||||||||
| } | ||||||||||
| client.log.print( | ||||||||||
| DEBUG, "Could not upload batch %s due to server error. Retrying.", batch.sequence()); | ||||||||||
| return true; | ||||||||||
| } else if (status == 429) { | ||||||||||
| DEBUG, | ||||||||||
| "Status %s did not have a valid Retry-After header for batch %s.", | ||||||||||
| status, | ||||||||||
| batch.sequence()); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| if (isStatusRetryWithBackoff(status)) { | ||||||||||
| client.log.print( | ||||||||||
| DEBUG, "Could not upload batch %s due to rate limiting. Retrying.", batch.sequence()); | ||||||||||
| return true; | ||||||||||
| DEBUG, | ||||||||||
| "Could not upload batch %s due to retryable status %s. Retrying with backoff.", | ||||||||||
| batch.sequence(), | ||||||||||
| status); | ||||||||||
| return new UploadResult(RetryStrategy.BACKOFF); | ||||||||||
|
Comment on lines
+508
to
+514
|
||||||||||
| } | ||||||||||
|
|
||||||||||
| client.log.print(DEBUG, "Could not upload batch %s. Giving up.", batch.sequence()); | ||||||||||
| client.log.print( | ||||||||||
| DEBUG, | ||||||||||
| "Could not upload batch %s due to non-retryable status %s. Giving up.", | ||||||||||
| batch.sequence(), | ||||||||||
| status); | ||||||||||
| notifyCallbacksWithException(batch, new IOException(response.errorBody().string())); | ||||||||||
|
|
||||||||||
| return false; | ||||||||||
| return new UploadResult(RetryStrategy.NONE); | ||||||||||
| } catch (IOException error) { | ||||||||||
| client.log.print(DEBUG, error, "Could not upload batch %s. Retrying.", batch.sequence()); | ||||||||||
|
|
||||||||||
| return true; | ||||||||||
| return new UploadResult(RetryStrategy.BACKOFF); | ||||||||||
| } catch (Exception exception) { | ||||||||||
| client.log.print(DEBUG, "Could not upload batch %s. Giving up.", batch.sequence()); | ||||||||||
|
|
||||||||||
| notifyCallbacksWithException(batch, exception); | ||||||||||
|
|
||||||||||
| return false; | ||||||||||
| return new UploadResult(RetryStrategy.NONE); | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| private static boolean isStatusRetryAfterEligible(int status) { | ||||||||||
| return status == 429 || status == 408 || status == 503; | ||||||||||
|
Comment on lines
+538
to
+539
|
||||||||||
| } | ||||||||||
|
|
||||||||||
| private static Long parseRetryAfterSeconds(String headerValue) { | ||||||||||
| if (headerValue == null) { | ||||||||||
| return null; | ||||||||||
| } | ||||||||||
| headerValue = headerValue.trim(); | ||||||||||
| if (headerValue.isEmpty()) { | ||||||||||
| return null; | ||||||||||
| } | ||||||||||
| try { | ||||||||||
| long seconds = Long.parseLong(headerValue); | ||||||||||
| if (seconds <= 0L) { | ||||||||||
| return null; | ||||||||||
| } | ||||||||||
| if (seconds > MAX_RETRY_AFTER_SECONDS) { | ||||||||||
| return MAX_RETRY_AFTER_SECONDS; | ||||||||||
| } | ||||||||||
| return seconds; | ||||||||||
| } catch (NumberFormatException ignored) { | ||||||||||
| return null; | ||||||||||
| } | ||||||||||
|
Comment on lines
+542
to
561
|
||||||||||
| } | ||||||||||
|
|
||||||||||
| @Override | ||||||||||
| public void run() { | ||||||||||
| int attempt = 0; | ||||||||||
| for (; attempt <= maxRetries; attempt++) { | ||||||||||
| boolean retry = upload(); | ||||||||||
| if (!retry) return; | ||||||||||
| int totalAttempts = 0; // counts every HTTP attempt (for header and error message) | ||||||||||
| int backoffAttempts = 0; // counts attempts that consume backoff-based retries | ||||||||||
| int maxBackoffAttempts = maxRetries + 1; // preserve existing semantics | ||||||||||
|
|
||||||||||
| while (true) { | ||||||||||
| totalAttempts++; | ||||||||||
| UploadResult result = upload(totalAttempts); | ||||||||||
|
|
||||||||||
| if (result.strategy == RetryStrategy.NONE) { | ||||||||||
| return; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| if (result.strategy == RetryStrategy.RETRY_AFTER) { | ||||||||||
| try { | ||||||||||
| TimeUnit.SECONDS.sleep(result.retryAfterSeconds); | ||||||||||
| } catch (InterruptedException e) { | ||||||||||
| client.log.print( | ||||||||||
| DEBUG, | ||||||||||
| "Thread interrupted while waiting for Retry-After for batch %s.", | ||||||||||
| batch.sequence()); | ||||||||||
| Thread.currentThread().interrupt(); | ||||||||||
| return; | ||||||||||
| } | ||||||||||
| // Do not count Retry-After based retries against maxRetries. | ||||||||||
| continue; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // BACKOFF strategy | ||||||||||
| backoffAttempts++; | ||||||||||
| if (backoffAttempts >= maxBackoffAttempts) { | ||||||||||
| break; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| try { | ||||||||||
| backo.sleep(attempt); | ||||||||||
| backo.sleep(backoffAttempts - 1); | ||||||||||
| } catch (InterruptedException e) { | ||||||||||
| client.log.print( | ||||||||||
| DEBUG, "Thread interrupted while backing off for batch %s.", batch.sequence()); | ||||||||||
| Thread.currentThread().interrupt(); | ||||||||||
| return; | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| client.log.print(ERROR, "Could not upload batch %s. Retries exhausted.", batch.sequence()); | ||||||||||
| notifyCallbacksWithException( | ||||||||||
| batch, new IOException(Integer.toString(attempt) + " retries exhausted")); | ||||||||||
| batch, new IOException(Integer.toString(totalAttempts) + " retries exhausted")); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| private static boolean is5xx(int status) { | ||||||||||
| return status >= 500 && status < 600; | ||||||||||
| private static boolean isStatusRetryWithBackoff(int status) { | ||||||||||
| // Explicitly retry these client errors | ||||||||||
|
||||||||||
| // Explicitly retry these client errors | |
| // Explicitly retry these client errors | |
| // 460 is a non-standard, Segment-specific status code that represents a transient failure | |
| // and is intentionally treated as retryable with backoff, similar to 408, 410, and 429. |
Copilot
AI
Feb 12, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
HTTP status 410 (Gone) is being treated as retryable, but this status code typically indicates that a resource has been permanently removed and will never be available again. Retrying such requests is unlikely to succeed and wastes resources. Consider removing 410 from the list of retryable client errors unless there's a specific reason the Segment API uses 410 in a non-standard way to indicate a temporary condition.
| if (status == 408 || status == 410 || status == 429 || status == 460) { | |
| if (status == 408 || status == 429 || status == 460) { |
Copilot
AI
Feb 12, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing test coverage for HTTP status codes 410 and 460, which are explicitly marked as retryable in the isStatusRetryWithBackoff method. Add tests to verify that these status codes are indeed retried with backoff as intended.
Copilot
AI
Feb 12, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing test coverage for HTTP status codes 505 (HTTP Version Not Supported) and 511 (Network Authentication Required), which are explicitly excluded from retry logic in the isStatusRetryWithBackoff method. While 501 is tested, add tests for 505 and 511 to ensure these status codes are not retried as intended.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The backoff configuration change from 15s base/1h cap to 100ms base/1min cap makes retries significantly more aggressive. While faster initial retries can be beneficial for transient errors, this change combined with the maxFlushAttempts increase to 1000 creates potential issues: 1) The first ~10 attempts will happen very quickly (within ~1 minute total), which could overwhelm a struggling service or trigger rate limiting, 2) After the cap is reached, retries continue at 1-minute intervals indefinitely (up to 1000 attempts = 16+ hours), and 3) The faster retry cadence may not give the underlying issue enough time to resolve. Consider a more balanced approach, such as keeping a higher base (e.g., 1-5 seconds) or implementing a circuit breaker pattern to avoid excessive retry attempts when a service is clearly down.