Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/monitor/sqsevent/spot-itn-event.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func (m SQSMonitor) spotITNTerminationToInterruptionEvent(event *EventBridgeEven
if err != nil {
log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.SpotInterruptionTaint, interruptionEvent.EventID)
}
return nil
return err
}
return &interruptionEvent, nil
}
42 changes: 19 additions & 23 deletions pkg/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ const (
)

var (
maxRetryDeadline time.Duration = 5 * time.Second
conflictRetryInterval time.Duration = 750 * time.Millisecond
maxRetryDeadline time.Duration = 15 * time.Second
conflictRetryInterval time.Duration = 500 * time.Millisecond
instanceIDRegex = regexp.MustCompile(`^i-.*`)
)

Expand Down Expand Up @@ -789,35 +789,31 @@ func addTaint(node *corev1.Node, nth Node, taintKey string, taintValue string, e
}

retryDeadline := time.Now().Add(maxRetryDeadline)
freshNode := node.DeepCopy()
client := nth.drainHelper.Client
var err error
refresh := false

for {
if refresh {
// Get the newest version of the node.
freshNode, err = client.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
if err != nil || freshNode == nil {
nodeErr := fmt.Errorf("failed to get node %v: %w", node.Name, err)
log.Err(nodeErr).
Str("taint_key", taintKey).
Str("node_name", node.Name).
Msg("Error while adding taint on node")
return nodeErr
}
freshNode, err := client.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
if err != nil || freshNode == nil {
nodeErr := fmt.Errorf("failed to get node %v: %w", node.Name, err)
log.Err(nodeErr).
Str("taint_key", taintKey).
Str("node_name", node.Name).
Msg("Error while adding taint on node")
return nodeErr
}

if !addTaintToSpec(freshNode, taintKey, taintValue, effect) {
if !refresh {
// Make sure we have the latest version before skipping update.
refresh = true
continue
}
return nil
}
_, err = client.CoreV1().Nodes().Update(context.TODO(), freshNode, metav1.UpdateOptions{})

taintsJSON, err := json.Marshal(freshNode.Spec.Taints)
if err != nil {
return fmt.Errorf("failed to marshal taints for node %s: %w", node.Name, err)
}
patchData := []byte(fmt.Sprintf(`{"spec":{"taints":%s}}`, taintsJSON))

_, err = client.CoreV1().Nodes().Patch(context.TODO(), node.Name, types.StrategicMergePatchType, patchData, metav1.PatchOptions{})
if err != nil && errors.IsConflict(err) && time.Now().Before(retryDeadline) {
refresh = true
time.Sleep(conflictRetryInterval)
continue
}
Expand Down