From 2de91cf495e517b9ba111058c4ba6989b0159314 Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Wed, 22 Apr 2026 18:04:11 +0000 Subject: [PATCH 1/3] Fix bb and rins from agent bug report --- cpp/src/branch_and_bound/branch_and_bound.cpp | 1 + cpp/src/mip_heuristics/diversity/lns/rins.cu | 58 +++++++++++++------ cpp/src/mip_heuristics/diversity/lns/rins.cuh | 1 + 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index e69ff7b9a5..7a200842e9 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1485,6 +1485,7 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t::new_best_incumbent_callback(const std::vector& solut template void rins_t::node_callback(const std::vector& solution, f_t objective) { - if (!enabled) return; + if (!enabled.load()) return; node_count++; if (node_count - node_count_at_last_improvement < settings.nodes_after_later_improvement) return; - if (node_count - node_count_at_last_rins > settings.node_freq) { - // opportunistic early test w/ atomic to avoid having to take the lock - if (!rins_thread->cpu_thread_done) return; - std::lock_guard lock(rins_mutex); - bool population_ready = false; - if (rins_thread->cpu_thread_done) { - std::lock_guard pop_lock(dm.population.write_mutex); - population_ready = dm.population.current_size() > 0 && dm.population.is_feasible(); - } - if (population_ready) { - lp_optimal_solution = solution; - rins_thread->start_cpu_solver(); - } + if (node_count - node_count_at_last_rins <= settings.node_freq) { return; } + + std::lock_guard lock(rins_mutex); + if (!enabled.load() || !rins_thread) { return; } + if (!rins_thread->cpu_thread_done.load()) { return; } + + bool population_ready = false; + { + std::lock_guard pop_lock(dm.population.write_mutex); + population_ready = dm.population.current_size() > 0 && dm.population.is_feasible(); } + if (!population_ready) { return; } + + refresh_problem_copy(); + if (solution.size() != static_cast::size_type>(problem_copy->n_variables)) { + CUOPT_LOG_DEBUG("Skipping RINS launch due to stale LP solution size (%zu vs %d)", + solution.size(), + problem_copy->n_variables); + return; + } + + lp_optimal_solution = solution; + rins_thread->start_cpu_solver(); } template void rins_t::enable() { + std::lock_guard lock(rins_mutex); rins_thread = std::make_unique>(); rins_thread->rins_ptr = this; seed = cuopt::seed_generator::get_seed(); - problem_ptr->handle_ptr->sync_stream(); - problem_copy = std::make_unique>(*problem_ptr, &rins_handle); + refresh_problem_copy(); enabled = true; } template void rins_t::stop_rins() { - enabled = false; - if (rins_thread) rins_thread->request_termination(); - rins_thread.reset(); + std::unique_ptr> local_thread; + { + std::lock_guard lock(rins_mutex); + enabled = false; + local_thread = std::move(rins_thread); + } + if (local_thread) { local_thread->request_termination(); } +} + +template +void rins_t::refresh_problem_copy() +{ + problem_ptr->handle_ptr->sync_stream(); + problem_copy = std::make_unique>(*problem_ptr, &rins_handle); } template diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cuh b/cpp/src/mip_heuristics/diversity/lns/rins.cuh index 0a9133f848..f25d793a7f 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cuh +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cuh @@ -77,6 +77,7 @@ class rins_t { void stop_rins(); void run_rins(); + void refresh_problem_copy(); mip_solver_context_t& context; problem_t* problem_ptr; From 0fed0c7b06e2f6a4bd18af9cf4855477edf475ae Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Wed, 22 Apr 2026 21:36:49 +0000 Subject: [PATCH 2/3] More fixes --- cpp/src/branch_and_bound/branch_and_bound.cpp | 12 +++-- cpp/src/mip_heuristics/diversity/lns/rins.cu | 54 +++++++------------ cpp/src/mip_heuristics/diversity/lns/rins.cuh | 3 +- 3 files changed, 29 insertions(+), 40 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 7a200842e9..9e993d644a 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1436,6 +1436,7 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t*> stack; stack.push_front(worker->start_node); + bool requeue_pending_nodes = false; worker->recompute_basis = true; worker->recompute_bounds = true; @@ -1483,9 +1484,11 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t::plunge_with(branch_and_bound_worker_t 0 && - (rel_gap <= settings_.relative_mip_gap_tol || abs_gap <= settings_.absolute_mip_gap_tol)) { - // If the solver converged according to the gap rules, but we still have nodes to explore - // in the stack, then we should add all the pending nodes back to the heap so the lower - // bound of the solver is set to the correct value. + (requeue_pending_nodes || rel_gap <= settings_.relative_mip_gap_tol || + abs_gap <= settings_.absolute_mip_gap_tol)) { + // If the solver exits early without consuming the local stack, or converged according to + // the gap rules while nodes are still pending, put those nodes back into the global queue + // before returning. while (!stack.empty()) { auto node = stack.front(); stack.pop_front(); diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index 091f3fc455..e154ba578f 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -26,6 +26,12 @@ #include namespace cuopt::linear_programming::detail { +template +rins_t::~rins_t() +{ + stop_rins(); +} + template rins_t::rins_t(mip_solver_context_t& context_, diversity_manager_t& dm_, @@ -70,22 +76,16 @@ void rins_t::node_callback(const std::vector& solution, f_t objec if (!enabled.load() || !rins_thread) { return; } if (!rins_thread->cpu_thread_done.load()) { return; } - bool population_ready = false; { std::lock_guard pop_lock(dm.population.write_mutex); - population_ready = dm.population.current_size() > 0 && dm.population.is_feasible(); - } - if (!population_ready) { return; } + if (dm.population.current_size() == 0 || !dm.population.is_feasible()) { return; } - refresh_problem_copy(); - if (solution.size() != static_cast::size_type>(problem_copy->n_variables)) { - CUOPT_LOG_DEBUG("Skipping RINS launch due to stale LP solution size (%zu vs %d)", - solution.size(), - problem_copy->n_variables); - return; - } + auto& best_feasible_ref = dm.population.best_feasible(); + if (!best_feasible_ref.get_feasible()) { return; } - lp_optimal_solution = solution; + incumbent_solution_snapshot = best_feasible_ref.get_host_assignment(); + lp_optimal_solution = solution; + } rins_thread->start_cpu_solver(); } @@ -96,7 +96,8 @@ void rins_t::enable() rins_thread = std::make_unique>(); rins_thread->rins_ptr = this; seed = cuopt::seed_generator::get_seed(); - refresh_problem_copy(); + problem_ptr->handle_ptr->sync_stream(); + problem_copy = std::make_unique>(*problem_ptr, &rins_handle); enabled = true; } @@ -112,19 +113,11 @@ void rins_t::stop_rins() if (local_thread) { local_thread->request_termination(); } } -template -void rins_t::refresh_problem_copy() -{ - problem_ptr->handle_ptr->sync_stream(); - problem_copy = std::make_unique>(*problem_ptr, &rins_handle); -} - template void rins_t::run_rins() { if (total_calls == 0) RAFT_CUDA_TRY(cudaSetDevice(context.handle_ptr->get_device())); - cuopt_assert(lp_optimal_solution.size() == problem_copy->n_variables, "Assignment size mismatch"); cuopt_assert(problem_copy->handle_ptr == &rins_handle, "Handle mismatch"); // Do not make assertions based on problem_ptr. The original problem may have been modified within // the FP loop relaxing integers cuopt_assert(problem_copy->n_variables == @@ -137,20 +130,11 @@ void rins_t::run_rins() solution_t best_sol(*problem_copy); rins_handle.sync_stream(); - // copy the best from the population into a solution_t in the RINS stream - { - std::lock_guard lock(dm.population.write_mutex); - if (!dm.population.is_feasible()) return; - cuopt_assert(dm.population.current_size() > 0, "No solutions in population"); - auto& best_feasible_ref = dm.population.best_feasible(); - cuopt_assert(best_feasible_ref.assignment.size() == best_sol.assignment.size(), - "Assignment size mismatch"); - cuopt_assert(best_feasible_ref.get_feasible(), "Best feasible is not feasible"); - expand_device_copy(best_sol.assignment, best_feasible_ref.assignment, rins_handle.get_stream()); - best_sol.handle_ptr = &rins_handle; - best_sol.problem_ptr = problem_copy.get(); - best_sol.compute_feasibility(); - } + // Use the launch-time snapshot to keep the incumbent and problem model consistent. + best_sol.copy_new_assignment(incumbent_solution_snapshot); + best_sol.handle_ptr = &rins_handle; + best_sol.problem_ptr = problem_copy.get(); + best_sol.compute_feasibility(); cuopt_assert(best_sol.handle_ptr == &rins_handle, "Handle mismatch"); cuopt_assert(best_sol.get_feasible(), "Best solution is not feasible"); diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cuh b/cpp/src/mip_heuristics/diversity/lns/rins.cuh index f25d793a7f..3071b82ac2 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cuh +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cuh @@ -67,6 +67,7 @@ struct rins_thread_t : public cpu_worker_thread_base_t> template class rins_t { public: + ~rins_t(); rins_t(mip_solver_context_t& context, diversity_manager_t& dm, rins_settings_t settings = rins_settings_t()); @@ -77,7 +78,6 @@ class rins_t { void stop_rins(); void run_rins(); - void refresh_problem_copy(); mip_solver_context_t& context; problem_t* problem_ptr; @@ -90,6 +90,7 @@ class rins_t { raft::handle_t rins_handle; std::vector lp_optimal_solution; + std::vector incumbent_solution_snapshot; f_t fixrate{0.5}; i_t total_calls{0}; From aee6ec83731a1acdf92e4f2fe88f83bdef1beb01 Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Mon, 4 May 2026 16:03:29 -0700 Subject: [PATCH 3/3] Fix style --- cpp/src/branch_and_bound/branch_and_bound.cpp | 9 ++++----- cpp/src/mip_heuristics/diversity/lns/rins.cu | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 9e993d644a..c2d16dac23 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1437,8 +1437,8 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t*> stack; stack.push_front(worker->start_node); bool requeue_pending_nodes = false; - worker->recompute_basis = true; - worker->recompute_bounds = true; + worker->recompute_basis = true; + worker->recompute_bounds = true; f_t lower_bound = get_lower_bound(); f_t upper_bound = upper_bound_; @@ -1545,9 +1545,8 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t 0 && - (requeue_pending_nodes || rel_gap <= settings_.relative_mip_gap_tol || - abs_gap <= settings_.absolute_mip_gap_tol)) { + if (stack.size() > 0 && (requeue_pending_nodes || rel_gap <= settings_.relative_mip_gap_tol || + abs_gap <= settings_.absolute_mip_gap_tol)) { // If the solver exits early without consuming the local stack, or converged according to // the gap rules while nodes are still pending, put those nodes back into the global queue // before returning. diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index e154ba578f..7b1db8452c 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -84,7 +84,7 @@ void rins_t::node_callback(const std::vector& solution, f_t objec if (!best_feasible_ref.get_feasible()) { return; } incumbent_solution_snapshot = best_feasible_ref.get_host_assignment(); - lp_optimal_solution = solution; + lp_optimal_solution = solution; } rins_thread->start_cpu_solver(); }