From 1004f743bf2f111e4abb7894dbe2bd6104d472d4 Mon Sep 17 00:00:00 2001 From: Leander Kohler Date: Tue, 17 Feb 2026 14:14:27 +0100 Subject: [PATCH 1/4] vmm: add post-migration event to VmSnapshot During live migration, VM ownership is moved away from the VMM thread. To preserve guest-triggered reboot and shutdown lifecycle intent across that ownership handover, we need a small lifecycle marker to travel with the migrated VM state. This change introduces `PostMigrationLifecycleEvent` and stores it in `VmSnapshot` with `#[serde(default)]` for backward compatibility. `Vm::snapshot()` now serializes the marker, and VM construction from a snapshot restores it. No control-loop behavior is changed in this commit. This is only the data model/plumbing needed by follow-up commits. On-behalf-of: SAP leander.kohler@sap.com Signed-off-by: Leander Kohler --- vmm/src/vm.rs | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 1a2e52f8cc..49402b7dcc 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -90,11 +90,9 @@ use crate::landlock::LandlockError; use crate::memory_manager::{ Error as MemoryManagerError, MemoryManager, MemoryManagerSnapshotData, }; -#[cfg(target_arch = "x86_64")] -use crate::migration::get_vm_snapshot; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use crate::migration::url_to_file; -use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, url_to_path}; +use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, get_vm_snapshot, url_to_path}; use crate::vcpu_throttling::ThrottleThreadHandle; #[cfg(feature = "fw_cfg")] use crate::vm_config::FwCfgConfig; @@ -529,6 +527,13 @@ pub struct Vm { stop_on_boot: bool, load_payload_handle: Option>>, vcpu_throttler: ThrottleThreadHandle, + post_migration_lifecycle_event: Option, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum PostMigrationLifecycleEvent { + VmReboot, + VmmShutdown, } impl Vm { @@ -814,6 +819,15 @@ impl Vm { } else { VmState::Created }; + let post_migration_lifecycle_event = snapshot + .as_ref() + .map(|snapshot| { + get_vm_snapshot(snapshot) + .map(|vm_snapshot| vm_snapshot.post_migration_lifecycle_event) + .map_err(Error::Restore) + }) + .transpose()? + .flatten(); // TODO we could also spawn the thread when a migration with auto-converge starts. // Probably this is the better design. @@ -839,6 +853,7 @@ impl Vm { stop_on_boot, load_payload_handle, vcpu_throttler, + post_migration_lifecycle_event, }) } @@ -1017,6 +1032,17 @@ impl Vm { self.vcpu_throttler.shutdown(); } + pub fn set_post_migration_lifecycle_event( + &mut self, + event: Option, + ) { + self.post_migration_lifecycle_event = event; + } + + pub fn post_migration_lifecycle_event(&self) -> Option { + self.post_migration_lifecycle_event + } + #[allow(clippy::too_many_arguments)] pub fn new( vm_config: Arc>, @@ -2818,6 +2844,8 @@ impl Pausable for Vm { #[derive(Serialize, Deserialize)] pub struct VmSnapshot { + #[serde(default)] + pub post_migration_lifecycle_event: Option, #[cfg(target_arch = "x86_64")] pub clock: Option, #[cfg(all(feature = "kvm", target_arch = "x86_64"))] @@ -2875,6 +2903,7 @@ impl Snapshottable for Vm { }; let vm_snapshot_state = VmSnapshot { + post_migration_lifecycle_event: self.post_migration_lifecycle_event(), #[cfg(target_arch = "x86_64")] clock: self.saved_clock, #[cfg(all(feature = "kvm", target_arch = "x86_64"))] From 00bb4bd2e8b6c996dfc803fe3b1cf05e9a5b8faa Mon Sep 17 00:00:00 2001 From: Leander Kohler Date: Tue, 17 Feb 2026 14:42:39 +0100 Subject: [PATCH 2/4] vmm: postpone reset/exit during migration While a live migration is running, the migration worker owns the VM and the VMM control loop cannot execute vm_reboot()/vmm_shutdown() directly. Guest-triggered reset/exit events in that window currently hit VmMigrating and fail. This change makes the control loop consume reset/exit as before, but when ownership is `MaybeVmOwnership::Migration` it postpones a post-migration lifecycle intent instead of calling lifecycle handlers directly. The postponed state is first-event-wins and is cleared when a new send migration starts, preventing stale lifecycle intent from leaking between migrations. This commit only introduces source-side postponing behavior and does not yet apply or replay the postponed event. On-behalf-of: SAP leander.kohler@sap.com Signed-off-by: Leander Kohler --- vmm/src/lib.rs | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 45e0e05963..c03ddaf5ba 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -87,7 +87,7 @@ use crate::migration::get_vm_snapshot; use crate::migration::{recv_vm_config, recv_vm_state}; use crate::seccomp_filters::{Thread, get_seccomp_filter}; use crate::sync_utils::Gate; -use crate::vm::{Error as VmError, Vm, VmState}; +use crate::vm::{Error as VmError, PostMigrationLifecycleEvent, Vm, VmState}; use crate::vm_config::{ DeviceConfig, DiskConfig, FsConfig, MemoryZoneConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, @@ -920,6 +920,7 @@ pub struct Vmm { console_resize_pipe: Option>, console_info: Option, check_migration_evt: EventFd, + postponed_lifecycle_event: Arc>>, /// Handle to the [`MigrationWorker`] thread. /// /// The handle will return the [`Vm`] back in any case. Further, the underlying error (if any) is returned. @@ -1883,10 +1884,24 @@ impl Vmm { console_resize_pipe: None, console_info: None, check_migration_evt, + postponed_lifecycle_event: Arc::new(Mutex::new(None)), migration_thread_handle: None, }) } + fn postpone_lifecycle_event_during_migration(&self, event: PostMigrationLifecycleEvent) { + let mut postponed_event = self.postponed_lifecycle_event.lock().unwrap(); + if postponed_event.is_none() { + *postponed_event = Some(event); + info!("Postponed post-migration lifecycle event: {event:?}"); + } + } + + fn clear_postponed_lifecycle_event(&self) { + let mut postponed_event = self.postponed_lifecycle_event.lock().unwrap(); + *postponed_event = None; + } + /// Try to receive a file descriptor from a socket. Returns the slot number and the file descriptor. fn vm_receive_memory_fd( socket: &mut SocketStream, @@ -2904,6 +2919,13 @@ impl Vmm { info!("VM exit event"); // Consume the event. self.exit_evt.read().map_err(Error::EventFdRead)?; + // Workaround for guest-induced shutdown during a live-migration. + if matches!(self.vm, MaybeVmOwnership::Migration) { + self.postpone_lifecycle_event_during_migration( + PostMigrationLifecycleEvent::VmmShutdown, + ); + continue; + } self.vmm_shutdown().map_err(Error::VmmShutdown)?; break 'outer; @@ -2912,6 +2934,13 @@ impl Vmm { info!("VM reset event"); // Consume the event. self.reset_evt.read().map_err(Error::EventFdRead)?; + // Workaround for guest-induced shutdown during a live-migration. + if matches!(self.vm, MaybeVmOwnership::Migration) { + self.postpone_lifecycle_event_during_migration( + PostMigrationLifecycleEvent::VmReboot, + ); + continue; + } self.vm_reboot().map_err(Error::VmReboot)?; } EpollDispatch::ActivateVirtioDevices => { @@ -3809,6 +3838,9 @@ impl RequestHandler for Vmm { send_data_migration.destination_url, send_data_migration.local ); + // New migration attempt: clear postponed lifecycle from any previous run. + self.clear_postponed_lifecycle_event(); + if !self .vm_config .as_ref() From 177945565c2a10deadf8222a6067eddc86e087dd Mon Sep 17 00:00:00 2001 From: Leander Kohler Date: Tue, 17 Feb 2026 14:58:49 +0100 Subject: [PATCH 3/4] vmm: migration: propagate/replay postponed event Add migration plumbing to carry the postponed lifecycle intent from source to destination and replay it through the existing control-loop paths. The migration worker now passes the shared postponed lifecycle state into the send path, and the sender writes the selected `PostMigrationLifecycleEvent` into the VM snapshot before transmitting state. On the receiving side, migration state restore extracts that snapshot field and stores it in VMM state. After `Command::Complete`, the target resumes the VM and replays the lifecycle action by writing to the existing eventfds: - VmReboot -> reset_evt - VmmShutdown -> exit_evt On-behalf-of: SAP leander.kohler@sap.com Signed-off-by: Leander Kohler --- vmm/src/lib.rs | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index c03ddaf5ba..d7aaeffa17 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -82,9 +82,7 @@ use crate::coredump::GuestDebuggable; use crate::cpu::IS_IN_SHUTDOWN; use crate::landlock::Landlock; use crate::memory_manager::MemoryManager; -#[cfg(all(feature = "kvm", target_arch = "x86_64"))] -use crate::migration::get_vm_snapshot; -use crate::migration::{recv_vm_config, recv_vm_state}; +use crate::migration::{get_vm_snapshot, recv_vm_config, recv_vm_state}; use crate::seccomp_filters::{Thread, get_seccomp_filter}; use crate::sync_utils::Gate; use crate::vm::{Error as VmError, PostMigrationLifecycleEvent, Vm, VmState}; @@ -797,6 +795,8 @@ struct MigrationWorker { vm: Vm, check_migration_evt: EventFd, config: VmSendMigrationData, + // Shared with main VMM thread + postponed_lifecycle_event: Arc>>, #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc, } @@ -822,6 +822,7 @@ impl MigrationWorker { #[cfg(all(feature = "kvm", target_arch = "x86_64"))] self.hypervisor.as_ref(), &self.config, + self.postponed_lifecycle_event.as_ref(), ).inspect_err(|_| { let e = self.migrate_error_cleanup(); if let Err(e) = e { @@ -921,6 +922,7 @@ pub struct Vmm { console_info: Option, check_migration_evt: EventFd, postponed_lifecycle_event: Arc>>, + received_postponed_lifecycle_event: Option, /// Handle to the [`MigrationWorker`] thread. /// /// The handle will return the [`Vm`] back in any case. Further, the underlying error (if any) is returned. @@ -1885,6 +1887,7 @@ impl Vmm { console_info: None, check_migration_evt, postponed_lifecycle_event: Arc::new(Mutex::new(None)), + received_postponed_lifecycle_event: None, migration_thread_handle: None, }) } @@ -2039,7 +2042,29 @@ impl Vmm { // The unwrap is safe, because the state machine makes sure we called // vm_receive_state before, which creates the VM. let vm = self.vm.vm_mut().unwrap(); - vm.resume()?; + + // We are on the control-loop thread handling an API request, so + // there is no concurrent access from other VMM or migration + // threads. The VM is in the Paused state , which permits both + // the Running transition (resume) and the Shutdown transition (reboot / exit) + // triggered via the eventfds below. + match self.received_postponed_lifecycle_event { + None => vm.resume()?, + Some(PostMigrationLifecycleEvent::VmReboot) => { + self.reset_evt + .write(1) + .context("Failed writing reset eventfd after migration") + .map_err(MigratableError::MigrateReceive)?; + } + Some(PostMigrationLifecycleEvent::VmmShutdown) => { + self.exit_evt + .write(1) + .context("Failed writing exit eventfd after migration") + .map_err(MigratableError::MigrateReceive)?; + } + } + self.received_postponed_lifecycle_event = None; + Ok(Completed) } _ => invalid_command(), @@ -2197,6 +2222,11 @@ impl Vmm { .context("Error deserialising snapshot") .map_err(MigratableError::MigrateReceive)?; + let vm_snapshot = get_vm_snapshot(&snapshot) + .context("Failed extracting VM snapshot data") + .map_err(MigratableError::MigrateReceive)?; + self.received_postponed_lifecycle_event = vm_snapshot.post_migration_lifecycle_event; + let exit_evt = self .exit_evt .try_clone() @@ -2516,6 +2546,7 @@ impl Vmm { #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: &dyn hypervisor::Hypervisor, send_data_migration: &VmSendMigrationData, + postponed_lifecycle_event: &Mutex>, ) -> result::Result<(), MigratableError> { let mut s = MigrationStateInternal::new(); @@ -2636,6 +2667,7 @@ impl Vmm { } // Capture snapshot and send it + vm.set_post_migration_lifecycle_event(*postponed_lifecycle_event.lock().unwrap()); let vm_snapshot = vm.snapshot()?; let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; @@ -3755,6 +3787,8 @@ impl RequestHandler for Vmm { &mut self, receive_data_migration: VmReceiveMigrationData, ) -> result::Result<(), MigratableError> { + // Prevent stale lifecycle intent from a previous failed receive attempt. + self.received_postponed_lifecycle_event = None; info!( "Receiving migration: receiver_url = {}, net_fds={:?}, tcp_url={:?}, zones={:?}", receive_data_migration.receiver_url, @@ -3907,6 +3941,7 @@ impl RequestHandler for Vmm { vm, check_migration_evt: self.check_migration_evt.try_clone().unwrap(), config: send_data_migration, + postponed_lifecycle_event: self.postponed_lifecycle_event.clone(), #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: self.hypervisor.clone(), }; From 9872a8aa7e284aa5ea02ce19e6acee501f4bdd37 Mon Sep 17 00:00:00 2001 From: Leander Kohler Date: Tue, 17 Feb 2026 15:00:59 +0100 Subject: [PATCH 4/4] vmm: migration: switch downtime on postponed event When a lifecycle event like reset or shutdown is postponed during pre-copy, switch to downtime at the next iteration boundary. This keeps the current iteration send intact and then transitions into the existing graceful downtime path (`stop_vcpu_throttling()`, `pause()`, final transfer, snapshot). To keep behavior deterministic on source migration failure, replay the postponed lifecycle event locally after ownership is returned: - VmReboot -> reset_evt - VmmShutdown -> exit_evt Postponed state is cleared on both success and failure paths to avoid stale state across migrations. On-behalf-of: SAP leander.kohler@sap.com Signed-off-by: Leander Kohler --- vmm/src/lib.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index d7aaeffa17..c0f290ca01 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1900,6 +1900,10 @@ impl Vmm { } } + fn current_postponed_lifecycle_event(&self) -> Option { + *self.postponed_lifecycle_event.lock().unwrap() + } + fn clear_postponed_lifecycle_event(&self) { let mut postponed_event = self.postponed_lifecycle_event.lock().unwrap(); *postponed_event = None; @@ -2303,6 +2307,7 @@ impl Vmm { s: &mut MigrationStateInternal, migration_timeout: Duration, migrate_downtime_limit: Duration, + postponed_lifecycle_event: &Mutex>, ) -> result::Result { let mut iteration_table; let total_memory_size_bytes = vm @@ -2450,6 +2455,17 @@ impl Vmm { // Increment iteration counter s.iteration += 1; + + let event = *postponed_lifecycle_event.lock().unwrap(); + if let Some(event) = event { + info!( + "Lifecycle event postponed during migration ({event:?}), switching to downtime phase early" + ); + // The current iteration has already been sent, therefore no extra range + // needs to be carried into the final transfer batch. + iteration_table = MemoryRangeTable::default(); + break; + } } Ok(iteration_table) @@ -2460,6 +2476,7 @@ impl Vmm { socket: &mut SocketStream, s: &mut MigrationStateInternal, send_data_migration: &VmSendMigrationData, + postponed_lifecycle_event: &Mutex>, ) -> result::Result<(), MigratableError> { let mem_send = SendAdditionalConnections::new(send_data_migration, &vm.guest_memory())?; @@ -2495,6 +2512,7 @@ impl Vmm { s, migration_timeout, migrate_downtime_limit, + postponed_lifecycle_event, )?; info!("Entering downtime phase"); @@ -2642,7 +2660,13 @@ impl Vmm { // Now pause VM vm.pause()?; } else { - Self::do_memory_migration(vm, &mut socket, &mut s, send_data_migration)?; + Self::do_memory_migration( + vm, + &mut socket, + &mut s, + send_data_migration, + postponed_lifecycle_event, + )?; } // Update migration progress snapshot @@ -2900,7 +2924,24 @@ impl Vmm { // Give VMM back control. self.vm = MaybeVmOwnership::Vmm(vm); - + if let Some(event) = self.current_postponed_lifecycle_event() { + match event { + PostMigrationLifecycleEvent::VmReboot => { + self.reset_evt + .write(1) + .context("Failed replaying reset event after failed migration") + .inspect_err(|write_err| error!("{write_err}")) + .ok(); + } + PostMigrationLifecycleEvent::VmmShutdown => { + self.exit_evt + .write(1) + .context("Failed replaying shutdown event after failed migration") + .inspect_err(|write_err| error!("{write_err}")) + .ok(); + } + } + } // Update migration progress snapshot { let mut lock = MIGRATION_PROGRESS_SNAPSHOT.lock().unwrap(); @@ -2910,6 +2951,7 @@ impl Vmm { } } } + self.clear_postponed_lifecycle_event(); } fn control_loop(