Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 116 additions & 7 deletions vmm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,10 @@ use crate::coredump::GuestDebuggable;
use crate::cpu::IS_IN_SHUTDOWN;
use crate::landlock::Landlock;
use crate::memory_manager::MemoryManager;
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
use crate::migration::get_vm_snapshot;
use crate::migration::{recv_vm_config, recv_vm_state};
use crate::migration::{get_vm_snapshot, recv_vm_config, recv_vm_state};
use crate::seccomp_filters::{Thread, get_seccomp_filter};
use crate::sync_utils::Gate;
use crate::vm::{Error as VmError, Vm, VmState};
use crate::vm::{Error as VmError, PostMigrationLifecycleEvent, Vm, VmState};
use crate::vm_config::{
DeviceConfig, DiskConfig, FsConfig, MemoryZoneConfig, NetConfig, PmemConfig, UserDeviceConfig,
VdpaConfig, VmConfig, VsockConfig,
Expand Down Expand Up @@ -797,6 +795,8 @@ struct MigrationWorker {
vm: Vm,
check_migration_evt: EventFd,
config: VmSendMigrationData,
// Shared with main VMM thread
postponed_lifecycle_event: Arc<Mutex<Option<PostMigrationLifecycleEvent>>>,
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
hypervisor: Arc<dyn hypervisor::Hypervisor>,
}
Expand All @@ -822,6 +822,7 @@ impl MigrationWorker {
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
self.hypervisor.as_ref(),
&self.config,
self.postponed_lifecycle_event.as_ref(),
).inspect_err(|_| {
let e = self.migrate_error_cleanup();
if let Err(e) = e {
Expand Down Expand Up @@ -920,6 +921,8 @@ pub struct Vmm {
console_resize_pipe: Option<Arc<File>>,
console_info: Option<ConsoleInfo>,
check_migration_evt: EventFd,
postponed_lifecycle_event: Arc<Mutex<Option<PostMigrationLifecycleEvent>>>,
received_postponed_lifecycle_event: Option<PostMigrationLifecycleEvent>,
/// Handle to the [`MigrationWorker`] thread.
///
/// The handle will return the [`Vm`] back in any case. Further, the underlying error (if any) is returned.
Expand Down Expand Up @@ -1883,10 +1886,29 @@ impl Vmm {
console_resize_pipe: None,
console_info: None,
check_migration_evt,
postponed_lifecycle_event: Arc::new(Mutex::new(None)),
received_postponed_lifecycle_event: None,
migration_thread_handle: None,
})
}

fn postpone_lifecycle_event_during_migration(&self, event: PostMigrationLifecycleEvent) {
let mut postponed_event = self.postponed_lifecycle_event.lock().unwrap();
if postponed_event.is_none() {
*postponed_event = Some(event);
info!("Postponed post-migration lifecycle event: {event:?}");
}
}

fn current_postponed_lifecycle_event(&self) -> Option<PostMigrationLifecycleEvent> {
*self.postponed_lifecycle_event.lock().unwrap()
}

fn clear_postponed_lifecycle_event(&self) {
let mut postponed_event = self.postponed_lifecycle_event.lock().unwrap();
*postponed_event = None;
}

/// Try to receive a file descriptor from a socket. Returns the slot number and the file descriptor.
fn vm_receive_memory_fd(
socket: &mut SocketStream,
Expand Down Expand Up @@ -2024,7 +2046,29 @@ impl Vmm {
// The unwrap is safe, because the state machine makes sure we called
// vm_receive_state before, which creates the VM.
let vm = self.vm.vm_mut().unwrap();
vm.resume()?;

// We are on the control-loop thread handling an API request, so
// there is no concurrent access from other VMM or migration
// threads. The VM is in the Paused state , which permits both
// the Running transition (resume) and the Shutdown transition (reboot / exit)
// triggered via the eventfds below.
match self.received_postponed_lifecycle_event {
None => vm.resume()?,
Some(PostMigrationLifecycleEvent::VmReboot) => {
self.reset_evt
.write(1)
.context("Failed writing reset eventfd after migration")
.map_err(MigratableError::MigrateReceive)?;
}
Some(PostMigrationLifecycleEvent::VmmShutdown) => {
self.exit_evt
.write(1)
.context("Failed writing exit eventfd after migration")
.map_err(MigratableError::MigrateReceive)?;
}
}
self.received_postponed_lifecycle_event = None;

Ok(Completed)
}
_ => invalid_command(),
Expand Down Expand Up @@ -2182,6 +2226,11 @@ impl Vmm {
.context("Error deserialising snapshot")
.map_err(MigratableError::MigrateReceive)?;

let vm_snapshot = get_vm_snapshot(&snapshot)
.context("Failed extracting VM snapshot data")
.map_err(MigratableError::MigrateReceive)?;
self.received_postponed_lifecycle_event = vm_snapshot.post_migration_lifecycle_event;

let exit_evt = self
.exit_evt
.try_clone()
Expand Down Expand Up @@ -2258,6 +2307,7 @@ impl Vmm {
s: &mut MigrationStateInternal,
migration_timeout: Duration,
migrate_downtime_limit: Duration,
postponed_lifecycle_event: &Mutex<Option<PostMigrationLifecycleEvent>>,
) -> result::Result<MemoryRangeTable, MigratableError> {
let mut iteration_table;
let total_memory_size_bytes = vm
Expand Down Expand Up @@ -2405,6 +2455,17 @@ impl Vmm {

// Increment iteration counter
s.iteration += 1;

let event = *postponed_lifecycle_event.lock().unwrap();
if let Some(event) = event {
info!(
"Lifecycle event postponed during migration ({event:?}), switching to downtime phase early"
);
// The current iteration has already been sent, therefore no extra range
// needs to be carried into the final transfer batch.
iteration_table = MemoryRangeTable::default();
break;
}
}

Ok(iteration_table)
Expand All @@ -2415,6 +2476,7 @@ impl Vmm {
socket: &mut SocketStream,
s: &mut MigrationStateInternal,
send_data_migration: &VmSendMigrationData,
postponed_lifecycle_event: &Mutex<Option<PostMigrationLifecycleEvent>>,
) -> result::Result<(), MigratableError> {
let mem_send = SendAdditionalConnections::new(send_data_migration, &vm.guest_memory())?;

Expand Down Expand Up @@ -2450,6 +2512,7 @@ impl Vmm {
s,
migration_timeout,
migrate_downtime_limit,
postponed_lifecycle_event,
)?;

info!("Entering downtime phase");
Expand Down Expand Up @@ -2501,6 +2564,7 @@ impl Vmm {
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
hypervisor: &dyn hypervisor::Hypervisor,
send_data_migration: &VmSendMigrationData,
postponed_lifecycle_event: &Mutex<Option<PostMigrationLifecycleEvent>>,
) -> result::Result<(), MigratableError> {
let mut s = MigrationStateInternal::new();

Expand Down Expand Up @@ -2596,7 +2660,13 @@ impl Vmm {
// Now pause VM
vm.pause()?;
} else {
Self::do_memory_migration(vm, &mut socket, &mut s, send_data_migration)?;
Self::do_memory_migration(
vm,
&mut socket,
&mut s,
send_data_migration,
postponed_lifecycle_event,
)?;
}

// Update migration progress snapshot
Expand All @@ -2621,6 +2691,7 @@ impl Vmm {
}

// Capture snapshot and send it
vm.set_post_migration_lifecycle_event(*postponed_lifecycle_event.lock().unwrap());
let vm_snapshot = vm.snapshot()?;
let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap();
Request::state(snapshot_data.len() as u64).write_to(&mut socket)?;
Expand Down Expand Up @@ -2853,7 +2924,24 @@ impl Vmm {

// Give VMM back control.
self.vm = MaybeVmOwnership::Vmm(vm);

if let Some(event) = self.current_postponed_lifecycle_event() {
match event {
PostMigrationLifecycleEvent::VmReboot => {
self.reset_evt
.write(1)
.context("Failed replaying reset event after failed migration")
.inspect_err(|write_err| error!("{write_err}"))
.ok();
}
PostMigrationLifecycleEvent::VmmShutdown => {
self.exit_evt
.write(1)
.context("Failed replaying shutdown event after failed migration")
.inspect_err(|write_err| error!("{write_err}"))
.ok();
}
}
}
// Update migration progress snapshot
{
let mut lock = MIGRATION_PROGRESS_SNAPSHOT.lock().unwrap();
Expand All @@ -2863,6 +2951,7 @@ impl Vmm {
}
}
}
self.clear_postponed_lifecycle_event();
}

fn control_loop(
Expand Down Expand Up @@ -2904,6 +2993,13 @@ impl Vmm {
info!("VM exit event");
// Consume the event.
self.exit_evt.read().map_err(Error::EventFdRead)?;
// Workaround for guest-induced shutdown during a live-migration.
if matches!(self.vm, MaybeVmOwnership::Migration) {
self.postpone_lifecycle_event_during_migration(
PostMigrationLifecycleEvent::VmmShutdown,
);
continue;
}
self.vmm_shutdown().map_err(Error::VmmShutdown)?;

break 'outer;
Expand All @@ -2912,6 +3008,13 @@ impl Vmm {
info!("VM reset event");
// Consume the event.
self.reset_evt.read().map_err(Error::EventFdRead)?;
// Workaround for guest-induced shutdown during a live-migration.
if matches!(self.vm, MaybeVmOwnership::Migration) {
self.postpone_lifecycle_event_during_migration(
PostMigrationLifecycleEvent::VmReboot,
);
continue;
}
self.vm_reboot().map_err(Error::VmReboot)?;
}
EpollDispatch::ActivateVirtioDevices => {
Expand Down Expand Up @@ -3726,6 +3829,8 @@ impl RequestHandler for Vmm {
&mut self,
receive_data_migration: VmReceiveMigrationData,
) -> result::Result<(), MigratableError> {
// Prevent stale lifecycle intent from a previous failed receive attempt.
self.received_postponed_lifecycle_event = None;
info!(
"Receiving migration: receiver_url = {}, net_fds={:?}, tcp_url={:?}, zones={:?}",
receive_data_migration.receiver_url,
Expand Down Expand Up @@ -3809,6 +3914,9 @@ impl RequestHandler for Vmm {
send_data_migration.destination_url, send_data_migration.local
);

// New migration attempt: clear postponed lifecycle from any previous run.
self.clear_postponed_lifecycle_event();

if !self
.vm_config
.as_ref()
Expand Down Expand Up @@ -3875,6 +3983,7 @@ impl RequestHandler for Vmm {
vm,
check_migration_evt: self.check_migration_evt.try_clone().unwrap(),
config: send_data_migration,
postponed_lifecycle_event: self.postponed_lifecycle_event.clone(),
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
hypervisor: self.hypervisor.clone(),
};
Expand Down
35 changes: 32 additions & 3 deletions vmm/src/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,9 @@ use crate::landlock::LandlockError;
use crate::memory_manager::{
Error as MemoryManagerError, MemoryManager, MemoryManagerSnapshotData,
};
#[cfg(target_arch = "x86_64")]
use crate::migration::get_vm_snapshot;
#[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
use crate::migration::url_to_file;
use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, url_to_path};
use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, get_vm_snapshot, url_to_path};
use crate::vcpu_throttling::ThrottleThreadHandle;
#[cfg(feature = "fw_cfg")]
use crate::vm_config::FwCfgConfig;
Expand Down Expand Up @@ -529,6 +527,13 @@ pub struct Vm {
stop_on_boot: bool,
load_payload_handle: Option<thread::JoinHandle<Result<EntryPoint>>>,
vcpu_throttler: ThrottleThreadHandle,
post_migration_lifecycle_event: Option<PostMigrationLifecycleEvent>,
}

#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum PostMigrationLifecycleEvent {
VmReboot,
VmmShutdown,
}

impl Vm {
Expand Down Expand Up @@ -814,6 +819,15 @@ impl Vm {
} else {
VmState::Created
};
let post_migration_lifecycle_event = snapshot
.as_ref()
.map(|snapshot| {
get_vm_snapshot(snapshot)
.map(|vm_snapshot| vm_snapshot.post_migration_lifecycle_event)
.map_err(Error::Restore)
})
.transpose()?
.flatten();

// TODO we could also spawn the thread when a migration with auto-converge starts.
// Probably this is the better design.
Expand All @@ -839,6 +853,7 @@ impl Vm {
stop_on_boot,
load_payload_handle,
vcpu_throttler,
post_migration_lifecycle_event,
})
}

Expand Down Expand Up @@ -1017,6 +1032,17 @@ impl Vm {
self.vcpu_throttler.shutdown();
}

pub fn set_post_migration_lifecycle_event(
&mut self,
event: Option<PostMigrationLifecycleEvent>,
) {
self.post_migration_lifecycle_event = event;
}

pub fn post_migration_lifecycle_event(&self) -> Option<PostMigrationLifecycleEvent> {
self.post_migration_lifecycle_event
}

#[allow(clippy::too_many_arguments)]
pub fn new(
vm_config: Arc<Mutex<VmConfig>>,
Expand Down Expand Up @@ -2818,6 +2844,8 @@ impl Pausable for Vm {

#[derive(Serialize, Deserialize)]
pub struct VmSnapshot {
#[serde(default)]
pub post_migration_lifecycle_event: Option<PostMigrationLifecycleEvent>,
#[cfg(target_arch = "x86_64")]
pub clock: Option<hypervisor::ClockData>,
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
Expand Down Expand Up @@ -2875,6 +2903,7 @@ impl Snapshottable for Vm {
};

let vm_snapshot_state = VmSnapshot {
post_migration_lifecycle_event: self.post_migration_lifecycle_event(),
#[cfg(target_arch = "x86_64")]
clock: self.saved_clock,
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
Expand Down
Loading