Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/cancellation.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ sequenceDiagram
end

deactivate IH
IH-->>Caller: sent_signal
deactivate IH
```

Expand Down
7 changes: 6 additions & 1 deletion src/hyperlight_host/Cargo.toml
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot don't make these changes

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted Cargo.toml changes in commit 28890e7. The tracing-tracy dependency is now back to being a regular dev-dependency as it was originally.

Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ tracing = { version = "0.1.43", features = ["log"] }
tracing-log = "0.2.0"
tracing-core = "0.1.35"
tracing-opentelemetry = { version = "0.32.0", optional = true }
tracing-tracy = { version = "0.11.4", optional = true }
hyperlight-common = { workspace = true, default-features = true, features = [ "std" ] }
hyperlight-guest-tracing = { workspace = true, default-features = true, optional = true }
vmm-sys-util = "0.15.0"
Expand Down Expand Up @@ -104,7 +105,6 @@ criterion = "0.8.1"
tracing-chrome = "0.7.2"
metrics-util = "0.20.1"
metrics-exporter-prometheus = { version = "0.18.1", default-features = false }
tracing-tracy = "0.11.4"
serde_json = "1.0"
hyperlight-component-macro = { workspace = true }

Expand All @@ -130,6 +130,7 @@ print_debug = []
# Dumps the VM state to a file on unexpected errors or crashes. The path of the file will be printed on stdout and logged.
crashdump = ["dep:chrono"]
trace_guest = ["dep:opentelemetry", "dep:tracing-opentelemetry", "dep:hyperlight-guest-tracing", "hyperlight-common/trace_guest"]
trace_tracy = ["dep:tracing-tracy"]
mem_profile = [ "trace_guest", "dep:framehop", "dep:fallible-iterator", "hyperlight-common/mem_profile" ]
kvm = ["dep:kvm-bindings", "dep:kvm-ioctls"]
mshv3 = ["dep:mshv-bindings", "dep:mshv-ioctls"]
Expand All @@ -139,6 +140,10 @@ fuzzing = ["hyperlight-common/fuzzing"]
build-metadata = ["dep:built"]
init-paging = []

[[example]]
name = "tracing-tracy"
required-features = ["trace_tracy"]

[[bench]]
name = "benchmarks"
harness = false
2 changes: 1 addition & 1 deletion src/hyperlight_host/benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ fn bench_guest_call_interrupt_latency(b: &mut criterion::Bencher, size: SandboxS
// Small delay to ensure the guest function is running in VM before interrupting
thread::sleep(std::time::Duration::from_millis(10));
let kill_start = Instant::now();
assert!(interrupt_handle.kill());
interrupt_handle.kill();
kill_start
});

Expand Down
31 changes: 21 additions & 10 deletions src/hyperlight_host/src/hypervisor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,19 @@ pub(crate) trait InterruptHandleImpl: InterruptHandle {
pub trait InterruptHandle: Send + Sync + Debug {
/// Interrupt the corresponding sandbox from running.
///
/// - If this is called while the the sandbox currently executing a guest function call, it will interrupt the sandbox and return `true`.
/// - If this is called while the sandbox is not running (for example before or after calling a guest function), it will do nothing and return `false`.
/// This method sets a cancellation flag that prevents or stops the execution of guest code.
/// The effectiveness of this call depends on timing relative to the guest function call lifecycle:
///
/// - **Before guest call starts** (before `clear_cancel()` in `MultiUseSandbox::call()`):
/// The cancellation request will be cleared and ignored.
/// - **After guest call starts but before entering guest code** (after `clear_cancel()`, before `run_vcpu()`):
/// Will prevent the guest from executing.
/// - **While executing guest code**: Will interrupt the vCPU.
/// - **After guest call completes**: Has no effect (cancellation is cleared at the start of the next call).
Comment thread
jsturtevant marked this conversation as resolved.
Outdated
///
/// # Note
/// This function will block for the duration of the time it takes for the vcpu thread to be interrupted.
fn kill(&self) -> bool;
fn kill(&self);

/// Used by a debugger to interrupt the corresponding sandbox from running.
///
Expand Down Expand Up @@ -374,13 +381,13 @@ impl InterruptHandleImpl for LinuxInterruptHandle {

#[cfg(any(kvm, mshv3))]
impl InterruptHandle for LinuxInterruptHandle {
fn kill(&self) -> bool {
fn kill(&self) {
// Release ordering ensures that any writes before kill() are visible to the vcpu thread
// when it checks is_cancelled() with Acquire ordering
self.state.fetch_or(Self::CANCEL_BIT, Ordering::Release);

// Send signals to interrupt the vcpu if it's currently running
self.send_signal()
self.send_signal();
}

#[cfg(gdb)]
Expand Down Expand Up @@ -513,7 +520,7 @@ impl InterruptHandleImpl for WindowsInterruptHandle {

#[cfg(target_os = "windows")]
impl InterruptHandle for WindowsInterruptHandle {
fn kill(&self) -> bool {
fn kill(&self) {
use windows::Win32::System::Hypervisor::WHvCancelRunVirtualProcessor;

// Release ordering ensures that any writes before kill() are visible to the vcpu thread
Expand All @@ -524,7 +531,7 @@ impl InterruptHandle for WindowsInterruptHandle {
// This ensures we see the running state set by the vcpu thread
let state = self.state.load(Ordering::Acquire);
if state & Self::RUNNING_BIT == 0 {
return false;
return;
}

// Take read lock to prevent race with WHvDeletePartition in set_dropped().
Expand All @@ -534,15 +541,19 @@ impl InterruptHandle for WindowsInterruptHandle {
Ok(guard) => guard,
Err(e) => {
log::error!("Failed to acquire partition_state read lock: {}", e);
return false;
return;
}
};

if guard.dropped {
return false;
return;
}

unsafe { WHvCancelRunVirtualProcessor(guard.handle, 0, 0).is_ok() }
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot Lets skip this, we have logging in other places

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the error logging for WHvCancelRunVirtualProcessor in commit 8b7a60c. The method now simply returns is_ok() without additional logging.

unsafe {
if let Err(e) = WHvCancelRunVirtualProcessor(guard.handle, 0, 0) {
log::error!("Failed to cancel running virtual processor: {}", e);
}
};
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ludfjig it seems the boolean here was useful information?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes but it would return false when cancelling before vcpu started executing.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am leaning towards keeping the bool and changing the wording in the docs since the linux implementation also returned a boolean before this. I am not a big fan of this change

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm open to that idea too.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot Lets keep the bool and update the comment to match the usage, specifically that it does not mean that it was success but that the signal was sent on linux and on windows it means true means it was killed but false could mean other things

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Restored the boolean return value and updated the documentation in commit 0daa0fd. The docs now clarify that:

  • On Linux: Returns true if a signal was sent to the vCPU thread, false if vCPU not running
  • On Windows: Returns true if WHvCancelRunVirtualProcessor was called successfully, false otherwise
  • A false return doesn't mean failure - the cancellation flag is always set

All test assertions have been restored.

}
#[cfg(gdb)]
fn kill_from_debugger(&self) -> bool {
Expand Down
2 changes: 1 addition & 1 deletion src/hyperlight_host/src/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ mod tests {
// interrupt the guest function call to "Spin" after 1 second
let thread = thread::spawn(move || {
thread::sleep(Duration::from_secs(1));
assert!(interrupt_handle.kill());
interrupt_handle.kill();
});

multi
Expand Down
9 changes: 4 additions & 5 deletions src/hyperlight_host/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ fn interrupt_in_progress_guest_call() {
// kill vm after 1 second
let thread = thread::spawn(move || {
thread::sleep(Duration::from_secs(1));
assert!(interrupt_handle.kill());
interrupt_handle.kill();
barrier2.wait(); // wait here until main thread has returned from the interrupted guest call
barrier2.wait(); // wait here until main thread has dropped the sandbox
assert!(interrupt_handle.dropped());
Expand Down Expand Up @@ -122,7 +122,7 @@ fn interrupt_guest_call_in_advance() {

// kill vm before the guest call has started
let thread = thread::spawn(move || {
assert!(!interrupt_handle.kill()); // should return false since vcpu is not running yet
interrupt_handle.kill();
barrier2.wait();
barrier2.wait(); // wait here until main thread has dropped the sandbox
assert!(interrupt_handle.dropped());
Expand Down Expand Up @@ -274,10 +274,9 @@ fn interrupt_moved_sandbox() {
let thread2 = thread::spawn(move || {
barrier.wait();
thread::sleep(Duration::from_secs(1));
assert!(interrupt_handle.kill());
interrupt_handle.kill();

// make sure this returns true, which means the sandbox wasn't killed incorrectly before
assert!(interrupt_handle2.kill());
interrupt_handle2.kill();
});

let res = sbox2.call::<i32>("Spin", ()).unwrap_err();
Expand Down
Loading