diff --git a/neqo-common/src/qlog.rs b/neqo-common/src/qlog.rs index 88f995f7f8..3d2fbf9575 100644 --- a/neqo-common/src/qlog.rs +++ b/neqo-common/src/qlog.rs @@ -128,14 +128,18 @@ impl Qlog { return; }; - if let Err(e) = f(&mut shared_streamer.streamer) { - log::error!("Qlog event generation failed with error {e}; closing qlog."); - // Set the inner Option to None to disable future logging for other references. - *borrow = None; - // Explicitly drop the RefCell borrow to release the mutable borrow. - drop(borrow); - // Set the outer Option to None to prevent future dereferences. - self.inner = None; + match f(&mut shared_streamer.streamer) { + // `Error::Done` means "event was below the importance threshold" - not an actual error. + Ok(()) | Err(qlog::Error::Done) => (), + Err(e) => { + log::error!("Qlog event generation failed with error {e}; closing qlog."); + // Set the inner Option to None to disable future logging for other references. + *borrow = None; + // Explicitly drop the RefCell borrow to release the mutable borrow. + drop(borrow); + // Set the outer Option to None to prevent future dereferences. + self.inner = None; + } } } } diff --git a/neqo-transport/src/qlog.rs b/neqo-transport/src/qlog.rs index 7d401a139c..b50f897b36 100644 --- a/neqo-transport/src/qlog.rs +++ b/neqo-transport/src/qlog.rs @@ -20,9 +20,10 @@ use qlog::events::{ }, quic::{ AckedRanges, CongestionStateUpdated, CongestionStateUpdatedTrigger, ErrorSpace, - MetricsUpdated, PacketDropped, PacketDroppedTrigger, PacketHeader, PacketLost, - PacketNumberSpace as QlogPacketNumberSpace, PacketReceived, PacketSent, PacketsAcked, - QuicFrame, RecoveryParametersSet, StreamType, VersionInformation, + LossTimerEventType, LossTimerUpdated, MetricsUpdated, PacketDropped, PacketDroppedTrigger, + PacketHeader, PacketLost, PacketNumberSpace as QlogPacketNumberSpace, PacketReceived, + PacketSent, PacketsAcked, QuicFrame, RecoveryParametersSet, StreamType, TimerType, + VersionInformation, }, }; use smallvec::SmallVec; @@ -489,6 +490,65 @@ pub fn congestion_state_updated( ); } +/// The type of loss recovery timer that fired or was updated. +#[derive(Clone, Copy)] +pub enum LossTimerType { + /// The reordering/loss-detection timer (ACK-based). + Ack, + /// The Probe Timeout timer. + Pto, +} + +/// Emit a `loss_timer_updated` Set event. +/// +/// Only the PTO timer has explicit set/cancel lifecycle in neqo. The +/// loss-detection (Ack) timer is derived lazily from packet state on every +/// call to [`crate::recovery::Loss::next_timeout`] and has no single arm or +/// cancel point to instrument. +pub fn loss_timer_set(qlog: &mut Qlog, now: Instant) { + loss_timer_updated(qlog, LossTimerEventType::Set, Some(TimerType::Pto), now); +} + +pub fn loss_timer_expired(qlog: &mut Qlog, timer_type: LossTimerType, now: Instant) { + loss_timer_updated( + qlog, + LossTimerEventType::Expired, + Some(timer_type.into()), + now, + ); +} + +/// Emit a `loss_timer_updated` Cancelled event. +/// +/// See [`loss_timer_set`] for why only `TimerType::Pto` is used here. +pub fn loss_timer_cancelled(qlog: &mut Qlog, now: Instant) { + loss_timer_updated( + qlog, + LossTimerEventType::Cancelled, + Some(TimerType::Pto), + now, + ); +} + +fn loss_timer_updated( + qlog: &mut Qlog, + event_type: LossTimerEventType, + timer_type: Option, + now: Instant, +) { + qlog.add_event_at( + || { + Some(EventData::LossTimerUpdated(LossTimerUpdated { + timer_type, + packet_number_space: None, + event_type, + delta: None, + })) + }, + now, + ); +} + // Helper functions #[expect(clippy::too_many_lines, reason = "Yeah, but it's a nice match.")] @@ -748,6 +808,15 @@ impl From<&CloseReason> for ConnectionClosed { } } +impl From for TimerType { + fn from(value: LossTimerType) -> Self { + match value { + LossTimerType::Ack => Self::Ack, + LossTimerType::Pto => Self::Pto, + } + } +} + impl From for qlog::events::quic::PacketType { fn from(value: packet::Type) -> Self { match value { diff --git a/neqo-transport/src/recovery/mod.rs b/neqo-transport/src/recovery/mod.rs index a8cd7bb4e6..8e2789099b 100644 --- a/neqo-transport/src/recovery/mod.rs +++ b/neqo-transport/src/recovery/mod.rs @@ -694,6 +694,9 @@ impl Loss { &mut self.stats.borrow_mut(), ); + if self.pto_state.is_some() { + qlog::loss_timer_cancelled(&mut self.qlog, now); + } self.pto_state = None; (acked_packets, lost) @@ -702,6 +705,9 @@ impl Loss { /// When receiving a retry, get all the sent packets so that they can be flushed. /// We also need to pretend that they never happened for the purposes of congestion control. pub fn retry(&mut self, primary_path: &PathRef, now: Instant) -> Vec { + if self.pto_state.is_some() { + qlog::loss_timer_cancelled(&mut self.qlog, now); + } self.pto_state = None; let mut dropped = self .spaces @@ -749,6 +755,9 @@ impl Loss { // We just made progress, so discard PTO count. // The spec says that clients should not do this until confirming that // the server has completed address validation, but ignore that. + if self.pto_state.is_some() { + qlog::loss_timer_cancelled(&mut self.qlog, now); + } self.pto_state = None; if space == PacketNumberSpace::Handshake { @@ -850,6 +859,7 @@ impl Loss { st.count_pto(&mut self.stats.borrow_mut()); qlog::metrics_updated(&mut self.qlog, &[qlog::Metric::PtoCount(st.count())], now); } + qlog::loss_timer_set(&mut self.qlog, now); } /// This checks whether the PTO timer has fired and fires it if needed. @@ -927,6 +937,18 @@ impl Loss { has_handshake_keys: bool, ) -> Vec { qtrace!("[{self}] timeout {now:?}"); + let timer_type = { + let path = primary_path.borrow(); + if self + .earliest_loss_time(path.rtt()) + .is_some_and(|t| t <= now) + { + qlog::LossTimerType::Ack + } else { + qlog::LossTimerType::Pto + } + }; + qlog::loss_timer_expired(&mut self.qlog, timer_type, now); let loss_delay = primary_path.borrow().rtt().loss_delay(); let confirmed = self.confirmed(); @@ -1916,4 +1938,68 @@ mod tests { "Initial space must be able to send CRYPTO frames even when PTO is for Handshake" ); } + + /// Set up a qlog-instrumented fixture with a PTO already fired. + /// Returns the log contents and the PTO expiry time for use in follow-on + /// operations (e.g., acknowledging packets to trigger Cancelled). + fn fire_pto_log() -> (Fixture, test_fixture::SharedVec, Instant) { + let (log, contents) = test_fixture::new_neqo_qlog(); + let mut lr = Fixture::default(); + lr.lr.set_qlog(log); + lr.on_packet_sent( + sent::Packet::new( + packet::Type::Initial, + 0, + now(), + true, + recovery::Tokens::new(), + ON_SENT_SIZE, + ), + now(), + ); + let pto = lr.next_timeout().expect("PTO timer armed"); + lr.timeout(pto); + (lr, contents, pto) + } + + #[test] + fn loss_timer_set_on_pto() { + let (_, contents, _) = fire_pto_log(); + let log = contents.to_string(); + assert!( + log.contains(r#""event_type":"set""#), + "Expected loss_timer_updated Set event in qlog: {log}" + ); + assert!( + log.contains(r#""timer_type":"pto""#), + "Expected timer_type pto in qlog: {log}" + ); + } + + #[test] + fn loss_timer_expired_on_timeout() { + let (_, contents, _) = fire_pto_log(); + let log = contents.to_string(); + assert!( + log.contains(r#""event_type":"expired""#), + "Expected loss_timer_updated Expired event in qlog: {log}" + ); + } + + #[test] + fn loss_timer_cancelled_on_ack() { + let (mut lr, contents, pto) = fire_pto_log(); + lr.on_ack_received( + PacketNumberSpace::Initial, + vec![0..=0], + None, + Duration::ZERO, + pto + TEST_RTT, + ); + let log = contents.to_string(); + assert!( + log.contains(r#""event_type":"cancelled""#), + "Expected loss_timer_updated Cancelled event in qlog: {log}" + ); + } } diff --git a/test-fixture/src/lib.rs b/test-fixture/src/lib.rs index 55ac7b962c..64fae5cad3 100644 --- a/test-fixture/src/lib.rs +++ b/test-fixture/src/lib.rs @@ -553,7 +553,7 @@ pub fn new_neqo_qlog() -> (Qlog, SharedVec) { None, Instant::now(), trace, - EventImportance::Base, + EventImportance::Extra, Box::new(buf), ); let log = Qlog::enabled(streamer, PathBuf::from(""));