Skip to content

Commit 2f1389b

Browse files
SantiagoPittellaMirko-von-Leipzig
authored andcommitted
feat(ntx-builder): deactivate accounts which crash repeatedly (#1712)
* feat(ntx-builder): blacklist accounts whose actors crash repeatedly * add PR number to changelog entry * docs: add changelog entry & remove duplicated ones * review: replace blacklist with deactivated * review: update docs * review: rename actors with accounts * review: improve log format Co-authored-by: Mirko <48352201+Mirko-von-Leipzig@users.noreply.github.com> * review: improve traces * review: move tests helpers as methods * review: remove ShutdownReason struct, replace with Result * chore: remove duplicated changelog entries --------- Co-authored-by: Mirko <48352201+Mirko-von-Leipzig@users.noreply.github.com>
1 parent 48d8776 commit 2f1389b

7 files changed

Lines changed: 210 additions & 97 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
- [BREAKING] Modified `TransactionHeader` serialization to allow converting back into the native type after serialization ([#1759](https://github.com/0xMiden/node/issues/1759)).
3434
- Removed `chain_tip` requirement from mempool subscription request ([#1771](https://github.com/0xMiden/node/pull/1771)).
3535
- Moved bootstrap procedure to `miden-node validator bootstrap` command ([#1764](https://github.com/0xMiden/node/pull/1764)).
36+
- NTX Builder now deactivates network accounts which crash repeatedly (configurable via `--ntx-builder.max-account-crashes`, default 10) ([#1712](https://github.com/0xMiden/miden-node/pull/1712)).
37+
3638

3739
### Fixes
3840

bin/node/src/commands/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,16 @@ pub struct NtxBuilderConfig {
184184
)]
185185
pub idle_timeout: Duration,
186186

187+
/// Maximum number of crashes before an account deactivated.
188+
///
189+
/// Once this limit is reached, no new transactions will be created for this account.
190+
#[arg(
191+
long = "ntx-builder.max-account-crashes",
192+
default_value_t = 10,
193+
value_name = "NUM"
194+
)]
195+
pub max_account_crashes: usize,
196+
187197
/// Directory for the ntx-builder's persistent database.
188198
///
189199
/// If not set, defaults to the node's data directory.
@@ -215,6 +225,7 @@ impl NtxBuilderConfig {
215225
.with_tx_prover_url(self.tx_prover_url)
216226
.with_script_cache_size(self.script_cache_size)
217227
.with_idle_timeout(self.idle_timeout)
228+
.with_max_account_crashes(self.max_account_crashes)
218229
}
219230
}
220231

crates/ntx-builder/src/actor/mod.rs

Lines changed: 56 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::num::NonZeroUsize;
55
use std::sync::Arc;
66
use std::time::Duration;
77

8+
use anyhow::Context;
89
use candidate::TransactionCandidate;
910
use futures::FutureExt;
1011
use miden_node_proto::clients::{Builder, ValidatorClient};
@@ -17,7 +18,7 @@ use miden_protocol::block::BlockNumber;
1718
use miden_protocol::note::{NoteScript, Nullifier};
1819
use miden_protocol::transaction::TransactionId;
1920
use miden_remote_prover_client::RemoteTransactionProver;
20-
use tokio::sync::{AcquireError, Notify, RwLock, Semaphore, mpsc};
21+
use tokio::sync::{Notify, RwLock, Semaphore, mpsc};
2122
use tokio_util::sync::CancellationToken;
2223
use url::Url;
2324

@@ -44,27 +45,6 @@ pub enum ActorRequest {
4445
CacheNoteScript { script_root: Word, script: NoteScript },
4546
}
4647

47-
// ACTOR SHUTDOWN REASON
48-
// ================================================================================================
49-
50-
/// The reason an actor has shut down.
51-
pub enum ActorShutdownReason {
52-
/// Occurs when an account actor detects failure in acquiring the rate-limiting semaphore.
53-
SemaphoreFailed(AcquireError),
54-
/// Occurs when an account actor detects its corresponding cancellation token has been triggered
55-
/// by the coordinator. Cancellation tokens are triggered by the coordinator to initiate
56-
/// graceful shutdown of actors.
57-
Cancelled(NetworkAccountId),
58-
/// Occurs when the actor encounters a database error it cannot recover from.
59-
DbError(NetworkAccountId, miden_node_db::DatabaseError),
60-
/// Occurs when an account actor detects that its account has been removed from the database
61-
/// (e.g. due to a reverted account creation).
62-
AccountRemoved(NetworkAccountId),
63-
/// Occurs when the actor has been idle for longer than the idle timeout and the builder
64-
/// has confirmed there are no available notes in the DB.
65-
IdleTimeout(NetworkAccountId),
66-
}
67-
6848
// ACCOUNT ACTOR CONFIG
6949
// ================================================================================================
7050

@@ -98,6 +78,43 @@ pub struct AccountActorContext {
9878
pub request_tx: mpsc::Sender<ActorRequest>,
9979
}
10080

81+
#[cfg(test)]
82+
impl AccountActorContext {
83+
/// Creates a minimal `AccountActorContext` suitable for unit tests.
84+
///
85+
/// The URLs are fake and actors spawned with this context will fail on their first gRPC call,
86+
/// but this is sufficient for testing coordinator logic (registry, deactivation, etc.).
87+
pub fn test(db: &crate::db::Db) -> Self {
88+
use miden_protocol::crypto::merkle::mmr::{Forest, MmrPeaks, PartialMmr};
89+
use tokio::sync::RwLock;
90+
use url::Url;
91+
92+
use crate::chain_state::ChainState;
93+
use crate::clients::StoreClient;
94+
use crate::test_utils::mock_block_header;
95+
96+
let url = Url::parse("http://127.0.0.1:1").unwrap();
97+
let block_header = mock_block_header(0_u32.into());
98+
let chain_mmr = PartialMmr::from_peaks(MmrPeaks::new(Forest::new(0), vec![]).unwrap());
99+
let chain_state = Arc::new(RwLock::new(ChainState::new(block_header, chain_mmr)));
100+
let (request_tx, _request_rx) = mpsc::channel(1);
101+
102+
Self {
103+
block_producer_url: url.clone(),
104+
validator_url: url.clone(),
105+
tx_prover_url: None,
106+
chain_state,
107+
store: StoreClient::new(url),
108+
script_cache: LruCache::new(NonZeroUsize::new(1).unwrap()),
109+
max_notes_per_tx: NonZeroUsize::new(1).unwrap(),
110+
max_note_attempts: 1,
111+
idle_timeout: Duration::from_secs(60),
112+
db: db.clone(),
113+
request_tx,
114+
}
115+
}
116+
}
117+
101118
// ACCOUNT ORIGIN
102119
// ================================================================================================
103120

@@ -239,9 +256,13 @@ impl AccountActor {
239256
}
240257
}
241258

242-
/// Runs the account actor, processing events and managing state until a reason to shutdown is
243-
/// encountered.
244-
pub async fn run(mut self, semaphore: Arc<Semaphore>) -> Result<(), ActorShutdownReason> {
259+
/// Runs the account actor, processing events and managing state until shutdown.
260+
///
261+
/// The return value signals the shutdown category to the coordinator:
262+
///
263+
/// - `Ok(())`: intentional shutdown (idle timeout, cancellation, or account removal).
264+
/// - `Err(_)`: crash (database error, semaphore failure, or any other bug).
265+
pub async fn run(mut self, semaphore: Arc<Semaphore>) -> anyhow::Result<()> {
245266
let account_id = self.origin.id();
246267

247268
// Determine initial mode by checking DB for available notes.
@@ -250,10 +271,7 @@ impl AccountActor {
250271
.db
251272
.has_available_notes(account_id, block_num, self.max_note_attempts)
252273
.await
253-
.map_err(|err| {
254-
tracing::error!(err = err.as_report(), account_id = %account_id, "failed to check for available notes");
255-
ActorShutdownReason::DbError(account_id, err)
256-
})?;
274+
.context("failed to check for available notes")?;
257275

258276
if has_notes {
259277
self.mode = ActorMode::NotesAvailable;
@@ -279,7 +297,7 @@ impl AccountActor {
279297

280298
tokio::select! {
281299
_ = self.cancel_token.cancelled() => {
282-
return Err(ActorShutdownReason::Cancelled(account_id));
300+
return Ok(());
283301
}
284302
// Handle coordinator notifications. On notification, re-evaluate state from DB.
285303
_ = self.notify.notified() => {
@@ -290,12 +308,7 @@ impl AccountActor {
290308
.db
291309
.transaction_exists(awaited_id)
292310
.await
293-
.inspect_err(|err| {
294-
tracing::error!(err = err.as_report(), account_id = %account_id, "failed to check transaction status");
295-
})
296-
.map_err(|err| {
297-
ActorShutdownReason::DbError(account_id, err)
298-
})?;
311+
.context("failed to check transaction status")?;
299312
if exists {
300313
self.mode = ActorMode::NotesAvailable;
301314
}
@@ -307,7 +320,7 @@ impl AccountActor {
307320
},
308321
// Execute transactions.
309322
permit = tx_permit_acquisition => {
310-
let _permit = permit.map_err(ActorShutdownReason::SemaphoreFailed)?;
323+
let _permit = permit.context("semaphore closed")?;
311324

312325
// Read the chain state.
313326
let chain_state = self.chain_state.read().await.clone();
@@ -327,7 +340,8 @@ impl AccountActor {
327340
}
328341
// Idle timeout: actor has been idle too long, deactivate account.
329342
_ = idle_timeout_sleep => {
330-
return Err(ActorShutdownReason::IdleTimeout(account_id));
343+
tracing::info!(%account_id, "Account actor deactivated due to idle timeout");
344+
return Ok(());
331345
}
332346
}
333347
}
@@ -338,21 +352,19 @@ impl AccountActor {
338352
&self,
339353
account_id: NetworkAccountId,
340354
chain_state: ChainState,
341-
) -> Result<Option<TransactionCandidate>, ActorShutdownReason> {
355+
) -> anyhow::Result<Option<TransactionCandidate>> {
342356
let block_num = chain_state.chain_tip_header.block_num();
343357
let max_notes = self.max_notes_per_tx.get();
344358

345359
let (latest_account, notes) = self
346360
.db
347361
.select_candidate(account_id, block_num, self.max_note_attempts)
348362
.await
349-
.map_err(|err| {
350-
tracing::error!(err = err.as_report(), account_id = %account_id, "failed to query DB for transaction candidate");
351-
ActorShutdownReason::DbError(account_id, err)
352-
})?;
363+
.context("failed to query DB for transaction candidate")?;
353364

354365
let Some(account) = latest_account else {
355-
return Err(ActorShutdownReason::AccountRemoved(account_id));
366+
tracing::info!(account_id = %account_id, "Account no longer exists in DB");
367+
return Ok(None);
356368
};
357369

358370
let notes: Vec<_> = notes.into_iter().take(max_notes).collect();

0 commit comments

Comments
 (0)