Skip to content

Commit 144d759

Browse files
kjw3ksapru
authored andcommitted
fix: harden installer and onboard resiliency (NVIDIA#961)
* fix: harden installer and onboard resiliency * fix: address installer and debug review follow-ups * fix: harden onboard resume across later setup steps * test: simplify payload extraction in onboard tests * test: keep onboard payload extraction target-compatible * chore: align onboard session lint with complexity rule * fix: harden onboard session safety and lock handling * fix: tighten onboard session redaction and metadata handling
1 parent 5c269c1 commit 144d759

17 files changed

Lines changed: 3339 additions & 900 deletions

bin/lib/onboard-session.js

Lines changed: 432 additions & 0 deletions
Large diffs are not rendered by default.

bin/lib/onboard.js

Lines changed: 808 additions & 106 deletions
Large diffs are not rendered by default.

bin/lib/runtime-recovery.js

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
const onboardSession = require("./onboard-session");
5+
6+
function stripAnsi(text) {
7+
// eslint-disable-next-line no-control-regex
8+
return String(text || "").replace(/\x1b\[[0-9;]*m/g, "");
9+
}
10+
11+
function parseLiveSandboxNames(listOutput = "") {
12+
const clean = stripAnsi(listOutput);
13+
const names = new Set();
14+
for (const rawLine of clean.split("\n")) {
15+
const line = rawLine.trim();
16+
if (!line) continue;
17+
if (/^(NAME|No sandboxes found\.?$)/i.test(line)) continue;
18+
if (/^Error:/i.test(line)) continue;
19+
const cols = line.split(/\s+/);
20+
if (cols[0]) {
21+
names.add(cols[0]);
22+
}
23+
}
24+
return names;
25+
}
26+
27+
function classifySandboxLookup(output = "") {
28+
const clean = stripAnsi(output).trim();
29+
if (!clean) {
30+
return { state: "missing", reason: "empty" };
31+
}
32+
if (/sandbox not found|status:\s*NotFound/i.test(clean)) {
33+
return { state: "missing", reason: "not_found" };
34+
}
35+
if (
36+
/transport error|client error|Connection reset by peer|Connection refused|No active gateway|Gateway: .*Error/i.test(
37+
clean
38+
)
39+
) {
40+
return { state: "unavailable", reason: "gateway_unavailable" };
41+
}
42+
return { state: "present", reason: "ok" };
43+
}
44+
45+
function classifyGatewayStatus(output = "") {
46+
const clean = stripAnsi(output).trim();
47+
if (!clean) {
48+
return { state: "inactive", reason: "empty" };
49+
}
50+
if (/Connected/i.test(clean)) {
51+
return { state: "connected", reason: "ok" };
52+
}
53+
if (
54+
/No active gateway|transport error|client error|Connection reset by peer|Connection refused|Gateway: .*Error/i.test(
55+
clean
56+
)
57+
) {
58+
return { state: "unavailable", reason: "gateway_unavailable" };
59+
}
60+
return { state: "inactive", reason: "not_connected" };
61+
}
62+
63+
function shouldAttemptGatewayRecovery({ sandboxState = "missing", gatewayState = "inactive" } = {}) {
64+
return sandboxState === "unavailable" && gatewayState !== "connected";
65+
}
66+
67+
function getRecoveryCommand() {
68+
const session = onboardSession.loadSession();
69+
if (session && session.resumable !== false) {
70+
return "nemoclaw onboard --resume";
71+
}
72+
return "nemoclaw onboard";
73+
}
74+
75+
module.exports = {
76+
classifyGatewayStatus,
77+
classifySandboxLookup,
78+
getRecoveryCommand,
79+
parseLiveSandboxNames,
80+
shouldAttemptGatewayRecovery,
81+
};

bin/nemoclaw.js

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ async function recoverNamedGatewayRuntime() {
134134
}
135135

136136
const shouldStartGateway = [before.state, after.state].some((state) =>
137-
["named_unhealthy", "named_unreachable", "connected_other"].includes(state)
137+
["missing_named", "named_unhealthy", "named_unreachable", "connected_other"].includes(state)
138138
);
139139

140140
if (shouldStartGateway) {
@@ -334,15 +334,16 @@ function exitWithSpawnResult(result) {
334334

335335
async function onboard(args) {
336336
const { onboard: runOnboard } = require("./lib/onboard");
337-
const allowedArgs = new Set(["--non-interactive"]);
337+
const allowedArgs = new Set(["--non-interactive", "--resume"]);
338338
const unknownArgs = args.filter((arg) => !allowedArgs.has(arg));
339339
if (unknownArgs.length > 0) {
340340
console.error(` Unknown onboard option(s): ${unknownArgs.join(", ")}`);
341-
console.error(" Usage: nemoclaw onboard [--non-interactive]");
341+
console.error(" Usage: nemoclaw onboard [--non-interactive] [--resume]");
342342
process.exit(1);
343343
}
344344
const nonInteractive = args.includes("--non-interactive");
345-
await runOnboard({ nonInteractive });
345+
const resume = args.includes("--resume");
346+
await runOnboard({ nonInteractive, resume });
346347
}
347348

348349
async function setup() {

install.sh

Lines changed: 105 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -124,18 +124,38 @@ print_banner() {
124124

125125
print_done() {
126126
local elapsed=$((SECONDS - _INSTALL_START))
127-
local sandbox_name
128-
sandbox_name="$(resolve_default_sandbox_name)"
129127
info "=== Installation complete ==="
130128
printf "\n"
131129
printf " ${C_GREEN}${C_BOLD}NemoClaw${C_RESET} ${C_DIM}(%ss)${C_RESET}\n" "$elapsed"
132130
printf "\n"
133-
printf " ${C_GREEN}Your OpenClaw Sandbox is live.${C_RESET}\n"
134-
printf " ${C_DIM}Sandbox in, break things, and tell us what you find.${C_RESET}\n"
135-
printf "\n"
136-
printf " ${C_GREEN}Next:${C_RESET}\n"
137-
printf " %s$%s nemoclaw %s connect\n" "$C_GREEN" "$C_RESET" "$sandbox_name"
138-
printf " %ssandbox@%s$%s openclaw tui\n" "$C_GREEN" "$sandbox_name" "$C_RESET"
131+
if [[ "$ONBOARD_RAN" == true ]]; then
132+
local sandbox_name
133+
sandbox_name="$(resolve_default_sandbox_name)"
134+
printf " ${C_GREEN}Your OpenClaw Sandbox is live.${C_RESET}\n"
135+
printf " ${C_DIM}Sandbox in, break things, and tell us what you find.${C_RESET}\n"
136+
printf "\n"
137+
printf " ${C_GREEN}Next:${C_RESET}\n"
138+
printf " %s$%s nemoclaw %s connect\n" "$C_GREEN" "$C_RESET" "$sandbox_name"
139+
printf " %ssandbox@%s$%s openclaw tui\n" "$C_GREEN" "$sandbox_name" "$C_RESET"
140+
elif [[ "$NEMOCLAW_READY_NOW" == true ]]; then
141+
printf " ${C_GREEN}NemoClaw CLI is ready in this shell.${C_RESET}\n"
142+
printf " ${C_DIM}Onboarding has not run yet.${C_RESET}\n"
143+
printf "\n"
144+
printf " ${C_GREEN}Next:${C_RESET}\n"
145+
printf " %s$%s nemoclaw onboard\n" "$C_GREEN" "$C_RESET"
146+
else
147+
printf " ${C_GREEN}NemoClaw CLI is installed.${C_RESET}\n"
148+
printf " ${C_DIM}Onboarding did not run because this shell cannot resolve 'nemoclaw' yet.${C_RESET}\n"
149+
printf "\n"
150+
printf " ${C_GREEN}Next:${C_RESET}\n"
151+
if [[ -n "$NEMOCLAW_RECOVERY_EXPORT_DIR" ]]; then
152+
printf " %s$%s export PATH=\"%s:\$PATH\"\n" "$C_GREEN" "$C_RESET" "$NEMOCLAW_RECOVERY_EXPORT_DIR"
153+
fi
154+
if [[ -n "$NEMOCLAW_RECOVERY_PROFILE" ]]; then
155+
printf " %s$%s source %s\n" "$C_GREEN" "$C_RESET" "$NEMOCLAW_RECOVERY_PROFILE"
156+
fi
157+
printf " %s$%s nemoclaw onboard\n" "$C_GREEN" "$C_RESET"
158+
fi
139159
printf "\n"
140160
printf " ${C_BOLD}GitHub${C_RESET} ${C_DIM}https://github.com/nvidia/nemoclaw${C_RESET}\n"
141161
printf " ${C_BOLD}Docs${C_RESET} ${C_DIM}https://docs.nvidia.com/nemoclaw/latest/${C_RESET}\n"
@@ -218,6 +238,10 @@ MIN_NPM_MAJOR=10
218238
RUNTIME_REQUIREMENT_MSG="NemoClaw requires Node.js >=${MIN_NODE_VERSION} and npm >=${MIN_NPM_MAJOR}."
219239
NEMOCLAW_SHIM_DIR="${HOME}/.local/bin"
220240
ORIGINAL_PATH="${PATH:-}"
241+
NEMOCLAW_READY_NOW=false
242+
NEMOCLAW_RECOVERY_PROFILE=""
243+
NEMOCLAW_RECOVERY_EXPORT_DIR=""
244+
ONBOARD_RAN=false
221245

222246
# Compare two semver strings (major.minor.patch). Returns 0 if $1 >= $2.
223247
# Rejects prerelease suffixes (e.g. "22.16.0-rc.1") to avoid arithmetic errors.
@@ -248,6 +272,30 @@ ensure_nvm_loaded() {
248272
fi
249273
}
250274

275+
detect_shell_profile() {
276+
local profile="$HOME/.bashrc"
277+
case "$(basename "${SHELL:-}")" in
278+
zsh)
279+
profile="$HOME/.zshrc"
280+
;;
281+
fish)
282+
profile="$HOME/.config/fish/config.fish"
283+
;;
284+
tcsh)
285+
profile="$HOME/.tcshrc"
286+
;;
287+
csh)
288+
profile="$HOME/.cshrc"
289+
;;
290+
*)
291+
if [[ ! -f "$HOME/.bashrc" && -f "$HOME/.profile" ]]; then
292+
profile="$HOME/.profile"
293+
fi
294+
;;
295+
esac
296+
printf "%s" "$profile"
297+
}
298+
251299
# Refresh PATH so that npm global bin is discoverable.
252300
# After nvm installs Node.js the global bin lives under the nvm prefix,
253301
# which may not yet be on PATH in the current session.
@@ -509,30 +557,30 @@ install_nemoclaw() {
509557
# ---------------------------------------------------------------------------
510558
verify_nemoclaw() {
511559
if command_exists nemoclaw; then
560+
NEMOCLAW_READY_NOW=true
512561
info "Verified: nemoclaw is available at $(command -v nemoclaw)"
513562
return 0
514563
fi
515564

516-
# nemoclaw not on PATH — try to diagnose and suggest a fix
517-
warn "nemoclaw is not on PATH after installation."
518-
519565
local npm_bin
520566
npm_bin="$(npm config get prefix 2>/dev/null)/bin" || true
521567

522568
if [[ -n "$npm_bin" && -x "$npm_bin/nemoclaw" ]]; then
523569
ensure_nemoclaw_shim || true
524570
if command_exists nemoclaw; then
571+
NEMOCLAW_READY_NOW=true
525572
info "Verified: nemoclaw is available at $(command -v nemoclaw)"
526573
return 0
527574
fi
528575

529-
warn "Found nemoclaw at $npm_bin/nemoclaw but could not expose it on PATH."
530-
warn ""
531-
warn "Add one of these directories to your shell profile:"
532-
warn " $NEMOCLAW_SHIM_DIR"
533-
warn " $npm_bin"
534-
warn ""
535-
warn "Continuing — nemoclaw is installed but requires a PATH update."
576+
NEMOCLAW_RECOVERY_PROFILE="$(detect_shell_profile)"
577+
if [[ -x "$NEMOCLAW_SHIM_DIR/nemoclaw" ]]; then
578+
NEMOCLAW_RECOVERY_EXPORT_DIR="$NEMOCLAW_SHIM_DIR"
579+
else
580+
NEMOCLAW_RECOVERY_EXPORT_DIR="$npm_bin"
581+
fi
582+
warn "Found nemoclaw at $npm_bin/nemoclaw but this shell still cannot resolve it."
583+
warn "Onboarding will be skipped until PATH is updated."
536584
return 0
537585
else
538586
warn "Could not locate the nemoclaw executable."
@@ -547,14 +595,33 @@ verify_nemoclaw() {
547595
# ---------------------------------------------------------------------------
548596
run_onboard() {
549597
info "Running nemoclaw onboard…"
598+
local -a onboard_cmd=(onboard)
599+
if command_exists node && [[ -f "${HOME}/.nemoclaw/onboard-session.json" ]]; then
600+
if node -e '
601+
const fs = require("fs");
602+
const file = process.argv[1];
603+
try {
604+
const data = JSON.parse(fs.readFileSync(file, "utf8"));
605+
const resumable = data && data.resumable !== false;
606+
const status = data && data.status;
607+
process.exit(resumable && status && status !== "complete" ? 0 : 1);
608+
} catch {
609+
process.exit(1);
610+
}
611+
' "${HOME}/.nemoclaw/onboard-session.json"; then
612+
info "Found an interrupted onboarding session — resuming it."
613+
onboard_cmd+=(--resume)
614+
fi
615+
fi
550616
if [ "${NON_INTERACTIVE:-}" = "1" ]; then
551-
nemoclaw onboard --non-interactive
617+
onboard_cmd+=(--non-interactive)
618+
nemoclaw "${onboard_cmd[@]}"
552619
elif [ -t 0 ]; then
553-
nemoclaw onboard
620+
nemoclaw "${onboard_cmd[@]}"
554621
elif exec 3</dev/tty; then
555622
info "Installer stdin is piped; attaching onboarding to /dev/tty…"
556623
local status=0
557-
nemoclaw onboard <&3 || status=$?
624+
nemoclaw "${onboard_cmd[@]}" <&3 || status=$?
558625
exec 3<&-
559626
return "$status"
560627
else
@@ -565,30 +632,32 @@ run_onboard() {
565632
# 6. Post-install message (printed last — after onboarding — so PATH hints stay visible)
566633
# ---------------------------------------------------------------------------
567634
post_install_message() {
568-
# Only show shell reload instructions when Node was installed via a
569-
# version manager that modifies PATH in shell profile files.
570-
# nvm and fnm require sourcing the profile; nodesource/brew install to
571-
# system paths already on PATH.
572-
if [[ ! -s "${NVM_DIR:-$HOME/.nvm}/nvm.sh" ]]; then
635+
if [[ "$NEMOCLAW_READY_NOW" == true ]]; then
573636
return 0
574637
fi
575638

576-
local profile="$HOME/.bashrc"
577-
if [[ -n "${ZSH_VERSION:-}" ]] || [[ "$(basename "${SHELL:-}")" == "zsh" ]]; then
578-
profile="$HOME/.zshrc"
579-
elif [[ ! -f "$HOME/.bashrc" && -f "$HOME/.profile" ]]; then
580-
profile="$HOME/.profile"
639+
if [[ -z "$NEMOCLAW_RECOVERY_EXPORT_DIR" ]]; then
640+
return 0
641+
fi
642+
643+
if [[ -z "$NEMOCLAW_RECOVERY_PROFILE" ]]; then
644+
NEMOCLAW_RECOVERY_PROFILE="$(detect_shell_profile)"
581645
fi
582646

583647
echo ""
584648
echo " ──────────────────────────────────────────────────"
585-
warn "Your current shell may not have the updated PATH."
649+
warn "Your current shell cannot resolve 'nemoclaw' yet."
586650
echo ""
587651
echo " To use nemoclaw now, run:"
588652
echo ""
589-
echo " source $profile"
653+
echo " export PATH=\"${NEMOCLAW_RECOVERY_EXPORT_DIR}:\$PATH\""
654+
echo " source ${NEMOCLAW_RECOVERY_PROFILE}"
655+
echo ""
656+
echo " Then run:"
657+
echo ""
658+
echo " nemoclaw onboard"
590659
echo ""
591-
echo " Or open a new terminal window."
660+
echo " Or open a new terminal window after updating your shell profile."
592661
echo " ──────────────────────────────────────────────────"
593662
echo ""
594663
}
@@ -635,8 +704,9 @@ main() {
635704
step 3 "Onboarding"
636705
if command_exists nemoclaw; then
637706
run_onboard
707+
ONBOARD_RAN=true
638708
else
639-
warn "Skipping onboarding — nemoclaw is not on PATH. Run 'nemoclaw onboard' after updating your PATH."
709+
warn "Skipping onboarding — this shell still cannot resolve 'nemoclaw'."
640710
fi
641711

642712
print_done

scripts/debug.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ elif command -v gtimeout >/dev/null 2>&1; then
107107
TIMEOUT_BIN="gtimeout"
108108
fi
109109

110+
SCRIPT_DIR=""
111+
REPO_ROOT=""
112+
ONBOARD_SESSION_HELPER=""
113+
SCRIPT_PATH="${BASH_SOURCE[0]:-}"
114+
if [ -n "$SCRIPT_PATH" ] && [ -f "$SCRIPT_PATH" ]; then
115+
SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_PATH")" && pwd)"
116+
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
117+
ONBOARD_SESSION_HELPER="${REPO_ROOT}/bin/lib/onboard-session.js"
118+
fi
119+
110120
# Redact known sensitive patterns (API keys, tokens, passwords in env/args).
111121
redact() {
112122
sed -E \
@@ -243,6 +253,24 @@ if [ "$QUICK" = false ]; then
243253
collect "openshell-gateway-info" openshell gateway info
244254
fi
245255

256+
# -- Onboard session state --
257+
258+
section "Onboard Session"
259+
if [ -n "$ONBOARD_SESSION_HELPER" ] && [ -f "$ONBOARD_SESSION_HELPER" ] && command -v node >/dev/null 2>&1; then
260+
# shellcheck disable=SC2016
261+
collect "onboard-session-summary" node -e '
262+
const helper = require(process.argv[1]);
263+
const summary = helper.summarizeForDebug();
264+
if (!summary) {
265+
process.stdout.write("No onboard session state found.\n");
266+
process.exit(0);
267+
}
268+
process.stdout.write(`${JSON.stringify(summary, null, 2)}\n`);
269+
' "$ONBOARD_SESSION_HELPER"
270+
else
271+
echo " (onboard session helper not available, skipping)"
272+
fi
273+
246274
# -- Sandbox internals (via SSH using openshell ssh-config) --
247275

248276
if command -v openshell &>/dev/null \

0 commit comments

Comments
 (0)