Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions src/auto-capture-cleanup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ const AUTO_CAPTURE_SESSION_RESET_PREFIX =
"A new session was started via /new or /reset. Execute your Session Startup sequence now";
const AUTO_CAPTURE_ADDRESSING_PREFIX_RE = /^(?:<@!?[0-9]+>|@[A-Za-z0-9_.-]+)\s*/;
const AUTO_CAPTURE_SYSTEM_EVENT_LINE_RE = /^System:\s*\[[^\n]*?\]\s*Exec\s+(?:completed|failed|started)\b.*$/gim;
const AUTO_CAPTURE_RUNTIME_WRAPPER_LINE_RE = /^\[(?:Subagent Context|Subagent Task)\]\s*/i;
const AUTO_CAPTURE_RUNTIME_WRAPPER_PREFIX_RE = /^\[(?:Subagent Context|Subagent Task)\]/i;
const AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE =
/(?:You are running as a subagent\b.*?(?:$|(?<=\.)\s+)|Results auto-announce to your requester\.?\s*|do not busy-poll for status\.?\s*|Reply with a brief acknowledgment only\.?\s*|Do not use any memory tools\.?\s*)/gi;

function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
Expand Down Expand Up @@ -62,6 +66,60 @@ function stripAutoCaptureAddressingPrefix(text: string): string {
return text.replace(AUTO_CAPTURE_ADDRESSING_PREFIX_RE, "").trim();
}

function stripRuntimeWrapperBoilerplate(text: string): string {
return text
.replace(AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE, "")
.replace(/\s{2,}/g, " ")
.trim();
}

function stripRuntimeWrapperLine(line: string): string {
const trimmed = line.trim();
if (!AUTO_CAPTURE_RUNTIME_WRAPPER_PREFIX_RE.test(trimmed)) {
return line;
}

const remainder = trimmed.replace(AUTO_CAPTURE_RUNTIME_WRAPPER_LINE_RE, "").trim();
if (!remainder) {
return "";
}

return stripRuntimeWrapperBoilerplate(remainder);
}

function stripLeadingRuntimeWrappers(text: string): string {
const trimmed = text.trim();
if (!trimmed) {
return trimmed;
}

const lines = trimmed.split("\n");
const cleanedLines: string[] = [];
let strippingLeadIn = true;

for (const line of lines) {
const current = line.trim();

if (strippingLeadIn && current === "") {
continue;
}

if (strippingLeadIn && AUTO_CAPTURE_RUNTIME_WRAPPER_PREFIX_RE.test(current)) {
const cleaned = stripRuntimeWrapperLine(current);
if (cleaned) {
cleanedLines.push(cleaned);
strippingLeadIn = false;
}
continue;
}

strippingLeadIn = false;
cleanedLines.push(line);
}

return cleanedLines.join("\n").trim();
}

export function stripAutoCaptureInjectedPrefix(role: string, text: string): string {
if (role !== "user") {
return text.trim();
Expand All @@ -76,6 +134,7 @@ export function stripAutoCaptureInjectedPrefix(role: string, text: string): stri
normalized = stripAutoCaptureSessionResetPrefix(normalized);
normalized = stripLeadingInboundMetadata(normalized);
normalized = stripAutoCaptureAddressingPrefix(normalized);
normalized = stripLeadingRuntimeWrappers(normalized);
normalized = stripLeadingInboundMetadata(normalized);
normalized = normalized.replace(/\n{3,}/g, "\n\n");
return normalized.trim();
Expand Down
1 change: 1 addition & 0 deletions src/extraction-prompts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ ${conversationText}
- Temporary information: One-time questions or conversations
- Vague information: "User has questions about a feature" (no specific details)
- Tool output, error logs, or boilerplate
- Runtime scaffolding or orchestration wrappers such as "[Subagent Context]", "[Subagent Task]", bootstrap wrappers, task envelopes, or agent instructions — these are execution metadata, NEVER store them as memories
- Recall queries / meta-questions: "Do you remember X?", "你还记得X吗?", "你知道我喜欢什么吗" — these are retrieval requests, NOT new information to store
- Degraded or incomplete references: If the user mentions something vaguely ("that thing I said"), do NOT invent details or create a hollow memory
Expand Down
13 changes: 12 additions & 1 deletion src/smart-extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,19 @@ import { batchDedup } from "./batch-dedup.js";
* - Standalone JSON blocks containing message_id/sender_id fields
*/
export function stripEnvelopeMetadata(text: string): string {
// 1. Strip "System: [timestamp] Channel..." lines
// 0. Strip runtime orchestration wrappers that should never become memories
// (sub-agent task scaffolding is execution metadata, not conversation content).
let cleaned = text.replace(
/^\[(?:Subagent Context|Subagent Task)\]\s*(?:You are running as a subagent.*?(?:$|(?<=\.)\s+)|Results auto-announce to your requester\.?\s*|do not busy-poll for status\.?\s*|Reply with a brief acknowledgment only\.?\s*|Do not use any memory tools\.?\s*)?/gim,
"",
);
cleaned = cleaned.replace(
/^(?:Results auto-announce to your requester\.?|do not busy-poll for status\.?|Do not use any memory tools\.?)\s*$/gim,
"",
);

// 1. Strip "System: [timestamp] Channel..." lines
cleaned = cleaned.replace(
/^System:\s*\[[\d\-: +GMT]+\]\s+\S+\[.*?\].*$/gm,
"",
);
Expand Down
50 changes: 50 additions & 0 deletions test/auto-capture-cleanup.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import jitiFactory from "jiti";

const jiti = jitiFactory(import.meta.url, { interopDefault: true });

const {
normalizeAutoCaptureText,
stripAutoCaptureInjectedPrefix,
} = jiti("../src/auto-capture-cleanup.ts");

describe("auto-capture cleanup", () => {
it("preserves real content when wrapper lines are mixed with facts in the same payload", () => {
const input = [
"[Subagent Context] You are running as a subagent (depth 1/1). Results auto-announce to your requester.",
"[Subagent Task] Reply with a brief acknowledgment only. Facts for automatic memory extraction quality test: 1) Shen prefers concise blunt status updates. 2) Project Orion deploy window is Friday 21:00 Asia/Shanghai. 3) If a database migration touches billing tables, require a dry run first. Do not use any memory tools.",
].join("\n");

const result = normalizeAutoCaptureText("user", input);
assert.equal(
result,
"Facts for automatic memory extraction quality test: 1) Shen prefers concise blunt status updates. 2) Project Orion deploy window is Friday 21:00 Asia/Shanghai. 3) If a database migration touches billing tables, require a dry run first.",
);
});

it("drops wrapper-only payloads", () => {
const input = [
"[Subagent Context] You are running as a subagent (depth 1/1). Results auto-announce to your requester.",
"[Subagent Task] Reply with a brief acknowledgment only.",
].join("\n");

assert.equal(normalizeAutoCaptureText("user", input), null);
});

it("strips inbound metadata before preserving the remaining content", () => {
const input = [
"Conversation info (untrusted metadata):",
"```json",
'{"message_id":"om_123","sender_id":"ou_456"}',
"```",
"",
"[Subagent Task] Reply with a brief acknowledgment only. Actual user content starts here.",
].join("\n");

assert.equal(
stripAutoCaptureInjectedPrefix("user", input),
"Actual user content starts here.",
);
});
});
24 changes: 24 additions & 0 deletions test/strip-envelope-metadata.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,30 @@ describe("stripEnvelopeMetadata", () => {
// -----------------------------------------------------------------------
// Edge cases
// -----------------------------------------------------------------------
it("strips subagent runtime wrapper lines but preserves real conversation that follows", () => {
const input = [
"[Subagent Context] You are running as a subagent (depth 1/1). Results auto-announce to your requester.",
"[Subagent Task] Reply with a brief acknowledgment only.",
"Actual user content starts here.",
].join("\n");

const result = stripEnvelopeMetadata(input);
assert.equal(result, "Actual user content starts here.");
});

it("strips multiline wrapper continuation text but preserves following conversation", () => {
const input = [
"[Subagent Context] You are running as a subagent (depth 1/1).",
"Results auto-announce to your requester.",
"[Subagent Task] Reply with a brief acknowledgment only.",
"Do not use any memory tools.",
"Actual user content starts here.",
].join("\n");

const result = stripEnvelopeMetadata(input);
assert.equal(result, "Actual user content starts here.");
});

it("handles Telegram-style envelope headers", () => {
const input = [
"System: [2026-03-18 14:21:36 GMT+8] Telegram[bot123] DM | user_456 [msg:12345]",
Expand Down
Loading