Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
]
},
"scripts": {
"test": "node test/embedder-error-hints.test.mjs && node test/cjk-recursion-regression.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node --test test/scope-access-undefined.test.mjs && node --test test/reflection-bypass-hook.test.mjs && node --test test/smart-extractor-scope-filter.test.mjs && node --test test/store-empty-scope-filter.test.mjs && node --test test/recall-text-cleanup.test.mjs && node test/update-consistency-lancedb.test.mjs && node --test test/strip-envelope-metadata.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs && node --test test/session-summary-before-reset.test.mjs && node --test test/sync-plugin-version.test.mjs && node test/smart-metadata-v2.mjs && node test/vector-search-cosine.test.mjs && node test/context-support-e2e.mjs && node test/temporal-facts.test.mjs && node test/memory-update-supersede.test.mjs && node test/memory-upgrader-diagnostics.test.mjs && node --test test/llm-api-key-client.test.mjs && node --test test/llm-oauth-client.test.mjs && node --test test/cli-oauth-login.test.mjs && node --test test/workflow-fork-guards.test.mjs && node --test test/clawteam-scope.test.mjs && node --test test/cross-process-lock.test.mjs && node --test test/preference-slots.test.mjs && node test/is-latest-auto-supersede.test.mjs && node --test test/temporal-awareness.test.mjs",
"test": "node test/embedder-error-hints.test.mjs && node test/cjk-recursion-regression.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node --test test/scope-access-undefined.test.mjs && node --test test/reflection-bypass-hook.test.mjs && node --test test/smart-extractor-scope-filter.test.mjs && node --test test/store-empty-scope-filter.test.mjs && node --test test/recall-text-cleanup.test.mjs && node test/update-consistency-lancedb.test.mjs && node --test test/strip-envelope-metadata.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs && node --test test/session-summary-before-reset.test.mjs && node --test test/sync-plugin-version.test.mjs && node test/smart-metadata-v2.mjs && node test/vector-search-cosine.test.mjs && node test/context-support-e2e.mjs && node test/temporal-facts.test.mjs && node test/memory-update-supersede.test.mjs && node test/memory-upgrader-diagnostics.test.mjs && node --test test/llm-api-key-client.test.mjs && node --test test/llm-oauth-client.test.mjs && node --test test/cli-oauth-login.test.mjs && node --test test/workflow-fork-guards.test.mjs && node --test test/clawteam-scope.test.mjs && node --test test/cross-process-lock.test.mjs && node --test test/preference-slots.test.mjs && node test/is-latest-auto-supersede.test.mjs && node --test test/temporal-awareness.test.mjs && node --test test/adaptive-retrieval.test.mjs && node --test test/noise-filter.test.mjs",
"test:openclaw-host": "node test/openclaw-host-functional.mjs",
"version": "node scripts/sync-plugin-version.mjs openclaw.plugin.json package.json && git add openclaw.plugin.json"
},
Expand Down
19 changes: 12 additions & 7 deletions src/adaptive-retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ const SKIP_PATTERNS = [
// Greetings & pleasantries
/^(hi|hello|hey|good\s*(morning|afternoon|evening|night)|greetings|yo|sup|howdy|what'?s up)\b/i,
// System/bot commands
/^\//, // slash commands
/^\/[a-z][\w-]*(\s|$)/i, // slash commands like /help, /recall my name
/^(run|build|test|ls|cd|git|npm|pip|docker|curl|cat|grep|find|make|sudo)\b/i,
// Simple affirmations/negations
/^(yes|no|yep|nope|ok|okay|sure|fine|thanks|thank you|thx|ty|got it|understood|cool|nice|great|good|perfect|awesome|👍|👎|✅|❌)\s*[.!]?$/i,
// Continuation prompts
/^(go ahead|continue|proceed|do it|start|begin|next|实施|實施|开始|開始|继续|繼續|好的|可以|行)\s*[.!]?$/i,
// Pure emoji
/^[\p{Emoji}\s]+$/u,
/^[\p{Extended_Pictographic}\u200d\ufe0f\s]+$/u,
// Heartbeat/system (match anywhere, not just at start, to handle prefixed formats)
/HEARTBEAT/i,
/^\[System/i,
Expand Down Expand Up @@ -72,10 +72,15 @@ export function shouldSkipRetrieval(query: string, minLength?: number): boolean

// Force retrieve if query has memory-related intent (checked FIRST,
// before length check, so short CJK queries like "你记得吗" aren't skipped)
if (FORCE_RETRIEVE_PATTERNS.some(p => p.test(trimmed))) return false;
// 注意:slash 命令(如 /recall)优先走 SKIP 路径,不走 FORCE 路径
const isSlashCmd = /^\/[a-z][\w-]*(\s|$)/i.test(trimmed);
if (!isSlashCmd && FORCE_RETRIEVE_PATTERNS.some(p => p.test(trimmed))) return false;

// Too short to be meaningful
if (trimmed.length < 5) return true;
// 含数字的字符串(如端口号 8080、issue 号 #123)携带语义信息,豁免长度截断
const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(trimmed);
const hasDigit = /\d/.test(trimmed);
if (!hasDigit && trimmed.length < (hasCJK ? 2 : 5)) return true;

// Skip if matches any skip pattern
if (SKIP_PATTERNS.some(p => p.test(trimmed))) return true;
Expand All @@ -88,9 +93,9 @@ export function shouldSkipRetrieval(query: string, minLength?: number): boolean

// Skip very short non-question messages (likely commands or affirmations)
// CJK characters carry more meaning per character, so use a lower threshold
const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(trimmed);
const defaultMinLength = hasCJK ? 6 : 15;
if (trimmed.length < defaultMinLength && !trimmed.includes('?') && !trimmed.includes('?')) return true;
// 含数字的字符串豁免此规则(端口号、issue 号等均属有语义内容)
const defaultMinLength = hasCJK ? 3 : 13;
if (!hasDigit && trimmed.length < defaultMinLength && !trimmed.includes('?') && !trimmed.includes('?')) return true;

// Default: do retrieve
return false;
Expand Down
5 changes: 3 additions & 2 deletions src/noise-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const META_QUESTION_PATTERNS = [

// Session boilerplate
const BOILERPLATE_PATTERNS = [
/^(hi|hello|hey|good morning|good evening|greetings)/i,
/^(hi|hello|hey|good morning|good evening|greetings)(\s+\w+)?[!,.]?\s*$/i,
/^fresh session/i,
/^new session/i,
/^HEARTBEAT/i,
Expand Down Expand Up @@ -73,7 +73,8 @@ export function isNoise(text: string, options: NoiseFilterOptions = {}): boolean
const opts = { ...DEFAULT_OPTIONS, ...options };
const trimmed = text.trim();

if (trimmed.length < 5) return true;
const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(trimmed);
if (trimmed.length < (hasCJK ? 2 : 5)) return true;

if (opts.filterDenials && DENIAL_PATTERNS.some(p => p.test(trimmed))) return true;
if (opts.filterMetaQuestions && META_QUESTION_PATTERNS.some(p => p.test(trimmed))) return true;
Expand Down
112 changes: 112 additions & 0 deletions test/adaptive-retrieval.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import jitiFactory from "jiti";

const jiti = jitiFactory(import.meta.url, { interopDefault: true });
const { shouldSkipRetrieval } = jiti("../src/adaptive-retrieval.ts");

describe("shouldSkipRetrieval", () => {
// --- emoji regex fix ---
describe("emoji regex should not match digits", () => {
it("does not skip pure digit strings", () => {
assert.equal(shouldSkipRetrieval("12345"), false);
});

it("does not skip port numbers", () => {
assert.equal(shouldSkipRetrieval("8080"), false);
});

it("does not skip hash-prefixed numbers", () => {
assert.equal(shouldSkipRetrieval("#123"), false);
});

it("skips pure emoji input", () => {
assert.equal(shouldSkipRetrieval("\ud83d\udc4d\ud83c\udf89\ud83d\ude80"), true);
});

it("does not skip emoji mixed with text", () => {
assert.equal(shouldSkipRetrieval("\ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67 family trip plan"), false);
});
});

// --- slash command regex fix ---
describe("slash command regex should not match file paths", () => {
it("skips single-word slash commands", () => {
assert.equal(shouldSkipRetrieval("/help"), true);
});

it("skips slash command with trailing space", () => {
assert.equal(shouldSkipRetrieval("/recall "), true);
});

it("does not skip file paths", () => {
assert.equal(shouldSkipRetrieval("/usr/bin/node"), false);
});

it("does not skip path with question", () => {
assert.equal(shouldSkipRetrieval("/etc/nginx/nginx.conf \u600e\u4e48\u914d\u7f6e"), false);
});

it("does not skip API paths", () => {
assert.equal(shouldSkipRetrieval("/api/v2/users"), false);
});

it("skips slash commands with arguments", () => {
assert.equal(shouldSkipRetrieval("/recall my name"), true);
});

it("skips slash commands with content arguments", () => {
assert.equal(shouldSkipRetrieval("/remember user prefers dark mode"), true);
});

it("skips slash commands with lesson content", () => {
assert.equal(shouldSkipRetrieval("/lesson always use strict mode"), true);
});
});

// --- CJK short text threshold fix ---
describe("CJK short text should not be killed by hard threshold", () => {
it("does not skip 4-char CJK query", () => {
assert.equal(shouldSkipRetrieval("\u4ed6\u559c\u6b22\u732b"), false);
});

it("does not skip 4-char CJK query (residence)", () => {
assert.equal(shouldSkipRetrieval("\u6211\u4f4f\u5317\u4eac"), false);
});

it("does not skip 3-char mixed CJK query", () => {
assert.equal(shouldSkipRetrieval("\u7528Go\u5199"), false);
});

it("does not skip CJK query with question mark", () => {
assert.equal(shouldSkipRetrieval("\u5bc6\u7801\u662f\u5565\uff1f"), false);
});

it("skips single CJK character", () => {
assert.equal(shouldSkipRetrieval("\u597d"), true);
});
});

// --- existing behavior preserved ---
describe("existing skip/force behavior preserved", () => {
it("skips greetings", () => {
assert.equal(shouldSkipRetrieval("hi"), true);
});

it("skips short English affirmations", () => {
assert.equal(shouldSkipRetrieval("ok"), true);
});

it("does not skip memory-related queries (English)", () => {
assert.equal(shouldSkipRetrieval("remember my name is Alice"), false);
});

it("does not skip memory-related queries (Chinese)", () => {
assert.equal(shouldSkipRetrieval("\u4f60\u8bb0\u5f97\u5417"), false);
});

it("does not skip normal length queries", () => {
assert.equal(shouldSkipRetrieval("what was the database schema we discussed"), false);
});
});
});
96 changes: 96 additions & 0 deletions test/noise-filter.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import jitiFactory from "jiti";

const jiti = jitiFactory(import.meta.url, { interopDefault: true });
const { isNoise, filterNoise } = jiti("../src/noise-filter.ts");

describe("isNoise", () => {
// --- CJK short text fix ---
describe("CJK short text should not be marked as noise", () => {
it("4-char CJK is not noise", () => {
assert.equal(isNoise("\u4ed6\u559c\u6b22\u732b"), false);
});

it("3-char mixed CJK is not noise", () => {
assert.equal(isNoise("\u7528Go\u5199"), false);
});

it("2-char CJK is not noise", () => {
assert.equal(isNoise("\u5b66\u4e60"), false);
});

it("single CJK char is noise", () => {
assert.equal(isNoise("\u597d"), true);
});
});

// --- English short text preserved ---
describe("English short text filtering preserved", () => {
it("marks 'ok' as noise", () => {
assert.equal(isNoise("ok"), true);
});

it("marks 'hi' as noise", () => {
assert.equal(isNoise("hi"), true);
});

it("marks 'test' as noise", () => {
assert.equal(isNoise("test"), true);
});

it("does not mark 5+ char English as noise by length alone", () => {
assert.equal(isNoise("hello world this is a real memory"), false);
});
});

// --- pattern filters ---
describe("denial pattern filtering", () => {
it("marks agent denial as noise", () => {
assert.equal(isNoise("I don't have any information about that"), true);
});
});

describe("meta-question pattern filtering", () => {
it("marks meta-question as noise", () => {
assert.equal(isNoise("do you remember what I said"), true);
});
});

describe("boilerplate pattern filtering", () => {
it("marks greeting as noise", () => {
assert.equal(isNoise("hello there"), true);
});
});

// --- options control ---
describe("options control", () => {
it("respects filterBoilerplate: false", () => {
assert.equal(isNoise("hello there", { filterBoilerplate: false }), false);
});

it("respects filterDenials: false", () => {
assert.equal(isNoise("I don't have any information", { filterDenials: false }), false);
});

it("respects filterMetaQuestions: false", () => {
assert.equal(isNoise("do you remember", { filterMetaQuestions: false }), false);
});
});
});

describe("filterNoise", () => {
it("filters noise items from array", () => {
const items = [
{ id: 1, text: "\u4ed6\u559c\u6b22\u732b" },
{ id: 2, text: "ok" },
{ id: 3, text: "I prefer dark mode for all editors" },
{ id: 4, text: "\u597d" },
];
const result = filterNoise(items, (item) => item.text);
assert.deepEqual(
result.map((r) => r.id),
[1, 3]
);
});
});
Loading