flamboh · flamboh · Mar 27, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/apps/web/src/lib/server/datasets.ts b/apps/web/src/lib/server/datasets.ts
@@ -1,5 +1,6 @@
 import fs from 'fs';
 import path from 'path';
+import { createRequire } from 'node:module';
 import Database from 'better-sqlite3';
 import type { DatasetSummary } from '$lib/types/types';
 import { getDatasetsConfigPath, getRepoRoot } from '$lib/server/paths';
@@ -17,9 +18,21 @@ export interface DatasetConfig {
 
 const repoRoot = getRepoRoot();
 const defaultRegistryPath = getDatasetsConfigPath();
+const require = createRequire(import.meta.url);
+
+type PreparedStatement = {
+	get(...params: unknown[]): unknown;
+	all(...params: unknown[]): unknown[];
+};
+
+export interface ReadonlyDatasetDb {
+	prepare(sql: string): PreparedStatement;
+	close(): void;
+}
+
 const datasetDbCache = new Map<
 	string,
-	{ db: Database.Database; dbPath: string; mtimeMs: number }
+	{ db: ReadonlyDatasetDb; dbPath: string; mtimeMs: number }
 >();
 const datasetDefaultStartCache = new Map<
 	string,
@@ -197,7 +210,25 @@ export function getDatasetDbPath(datasetId: string): string {
 	return getDatasetConfig(datasetId).db_path;
 }
 
-export function getDatasetDb(datasetId: string): Database.Database {
+function openNodeSqliteDatabase(dbPath: string): ReadonlyDatasetDb {
+	const { DatabaseSync } = require('node:sqlite') as typeof import('node:sqlite');
+	type SQLInputValue = import('node:sqlite').SQLInputValue;
+	const db = new DatabaseSync(dbPath, { open: true, readOnly: true });
+	return {
+		prepare(sql: string): PreparedStatement {
+			const stmt = db.prepare(sql);
+			return {
+				get: (...params: unknown[]) => stmt.get(...(params as SQLInputValue[])),
+				all: (...params: unknown[]) => stmt.all(...(params as SQLInputValue[]))
+			};
+		},
+		close() {
+			db.close();
+		}
+	};
+}
+
+export function getDatasetDb(datasetId: string): ReadonlyDatasetDb {
 	const dbPath = getDatasetDbPath(datasetId);
 	if (!fs.existsSync(dbPath)) {
 		throw new Error(`Dataset database not found for '${datasetId}' at ${dbPath}`);
@@ -217,7 +248,18 @@ export function getDatasetDb(datasetId: string): Database.Database {
 		}
 	}
 
-	const db = new Database(dbPath, { readonly: true });
+	let db: ReadonlyDatasetDb;
+	try {
+		db = new Database(dbPath, { readonly: true });
+	} catch (error) {
+		if (error instanceof Error && 'code' in error && error.code === 'ERR_DLOPEN_FAILED') {
+			console.warn(`better-sqlite3 failed to load for '${datasetId}', falling back to node:sqlite`);
+			db = openNodeSqliteDatabase(dbPath);
+		} else {
+			throw error;
+		}
+	}
+
 	datasetDbCache.set(datasetId, { db, dbPath, mtimeMs: stat.mtimeMs });
 	return db;
 }

diff --git a/apps/web/src/routes/api/netflow/stats/+server.ts b/apps/web/src/routes/api/netflow/stats/+server.ts
@@ -17,7 +17,7 @@ const BUCKET_SIZES: Record<string, number> = {
  */
 function getBucketStartQuery(groupBy: string): string {
 	const bucketSize = BUCKET_SIZES[groupBy] ?? BUCKET_SIZES.date;
-	return `(CAST(strftime('%s', datetime(timestamp, 'unixepoch', 'localtime')) AS integer) / ${bucketSize}) * ${bucketSize}`;
+	return `(CAST(strftime('%s', datetime(timestamp, 'unixepoch', 'localtime', 'start of day', 'utc', printf('+%d seconds', ((CAST(strftime('%s', datetime(timestamp, 'unixepoch', 'localtime')) AS integer) - CAST(strftime('%s', datetime(timestamp, 'unixepoch', 'localtime', 'start of day')) AS integer)) / ${bucketSize}) * ${bucketSize}))) AS integer))`;
 }
 
 export const GET: RequestHandler = async ({ url }) => {

diff --git a/apps/web/tests/lib/server/datasets.test.ts b/apps/web/tests/lib/server/datasets.test.ts
@@ -4,25 +4,54 @@ import path from 'path';
 import { spawnSync } from 'child_process';
 import { afterEach, describe, expect, it, vi } from 'vitest';
 
-vi.mock('better-sqlite3', () => ({
-	default: class MockDatabase {
-		constructor(private readonly dbPath: string) {}
-
-		prepare(query: string) {
-			return {
-				get: () => {
-					const result = spawnSync('sqlite3', [this.dbPath, query], { encoding: 'utf-8' });
-					if (result.status !== 0) {
-						throw new Error(result.stderr || 'sqlite3 query failed');
-					}
+class MockDatabaseSync {
+	constructor(private readonly dbPath: string) {}
+
+	prepare(query: string) {
+		return {
+			get: () => {
+				const result = spawnSync('sqlite3', [this.dbPath, query], { encoding: 'utf-8' });
+				if (result.status !== 0) {
+					throw new Error(result.stderr || 'sqlite3 query failed');
+				}
+
+				const minTimestamp = Number(result.stdout.trim());
+				return { minTimestamp: Number.isFinite(minTimestamp) ? minTimestamp : null };
+			},
+			all: () => []
+		};
+	}
+
+	close() {}
+}
 
-					const minTimestamp = Number(result.stdout.trim());
-					return { minTimestamp: Number.isFinite(minTimestamp) ? minTimestamp : null };
+const betterSqlite3Factory = vi.fn((dbPath: string) => ({
+	prepare(query: string) {
+		return {
+			get: () => {
+				const result = spawnSync('sqlite3', [dbPath, query], { encoding: 'utf-8' });
+				if (result.status !== 0) {
+					throw new Error(result.stderr || 'sqlite3 query failed');
 				}
-			};
-		}
 
-		close() {}
+				const minTimestamp = Number(result.stdout.trim());
+				return { minTimestamp: Number.isFinite(minTimestamp) ? minTimestamp : null };
+			}
+		};
+	},
+	close() {}
+}));
+
+vi.mock('better-sqlite3', () => ({
+	default: vi.fn().mockImplementation((dbPath: string) => betterSqlite3Factory(dbPath))
+}));
+
+vi.mock('node:module', async () => ({
+	createRequire: () => (specifier: string) => {
+		if (specifier === 'node:sqlite') {
+			return { DatabaseSync: MockDatabaseSync };
+		}
+		throw new Error(`Unexpected require: ${specifier}`);
 	}
 }));
 
@@ -34,6 +63,7 @@ async function loadDatasetsModule() {
 describe('dataset server helpers', () => {
 	afterEach(() => {
 		vi.unstubAllEnvs();
+		betterSqlite3Factory.mockClear();
 	});
 
 	it('lists dataset summaries from registry + sqlite min timestamp', async () => {
@@ -115,4 +145,43 @@ describe('dataset server helpers', () => {
 		expect(datasets.listDatasetSources('alpha')).toEqual(['r1', 'r2']);
 		expect(() => datasets.getDatasetConfig('missing')).toThrow(/Unknown dataset 'missing'/);
 	});
+
+	it('falls back to node:sqlite when better-sqlite3 fails to load', async () => {
+		const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'datasets-test-'));
+		const dbPath = path.join(tempDir, 'netflow.sqlite');
+		const registryPath = path.join(tempDir, 'datasets.json');
+
+		const seedResult = spawnSync(
+			'sqlite3',
+			[
+				dbPath,
+				'CREATE TABLE netflow_stats (timestamp INTEGER NOT NULL); INSERT INTO netflow_stats (timestamp) VALUES (1740823200);'
+			],
+			{ encoding: 'utf-8' }
+		);
+		expect(seedResult.status).toBe(0);
+
+		fs.writeFileSync(
+			registryPath,
+			JSON.stringify([
+				{
+					dataset_id: 'alpha',
+					label: 'Alpha Label',
+					root_path: tempDir,
+					db_path: dbPath
+				}
+			])
+		);
+
+		betterSqlite3Factory.mockImplementationOnce(() => {
+			const error = new Error('Module did not self-register') as Error & { code: string };
+			error.code = 'ERR_DLOPEN_FAILED';
+			throw error;
+		});
+
+		vi.stubEnv('DATASETS_CONFIG_PATH', registryPath);
+
+		const datasets = await loadDatasetsModule();
+		expect(datasets.getDatasetDefaultStartDate('alpha')).toBe('2025-03-01');
+	});
 });
diff --git a/apps/web/tests/routes/api-netflow-stats.test.ts b/apps/web/tests/routes/api-netflow-stats.test.ts
@@ -40,9 +40,10 @@ describe('/api/netflow/stats GET', () => {
 				bytesOther: 14
 			}
 		]);
+		const prepare = vi.fn().mockReturnValue({ all });
 		vi.mocked(getRequestedDataset).mockReturnValue('alpha');
 		vi.mocked(getDatasetDb).mockReturnValue({
-			prepare: vi.fn().mockReturnValue({ all })
+			prepare
 		} as never);
 
 		const response = await GET({
@@ -75,6 +76,7 @@ describe('/api/netflow/stats GET', () => {
 			]
 		});
 		expect(all).toHaveBeenCalledWith('r1', 'r2', '1', '2');
+		expect(prepare).toHaveBeenCalledWith(expect.stringContaining("'start of day', 'utc'"));
 	});
 
 	it('returns 500 when the database query fails', async () => {

diff --git a/term-update-accomplishments-2025-12-15.md b/term-update-accomplishments-2025-12-15.md
diff --git a/term-update.md b/term-update.md
diff --git a/tests/python/test_discovery.py b/tests/python/test_discovery.py
@@ -111,3 +111,69 @@ def test_scan_filesystem_skips_unparseable_and_pre_start_files(
     rows = list(discovery.scan_filesystem())
 
     assert rows == []
+
+
+def test_get_stale_days_uses_local_day_boundaries() -> None:
+    common, discovery = load_modules()
+    conn = sqlite3.connect(':memory:')
+    common.init_processed_files_table(conn)
+
+    same_local_day = [
+        (
+            '/captures/r1/2025/03/05/nfcapd.202503050045',
+            'r1',
+            common.timestamp_to_unix(datetime(2025, 3, 5, 0, 45)),
+            1,
+        ),
+        (
+            '/captures/r1/2025/03/05/nfcapd.202503052355',
+            'r1',
+            common.timestamp_to_unix(datetime(2025, 3, 5, 23, 55)),
+            None,
+        ),
+    ]
+    conn.executemany(
+        'INSERT INTO processed_files (file_path, router, timestamp, flow_stats_status) VALUES (?, ?, ?, ?)',
+        same_local_day,
+    )
+
+    assert discovery.get_stale_days(conn, 'flow_stats') == {
+        ('r1', common.timestamp_to_unix(datetime(2025, 3, 5, 0, 0)))
+    }
+
+
+def test_sync_processed_files_table_updates_mirrored_path_without_duplication(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    common, discovery = load_modules()
+    conn = sqlite3.connect(':memory:')
+    common.init_processed_files_table(conn)
+
+    ts = common.timestamp_to_unix(datetime(2025, 3, 2, 0, 0))
+    conn.execute(
+        'INSERT INTO processed_files (file_path, router, timestamp, file_exists) VALUES (?, ?, ?, ?)',
+        ('/old-root/r1/2025/03/02/nfcapd.202503020000', 'r1', ts, 1),
+    )
+
+    monkeypatch.setattr(discovery, 'AVAILABLE_ROUTERS', ['r1'])
+    monkeypatch.setattr(discovery, 'DATA_START_DATE', datetime(2025, 3, 1, 0, 0))
+    monkeypatch.setattr(
+        discovery,
+        'scan_filesystem',
+        lambda discovery_window_days=0: iter(
+            [('/new-root/r1/2025/03/02/nfcapd.202503020000', 'r1', datetime(2025, 3, 2, 0, 0))]
+        ),
+    )
+
+    stats = discovery.sync_processed_files_table(
+        conn,
+        include_gaps=False,
+        reprocess_window_days=0,
+        discovery_window_days=0,
+    )
+
+    row = conn.execute(
+        'SELECT file_path, router, timestamp, file_exists FROM processed_files'
+    ).fetchone()
+    assert stats == {'discovered': 1, 'new_files': 0, 'gaps': 0}
+    assert row == ('/new-root/r1/2025/03/02/nfcapd.202503020000', 'r1', ts, 1)
diff --git a/tests/python/test_flow_db.py b/tests/python/test_flow_db.py
@@ -56,3 +56,40 @@ def test_batch_insert_results_inserts_successful_rows() -> None:
     ).fetchone()
     assert inserted == 1
     assert row == ('/tmp/a', 'r1', 3, 4, 5, 0)
+
+
+def test_batch_insert_results_replaces_mirrored_path_duplicate() -> None:
+    _, flow_db = load_modules()
+    conn = sqlite3.connect(':memory:')
+    flow_db.init_netflow_stats_table(conn)
+    conn.execute(
+        """
+        INSERT INTO netflow_stats (
+            file_path, router, timestamp,
+            flows, flows_tcp, flows_udp, flows_icmp, flows_other,
+            packets, packets_tcp, packets_udp, packets_icmp, packets_other,
+            bytes, bytes_tcp, bytes_udp, bytes_icmp, bytes_other,
+            first_timestamp, last_timestamp, msec_first, msec_last, sequence_failures
+        ) VALUES (?, ?, ?, ?, 0, 0, 0, 0, ?, 0, 0, 0, 0, ?, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+        """,
+        ('/old-root/a', 'r1', 123, 3, 4, 5),
+    )
+
+    inserted = flow_db.batch_insert_results(
+        conn,
+        [
+            {
+                'file_path': '/new-root/a',
+                'router': 'r1',
+                'timestamp': 123,
+                'success': True,
+                'data': {'flows': 7, 'packets': 8, 'bytes': 9},
+            }
+        ],
+    )
+
+    rows = conn.execute(
+        'SELECT file_path, router, timestamp, flows, packets, bytes FROM netflow_stats'
+    ).fetchall()
+    assert inserted == 1
+    assert rows == [('/new-root/a', 'r1', 123, 7, 8, 9)]