diff --git a/src/config-schema.ts b/src/config-schema.ts index d9393e5..48f9fd6 100644 --- a/src/config-schema.ts +++ b/src/config-schema.ts @@ -2,38 +2,44 @@ import { z } from "zod"; export const TargetModeSchema = z.enum(["symlink", "copy"]); export const CacheModeSchema = z.enum(["materialize"]); -export const IntegritySchema = z.object({ - type: z.enum(["commit", "manifest"]), - value: z.string().nullable(), -}); +export const IntegritySchema = z + .object({ + type: z.enum(["commit", "manifest"]), + value: z.string().nullable(), + }) + .strict(); -export const DefaultsSchema = z.object({ - ref: z.string().min(1), - mode: CacheModeSchema, - include: z.array(z.string().min(1)).min(1), - targetMode: TargetModeSchema.optional(), - depth: z.number().min(1), - required: z.boolean(), - maxBytes: z.number().min(1), - maxFiles: z.number().min(1).optional(), - allowHosts: z.array(z.string().min(1)).min(1), -}); +export const DefaultsSchema = z + .object({ + ref: z.string().min(1), + mode: CacheModeSchema, + include: z.array(z.string().min(1)).min(1), + targetMode: TargetModeSchema.optional(), + depth: z.number().min(1), + required: z.boolean(), + maxBytes: z.number().min(1), + maxFiles: z.number().min(1).optional(), + allowHosts: z.array(z.string().min(1)).min(1), + }) + .strict(); -export const SourceSchema = z.object({ - id: z.string().min(1), - repo: z.string().min(1), - targetDir: z.string().min(1).optional(), - targetMode: TargetModeSchema.optional(), - ref: z.string().min(1).optional(), - mode: CacheModeSchema.optional(), - depth: z.number().min(1).optional(), - include: z.array(z.string().min(1)).optional(), - exclude: z.array(z.string().min(1)).optional(), - required: z.boolean().optional(), - maxBytes: z.number().min(1).optional(), - maxFiles: z.number().min(1).optional(), - integrity: IntegritySchema.optional(), -}); +export const SourceSchema = z + .object({ + id: z.string().min(1), + repo: z.string().min(1), + targetDir: z.string().min(1).optional(), + targetMode: TargetModeSchema.optional(), + ref: z.string().min(1).optional(), + mode: CacheModeSchema.optional(), + depth: z.number().min(1).optional(), + include: z.array(z.string().min(1)).optional(), + exclude: z.array(z.string().min(1)).optional(), + required: z.boolean().optional(), + maxBytes: z.number().min(1).optional(), + maxFiles: z.number().min(1).optional(), + integrity: IntegritySchema.optional(), + }) + .strict(); export const ConfigSchema = z .object({ diff --git a/src/config.ts b/src/config.ts index f2beed1..7f656a9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -318,6 +318,21 @@ export const validateConfig = (input: unknown): DocsCacheConfig => { return source; }); + // Validate unique source IDs + const idSet = new Set(); + const duplicates: string[] = []; + for (const source of sources) { + if (idSet.has(source.id)) { + duplicates.push(source.id); + } + idSet.add(source.id); + } + if (duplicates.length > 0) { + throw new Error( + `Duplicate source IDs found: ${duplicates.join(", ")}. Each source must have a unique ID.`, + ); + } + return { cacheDir, targetMode: targetModeOverride, diff --git a/tests/edge-cases-security.test.js b/tests/edge-cases-security.test.js new file mode 100644 index 0000000..a646b68 --- /dev/null +++ b/tests/edge-cases-security.test.js @@ -0,0 +1,466 @@ +import assert from "node:assert/strict"; +import { mkdir, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { test } from "node:test"; + +import { runSync } from "../dist/api.mjs"; + +test("materialize rejects path traversal in materialized files", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-security-${Date.now().toString(36)}`, + ); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + await mkdir(repoDir, { recursive: true }); + // Try to create a file that would escape via path traversal + await writeFile(path.join(repoDir, "normal.md"), "safe content", "utf8"); + + const config = { + sources: [ + { + id: "test", + repo: "https://example.com/repo.git", + include: ["**/*.md"], + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ); + + // Verify normal file was materialized + const { access } = await import("node:fs/promises"); + await access(path.join(cacheDir, "test", "normal.md")); +}); + +test("materialize handles files with Unicode names", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-unicode-${Date.now().toString(36)}`, + ); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + await mkdir(repoDir, { recursive: true }); + // Files with Unicode characters + await writeFile(path.join(repoDir, "文档.md"), "Chinese", "utf8"); + await writeFile(path.join(repoDir, "Документ.md"), "Russian", "utf8"); + await writeFile(path.join(repoDir, "مستند.md"), "Arabic", "utf8"); + + const config = { + sources: [ + { + id: "test", + repo: "https://example.com/repo.git", + include: ["**/*.md"], + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ); + + // Verify files were materialized + const { access } = await import("node:fs/promises"); + await access(path.join(cacheDir, "test", "文档.md")); + await access(path.join(cacheDir, "test", "Документ.md")); + await access(path.join(cacheDir, "test", "مستند.md")); +}); + +test("materialize handles deeply nested directories", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-deep-${Date.now().toString(36)}`, + ); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + // Create a deeply nested structure + const deepPath = path.join(repoDir, "a", "b", "c", "d", "e", "f", "g"); + await mkdir(deepPath, { recursive: true }); + await writeFile(path.join(deepPath, "deep.md"), "content", "utf8"); + + const config = { + sources: [ + { + id: "test", + repo: "https://example.com/repo.git", + include: ["**/*.md"], + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ); + + // Verify deeply nested file was materialized + const { access } = await import("node:fs/promises"); + await access( + path.join(cacheDir, "test", "a", "b", "c", "d", "e", "f", "g", "deep.md"), + ); +}); + +test("materialize handles files with special characters in names", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-special-${Date.now().toString(36)}`, + ); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + await mkdir(repoDir, { recursive: true }); + // Files with special but filesystem-safe characters + await writeFile(path.join(repoDir, "file-with-dash.md"), "content", "utf8"); + await writeFile( + path.join(repoDir, "file_with_underscore.md"), + "content", + "utf8", + ); + await writeFile(path.join(repoDir, "file.with.dots.md"), "content", "utf8"); + await writeFile(path.join(repoDir, "file with spaces.md"), "content", "utf8"); + + const config = { + sources: [ + { + id: "test", + repo: "https://example.com/repo.git", + include: ["**/*.md"], + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ); + + // Verify all files were materialized + const { access } = await import("node:fs/promises"); + await access(path.join(cacheDir, "test", "file-with-dash.md")); + await access(path.join(cacheDir, "test", "file_with_underscore.md")); + await access(path.join(cacheDir, "test", "file.with.dots.md")); + await access(path.join(cacheDir, "test", "file with spaces.md")); +}); + +test("materialize respects maxBytes during processing", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-max-${Date.now().toString(36)}`, + ); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + await mkdir(repoDir, { recursive: true }); + // Create files that will exceed maxBytes + await writeFile(path.join(repoDir, "file1.md"), "a".repeat(100), "utf8"); + await writeFile(path.join(repoDir, "file2.md"), "b".repeat(100), "utf8"); + + const config = { + sources: [ + { + id: "test", + repo: "https://example.com/repo.git", + include: ["**/*.md"], + maxBytes: 150, // Should fail after first file + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + await assert.rejects( + () => + runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ), + /maxBytes/i, + ); +}); + +test("materialize handles empty files", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-empty-${Date.now().toString(36)}`, + ); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + await mkdir(repoDir, { recursive: true }); + await writeFile(path.join(repoDir, "empty.md"), "", "utf8"); + + const config = { + sources: [ + { + id: "test", + repo: "https://example.com/repo.git", + include: ["**/*.md"], + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ); + + // Verify empty file was materialized + const { readFile } = await import("node:fs/promises"); + const content = await readFile( + path.join(cacheDir, "test", "empty.md"), + "utf8", + ); + assert.equal(content, ""); +}); + +test("materialize with no matching files creates empty cache", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-nomatch-${Date.now().toString(36)}`, + ); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + await mkdir(repoDir, { recursive: true }); + await writeFile(path.join(repoDir, "file.txt"), "not markdown", "utf8"); + + const config = { + sources: [ + { + id: "test", + repo: "https://example.com/repo.git", + include: ["**/*.md"], // No .md files exist + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + const result = await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: true, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ); + + // Should complete with 0 files + assert.equal(result.results[0].fileCount, 0); + assert.equal(result.results[0].bytes, 0); +}); + +test("source ID with null bytes is allowed but potentially problematic", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-null-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const configPath = path.join(tmpRoot, "docs.config.json"); + + const config = { + sources: [ + { + id: "test\x00evil", + repo: "https://github.com/example/repo.git", + }, + ], + }; + + // Write the config with null byte + await writeFile(configPath, JSON.stringify(config, null, 2), "utf8"); + + // The null byte should be in the file + const { loadConfig } = await import("../dist/api.mjs"); + // Load and verify - null bytes in JSON strings are preserved + const { sources } = await loadConfig(configPath); + assert.ok(sources[0].id.includes("\x00")); + // This could cause filesystem issues but is technically valid JSON + // TODO: Consider sanitizing source IDs to prevent null bytes +}); + +test("maxBytes exactly equal to total size", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-exact-${Date.now().toString(36)}`, + ); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + await mkdir(repoDir, { recursive: true }); + const content = "a".repeat(100); + await writeFile(path.join(repoDir, "file.md"), content, "utf8"); + + const config = { + sources: [ + { + id: "test", + repo: "https://example.com/repo.git", + include: ["**/*.md"], + maxBytes: 100, // Exactly the size of the file + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + // Should succeed because we check AFTER adding to total + await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ); + + const { access } = await import("node:fs/promises"); + await access(path.join(cacheDir, "test", "file.md")); +}); diff --git a/tests/edge-cases-validation.test.js b/tests/edge-cases-validation.test.js new file mode 100644 index 0000000..769063a --- /dev/null +++ b/tests/edge-cases-validation.test.js @@ -0,0 +1,393 @@ +import assert from "node:assert/strict"; +import { mkdir, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { test } from "node:test"; + +import { loadConfig } from "../dist/api.mjs"; + +const writeConfig = async (data) => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-val-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const configPath = path.join(tmpRoot, "docs.config.json"); + await writeFile(configPath, `${JSON.stringify(data, null, 2)}\n`, "utf8"); + return configPath; +}; + +test("config rejects duplicate source IDs", async () => { + const configPath = await writeConfig({ + sources: [ + { id: "duplicate", repo: "https://github.com/example/repo1.git" }, + { id: "duplicate", repo: "https://github.com/example/repo2.git" }, + ], + }); + + // Should now reject duplicate IDs + await assert.rejects(() => loadConfig(configPath), /Duplicate source IDs/i); +}); + +test("sourceId with forward slash is allowed but may cause issues", async () => { + const configPath = await writeConfig({ + sources: [{ id: "org/repo", repo: "https://github.com/example/repo.git" }], + }); + + // This creates nested directories which might be intentional or accidental + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].id, "org/repo"); +}); + +test("sourceId with backslash is allowed but may cause issues on Windows", async () => { + const configPath = await writeConfig({ + sources: [{ id: "org\\repo", repo: "https://github.com/example/repo.git" }], + }); + + // Backslash in ID could be problematic on Windows + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].id, "org\\repo"); +}); + +test("very long source ID is allowed", async () => { + const longId = "a".repeat(300); + const configPath = await writeConfig({ + sources: [{ id: longId, repo: "https://github.com/example/repo.git" }], + }); + + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].id, longId); + // File systems have path length limits (255 chars on most systems) +}); + +test("source ID with special characters that are filesystem-safe", async () => { + // Test various characters that are typically safe + const ids = ["test-repo", "test_repo", "test.repo", "test@v1.0"]; + + for (const id of ids) { + const configPath = await writeConfig({ + sources: [{ id, repo: "https://github.com/example/repo.git" }], + }); + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].id, id); + } +}); + +test("targetDir with absolute path is allowed", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + targetDir: "/absolute/path", + }, + ], + }); + + // Absolute paths are allowed - user might want to link anywhere + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].targetDir, "/absolute/path"); +}); + +test("targetDir with Windows-style path is allowed", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + targetDir: "C:\\Users\\test\\docs", + }, + ], + }); + + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].targetDir, "C:\\Users\\test\\docs"); +}); + +test("repo URL with various protocols", async () => { + const urls = [ + "https://github.com/user/repo.git", + "http://example.com/repo.git", + "git@github.com:user/repo.git", + "ssh://git@github.com/user/repo.git", + "file:///local/path/repo.git", + ]; + + for (const repo of urls) { + const configPath = await writeConfig({ + sources: [{ id: `test-${urls.indexOf(repo)}`, repo }], + }); + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].repo, repo); + } +}); + +test("ref with various formats", async () => { + const refs = [ + "main", + "master", + "v1.0.0", + "feature/branch-name", + "refs/heads/main", + "refs/tags/v1.0.0", + "abcdef1234567890", // commit SHA + ]; + + for (const ref of refs) { + const configPath = await writeConfig({ + sources: [ + { + id: `test-${refs.indexOf(ref)}`, + repo: "https://github.com/example/repo.git", + ref, + }, + ], + }); + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].ref, ref); + } +}); + +test("include patterns with various glob syntaxes", async () => { + const patterns = [ + "**/*.md", + "docs/**", + "*.{md,mdx}", + "!(node_modules)/**", + "**/*.[mM][dD]", // case variations + "path/to/specific/file.md", + ]; + + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + include: patterns, + }, + ], + }); + + const { sources } = await loadConfig(configPath); + assert.deepEqual(sources[0].include, patterns); +}); + +test("exclude patterns with various syntaxes", async () => { + const patterns = [ + "**/node_modules/**", + "**/*.test.md", + "temp/**", + "**/.*", // hidden files + ]; + + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + exclude: patterns, + }, + ], + }); + + const { sources } = await loadConfig(configPath); + assert.deepEqual(sources[0].exclude, patterns); +}); + +test("maxBytes at boundary values", async () => { + const values = [1, 1000, 1000000, 1000000000]; + + for (const maxBytes of values) { + const configPath = await writeConfig({ + sources: [ + { + id: `test-${values.indexOf(maxBytes)}`, + repo: "https://github.com/example/repo.git", + maxBytes, + }, + ], + }); + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].maxBytes, maxBytes); + } +}); + +test("maxFiles at boundary values", async () => { + const values = [1, 10, 100, 1000, 10000]; + + for (const maxFiles of values) { + const configPath = await writeConfig({ + sources: [ + { + id: `test-${values.indexOf(maxFiles)}`, + repo: "https://github.com/example/repo.git", + maxFiles, + }, + ], + }); + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].maxFiles, maxFiles); + } +}); + +test("integrity with null value", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + integrity: { + type: "commit", + value: null, + }, + }, + ], + }); + + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].integrity.type, "commit"); + assert.equal(sources[0].integrity.value, null); +}); + +test("integrity with manifest type", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + integrity: { + type: "manifest", + value: "sha256-abcdef...", + }, + }, + ], + }); + + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].integrity.type, "manifest"); + assert.equal(sources[0].integrity.value, "sha256-abcdef..."); +}); + +test("required field with various boolean values", async () => { + for (const required of [true, false]) { + const configPath = await writeConfig({ + sources: [ + { + id: `test-${required}`, + repo: "https://github.com/example/repo.git", + required, + }, + ], + }); + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].required, required); + } +}); + +test("depth values from 1 to higher numbers", async () => { + const depths = [1, 2, 5, 10, 100]; + + for (const depth of depths) { + const configPath = await writeConfig({ + sources: [ + { + id: `test-${depths.indexOf(depth)}`, + repo: "https://github.com/example/repo.git", + depth, + }, + ], + }); + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].depth, depth); + } +}); + +test("config with empty sources array", async () => { + const configPath = await writeConfig({ + sources: [], + }); + + // Empty sources is technically valid + const { sources } = await loadConfig(configPath); + assert.equal(sources.length, 0); +}); + +test("config with only defaults, no sources", async () => { + const configPath = await writeConfig({ + defaults: { + ref: "develop", + }, + sources: [], + }); + + const { config, sources } = await loadConfig(configPath); + assert.equal(sources.length, 0); + assert.equal(config.defaults.ref, "develop"); +}); + +test("cacheDir with relative path", async () => { + const configPath = await writeConfig({ + cacheDir: "./custom-cache", + sources: [{ id: "test", repo: "https://github.com/example/repo.git" }], + }); + + const { config } = await loadConfig(configPath); + assert.equal(config.cacheDir, "./custom-cache"); +}); + +test("cacheDir with absolute path", async () => { + const configPath = await writeConfig({ + cacheDir: "/tmp/docs-cache", + sources: [{ id: "test", repo: "https://github.com/example/repo.git" }], + }); + + const { config } = await loadConfig(configPath); + assert.equal(config.cacheDir, "/tmp/docs-cache"); +}); + +test("index flag set to true", async () => { + const configPath = await writeConfig({ + index: true, + sources: [{ id: "test", repo: "https://github.com/example/repo.git" }], + }); + + const { config } = await loadConfig(configPath); + assert.equal(config.index, true); +}); + +test("targetMode at root level", async () => { + const configPath = await writeConfig({ + targetMode: "copy", + sources: [{ id: "test", repo: "https://github.com/example/repo.git" }], + }); + + const { config } = await loadConfig(configPath); + assert.equal(config.targetMode, "copy"); +}); + +test("defaults with all fields specified", async () => { + const configPath = await writeConfig({ + defaults: { + ref: "main", + mode: "materialize", + include: ["**/*.md"], + targetMode: "copy", + depth: 1, + required: false, + maxBytes: 1000000, + maxFiles: 100, + allowHosts: ["github.com"], + }, + sources: [{ id: "test", repo: "https://github.com/example/repo.git" }], + }); + + const { config } = await loadConfig(configPath); + assert.equal(config.defaults.ref, "main"); + assert.equal(config.defaults.mode, "materialize"); + assert.equal(config.defaults.targetMode, "copy"); + assert.equal(config.defaults.depth, 1); + assert.equal(config.defaults.required, false); + assert.equal(config.defaults.maxBytes, 1000000); + assert.equal(config.defaults.maxFiles, 100); + assert.deepEqual(config.defaults.allowHosts, ["github.com"]); +}); diff --git a/tests/edge-cases.test.js b/tests/edge-cases.test.js new file mode 100644 index 0000000..c006331 --- /dev/null +++ b/tests/edge-cases.test.js @@ -0,0 +1,438 @@ +import assert from "node:assert/strict"; +import { mkdir, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { test } from "node:test"; + +import { loadConfig, runSync } from "../dist/api.mjs"; + +const exists = async (target) => { + try { + await import("node:fs/promises").then(({ access }) => access(target)); + return true; + } catch { + return false; + } +}; + +const writeConfig = async (data) => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-edge-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const configPath = path.join(tmpRoot, "docs.config.json"); + await writeFile(configPath, `${JSON.stringify(data, null, 2)}\n`, "utf8"); + return configPath; +}; + +test("config rejects duplicate source IDs", async () => { + const configPath = await writeConfig({ + sources: [ + { id: "same", repo: "https://github.com/example/repo1.git" }, + { id: "same", repo: "https://github.com/example/repo2.git" }, + ], + }); + await assert.rejects(() => loadConfig(configPath), /Duplicate source IDs/i); +}); + +test("config allows source ID with path traversal characters (potential issue)", async () => { + const configPath = await writeConfig({ + sources: [{ id: "../evil", repo: "https://github.com/example/repo.git" }], + }); + // Currently allowed - but will fail when creating cache directories + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].id, "../evil"); + // TODO: Should validate sourceId to prevent path traversal +}); + +test("config allows source ID with special characters (potential issue)", async () => { + // Test various special characters that could break shell or file systems + const specialChars = ["foo:bar", "foo|bar", "foo*bar", "foo?bar", "foo { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + maxBytes: 0, + }, + ], + }); + await assert.rejects( + () => loadConfig(configPath), + /maxBytes.*>=1|maxBytes.*greater than zero/i, + ); +}); + +test("config rejects negative maxBytes", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + maxBytes: -100, + }, + ], + }); + await assert.rejects( + () => loadConfig(configPath), + /maxBytes.*>=1|maxBytes.*greater than zero/i, + ); +}); + +test("config rejects zero maxFiles", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + maxFiles: 0, + }, + ], + }); + await assert.rejects( + () => loadConfig(configPath), + /maxFiles.*>=1|maxFiles.*greater than zero/i, + ); +}); + +test("config rejects negative maxFiles", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + maxFiles: -5, + }, + ], + }); + await assert.rejects( + () => loadConfig(configPath), + /maxFiles.*>=1|maxFiles.*greater than zero/i, + ); +}); + +test("config rejects empty string fields", async () => { + const configPath = await writeConfig({ + sources: [{ id: "", repo: "https://github.com/example/repo.git" }], + }); + await assert.rejects(() => loadConfig(configPath), /sources.*id|id.*>=1/i); +}); + +test("config allows whitespace-only ID (potential issue)", async () => { + const configPath = await writeConfig({ + sources: [{ id: " ", repo: "https://github.com/example/repo.git" }], + }); + // Currently this passes validation because we don't trim + // This could cause issues when creating directories + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].id, " "); + // TODO: Should trim and validate non-empty after trim +}); + +test("config rejects empty repo URL", async () => { + const configPath = await writeConfig({ + sources: [{ id: "test", repo: "" }], + }); + await assert.rejects( + () => loadConfig(configPath), + /sources.*repo|repo.*>=1/i, + ); +}); + +test("targetDir with path traversal is not validated in config", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-traversal-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const configPath = path.join(tmpRoot, "docs.config.json"); + + const config = { + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + targetDir: "../../etc/passwd", + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + // Config loading doesn't validate targetDir paths + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].targetDir, "../../etc/passwd"); + // TODO: Should validate targetDir doesn't escape project root +}); + +test("very large maxBytes value is accepted", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + maxBytes: Number.MAX_SAFE_INTEGER, + }, + ], + }); + const { sources } = await loadConfig(configPath); + assert.equal(sources[0].maxBytes, Number.MAX_SAFE_INTEGER); +}); + +test("config with malformed JSON", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-malformed-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const configPath = path.join(tmpRoot, "docs.config.json"); + await writeFile(configPath, '{ "sources": [ invalid json } ]', "utf8"); + + await assert.rejects(() => loadConfig(configPath), /Invalid JSON/i); +}); + +test("config rejects BOM (Byte Order Mark)", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-bom-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const configPath = path.join(tmpRoot, "docs.config.json"); + + const config = { + sources: [{ id: "test", repo: "https://github.com/example/repo.git" }], + }; + // UTF-8 BOM - this is a real edge case that can happen with Windows editors + await writeFile( + configPath, + "\uFEFF" + JSON.stringify(config, null, 2), + "utf8", + ); + + // JSON.parse in Node.js doesn't handle BOM well, this will fail + await assert.rejects(() => loadConfig(configPath), /Invalid JSON/i); +}); + +test("lock file with invalid version", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-lock-ver-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const lockPath = path.join(tmpRoot, "docs.lock"); + + const invalidLock = { + version: 2, + generatedAt: new Date().toISOString(), + toolVersion: "0.1.0", + sources: {}, + }; + await writeFile(lockPath, JSON.stringify(invalidLock, null, 2), "utf8"); + + // Need to import dynamically to avoid tree shaking + const { + default: { readFile: read }, + } = await import("node:fs/promises"); + const raw = await read(lockPath, "utf8"); + const parsed = JSON.parse(raw); + + // Manually validate - version 2 is not valid + assert.notEqual(parsed.version, 1); +}); + +test("lock file with missing required fields", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-lock-miss-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const lockPath = path.join(tmpRoot, "docs.lock"); + + const invalidLock = { + version: 1, + // missing generatedAt, toolVersion, sources + }; + await writeFile(lockPath, JSON.stringify(invalidLock, null, 2), "utf8"); + + // Verify the file was written with missing fields + const { + default: { readFile: read }, + } = await import("node:fs/promises"); + const raw = await read(lockPath, "utf8"); + const parsed = JSON.parse(raw); + assert.equal(parsed.generatedAt, undefined); +}); + +test("lock file with negative bytes", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-lock-neg-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const lockPath = path.join(tmpRoot, "docs.lock"); + + const invalidLock = { + version: 1, + generatedAt: new Date().toISOString(), + toolVersion: "0.1.0", + sources: { + test: { + repo: "https://github.com/example/repo.git", + ref: "main", + resolvedCommit: "abc123", + bytes: -100, + fileCount: 5, + manifestSha256: "def456", + updatedAt: new Date().toISOString(), + }, + }, + }; + await writeFile(lockPath, JSON.stringify(invalidLock, null, 2), "utf8"); + + // Verify the file contains negative bytes + const { + default: { readFile: read }, + } = await import("node:fs/promises"); + const raw = await read(lockPath, "utf8"); + const parsed = JSON.parse(raw); + assert.equal(parsed.sources.test.bytes, -100); +}); + +test("lock file with corrupted JSON", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-lock-corrupt-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const lockPath = path.join(tmpRoot, "docs.lock"); + + await writeFile(lockPath, '{"version": 1, invalid', "utf8"); + + // Verify the corrupted content was written + const { + default: { readFile: read }, + } = await import("node:fs/promises"); + const raw = await read(lockPath, "utf8"); + assert.ok(raw.includes("invalid")); + + // JSON.parse should throw + assert.throws(() => JSON.parse(raw), SyntaxError); +}); + +test("empty include array is rejected", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + include: [], + }, + ], + }); + await assert.rejects( + () => loadConfig(configPath), + /include.*non-empty array/i, + ); +}); + +test("include with empty string is rejected", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + include: ["*.md", ""], + }, + ], + }); + await assert.rejects( + () => loadConfig(configPath), + /include.*>=1|include.*non-empty/i, + ); +}); + +test("exclude with empty string is rejected", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + exclude: ["*.tmp", ""], + }, + ], + }); + await assert.rejects( + () => loadConfig(configPath), + /exclude.*>=1|exclude.*non-empty/i, + ); +}); + +test("config with unknown fields is rejected", async () => { + const configPath = await writeConfig({ + unknownField: "value", + sources: [{ id: "test", repo: "https://github.com/example/repo.git" }], + }); + await assert.rejects(() => loadConfig(configPath), /does not match schema/i); +}); + +test("source with unknown fields is now rejected", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + unknownSourceField: "value", + }, + ], + }); + // SourceSchema now has .strict() mode properly applied + // Unknown fields should be rejected + await assert.rejects( + () => loadConfig(configPath), + /Unrecognized key|does not match schema/i, + ); +}); + +test("depth must be positive", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + depth: 0, + }, + ], + }); + await assert.rejects( + () => loadConfig(configPath), + /depth.*>=1|depth.*greater than zero/i, + ); +}); + +test("negative depth is rejected", async () => { + const configPath = await writeConfig({ + sources: [ + { + id: "test", + repo: "https://github.com/example/repo.git", + depth: -1, + }, + ], + }); + await assert.rejects( + () => loadConfig(configPath), + /depth.*>=1|depth.*greater than zero/i, + ); +});