Skip to content

Commit 27cf93f

Browse files
feat(#20): add Data Matrix C40/Text encoding modes with auto-selection
- encodeC40() — 3 chars → 2 codewords for uppercase + digits - encodeTextMode() — 3 chars → 2 codewords for lowercase + digits - encodeAuto() — auto-selects best mode (ASCII, C40, or Text) - encodeDataMatrix() now uses auto mode for optimal encoding - Shift sets for control chars, punctuation, and cross-case - 9 tests added Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9afd905 commit 27cf93f

3 files changed

Lines changed: 199 additions & 3 deletions

File tree

src/encoders/datamatrix/encoder.ts

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,129 @@ function randomizePad(padValue: number, position: number): number {
8383
const result = padValue + pseudoRandom;
8484
return result <= 254 ? result : result - 254;
8585
}
86+
87+
// C40 character set values
88+
// Set 0 (basic): space=3, 0-9=4-13, A-Z=14-39
89+
// Set 1 (shift 1): control chars 0-31
90+
// Set 2 (shift 2): !"#$%&'()*+,-./:;<=>?@[\]^_
91+
// Set 3 (shift 3): `a-z{|}~DEL
92+
function c40Value(ch: number): { set: number; value: number } {
93+
if (ch === 32) return { set: 0, value: 3 }; // space
94+
if (ch >= 48 && ch <= 57) return { set: 0, value: ch - 48 + 4 }; // 0-9
95+
if (ch >= 65 && ch <= 90) return { set: 0, value: ch - 65 + 14 }; // A-Z
96+
if (ch >= 0 && ch <= 31) return { set: 1, value: ch }; // control
97+
if (ch >= 33 && ch <= 47) return { set: 2, value: ch - 33 }; // !"#$%&'()*+,-./
98+
if (ch >= 58 && ch <= 64) return { set: 2, value: ch - 58 + 15 }; // :;<=>?@
99+
if (ch >= 91 && ch <= 95) return { set: 2, value: ch - 91 + 22 }; // [\]^_
100+
if (ch >= 96 && ch <= 127) return { set: 3, value: ch - 96 }; // `a-z{|}~
101+
return { set: -1, value: 0 }; // not C40 encodable
102+
}
103+
104+
// Text mode: same as C40 but swaps upper/lowercase
105+
function textValue(ch: number): { set: number; value: number } {
106+
if (ch === 32) return { set: 0, value: 3 };
107+
if (ch >= 48 && ch <= 57) return { set: 0, value: ch - 48 + 4 };
108+
if (ch >= 97 && ch <= 122) return { set: 0, value: ch - 97 + 14 }; // a-z in basic set
109+
if (ch >= 0 && ch <= 31) return { set: 1, value: ch };
110+
if (ch >= 33 && ch <= 47) return { set: 2, value: ch - 33 };
111+
if (ch >= 58 && ch <= 64) return { set: 2, value: ch - 58 + 15 };
112+
if (ch >= 91 && ch <= 95) return { set: 2, value: ch - 91 + 22 };
113+
if (ch === 96) return { set: 3, value: 0 }; // backtick
114+
if (ch >= 65 && ch <= 90) return { set: 3, value: ch - 65 + 1 }; // A-Z in shift 3
115+
if (ch >= 123 && ch <= 127) return { set: 3, value: ch - 123 + 27 };
116+
return { set: -1, value: 0 };
117+
}
118+
119+
/**
120+
* Encode text using C40 mode (efficient for uppercase + digits)
121+
* 3 characters → 2 codewords
122+
* Latch: codeword 230
123+
*/
124+
export function encodeC40(text: string): number[] {
125+
return encodeC40Text(text, 230, c40Value);
126+
}
127+
128+
/**
129+
* Encode text using Text mode (efficient for lowercase + digits)
130+
* 3 characters → 2 codewords
131+
* Latch: codeword 239
132+
*/
133+
export function encodeTextMode(text: string): number[] {
134+
return encodeC40Text(text, 239, textValue);
135+
}
136+
137+
function encodeC40Text(
138+
text: string,
139+
latchCW: number,
140+
valueFn: (ch: number) => { set: number; value: number },
141+
): number[] {
142+
const codewords: number[] = [latchCW]; // Latch to C40/Text
143+
const values: number[] = [];
144+
145+
for (let i = 0; i < text.length; i++) {
146+
const ch = text.charCodeAt(i);
147+
const { set, value } = valueFn(ch);
148+
if (set === -1) {
149+
// Fall back to ASCII for this character — unlatch
150+
// For simplicity, encode rest as ASCII
151+
codewords.push(254); // Unlatch to ASCII
152+
const remaining = text.substring(i);
153+
codewords.push(...encodeASCII(remaining));
154+
return codewords;
155+
}
156+
if (set > 0) {
157+
values.push(set - 1); // Shift indicator (0=shift1, 1=shift2, 2=shift3)
158+
values.push(value);
159+
} else {
160+
values.push(value);
161+
}
162+
}
163+
164+
// Pack triplets into codeword pairs
165+
let i = 0;
166+
while (i + 2 < values.length) {
167+
const v = values[i]! * 1600 + values[i + 1]! * 40 + values[i + 2]! + 1;
168+
codewords.push(Math.floor(v / 256));
169+
codewords.push(v % 256);
170+
i += 3;
171+
}
172+
173+
// Handle remaining 1 or 2 values — unlatch to ASCII
174+
if (i < values.length) {
175+
codewords.push(254); // Unlatch
176+
}
177+
178+
return codewords;
179+
}
180+
181+
/**
182+
* Auto-select best encoding mode for the given text
183+
* Returns the most efficient encoding
184+
*/
185+
export function encodeAuto(text: string): number[] {
186+
// Count uppercase vs lowercase to decide
187+
let upper = 0;
188+
let lower = 0;
189+
let digits = 0;
190+
for (const ch of text) {
191+
const c = ch.charCodeAt(0);
192+
if (c >= 65 && c <= 90) upper++;
193+
else if (c >= 97 && c <= 122) lower++;
194+
else if (c >= 48 && c <= 57) digits++;
195+
}
196+
197+
// C40 is best for uppercase-heavy, Text for lowercase-heavy
198+
const asciiCW = encodeASCII(text);
199+
200+
if (upper + digits > text.length * 0.6) {
201+
const c40CW = encodeC40(text);
202+
if (c40CW.length < asciiCW.length) return c40CW;
203+
}
204+
205+
if (lower + digits > text.length * 0.6) {
206+
const textCW = encodeTextMode(text);
207+
if (textCW.length < asciiCW.length) return textCW;
208+
}
209+
210+
return asciiCW;
211+
}

src/encoders/datamatrix/index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*/
77

88
import { InvalidInputError, CapacityError } from "../../errors";
9-
import { encodeASCII, padCodewords } from "./encoder";
9+
import { encodeASCII, encodeAuto, padCodewords } from "./encoder";
1010
import { selectSymbolSize } from "./tables";
1111
import { generateInterleavedEC } from "./reed-solomon";
1212
import { placeModules } from "./placement";
@@ -30,8 +30,8 @@ export function encodeDataMatrix(text: string): boolean[][] {
3030
throw new InvalidInputError("Data Matrix input must not be empty");
3131
}
3232

33-
// Step 1: Encode text to data codewords (ASCII mode)
34-
const dataCodewords = encodeASCII(text);
33+
// Step 1: Encode text to data codewords (auto-select best mode)
34+
const dataCodewords = encodeAuto(text);
3535

3636
// Step 2: Select the smallest symbol size that fits the data
3737
const symbol = selectSymbolSize(dataCodewords.length);
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import { describe, expect, it } from "vitest";
2+
import {
3+
encodeASCII,
4+
encodeC40,
5+
encodeTextMode,
6+
encodeAuto,
7+
} from "../src/encoders/datamatrix/encoder";
8+
9+
describe("Data Matrix C40 encoding", () => {
10+
it("encodes uppercase text", () => {
11+
const cw = encodeC40("HELLO WORLD");
12+
expect(cw[0]).toBe(230); // C40 latch
13+
expect(cw.length).toBeLessThan(encodeASCII("HELLO WORLD").length + 1);
14+
});
15+
16+
it("encodes digits efficiently", () => {
17+
const cw = encodeC40("ABC 123");
18+
expect(cw[0]).toBe(230);
19+
expect(cw.length).toBeGreaterThan(1);
20+
});
21+
22+
it("falls back for non-C40 characters", () => {
23+
const cw = encodeC40("hello"); // lowercase not in C40 basic set
24+
// Should include unlatch (254) and encode rest in ASCII
25+
expect(cw).toContain(254);
26+
});
27+
});
28+
29+
describe("Data Matrix Text encoding", () => {
30+
it("encodes lowercase text", () => {
31+
const cw = encodeTextMode("hello world");
32+
expect(cw[0]).toBe(239); // Text latch
33+
expect(cw.length).toBeLessThan(encodeASCII("hello world").length + 1);
34+
});
35+
36+
it("handles uppercase via shift", () => {
37+
const cw = encodeTextMode("Hello");
38+
expect(cw[0]).toBe(239);
39+
expect(cw.length).toBeGreaterThan(1);
40+
});
41+
});
42+
43+
describe("Data Matrix auto encoding", () => {
44+
it("uses ASCII for mixed content", () => {
45+
const cw = encodeAuto("Hi 123");
46+
// Short mixed text — ASCII should win or be similar
47+
expect(cw.length).toBeGreaterThan(0);
48+
});
49+
50+
it("uses C40 for uppercase-heavy text", () => {
51+
const auto = encodeAuto("HELLO WORLD ABC DEF");
52+
const ascii = encodeASCII("HELLO WORLD ABC DEF");
53+
// C40 should be more efficient
54+
expect(auto.length).toBeLessThanOrEqual(ascii.length);
55+
});
56+
57+
it("uses Text mode for lowercase-heavy text", () => {
58+
const auto = encodeAuto("hello world abc def");
59+
const ascii = encodeASCII("hello world abc def");
60+
expect(auto.length).toBeLessThanOrEqual(ascii.length);
61+
});
62+
63+
it("produces valid codewords", () => {
64+
const cw = encodeAuto("Test Data 123");
65+
for (const c of cw) {
66+
expect(c).toBeGreaterThanOrEqual(0);
67+
expect(c).toBeLessThanOrEqual(255);
68+
}
69+
});
70+
});

0 commit comments

Comments
 (0)