Skip to content

Commit 4d9c4ab

Browse files
committed
More accurate argv-to-command-line serialization when spawning child processes on Windows
The old implementation had a bug in it in that it didn't quote empty strings, but it also didn't properly follow the special quoting rules required for the first argument (the executable name). This new implementation serializes the argv correctly such that it can be parsed by the `CommandLineToArgvW` algorithm.
1 parent 13f78e2 commit 4d9c4ab

File tree

1 file changed

+150
-29
lines changed

1 file changed

+150
-29
lines changed

lib/std/child_process.zig

Lines changed: 150 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -744,9 +744,6 @@ pub const ChildProcess = struct {
744744
windowsDestroyPipe(g_hChildStd_ERR_Rd, g_hChildStd_ERR_Wr);
745745
};
746746

747-
const cmd_line = try windowsCreateCommandLine(self.allocator, self.argv);
748-
defer self.allocator.free(cmd_line);
749-
750747
var siStartInfo = windows.STARTUPINFOW{
751748
.cb = @sizeOf(windows.STARTUPINFOW),
752749
.hStdError = g_hChildStd_ERR_Wr,
@@ -818,7 +815,11 @@ pub const ChildProcess = struct {
818815
const app_name_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, app_basename_utf8);
819816
defer self.allocator.free(app_name_w);
820817

821-
const cmd_line_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, cmd_line);
818+
const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) {
819+
// argv[0] contains unsupported characters that will never resolve to a valid exe.
820+
error.InvalidArg0 => return error.FileNotFound,
821+
else => |e| return e,
822+
};
822823
defer self.allocator.free(cmd_line_w);
823824

824825
run: {
@@ -1236,39 +1237,159 @@ test "windowsCreateProcessSupportsExtension" {
12361237
try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null);
12371238
}
12381239

1239-
/// Caller must dealloc.
1240-
fn windowsCreateCommandLine(allocator: mem.Allocator, argv: []const []const u8) ![:0]u8 {
1240+
pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 };
1241+
1242+
/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and
1243+
/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice.
1244+
pub fn argvToCommandLineWindows(
1245+
allocator: mem.Allocator,
1246+
argv: []const []const u8,
1247+
) ArgvToCommandLineError![:0]u16 {
12411248
var buf = std.ArrayList(u8).init(allocator);
12421249
defer buf.deinit();
12431250

1244-
for (argv, 0..) |arg, arg_i| {
1245-
if (arg_i != 0) try buf.append(' ');
1246-
if (mem.indexOfAny(u8, arg, " \t\n\"") == null) {
1247-
try buf.appendSlice(arg);
1248-
continue;
1251+
if (argv.len != 0) {
1252+
const arg0 = argv[0];
1253+
1254+
// The first argument must be quoted if it contains spaces or ASCII control characters
1255+
// (excluding DEL). It also follows special quoting rules where backslashes have no special
1256+
// interpretation, which makes it impossible to pass certain first arguments containing
1257+
// double quotes to a child process without characters from the first argument leaking into
1258+
// subsequent ones (which could have security implications).
1259+
//
1260+
// Empty arguments technically don't need quotes, but we quote them anyway for maximum
1261+
// compatibility with different implementations of the 'CommandLineToArgvW' algorithm.
1262+
//
1263+
// Double quotes are illegal in paths on Windows, so for the sake of simplicity we reject
1264+
// all first arguments containing double quotes, even ones that we could theoretically
1265+
// serialize in unquoted form.
1266+
var needs_quotes = arg0.len == 0;
1267+
for (arg0) |c| {
1268+
if (c <= ' ') {
1269+
needs_quotes = true;
1270+
} else if (c == '"') {
1271+
return error.InvalidArg0;
1272+
}
12491273
}
1250-
try buf.append('"');
1251-
var backslash_count: usize = 0;
1252-
for (arg) |byte| {
1253-
switch (byte) {
1254-
'\\' => backslash_count += 1,
1255-
'"' => {
1256-
try buf.appendNTimes('\\', backslash_count * 2 + 1);
1257-
try buf.append('"');
1258-
backslash_count = 0;
1259-
},
1260-
else => {
1261-
try buf.appendNTimes('\\', backslash_count);
1262-
try buf.append(byte);
1263-
backslash_count = 0;
1264-
},
1274+
if (needs_quotes) {
1275+
try buf.append('"');
1276+
try buf.appendSlice(arg0);
1277+
try buf.append('"');
1278+
} else {
1279+
try buf.appendSlice(arg0);
1280+
}
1281+
1282+
for (argv[1..]) |arg| {
1283+
try buf.append(' ');
1284+
1285+
// Subsequent arguments must be quoted if they contain spaces, tabs or double quotes,
1286+
// or if they are empty. For simplicity and for maximum compatibility with different
1287+
// implementations of the 'CommandLineToArgvW' algorithm, we also quote all ASCII
1288+
// control characters (again, excluding DEL).
1289+
needs_quotes = for (arg) |c| {
1290+
if (c <= ' ' or c == '"') {
1291+
break true;
1292+
}
1293+
} else arg.len == 0;
1294+
if (!needs_quotes) {
1295+
try buf.appendSlice(arg);
1296+
continue;
1297+
}
1298+
1299+
try buf.append('"');
1300+
var backslash_count: usize = 0;
1301+
for (arg) |byte| {
1302+
switch (byte) {
1303+
'\\' => {
1304+
backslash_count += 1;
1305+
},
1306+
'"' => {
1307+
try buf.appendNTimes('\\', backslash_count * 2 + 1);
1308+
try buf.append('"');
1309+
backslash_count = 0;
1310+
},
1311+
else => {
1312+
try buf.appendNTimes('\\', backslash_count);
1313+
try buf.append(byte);
1314+
backslash_count = 0;
1315+
},
1316+
}
12651317
}
1318+
try buf.appendNTimes('\\', backslash_count * 2);
1319+
try buf.append('"');
12661320
}
1267-
try buf.appendNTimes('\\', backslash_count * 2);
1268-
try buf.append('"');
12691321
}
12701322

1271-
return buf.toOwnedSliceSentinel(0);
1323+
return try unicode.utf8ToUtf16LeWithNull(allocator, buf.items);
1324+
}
1325+
1326+
test "argvToCommandLineWindows" {
1327+
const t = testArgvToCommandLineWindows;
1328+
1329+
try t(&.{
1330+
\\C:\Program Files\zig\zig.exe
1331+
,
1332+
\\run
1333+
,
1334+
\\.\src\main.zig
1335+
,
1336+
\\-target
1337+
,
1338+
\\x86_64-windows-gnu
1339+
,
1340+
\\-O
1341+
,
1342+
\\ReleaseSafe
1343+
,
1344+
\\--
1345+
,
1346+
\\--emoji=🗿
1347+
,
1348+
\\--eval=new Regex("Dwayne \"The Rock\" Johnson")
1349+
,
1350+
},
1351+
\\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 "--eval=new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")"
1352+
);
1353+
1354+
try t(&.{}, "");
1355+
try t(&.{""}, "\"\"");
1356+
try t(&.{" "}, "\" \"");
1357+
try t(&.{"\t"}, "\"\t\"");
1358+
try t(&.{"\x07"}, "\"\x07\"");
1359+
try t(&.{"🦎"}, "🦎");
1360+
1361+
try t(
1362+
&.{ "zig", "aa aa", "bb\tbb", "cc\ncc", "dd\r\ndd", "ee\x7Fee" },
1363+
"zig \"aa aa\" \"bb\tbb\" \"cc\ncc\" \"dd\r\ndd\" ee\x7Fee",
1364+
);
1365+
1366+
try t(
1367+
&.{ "\\\\foo bar\\foo bar\\", "\\\\zig zag\\zig zag\\" },
1368+
"\"\\\\foo bar\\foo bar\\\" \"\\\\zig zag\\zig zag\\\\\"",
1369+
);
1370+
1371+
try std.testing.expectError(
1372+
error.InvalidArg0,
1373+
argvToCommandLineWindows(std.testing.allocator, &.{"\"quotes\"quotes\""}),
1374+
);
1375+
try std.testing.expectError(
1376+
error.InvalidArg0,
1377+
argvToCommandLineWindows(std.testing.allocator, &.{"quotes\"quotes"}),
1378+
);
1379+
try std.testing.expectError(
1380+
error.InvalidArg0,
1381+
argvToCommandLineWindows(std.testing.allocator, &.{"q u o t e s \" q u o t e s"}),
1382+
);
1383+
}
1384+
1385+
fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []const u8) !void {
1386+
const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv);
1387+
defer std.testing.allocator.free(cmd_line_w);
1388+
1389+
const cmd_line = try unicode.utf16leToUtf8Alloc(std.testing.allocator, cmd_line_w);
1390+
defer std.testing.allocator.free(cmd_line);
1391+
1392+
try std.testing.expectEqualStrings(expected_cmd_line, cmd_line);
12721393
}
12731394

12741395
fn windowsDestroyPipe(rd: ?windows.HANDLE, wr: ?windows.HANDLE) void {

0 commit comments

Comments
 (0)