Skip to content

Commit 291ba17

Browse files
authored
Merge pull request #388 from pjungkamp/fmts/raw0
Introduce `raw0` format for zero-terminated strings
2 parents 0e7172e + f4a467a commit 291ba17

7 files changed

Lines changed: 98 additions & 32 deletions

File tree

docs/cli.dj

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ For example,
4040
`jaq --from yaml . myfile.yml`
4141
parses `myfile.yml` as YAML.
4242
Possible values of _FORMAT_ include:
43-
`raw`, `json`, `yaml`, `cbor`, `toml`, `xml`.
43+
`raw`, `raw0`, `json`, `yaml`, `cbor`, `toml`, `xml`.
4444

4545
jaq automatically chooses the corresponding input format for
4646
files with the extensions
@@ -100,6 +100,41 @@ See [`--rawfile`](#--rawfile).
100100

101101
This is equivalent to `--from raw`.
102102

103+
{#--raw-input0}
104+
### `--raw-input0`
105+
106+
Read values like [`--raw-input`](#--raw-input), splitting by NUL (`\0`) instead of newlines.
107+
For example:
108+
109+
```
110+
$ printf "Hello\nWorld\0foo" | jaq --raw-input0
111+
"Hello\nWorld"
112+
"foo"
113+
```
114+
115+
When combined with [`--slurp`](#--slurp),
116+
this collects all inputs into an array.
117+
For example:
118+
119+
```
120+
$ printf "Hello\nWorld\0foo" | jaq -sc --raw-input0
121+
["Hello\nWorld","foo"]
122+
```
123+
124+
This option can be used to round-trip output from [`--raw-output0`](#--raw-output0):
125+
126+
```
127+
$ printf '"some string\\nwith a newline" "another string"' | jaq --raw-output0 | jaq --raw-input0
128+
"some string\nwith a newline"
129+
"another string"
130+
```
131+
132+
This is equivalent to `--from raw0`.
133+
134+
::: Compatibility
135+
`jq` does not have this option.
136+
:::
137+
103138
{#--slurp}
104139
### `-s`, `--slurp`
105140

@@ -194,6 +229,26 @@ $ printf '["Hello\\nWorld"]' | jaq -rc
194229

195230
This is equivalent to `--to raw`.
196231

232+
{#--raw-output0}
233+
### `--raw-output0`
234+
235+
Output values like [`--raw-output`](#--raw-output), terminating them with NUL (`\0`) instead of LF (`\n`).
236+
237+
```
238+
$ printf '"some string\\nwith a newline" true' | jaq --raw-output0 | jaq -s --raw-input tobytes
239+
b"some string\nwith a newline\x00true\x00"
240+
```
241+
242+
Any output value that is a string containing NUL produces an error.
243+
Note that this does not apply to strings contained in an array or an object.
244+
245+
```
246+
$ printf '"string with NUL \\u0000"' | jaq --raw-output0 2>/dev/null; echo $?
247+
2
248+
```
249+
250+
This is equivalent to `--to raw0`.
251+
197252
{#--join-output}
198253
### `-j`, `--join-output`
199254

jaq-fmts/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ pub enum Format {
2727
/// When the option `--slurp` is used additionally,
2828
/// then the whole input is read into a single string.
2929
Raw,
30+
/// Zero-terminated text strings
31+
Raw0,
3032
/// JavaScript Object Notation
3133
#[default]
3234
Json,
@@ -61,6 +63,7 @@ impl Format {
6163
match s {
6264
"cbor" => Some(Format::Cbor),
6365
"raw" => Some(Format::Raw),
66+
"raw0" => Some(Format::Raw0),
6467
"json" => Some(Format::Json),
6568
"toml" => Some(Format::Toml),
6669
"xml" => Some(Format::Xml),

jaq-fmts/src/read/formats.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,34 @@ use crate::{invalid_data, map_invalid_data, Format};
33
use bytes::Bytes;
44
use jaq_core::box_iter::{box_once, BoxIter};
55
use jaq_json::{Tag, Val};
6-
use std::io::{self, BufRead, Read};
6+
use std::io::{self, Read};
77

88
type Vals<'a> = BoxIter<'a, io::Result<Val>>;
99

1010
/// Read input to string for certain formats.
1111
///
12-
/// This has to be synchronised with [`from_bufread`].
12+
/// This has to be synchronised with [`read`].
1313
pub fn read_string(fmt: Format, read: impl Read) -> Result<String> {
1414
use Format::*;
1515
match fmt {
16-
Raw | Json | Cbor => Ok(String::new()),
16+
Raw | Raw0 | Json | Cbor => Ok(String::new()),
1717
Toml | Xml | Yaml => io::read_to_string(read),
1818
}
1919
}
2020

2121
/// Convert bytes to string for certain formats.
2222
///
23-
/// This has to be synchronised with [`from_bytes`].
23+
/// This has to be synchronised with [`parse`].
2424
pub fn bytes_str(fmt: Format, bytes: &[u8]) -> Result<&str> {
2525
use Format::*;
2626
Ok(match fmt {
27-
Raw | Json | Cbor => "",
27+
Raw | Raw0 | Json | Cbor => "",
2828
Toml | Xml | Yaml => core::str::from_utf8(bytes).map_err(invalid_data)?,
2929
})
3030
}
3131

32-
/// Read value from [`BufRead`] or [`&str`], depending on format.
33-
pub fn from_bufread<'a>(fmt: Format, read: impl BufRead + 'a, s: &'a str, slurp: bool) -> Vals<'a> {
32+
/// Read values from [`io::BufRead`] or [`&str`], depending on format.
33+
pub fn read<'a>(fmt: Format, read: impl io::BufRead + 'a, s: &'a str, slurp: bool) -> Vals<'a> {
3434
use bstr::io::BufReadExt;
3535
let mut read = read;
3636
match fmt {
@@ -40,6 +40,7 @@ pub fn from_bufread<'a>(fmt: Format, read: impl BufRead + 'a, s: &'a str, slurp:
4040
box_once(result.map(|_| Val::utf8_str(buf)))
4141
}
4242
Format::Raw => Box::new(read.byte_lines().map(|r| r.map(Val::utf8_str))),
43+
Format::Raw0 => collect_if(slurp, read.byte_records(0).map(|r| r.map(Val::utf8_str))),
4344
Format::Cbor => collect_if(slurp, cbor::read_many(read)),
4445
Format::Json => collect_if(slurp, json::read_many(read)),
4546
Format::Toml => box_once(toml::parse(s).map_err(invalid_data)),
@@ -48,16 +49,17 @@ pub fn from_bufread<'a>(fmt: Format, read: impl BufRead + 'a, s: &'a str, slurp:
4849
}
4950
}
5051

51-
/// Parse value from file or `s`, depending on format.
52-
pub fn from_bytes<'a>(fmt: Format, bytes: &'a Bytes, s: &'a str, slurp: bool) -> Vals<'a> {
52+
/// Parse values from [`Bytes`] or [`&str`], depending on format.
53+
pub fn parse<'a>(fmt: Format, bytes: &'a Bytes, s: &'a str, slurp: bool) -> Vals<'a> {
5354
use bstr::ByteSlice;
55+
let nul_sep = |s: &'a [u8]| s.strip_suffix(b"\0").unwrap_or(bytes).split_str("\0");
56+
let slice_to_str = |s| Ok(Val::Str(bytes.slice_ref(s), Tag::Utf8));
5457
match fmt {
5558
Format::Raw if slurp => box_once(Ok(Val::Str(bytes.clone(), Tag::Utf8))),
56-
Format::Raw => Box::new(
57-
ByteSlice::lines(&**bytes).map(|line| Ok(Val::Str(bytes.slice_ref(line), Tag::Utf8))),
58-
),
59+
Format::Raw => Box::new(bytes.lines().map(slice_to_str)),
60+
Format::Raw0 => collect_if(slurp, nul_sep(bytes).map(slice_to_str)),
5961
Format::Json => collect_if(slurp, json::parse_many(bytes).map(map_invalid_data)),
6062
Format::Cbor => collect_if(slurp, cbor::parse_many(bytes).map(map_invalid_data)),
61-
Format::Toml | Format::Xml | Format::Yaml => from_bufread(fmt, &[][..], s, slurp),
63+
Format::Toml | Format::Xml | Format::Yaml => read(fmt, &[][..], s, slurp),
6264
}
6365
}

jaq-fmts/src/write/formats.rs

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,32 @@ pub fn write(w: &mut dyn Write, writer: &Writer, val: &Val) -> Result {
2020
}
2121

2222
match (val, format) {
23-
(Val::Str(b, _), Format::Raw) => w.write_all(b)?,
23+
(Val::Str(b, _), Format::Raw0) if b.contains(&b'\0') => {
24+
let nul_err = "cannot dump a string containing NUL with `--to raw0` or `--raw-output0`";
25+
return Err(io::Error::new(io::ErrorKind::InvalidData, nul_err));
26+
}
27+
(Val::Str(b, _), Format::Raw | Format::Raw0) => w.write_all(b)?,
2428
(_, Format::Cbor) => cbor::write(w, val)?,
25-
(_, Format::Json | Format::Raw) => jaq_json::write::write(w, pp, 0, val)?,
29+
(_, Format::Json | Format::Raw | Format::Raw0) => jaq_json::write::write(w, pp, 0, val)?,
2630
(_, Format::Yaml) => yaml::write(w, pp, 0, val)?,
2731
(_, Format::Toml) => write!(w, "{}", map_err_to_string(toml::Toml::try_from(val))?)?,
2832
(_, Format::Xml) => map_err_to_string(xml::Xml::try_from(val))?.write(w)?,
2933
};
3034

31-
if match format {
32-
Format::Cbor => false,
33-
Format::Yaml => true,
34-
_ => !join,
35-
} {
36-
// this flushes output, because stdout is line-buffered in Rust
37-
writeln!(w)?
38-
};
39-
40-
// when running `jaq -jn '"prompt> " | (., input)'`,
41-
// this flush is necessary to make "prompt> " appear first
42-
w.flush()?;
35+
w.write_all(match format {
36+
Format::Cbor => b"",
37+
Format::Raw0 => b"\0",
38+
Format::Yaml => b"\n",
39+
_ if *join => b"",
40+
_ => b"\n",
41+
})?;
4342

4443
if yaml_doc {
4544
// end of YAML document
46-
writeln!(w, "...")?
45+
writeln!(w, "...")?;
4746
}
48-
Ok(())
47+
48+
// when running `jaq -jn '"prompt> " | (., input)'`,
49+
// this flush is necessary to make "prompt> " appear first
50+
w.flush()
4951
}

jaq/src/cli.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,13 @@ impl Cli {
9191
"from" => self.from = Some(parse_format("--from", args)?),
9292
"null-input" => self.short('n', args)?,
9393
"raw-input" => self.short('R', args)?,
94+
"raw-input0" => self.from = Some(Format::Raw0),
9495
"slurp" => self.short('s', args)?,
9596

9697
"to" => self.to = Some(parse_format("--to", args)?),
9798
"compact-output" => self.short('c', args)?,
9899
"raw-output" => self.short('r', args)?,
100+
"raw-output0" => self.to = Some(Format::Raw0),
99101
"join-output" => self.short('j', args)?,
100102
"in-place" => self.short('i', args)?,
101103
"sort-keys" => self.short('S', args)?,

jaq/src/help.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ Arguments:
99
Input options:
1010
-n, --null-input Use null as single input value
1111
-R, --raw-input Read lines of the input as sequence of strings
12+
--raw-input0 Read input like `-R`, splitting by NUL instead of newlines
1213
-s, --slurp Read all input values into one array (per file)
1314
--from <FORMAT> Read input in given format, e.g. yaml
1415

1516
Output options:
1617
-c, --compact-output Print JSON compactly, omitting whitespace
1718
-r, --raw-output Write strings without escaping them with quotes
19+
--raw-output0 Write output like `-r` and print NUL after each output
1820
-j, --join-output Do not print a newline after each value
1921
-i, --in-place Overwrite input file with its output
2022
-S, --sort-keys Print objects sorted by their keys

jaq/src/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ fn real_main(cli: &Cli) -> Result<ExitCode, Error> {
129129
let last = if cli.files.is_empty() {
130130
let format = unwrap_or_json(cli.from);
131131
let s = read::read_string(format, io::stdin().lock())?;
132-
let inputs = read::from_bufread(format, io::stdin().lock(), &s, cli.slurp);
132+
let inputs = read::read(format, io::stdin().lock(), &s, cli.slurp);
133133
with_stdout(|out| run(runner, &filter, vars, inputs, |v| write(out, writer, &v)))?
134134
} else {
135135
let mut last = None;
@@ -139,7 +139,7 @@ fn real_main(cli: &Cli) -> Result<ExitCode, Error> {
139139
.map_err(|e| Error::Io(Some(path.display().to_string()), e))?;
140140
let format = unwrap_or_json(cli.from.or_else(|| Format::determine(path)));
141141
let s = read::bytes_str(format, &bytes)?;
142-
let inputs = read::from_bytes(format, &bytes, s, cli.slurp);
142+
let inputs = read::parse(format, &bytes, s, cli.slurp);
143143

144144
if cli.in_place {
145145
// create a temporary file where output is written to

0 commit comments

Comments
 (0)