Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 65 additions & 6 deletions crates/anstyle-parse/benches/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use criterion::{black_box, Criterion};
use anstyle_parse::*;

struct BenchDispatcher;
impl Perform for BenchDispatcher {
impl Perform<char> for BenchDispatcher {
fn print(&mut self, c: char) {
black_box(c);
}
Expand Down Expand Up @@ -32,6 +32,35 @@ impl Perform for BenchDispatcher {
black_box((intermediates, ignore, byte));
}
}
impl Perform<&'_ str> for BenchDispatcher {
fn print(&mut self, c: &'_ str) {
black_box(c);
}

fn execute(&mut self, byte: u8) {
black_box(byte);
}

fn hook(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: u8) {
black_box((params, intermediates, ignore, c));
}

fn put(&mut self, byte: u8) {
black_box(byte);
}

fn osc_dispatch(&mut self, params: &[&[u8]], bell_terminated: bool) {
black_box((params, bell_terminated));
}

fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: u8) {
black_box((params, intermediates, ignore, c));
}

fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8) {
black_box((intermediates, ignore, byte));
}
}

#[derive(Default)]
struct Strip(String);
Expand All @@ -40,14 +69,22 @@ impl Strip {
Self(String::with_capacity(capacity))
}
}
impl Perform for Strip {
impl Perform<char> for Strip {
fn print_control(byte: u8) -> bool {
byte.is_ascii_whitespace()
}
fn print(&mut self, c: char) {
self.0.push(c);
}
}
impl Perform<&'_ str> for Strip {
fn print_control(byte: u8) -> bool {
byte.is_ascii_whitespace()
}
fn print(&mut self, c: &'_ str) {
self.0.push_str(c);
}
}

fn parse(c: &mut Criterion) {
for (name, content) in [
Expand All @@ -61,28 +98,50 @@ fn parse(c: &mut Criterion) {
),
] {
let mut group = c.benchmark_group(name);
group.bench_function("advance", |b| {
group.bench_function("advance_byte", |b| {
b.iter(|| {
let mut dispatcher = BenchDispatcher;
let mut parser = Parser::<DefaultCharAccumulator>::new();

for byte in content {
parser.advance(&mut dispatcher, *byte);
parser.advance_byte(&mut dispatcher, *byte);
}
})
});
group.bench_function("advance_strip", |b| {
if let Ok(content) = std::str::from_utf8(content) {
group.bench_function("advance_str", |b| {
b.iter(|| {
let mut dispatcher = BenchDispatcher;
let mut parser = Parser::<DefaultCharAccumulator>::new();

parser.advance_str(&mut dispatcher, content);
})
});
}
group.bench_function("advance_byte(strip)", |b| {
b.iter(|| {
let mut stripped = Strip::with_capacity(content.len());
let mut parser = Parser::<DefaultCharAccumulator>::new();

for byte in content {
parser.advance(&mut stripped, *byte);
parser.advance_byte(&mut stripped, *byte);
}

black_box(stripped.0)
})
});
if let Ok(content) = std::str::from_utf8(content) {
group.bench_function("advance_str(strip)", |b| {
b.iter(|| {
let mut stripped = Strip::with_capacity(content.len());
let mut parser = Parser::<DefaultCharAccumulator>::new();

parser.advance_str(&mut stripped, content);

black_box(stripped.0)
})
});
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions crates/anstyle-parse/examples/parselog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use anstyle_parse::{DefaultCharAccumulator, Params, Parser, Perform};
/// A type implementing Perform that just logs actions
struct Log;

impl Perform for Log {
impl Perform<char> for Log {
fn print(&mut self, c: char) {
println!("[print] {:?}", c);
}
Expand Down Expand Up @@ -66,7 +66,7 @@ fn main() {
Ok(0) => break,
Ok(n) => {
for byte in &buf[..n] {
statemachine.advance(&mut performer, *byte);
statemachine.advance_byte(&mut performer, *byte);
}
}
Err(err) => {
Expand Down
147 changes: 140 additions & 7 deletions crates/anstyle-parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,69 @@ where
///
/// Requires a [`Perform`] in case `byte` triggers an action
#[inline]
pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
pub fn advance_str<'i, P>(&mut self, performer: &mut P, bytes: &'i str)
where
P: Perform<&'i str>,
{
let mut bytes = bytes.as_bytes();
'empty: while !bytes.is_empty() {
while !matches!(self.state, State::Ground) {
if !self.next_byte(performer, &mut bytes) {
break 'empty;
}
}
let offset = bytes.iter().copied().position(|b| {
let change = table::state_change(State::Ground, b);
let (_state, action) = unpack(change);
let printable = action == Action::Print
|| action == Action::BeginUtf8
// since we know the input is valid UTF-8, the only thing we can do with
// continuations is to print them
|| is_utf8_continuation(b)
|| (action == Action::Execute && P::print_control(b));
!printable
});
if let Some(offset) = offset {
if offset != 0 {
let (printable, next) = bytes.split_at(offset);
let printable = core::str::from_utf8(printable).unwrap();
performer.print(printable);
bytes = next;
}
self.next_byte(performer, &mut bytes);
} else {
let (printable, next) = (bytes, b"");
let printable = core::str::from_utf8(printable).unwrap();
performer.print(printable);
bytes = next;
}
}
}

#[inline]
fn next_byte<'i, P, O>(&mut self, performer: &mut P, bytes: &mut &'i [u8]) -> bool
where
P: Perform<O>,
{
if let Some((byte, next)) = bytes.split_first() {
self.advance_byte(&mut Forward::new(performer), *byte);
*bytes = next;
true
} else {
false
}
}

/// Advance the parser state
///
/// Requires a [`Perform`] in case `byte` triggers an action
///
/// [`Perform`]: trait.Perform.html
#[inline]
pub fn advance_byte<P>(&mut self, performer: &mut P, byte: u8)
where
P: Perform<char>,
{
// Utf8 characters are handled out-of-band.
if let State::Utf8 = self.state {
self.process_utf8(performer, byte);
Expand All @@ -120,7 +182,7 @@ where
#[inline]
fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
where
P: Perform,
P: Perform<char>,
{
if let Some(c) = self.utf8_parser.add(byte) {
performer.print(c);
Expand All @@ -131,7 +193,7 @@ where
#[inline]
fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8)
where
P: Perform,
P: Perform<char>,
{
match state {
State::Anywhere => {
Expand Down Expand Up @@ -179,7 +241,10 @@ where
///
/// The aliasing is needed here for multiple slices into self.osc_raw
#[inline]
fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) {
fn osc_dispatch<P>(&self, performer: &mut P, byte: u8)
where
P: Perform<char>,
{
let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] =
unsafe { MaybeUninit::uninit().assume_init() };

Expand All @@ -196,7 +261,10 @@ where
}

#[inline]
fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
fn perform_action<P>(&mut self, performer: &mut P, action: Action, byte: u8)
where
P: Perform<char>,
{
match action {
Action::Print => performer.print(byte as char),
Action::Execute if P::print_control(byte) => performer.print(byte as char),
Expand Down Expand Up @@ -392,15 +460,15 @@ impl<'a> utf8::Receiver for VtUtf8Receiver<'a> {
/// a useful way in my own words for completeness, but the site should be
/// referenced if something isn't clear. If the site disappears at some point in
/// the future, consider checking archive.org.
pub trait Perform {
pub trait Perform<P> {
/// Whether single-byte control characters should be [`Perform::execute`]d or
/// [`Perform::print`]ed.
fn print_control(_byte: u8) -> bool {
false
}

/// Draw a character to the screen and update states.
fn print(&mut self, _c: char) {}
fn print(&mut self, _c: P) {}

/// Execute a C0 or C1 control function.
fn execute(&mut self, _byte: u8) {}
Expand Down Expand Up @@ -449,3 +517,68 @@ pub trait Perform {
/// subsequent characters were ignored.
fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {}
}

struct Forward<'p, P, O> {
inner: &'p mut P,
o: core::marker::PhantomData<O>,
}

impl<'p, P, O> Forward<'p, P, O>
where
P: Perform<O>,
{
fn new(inner: &'p mut P) -> Self {
Self {
inner,
o: Default::default(),
}
}
}

impl<'p, P, O> Perform<char> for Forward<'p, P, O>
where
P: Perform<O>,
{
fn print_control(_byte: u8) -> bool {
false
}

fn print(&mut self, _c: char) {
#[cfg(debug_assertions)]
panic!("should not be printing {:?}", _c);
}

fn execute(&mut self, byte: u8) {
self.inner.execute(byte)
}

fn hook(&mut self, params: &Params, intermediates: &[u8], ignore: bool, action: u8) {
self.inner.hook(params, intermediates, ignore, action)
}

fn put(&mut self, byte: u8) {
self.inner.put(byte)
}

fn unhook(&mut self) {
self.inner.unhook()
}

fn osc_dispatch(&mut self, params: &[&[u8]], bell_terminated: bool) {
self.inner.osc_dispatch(params, bell_terminated)
}

fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, action: u8) {
self.inner
.csi_dispatch(params, intermediates, ignore, action)
}

fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8) {
self.inner.esc_dispatch(intermediates, ignore, byte)
}
}

#[inline]
fn is_utf8_continuation(b: u8) -> bool {
matches!(b, 0x80..=0xbf)
}
7 changes: 7 additions & 0 deletions crates/anstyle-parse/tests/testsuite.proptest-regressions
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 001bdfa800d266450c8dc1f7d2b63920b433ced8528f12f343c7925d1ab943c9 # shrinks to input = "𐂀"
Loading