bytecodealliance · fitzgen · Jul 25, 2024 · Jul 23, 2024 · Jul 25, 2024 · Jul 25, 2024
@@ -210,6 +210,7 @@ wasmtime-fuzzing = { path = "crates/fuzzing" }
 wasmtime-jit-icache-coherence = { path = "crates/jit-icache-coherence", version = "=24.0.0" }
 wasmtime-wit-bindgen = { path = "crates/wit-bindgen", version = "=24.0.0" }
 test-programs-artifacts = { path = 'crates/test-programs/artifacts' }
+pulley-interpreter-fuzz = { path = 'pulley/fuzz' }
 
 cranelift-wasm = { path = "cranelift/wasm", version = "0.111.0" }
 cranelift-codegen = { path = "cranelift/codegen", version = "0.111.0", default-features = false, features = ["std", "unwind", "trace-log"] }

@@ -24,6 +24,7 @@ cranelift-native = { workspace = true }
 cranelift-control = { workspace = true }
 libfuzzer-sys = { workspace = true, features = ["arbitrary-derive"] }
 target-lexicon = { workspace = true }
+pulley-interpreter-fuzz = { workspace = true }
 smallvec = { workspace = true }
 wasmparser = { workspace = true }
 wasmtime = { workspace = true, features = ["winch"] }
@@ -122,3 +123,10 @@ path = "fuzz_targets/memory_accesses.rs"
 test = false
 doc = false
 bench = false
+
+[[bin]]
+name = "pulley"
+path = "fuzz_targets/pulley.rs"
+test = false
+doc = false
+bench = false
@@ -0,0 +1,21 @@
+#![no_main]
+
+use libfuzzer_sys::{arbitrary::*, fuzz_target};
+use pulley_interpreter_fuzz::{interp, roundtrip};
+
+fuzz_target!(|data| {
+    let _ = fuzz(data);
+});
+
+fn fuzz(data: &[u8]) -> Result<()> {
+    let _ = env_logger::try_init();
+
+    let mut u = Unstructured::new(data);
+    match u.int_in_range(0..=1)? {
+        0 => roundtrip(Arbitrary::arbitrary_take_rest(u)?),
+        1 => interp(Arbitrary::arbitrary_take_rest(u)?),
+        _ => unreachable!(),
+    }
+
+    Ok(())
+}
@@ -0,0 +1,30 @@
+[package]
+authors = ["The Pulley Project Developers"]
+description = "The Pulley interpreter, its bytecode definition, encoder, decoder, and etc..."
+edition.workspace = true
+license = "Apache-2.0 WITH LLVM-exception"
+name = "pulley-interpreter"
+readme = "./README.md"
+repository = "https://github.com/bytecodealliance/wasmtime/tree/main/pulley"
+version = "0.1.0"
+
+[lints]
+workspace = true
+
+[dependencies]
+arbitrary = { workspace = true, optional = true }
+log = { workspace = true }
+
+[dev-dependencies]
+env_logger = { workspace = true }
+
+[features]
+std = []
+arbitrary = ["dep:arbitrary", "arbitrary/derive", "std"]
+encode = []
+decode = []
+disas = ["decode"]
+interp = ["decode"]
+
+[package.metadata.docs.rs]
+all-features = true
@@ -0,0 +1,122 @@
+<div align="center">
+  <h1>Pulley</h1>
+
+  <h3>Portable, Universal, Low-Level Execution strategY</h3>
+
+  <p>
+    <strong>A portable bytecode and fast interpreter</strong>
+  </p>
+
+  <strong>A <a href="https://bytecodealliance.org/">Bytecode Alliance</a> project</strong>
+
+  <p>
+    <a href="https://github.com/bytecodealliance/wasmtime/actions?query=workflow%3ACI"><img src="https://github.com/bytecodealliance/wasmtime/workflows/CI/badge.svg" alt="build status" /></a>
+    <a href="https://bytecodealliance.zulipchat.com/#narrow/stream/217126-wasmtime"><img src="https://img.shields.io/badge/zulip-join_chat-brightgreen.svg" alt="zulip chat" /></a>
+    <img src="https://img.shields.io/badge/rustc-stable+-green.svg" alt="supported rustc stable" />
+    <a href="https://docs.rs/pulley-interpreter"><img src="https://docs.rs/pulley-interpreter/badge.svg" alt="Documentation Status" /></a>
+  </p>
+
+  <h3>
+    <a href="https://bytecodealliance.zulipchat.com/#narrow/stream/217126-wasmtime">Chat</a>
+  </h3>
+</div>
+
+## About
+
+Pulley is a portable bytecode and fast interpreter for use in Wasmtime.
+
+Pulley's primary goal is portability and its secondary goal is fast
+interpretation.
+
+Pulley is not intended to be a simple reference interpreter, support dynamically
+switching to just-in-time compiled code, or even to be the very fastest
+interpreter in the world.
+
+For more details on Pulley's motivation, goals, and non-goals, see [the Bytecode
+Alliance RFC that originally proposed Pulley][rfc].
+
+[rfc]: https://github.com/bytecodealliance/rfcs/blob/main/accepted/pulley.md
+
+## Status
+
+Pulley is very much still a work in progress! Expect the details of the bytecode
+to change, instructions to appear and disappear, and APIs to be overhauled.
+
+## Example
+
+Here is the disassembly of `f(a, b) = a + b` in Pulley today:
+
+```
+       0: 11 1f f0 ff ff ff ff ff ff ff   xconst64 x31, 18446744073709551600
+       a: 12 20 20 1f                     xadd32 sp, sp, x31
+       e: 32 20 08 21                     store64_offset8 sp, 8, lr
+      12: 30 20 22                        store64 sp, fp
+      15: 0b 22 20                        xmov fp, sp
+      18: 12 00 00 01                     xadd32 x0, x0, x1
+      1c: 0b 20 22                        xmov sp, fp
+      1f: 25 21 20 08                     load64_offset8 lr, sp, 8
+      23: 22 22 20                        load64 fp, sp
+      26: 0e 1f 10                        xconst8 x31, 16
+      29: 12 20 20 1f                     xadd32 sp, sp, x31
+      2d: 00                              ret
+```
+
+Note that there are a number of things that could be improved here:
+
+* We could avoid allocating a deallocating a stack frame because this function's
+  body doesn't use any stack slots.
+* We could sign-extend, rather than zero-extend, constants so that `-16` has a
+  single-byte encoding instead of an eight-byte encoding.
+* We could collapse the whole prologue and epilogue instruction sequences into
+  super-instructions, since they are identical (modulo the frame size immediate)
+  for all functions.
+
+As mentioned above, Pulley is very much a work in progress.
+
+## Principles
+
+What follows are some general, incomplete, and sometimes-conflicting principles
+that we try and follow when designing the Pulley bytecode format and its
+interpreter:
+
+* The bytecode should be simple and fast to decode in software. For example, we
+  should avoid overly-complicated bitpacking, and only reach for that kind of
+  thing when benchmarks and profiles show it to be of benefit.
+
+* The interpreter never materializes `enum Instruction { .. }` values. Instead,
+  it decodes immediates and operands as needed in each opcode handler. This
+  avoids constructing unnecessary temporary storage and branching on opcode
+  multiple times.
+
+* Because we never materialize `enum Instruction { .. }` values, we don't have
+  to worry about unused padding or one very-large instruction inflating the size
+  of all the rest of our small instructions. To put it concisely: we can lean
+  into a variable-length encoding where some instructions require only a single
+  byte and others require many. This helps keep the bytecode compact and
+  cache-efficient.
+
+* We lean into defining super-instructions (sometimes called "macro ops") that
+  perform the work of multiple operations in a single instruction. The more work
+  we do in each turn of the interpreter loop the less we are impacted by its
+  overhead. Additionally, Cranelift, as the primary Pulley bytecode producer,
+  can leverage ISLE lowering patterns to easily identify opportunites for
+  emitting super-instructions.
+
+* We do not, in general, define sub-opcodes. There should be only one branch, on
+  the initial opcode, when evaluating any given instruction. For example, we do
+  *not* have a generic `load` instruction that is followed by a sub-opcode to
+  discriminate between different addressing modes. Instead, we have many
+  different kinds of `load` instructions, one for each of our addressing modes.
+
+  The one exception is the split between regular and extended ops. Regular ops
+  are a single `u8` opcode, where `255` is reserved for all extended ops, and a
+  `u16` opcode follows after the `255` regular opcode. This keeps the most
+  common instructions extra small, and provides a pressure release valve for
+  defining an unbounded number of additional, colder, ops.
+
+* We strive to cut down on boilerplate as much as possible, and try to avoid
+  matching on every opcode repeatedly throughout the whole code base. We do this
+  via heavy `macro_rules` usage where we define the bytecode inside a
+  higher-order macro and then automatically derive a disassembler, decoder,
+  encoder, etc... from that definition. This also avoids any kind of drift where
+  the encoder and decoder get out of sync with each other, for example.
@@ -0,0 +1,4 @@
+target
+corpus
+artifacts
+coverage
@@ -0,0 +1,13 @@
+[package]
+name = "pulley-interpreter-fuzz"
+version = "0.0.0"
+publish = false
+edition.workspace = true
+
+[lints]
+workspace = true
+
+[dependencies]
+pulley-interpreter = { path = "..", features = ["encode", "decode", "disas", "interp", "arbitrary"] }
+env_logger = { workspace = true }
+log = { workspace = true }
@@ -0,0 +1,112 @@
+use pulley_interpreter::{
+    interp::Vm,
+    op::{self, ExtendedOp, Op},
+    *,
+};
+use std::ptr::NonNull;
+
+pub fn interp(ops: Vec<Op>) {
+    let _ = env_logger::try_init();
+
+    log::trace!("input: {ops:#?}");
+
+    let mut ops = ops;
+    ops.retain(|op| op_is_safe_for_fuzzing(op));
+    // Make sure that we end with a `ret` so that the interpreter returns
+    // control to us instead of continuing off the end of the ops and into
+    // undefined memory.
+    ops.push(Op::Ret(op::Ret {}));
+
+    log::trace!("filtered to only safe ops: {ops:#?}");
+
+    let mut encoded = vec![];
+    for op in &ops {
+        op.encode(&mut encoded);
+    }
+    log::trace!("encoded: {encoded:?}");
+
+    let mut vm = Vm::new();
+    unsafe {
+        let args = &[];
+        let rets = &[];
+        match vm.call(NonNull::from(&encoded[0]), args, rets.into_iter().copied()) {
+            Ok(rets) => assert_eq!(rets.count(), 0),
+            Err(pc) => {
+                let pc = pc as usize;
+
+                let start = &encoded[0] as *const u8 as usize;
+                let end = encoded.last().unwrap() as *const u8 as usize;
+                assert!(
+                    start <= pc && pc < end,
+                    "pc should be in range {start:#018x}..{end:#018x}, got {pc:#018x}"
+                );
+
+                let index = pc - start;
+                assert_eq!(encoded[index], Opcode::ExtendedOp as u8);
+                let [a, b] = (ExtendedOpcode::Trap as u16).to_le_bytes();
+                assert_eq!(encoded[index + 1], a);
+                assert_eq!(encoded[index + 2], b);
+            }
+        };
+    }
+}
+
+fn op_is_safe_for_fuzzing(op: &Op) -> bool {
+    match op {
+        Op::Ret(_) => true,
+        Op::Jump(_) => false,
+        Op::BrIf(_) => false,
+        Op::BrIfNot(_) => false,
+        Op::BrIfXeq32(_) => false,
+        Op::BrIfXneq32(_) => false,
+        Op::BrIfXult32(_) => false,
+        Op::BrIfXulteq32(_) => false,
+        Op::BrIfXslt32(_) => false,
+        Op::BrIfXslteq32(_) => false,
+        Op::Xmov(op::Xmov { dst, .. }) => !dst.is_special(),
+        Op::Fmov(_) => true,
+        Op::Vmov(_) => true,
+        Op::Xconst8(op::Xconst8 { dst, .. }) => !dst.is_special(),
+        Op::Xconst16(op::Xconst16 { dst, .. }) => !dst.is_special(),
+        Op::Xconst32(op::Xconst32 { dst, .. }) => !dst.is_special(),
+        Op::Xconst64(op::Xconst64 { dst, .. }) => !dst.is_special(),
+        Op::Xadd32(op::Xadd32 { dst, .. }) => !dst.is_special(),
+        Op::Xadd64(op::Xadd64 { dst, .. }) => !dst.is_special(),
+        Op::Load32U(_) => false,
+        Op::Load32S(_) => false,
+        Op::Load64(_) => false,
+        Op::Load32UOffset8(_) => false,
+        Op::Load32SOffset8(_) => false,
+        Op::Load64Offset8(_) => false,
+        Op::Store32(_) => false,
+        Op::Store64(_) => false,
+        Op::Store32SOffset8(_) => false,
+        Op::Store64Offset8(_) => false,
+        Op::BitcastIntFromFloat32(op::BitcastIntFromFloat32 { dst, .. }) => !dst.is_special(),
+        Op::BitcastIntFromFloat64(op::BitcastIntFromFloat64 { dst, .. }) => !dst.is_special(),
+        Op::BitcastFloatFromInt32(_) => true,
+        Op::BitcastFloatFromInt64(_) => true,
+        Op::ExtendedOp(op) => extended_op_is_safe_for_fuzzing(op),
+        Op::Call(_) => false,
+        Op::Xeq64(Xeq64 { dst, .. }) => !dst.is_special(),
+        Op::Xneq64(Xneq64 { dst, .. }) => !dst.is_special(),
+        Op::Xslt64(Xslt64 { dst, .. }) => !dst.is_special(),
+        Op::Xslteq64(Xslteq64 { dst, .. }) => !dst.is_special(),
+        Op::Xult64(Xult64 { dst, .. }) => !dst.is_special(),
+        Op::Xulteq64(Xulteq64 { dst, .. }) => !dst.is_special(),
+        Op::Xeq32(Xeq32 { dst, .. }) => !dst.is_special(),
+        Op::Xneq32(Xneq32 { dst, .. }) => !dst.is_special(),
+        Op::Xslt32(Xslt32 { dst, .. }) => !dst.is_special(),
+        Op::Xslteq32(Xslteq32 { dst, .. }) => !dst.is_special(),
+        Op::Xult32(Xult32 { dst, .. }) => !dst.is_special(),
+        Op::Xulteq32(Xulteq32 { dst, .. }) => !dst.is_special(),
+    }
+}
+
+fn extended_op_is_safe_for_fuzzing(op: &ExtendedOp) -> bool {
+    match op {
+        ExtendedOp::Trap(_) => true,
+        ExtendedOp::Nop(_) => true,
+        ExtendedOp::GetSp(_) => true,
+    }
+}
@@ -0,0 +1,5 @@
+mod roundtrip;
+pub use roundtrip::*;
+
+mod interp;
+pub use interp::*;
@@ -0,0 +1,25 @@
+use pulley_interpreter::{
+    decode::{Decoder, SafeBytecodeStream},
+    op::{MaterializeOpsVisitor, Op},
+};
+
+pub fn roundtrip(ops: Vec<Op>) {
+    let _ = env_logger::try_init();
+
+    log::trace!("input: {ops:#?}");
+
+    let mut encoded = vec![];
+    for op in &ops {
+        op.encode(&mut encoded);
+    }
+    log::trace!("encoded: {encoded:?}");
+
+    let mut materializer = MaterializeOpsVisitor::new(SafeBytecodeStream::new(&encoded));
+    let decoded = Decoder::decode_all(&mut materializer).expect("should decode okay");
+    log::trace!("decoded: {decoded:#?}");
+
+    assert_eq!(
+        decoded, ops,
+        "`decode(encode(ops))` should be equal to the original `ops`"
+    );
+}