-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Introduce the pulley-interpreter crate
#9008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
fitzgen
merged 14 commits into
bytecodealliance:main
from
fitzgen:add-pulley-interpreter-crate
Jul 25, 2024
Merged
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
53e6a13
Introduce the `pulley-interpreter` crate
fitzgen 857ea9c
remove stray fn main
fitzgen 74f962e
Add small tests for special x registers
fitzgen 32b4f42
Remove now-unused import
fitzgen 4fb7fb5
always generate 0 pc rel offsets in arbitrary
fitzgen 035c1da
Add doc_auto_cfg feature for docs.rs
fitzgen 4ae8959
enable all optional features for docs.rs
fitzgen 4cff7b8
Consolidate `BytecodeStream::{advance,get1,get2,...}` into `BytecodeS…
fitzgen d5b4b9c
fix fuzz targets build
fitzgen 41874e7
inherit workspace lints in pulley's fuzz crate
fitzgen ff88427
Merge fuzz targets into one target; fix a couple small fuzz bugs
fitzgen 2d65d50
Add Pulley to our cargo vet config
fitzgen f2f5187
Add pulley as a crate to publish
fitzgen 8366149
Move Pulley fuzz target into top level fuzz directory
fitzgen File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| #![no_main] | ||
|
|
||
| use libfuzzer_sys::{arbitrary::*, fuzz_target}; | ||
| use pulley_interpreter_fuzz::{interp, roundtrip}; | ||
|
|
||
| fuzz_target!(|data| { | ||
| let _ = fuzz(data); | ||
| }); | ||
|
|
||
| fn fuzz(data: &[u8]) -> Result<()> { | ||
| let _ = env_logger::try_init(); | ||
|
|
||
| let mut u = Unstructured::new(data); | ||
| match u.int_in_range(0..=1)? { | ||
| 0 => roundtrip(Arbitrary::arbitrary_take_rest(u)?), | ||
| 1 => interp(Arbitrary::arbitrary_take_rest(u)?), | ||
| _ => unreachable!(), | ||
| } | ||
|
|
||
| Ok(()) | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| [package] | ||
| authors = ["The Pulley Project Developers"] | ||
| description = "The Pulley interpreter, its bytecode definition, encoder, decoder, and etc..." | ||
| edition.workspace = true | ||
| license = "Apache-2.0 WITH LLVM-exception" | ||
| name = "pulley-interpreter" | ||
| readme = "./README.md" | ||
| repository = "https://github.com/bytecodealliance/wasmtime/tree/main/pulley" | ||
| version = "0.1.0" | ||
|
|
||
| [lints] | ||
| workspace = true | ||
|
|
||
| [dependencies] | ||
| arbitrary = { workspace = true, optional = true } | ||
| log = { workspace = true } | ||
|
|
||
| [dev-dependencies] | ||
| env_logger = { workspace = true } | ||
|
|
||
| [features] | ||
| std = [] | ||
| arbitrary = ["dep:arbitrary", "arbitrary/derive", "std"] | ||
| encode = [] | ||
| decode = [] | ||
| disas = ["decode"] | ||
| interp = ["decode"] | ||
|
|
||
| [package.metadata.docs.rs] | ||
| all-features = true |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| <div align="center"> | ||
| <h1>Pulley</h1> | ||
|
|
||
| <h3>Portable, Universal, Low-Level Execution strategY</h3> | ||
|
|
||
| <p> | ||
| <strong>A portable bytecode and fast interpreter</strong> | ||
| </p> | ||
|
|
||
| <strong>A <a href="https://bytecodealliance.org/">Bytecode Alliance</a> project</strong> | ||
|
|
||
| <p> | ||
| <a href="https://github.com/bytecodealliance/wasmtime/actions?query=workflow%3ACI"><img src="https://github.com/bytecodealliance/wasmtime/workflows/CI/badge.svg" alt="build status" /></a> | ||
| <a href="https://bytecodealliance.zulipchat.com/#narrow/stream/217126-wasmtime"><img src="https://img.shields.io/badge/zulip-join_chat-brightgreen.svg" alt="zulip chat" /></a> | ||
| <img src="https://img.shields.io/badge/rustc-stable+-green.svg" alt="supported rustc stable" /> | ||
| <a href="https://docs.rs/pulley-interpreter"><img src="https://docs.rs/pulley-interpreter/badge.svg" alt="Documentation Status" /></a> | ||
| </p> | ||
|
|
||
| <h3> | ||
| <a href="https://bytecodealliance.zulipchat.com/#narrow/stream/217126-wasmtime">Chat</a> | ||
| </h3> | ||
| </div> | ||
|
|
||
| ## About | ||
|
|
||
| Pulley is a portable bytecode and fast interpreter for use in Wasmtime. | ||
|
|
||
| Pulley's primary goal is portability and its secondary goal is fast | ||
| interpretation. | ||
|
|
||
| Pulley is not intended to be a simple reference interpreter, support dynamically | ||
| switching to just-in-time compiled code, or even to be the very fastest | ||
| interpreter in the world. | ||
|
|
||
| For more details on Pulley's motivation, goals, and non-goals, see [the Bytecode | ||
| Alliance RFC that originally proposed Pulley][rfc]. | ||
|
|
||
| [rfc]: https://github.com/bytecodealliance/rfcs/blob/main/accepted/pulley.md | ||
|
|
||
| ## Status | ||
|
|
||
| Pulley is very much still a work in progress! Expect the details of the bytecode | ||
| to change, instructions to appear and disappear, and APIs to be overhauled. | ||
|
|
||
| ## Example | ||
|
|
||
| Here is the disassembly of `f(a, b) = a + b` in Pulley today: | ||
|
|
||
| ``` | ||
| 0: 11 1f f0 ff ff ff ff ff ff ff xconst64 x31, 18446744073709551600 | ||
| a: 12 20 20 1f xadd32 sp, sp, x31 | ||
| e: 32 20 08 21 store64_offset8 sp, 8, lr | ||
| 12: 30 20 22 store64 sp, fp | ||
| 15: 0b 22 20 xmov fp, sp | ||
| 18: 12 00 00 01 xadd32 x0, x0, x1 | ||
| 1c: 0b 20 22 xmov sp, fp | ||
| 1f: 25 21 20 08 load64_offset8 lr, sp, 8 | ||
| 23: 22 22 20 load64 fp, sp | ||
| 26: 0e 1f 10 xconst8 x31, 16 | ||
| 29: 12 20 20 1f xadd32 sp, sp, x31 | ||
| 2d: 00 ret | ||
| ``` | ||
|
|
||
| Note that there are a number of things that could be improved here: | ||
|
|
||
| * We could avoid allocating a deallocating a stack frame because this function's | ||
| body doesn't use any stack slots. | ||
| * We could sign-extend, rather than zero-extend, constants so that `-16` has a | ||
| single-byte encoding instead of an eight-byte encoding. | ||
| * We could collapse the whole prologue and epilogue instruction sequences into | ||
| super-instructions, since they are identical (modulo the frame size immediate) | ||
| for all functions. | ||
|
|
||
| As mentioned above, Pulley is very much a work in progress. | ||
|
|
||
| ## Principles | ||
|
|
||
| What follows are some general, incomplete, and sometimes-conflicting principles | ||
| that we try and follow when designing the Pulley bytecode format and its | ||
| interpreter: | ||
|
|
||
| * The bytecode should be simple and fast to decode in software. For example, we | ||
| should avoid overly-complicated bitpacking, and only reach for that kind of | ||
| thing when benchmarks and profiles show it to be of benefit. | ||
|
|
||
| * The interpreter never materializes `enum Instruction { .. }` values. Instead, | ||
| it decodes immediates and operands as needed in each opcode handler. This | ||
| avoids constructing unnecessary temporary storage and branching on opcode | ||
| multiple times. | ||
|
|
||
| * Because we never materialize `enum Instruction { .. }` values, we don't have | ||
| to worry about unused padding or one very-large instruction inflating the size | ||
| of all the rest of our small instructions. To put it concisely: we can lean | ||
| into a variable-length encoding where some instructions require only a single | ||
| byte and others require many. This helps keep the bytecode compact and | ||
| cache-efficient. | ||
|
|
||
| * We lean into defining super-instructions (sometimes called "macro ops") that | ||
| perform the work of multiple operations in a single instruction. The more work | ||
| we do in each turn of the interpreter loop the less we are impacted by its | ||
| overhead. Additionally, Cranelift, as the primary Pulley bytecode producer, | ||
| can leverage ISLE lowering patterns to easily identify opportunites for | ||
| emitting super-instructions. | ||
|
|
||
| * We do not, in general, define sub-opcodes. There should be only one branch, on | ||
| the initial opcode, when evaluating any given instruction. For example, we do | ||
| *not* have a generic `load` instruction that is followed by a sub-opcode to | ||
| discriminate between different addressing modes. Instead, we have many | ||
| different kinds of `load` instructions, one for each of our addressing modes. | ||
|
|
||
| The one exception is the split between regular and extended ops. Regular ops | ||
| are a single `u8` opcode, where `255` is reserved for all extended ops, and a | ||
| `u16` opcode follows after the `255` regular opcode. This keeps the most | ||
| common instructions extra small, and provides a pressure release valve for | ||
| defining an unbounded number of additional, colder, ops. | ||
|
|
||
| * We strive to cut down on boilerplate as much as possible, and try to avoid | ||
| matching on every opcode repeatedly throughout the whole code base. We do this | ||
| via heavy `macro_rules` usage where we define the bytecode inside a | ||
| higher-order macro and then automatically derive a disassembler, decoder, | ||
| encoder, etc... from that definition. This also avoids any kind of drift where | ||
| the encoder and decoder get out of sync with each other, for example. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| target | ||
| corpus | ||
| artifacts | ||
| coverage |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| [package] | ||
| name = "pulley-interpreter-fuzz" | ||
| version = "0.0.0" | ||
| publish = false | ||
| edition.workspace = true | ||
|
|
||
| [lints] | ||
| workspace = true | ||
|
|
||
| [dependencies] | ||
| pulley-interpreter = { path = "..", features = ["encode", "decode", "disas", "interp", "arbitrary"] } | ||
| env_logger = { workspace = true } | ||
| log = { workspace = true } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| use pulley_interpreter::{ | ||
| interp::Vm, | ||
| op::{self, ExtendedOp, Op}, | ||
| *, | ||
| }; | ||
| use std::ptr::NonNull; | ||
|
|
||
| pub fn interp(ops: Vec<Op>) { | ||
| let _ = env_logger::try_init(); | ||
|
|
||
| log::trace!("input: {ops:#?}"); | ||
|
|
||
| let mut ops = ops; | ||
| ops.retain(|op| op_is_safe_for_fuzzing(op)); | ||
| // Make sure that we end with a `ret` so that the interpreter returns | ||
| // control to us instead of continuing off the end of the ops and into | ||
| // undefined memory. | ||
| ops.push(Op::Ret(op::Ret {})); | ||
|
|
||
| log::trace!("filtered to only safe ops: {ops:#?}"); | ||
|
|
||
| let mut encoded = vec![]; | ||
| for op in &ops { | ||
| op.encode(&mut encoded); | ||
| } | ||
| log::trace!("encoded: {encoded:?}"); | ||
|
|
||
| let mut vm = Vm::new(); | ||
| unsafe { | ||
| let args = &[]; | ||
| let rets = &[]; | ||
| match vm.call(NonNull::from(&encoded[0]), args, rets.into_iter().copied()) { | ||
| Ok(rets) => assert_eq!(rets.count(), 0), | ||
| Err(pc) => { | ||
| let pc = pc as usize; | ||
|
|
||
| let start = &encoded[0] as *const u8 as usize; | ||
| let end = encoded.last().unwrap() as *const u8 as usize; | ||
| assert!( | ||
| start <= pc && pc < end, | ||
| "pc should be in range {start:#018x}..{end:#018x}, got {pc:#018x}" | ||
| ); | ||
|
|
||
| let index = pc - start; | ||
| assert_eq!(encoded[index], Opcode::ExtendedOp as u8); | ||
| let [a, b] = (ExtendedOpcode::Trap as u16).to_le_bytes(); | ||
| assert_eq!(encoded[index + 1], a); | ||
| assert_eq!(encoded[index + 2], b); | ||
| } | ||
| }; | ||
| } | ||
| } | ||
|
|
||
| fn op_is_safe_for_fuzzing(op: &Op) -> bool { | ||
| match op { | ||
| Op::Ret(_) => true, | ||
| Op::Jump(_) => false, | ||
| Op::BrIf(_) => false, | ||
| Op::BrIfNot(_) => false, | ||
| Op::BrIfXeq32(_) => false, | ||
| Op::BrIfXneq32(_) => false, | ||
| Op::BrIfXult32(_) => false, | ||
| Op::BrIfXulteq32(_) => false, | ||
| Op::BrIfXslt32(_) => false, | ||
| Op::BrIfXslteq32(_) => false, | ||
| Op::Xmov(op::Xmov { dst, .. }) => !dst.is_special(), | ||
| Op::Fmov(_) => true, | ||
| Op::Vmov(_) => true, | ||
| Op::Xconst8(op::Xconst8 { dst, .. }) => !dst.is_special(), | ||
| Op::Xconst16(op::Xconst16 { dst, .. }) => !dst.is_special(), | ||
| Op::Xconst32(op::Xconst32 { dst, .. }) => !dst.is_special(), | ||
| Op::Xconst64(op::Xconst64 { dst, .. }) => !dst.is_special(), | ||
| Op::Xadd32(op::Xadd32 { dst, .. }) => !dst.is_special(), | ||
| Op::Xadd64(op::Xadd64 { dst, .. }) => !dst.is_special(), | ||
| Op::Load32U(_) => false, | ||
| Op::Load32S(_) => false, | ||
| Op::Load64(_) => false, | ||
| Op::Load32UOffset8(_) => false, | ||
| Op::Load32SOffset8(_) => false, | ||
| Op::Load64Offset8(_) => false, | ||
| Op::Store32(_) => false, | ||
| Op::Store64(_) => false, | ||
| Op::Store32SOffset8(_) => false, | ||
| Op::Store64Offset8(_) => false, | ||
| Op::BitcastIntFromFloat32(op::BitcastIntFromFloat32 { dst, .. }) => !dst.is_special(), | ||
| Op::BitcastIntFromFloat64(op::BitcastIntFromFloat64 { dst, .. }) => !dst.is_special(), | ||
| Op::BitcastFloatFromInt32(_) => true, | ||
| Op::BitcastFloatFromInt64(_) => true, | ||
| Op::ExtendedOp(op) => extended_op_is_safe_for_fuzzing(op), | ||
| Op::Call(_) => false, | ||
| Op::Xeq64(Xeq64 { dst, .. }) => !dst.is_special(), | ||
| Op::Xneq64(Xneq64 { dst, .. }) => !dst.is_special(), | ||
| Op::Xslt64(Xslt64 { dst, .. }) => !dst.is_special(), | ||
| Op::Xslteq64(Xslteq64 { dst, .. }) => !dst.is_special(), | ||
| Op::Xult64(Xult64 { dst, .. }) => !dst.is_special(), | ||
| Op::Xulteq64(Xulteq64 { dst, .. }) => !dst.is_special(), | ||
| Op::Xeq32(Xeq32 { dst, .. }) => !dst.is_special(), | ||
| Op::Xneq32(Xneq32 { dst, .. }) => !dst.is_special(), | ||
| Op::Xslt32(Xslt32 { dst, .. }) => !dst.is_special(), | ||
| Op::Xslteq32(Xslteq32 { dst, .. }) => !dst.is_special(), | ||
| Op::Xult32(Xult32 { dst, .. }) => !dst.is_special(), | ||
| Op::Xulteq32(Xulteq32 { dst, .. }) => !dst.is_special(), | ||
| } | ||
| } | ||
|
|
||
| fn extended_op_is_safe_for_fuzzing(op: &ExtendedOp) -> bool { | ||
| match op { | ||
| ExtendedOp::Trap(_) => true, | ||
| ExtendedOp::Nop(_) => true, | ||
| ExtendedOp::GetSp(_) => true, | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| mod roundtrip; | ||
| pub use roundtrip::*; | ||
|
|
||
| mod interp; | ||
| pub use interp::*; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| use pulley_interpreter::{ | ||
| decode::{Decoder, SafeBytecodeStream}, | ||
| op::{MaterializeOpsVisitor, Op}, | ||
| }; | ||
|
|
||
| pub fn roundtrip(ops: Vec<Op>) { | ||
| let _ = env_logger::try_init(); | ||
|
|
||
| log::trace!("input: {ops:#?}"); | ||
|
|
||
| let mut encoded = vec![]; | ||
| for op in &ops { | ||
| op.encode(&mut encoded); | ||
| } | ||
| log::trace!("encoded: {encoded:?}"); | ||
|
|
||
| let mut materializer = MaterializeOpsVisitor::new(SafeBytecodeStream::new(&encoded)); | ||
| let decoded = Decoder::decode_all(&mut materializer).expect("should decode okay"); | ||
| log::trace!("decoded: {decoded:#?}"); | ||
|
|
||
| assert_eq!( | ||
| decoded, ops, | ||
| "`decode(encode(ops))` should be equal to the original `ops`" | ||
| ); | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.