Skip to main content

foundry_evm_core/
bytecode.rs

1use revm::bytecode::{OpCode, opcode};
2use std::{fmt, slice};
3
4/// An iterator that yields opcodes and their immediate data.
5///
6/// If the bytecode is not well-formed, the iterator will still yield opcodes, but the immediate
7/// data may be incorrect. For example, if the bytecode is `PUSH2 0x69`, the iterator will yield
8/// `PUSH2, &[]`.
9#[derive(Clone, Debug)]
10pub struct InstIter<'a> {
11    iter: slice::Iter<'a, u8>,
12}
13
14impl fmt::Display for InstIter<'_> {
15    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16        for (i, op) in self.clone().enumerate() {
17            if i > 0 {
18                f.write_str(" ")?;
19            }
20            write!(f, "{op}")?;
21        }
22        Ok(())
23    }
24}
25
26impl<'a> InstIter<'a> {
27    /// Create a new iterator over the given bytecode slice.
28    #[inline]
29    pub fn new(slice: &'a [u8]) -> Self {
30        Self { iter: slice.iter() }
31    }
32
33    /// Returns a new iterator that also yields the program counter alongside the opcode and
34    /// immediate data.
35    #[inline]
36    pub const fn with_pc(self) -> InstIterWithPc<'a> {
37        InstIterWithPc { iter: self, pc: 0 }
38    }
39
40    /// Returns the inner iterator.
41    #[inline]
42    pub const fn inner(&self) -> &slice::Iter<'a, u8> {
43        &self.iter
44    }
45
46    /// Returns the inner iterator.
47    #[inline]
48    pub const fn inner_mut(&mut self) -> &mut slice::Iter<'a, u8> {
49        &mut self.iter
50    }
51
52    /// Returns the inner iterator.
53    #[inline]
54    pub const fn into_inner(self) -> slice::Iter<'a, u8> {
55        self.iter
56    }
57}
58
59impl<'a> Iterator for InstIter<'a> {
60    type Item = Inst<'a>;
61
62    #[inline]
63    fn next(&mut self) -> Option<Self::Item> {
64        self.iter.next().map(|&opcode| {
65            // SAFETY: OpCode wraps a u8, unknown opcodes are valid to construct.
66            let opcode = unsafe { OpCode::new_unchecked(opcode) };
67            let len = imm_len(opcode.get()) as usize;
68            let (immediate, rest) = self.iter.as_slice().split_at_checked(len).unwrap_or_default();
69            self.iter = rest.iter();
70            Inst { opcode, immediate }
71        })
72    }
73
74    #[inline]
75    fn size_hint(&self) -> (usize, Option<usize>) {
76        let len = self.iter.len();
77        ((len != 0) as usize, Some(len))
78    }
79}
80
81impl std::iter::FusedIterator for InstIter<'_> {}
82
83/// A bytecode iterator that yields opcodes and their immediate data, alongside the program counter.
84///
85/// Created by calling [`InstIter::with_pc`].
86#[derive(Debug)]
87pub struct InstIterWithPc<'a> {
88    iter: InstIter<'a>,
89    pc: usize,
90}
91
92impl<'a> Iterator for InstIterWithPc<'a> {
93    type Item = (usize, Inst<'a>);
94
95    #[inline]
96    fn next(&mut self) -> Option<Self::Item> {
97        self.iter.next().map(|inst| {
98            let pc = self.pc;
99            self.pc += 1 + inst.immediate.len();
100            (pc, inst)
101        })
102    }
103
104    #[inline]
105    fn size_hint(&self) -> (usize, Option<usize>) {
106        self.iter.size_hint()
107    }
108}
109
110impl std::iter::FusedIterator for InstIterWithPc<'_> {}
111
112/// An opcode and its immediate data. Returned by [`InstIter`].
113#[derive(Clone, Copy, PartialEq, Eq)]
114pub struct Inst<'a> {
115    /// The opcode.
116    pub opcode: OpCode,
117    /// The immediate data, if any.
118    ///
119    /// If an opcode is missing immediate data, e.g. malformed or bytecode hash, this will be an
120    /// empty slice.
121    pub immediate: &'a [u8],
122}
123
124impl fmt::Debug for Inst<'_> {
125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126        fmt::Display::fmt(self, f)
127    }
128}
129
130impl fmt::Display for Inst<'_> {
131    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132        write!(f, "{}", self.opcode)?;
133        match self.immediate {
134            [] => Ok(()),
135            imm => write!(f, " {:#x}", alloy_primitives::hex::display(imm)),
136        }
137    }
138}
139
140/// Returns the length of the immediate data for the given opcode, or `0` if none.
141#[inline]
142const fn imm_len(op: u8) -> u8 {
143    match op {
144        opcode::PUSH1..=opcode::PUSH32 => op - opcode::PUSH0,
145        _ => 0,
146    }
147}
148
149/// Returns a string representation of the given bytecode.
150pub fn format_bytecode(bytecode: &[u8]) -> String {
151    let mut w = String::new();
152    format_bytecode_to(bytecode, &mut w).unwrap();
153    w
154}
155
156/// Formats an EVM bytecode to the given writer.
157pub fn format_bytecode_to<W: fmt::Write + ?Sized>(bytecode: &[u8], w: &mut W) -> fmt::Result {
158    write!(w, "{}", InstIter::new(bytecode))
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use revm::bytecode::opcode as op;
165
166    fn o(op: u8) -> OpCode {
167        unsafe { OpCode::new_unchecked(op) }
168    }
169
170    #[test]
171    fn iter_basic() {
172        let bytecode = [0x01, 0x02, 0x03, 0x04, 0x05];
173        let mut iter = InstIter::new(&bytecode);
174
175        assert_eq!(iter.next(), Some(Inst { opcode: o(0x01), immediate: &[] }));
176        assert_eq!(iter.next(), Some(Inst { opcode: o(0x02), immediate: &[] }));
177        assert_eq!(iter.next(), Some(Inst { opcode: o(0x03), immediate: &[] }));
178        assert_eq!(iter.next(), Some(Inst { opcode: o(0x04), immediate: &[] }));
179        assert_eq!(iter.next(), Some(Inst { opcode: o(0x05), immediate: &[] }));
180        assert_eq!(iter.next(), None);
181    }
182
183    #[test]
184    fn iter_with_imm() {
185        let bytecode = [op::PUSH0, op::PUSH1, 0x69, op::PUSH2, 0x01, 0x02];
186        let mut iter = InstIter::new(&bytecode);
187
188        assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH0), immediate: &[] }));
189        assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH1), immediate: &[0x69] }));
190        assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH2), immediate: &[0x01, 0x02] }));
191        assert_eq!(iter.next(), None);
192    }
193
194    #[test]
195    fn iter_with_imm_too_short() {
196        let bytecode = [op::PUSH2, 0x69];
197        let mut iter = InstIter::new(&bytecode);
198
199        assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH2), immediate: &[] }));
200        assert_eq!(iter.next(), None);
201    }
202
203    #[test]
204    fn display() {
205        let bytecode = [op::PUSH0, op::PUSH1, 0x69, op::PUSH2, 0x01, 0x02];
206        let s = format_bytecode(&bytecode);
207        assert_eq!(s, "PUSH0 PUSH1 0x69 PUSH2 0x0102");
208    }
209
210    #[test]
211    fn decode_push2_and_stop() {
212        // 0x61 0xAA 0xBB = PUSH2 0xAABB
213        // 0x00           = STOP
214        let code = vec![0x61, 0xAA, 0xBB, 0x00];
215        let insns = InstIter::new(&code).with_pc().collect::<Vec<_>>();
216
217        // PUSH2 then STOP
218        assert_eq!(insns.len(), 2);
219
220        // PUSH2 at pc = 0
221        let i0 = &insns[0];
222        assert_eq!(i0.0, 0);
223        assert_eq!(i0.1.opcode, op::PUSH2);
224        assert_eq!(i0.1.immediate, &[0xAA, 0xBB]);
225
226        // STOP at pc = 3
227        let i1 = &insns[1];
228        assert_eq!(i1.0, 3);
229        assert_eq!(i1.1.opcode, op::STOP);
230        assert!(i1.1.immediate.is_empty());
231    }
232
233    #[test]
234    fn decode_arithmetic_ops() {
235        // 0x01 = ADD, 0x02 = MUL, 0x03 = SUB, 0x04 = DIV
236        let code = vec![0x01, 0x02, 0x03, 0x04];
237        let insns = InstIter::new(&code).with_pc().collect::<Vec<_>>();
238
239        assert_eq!(insns.len(), 4);
240
241        let expected = [(0, op::ADD), (1, op::MUL), (2, op::SUB), (3, op::DIV)];
242        for ((pc, want_op), insn) in expected.iter().zip(insns.iter()) {
243            assert_eq!(insn.0, *pc);
244            assert_eq!(insn.1.opcode, *want_op);
245            assert!(insn.1.immediate.is_empty());
246        }
247    }
248}