foundry_evm_core/
bytecode.rs

1use revm::bytecode::{OpCode, opcode};
2use std::{fmt, slice};
3
4/// An iterator that yields opcodes and their immediate data.
5///
6/// If the bytecode is not well-formed, the iterator will still yield opcodes, but the immediate
7/// data may be incorrect. For example, if the bytecode is `PUSH2 0x69`, the iterator will yield
8/// `PUSH2, &[]`.
9#[derive(Clone, Debug)]
10pub struct InstIter<'a> {
11    iter: slice::Iter<'a, u8>,
12}
13
14impl fmt::Display for InstIter<'_> {
15    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16        for (i, op) in self.clone().enumerate() {
17            if i > 0 {
18                f.write_str(" ")?;
19            }
20            write!(f, "{op}")?;
21        }
22        Ok(())
23    }
24}
25
26impl<'a> InstIter<'a> {
27    /// Create a new iterator over the given bytecode slice.
28    #[inline]
29    pub fn new(slice: &'a [u8]) -> Self {
30        Self { iter: slice.iter() }
31    }
32
33    /// Returns a new iterator that also yields the program counter alongside the opcode and
34    /// immediate data.
35    #[inline]
36    pub fn with_pc(self) -> InstIterWithPc<'a> {
37        InstIterWithPc { iter: self, pc: 0 }
38    }
39
40    /// Returns the inner iterator.
41    #[inline]
42    pub fn inner(&self) -> &slice::Iter<'a, u8> {
43        &self.iter
44    }
45
46    /// Returns the inner iterator.
47    #[inline]
48    pub fn inner_mut(&mut self) -> &mut slice::Iter<'a, u8> {
49        &mut self.iter
50    }
51
52    /// Returns the inner iterator.
53    #[inline]
54    pub fn into_inner(self) -> slice::Iter<'a, u8> {
55        self.iter
56    }
57}
58
59impl<'a> Iterator for InstIter<'a> {
60    type Item = Inst<'a>;
61
62    #[inline]
63    fn next(&mut self) -> Option<Self::Item> {
64        self.iter.next().map(|&opcode| {
65            let opcode = unsafe { OpCode::new_unchecked(opcode) };
66            let len = imm_len(opcode.get()) as usize;
67            let (immediate, rest) = self.iter.as_slice().split_at_checked(len).unwrap_or_default();
68            self.iter = rest.iter();
69            Inst { opcode, immediate }
70        })
71    }
72
73    #[inline]
74    fn size_hint(&self) -> (usize, Option<usize>) {
75        let len = self.iter.len();
76        ((len != 0) as usize, Some(len))
77    }
78}
79
80impl std::iter::FusedIterator for InstIter<'_> {}
81
82/// A bytecode iterator that yields opcodes and their immediate data, alongside the program counter.
83///
84/// Created by calling [`InstIter::with_pc`].
85#[derive(Debug)]
86pub struct InstIterWithPc<'a> {
87    iter: InstIter<'a>,
88    pc: usize,
89}
90
91impl<'a> Iterator for InstIterWithPc<'a> {
92    type Item = (usize, Inst<'a>);
93
94    #[inline]
95    fn next(&mut self) -> Option<Self::Item> {
96        self.iter.next().map(|inst| {
97            let pc = self.pc;
98            self.pc += 1 + inst.immediate.len();
99            (pc, inst)
100        })
101    }
102
103    #[inline]
104    fn size_hint(&self) -> (usize, Option<usize>) {
105        self.iter.size_hint()
106    }
107}
108
109impl std::iter::FusedIterator for InstIterWithPc<'_> {}
110
111/// An opcode and its immediate data. Returned by [`InstIter`].
112#[derive(Clone, Copy, PartialEq, Eq)]
113pub struct Inst<'a> {
114    /// The opcode.
115    pub opcode: OpCode,
116    /// The immediate data, if any.
117    ///
118    /// If an opcode is missing immediate data, e.g. malformed or bytecode hash, this will be an
119    /// empty slice.
120    pub immediate: &'a [u8],
121}
122
123impl fmt::Debug for Inst<'_> {
124    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
125        fmt::Display::fmt(self, f)
126    }
127}
128
129impl fmt::Display for Inst<'_> {
130    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
131        write!(f, "{}", self.opcode)?;
132        match self.immediate {
133            [] => Ok(()),
134            imm => write!(f, " {:#x}", alloy_primitives::hex::display(imm)),
135        }
136    }
137}
138
139/// Returns the length of the immediate data for the given opcode, or `0` if none.
140#[inline]
141const fn imm_len(op: u8) -> u8 {
142    match op {
143        opcode::PUSH1..=opcode::PUSH32 => op - opcode::PUSH0,
144        _ => 0,
145    }
146}
147
148/// Returns a string representation of the given bytecode.
149pub fn format_bytecode(bytecode: &[u8]) -> String {
150    let mut w = String::new();
151    format_bytecode_to(bytecode, &mut w).unwrap();
152    w
153}
154
155/// Formats an EVM bytecode to the given writer.
156pub fn format_bytecode_to<W: fmt::Write + ?Sized>(bytecode: &[u8], w: &mut W) -> fmt::Result {
157    write!(w, "{}", InstIter::new(bytecode))
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163    use revm::bytecode::opcode as op;
164
165    fn o(op: u8) -> OpCode {
166        unsafe { OpCode::new_unchecked(op) }
167    }
168
169    #[test]
170    fn iter_basic() {
171        let bytecode = [0x01, 0x02, 0x03, 0x04, 0x05];
172        let mut iter = InstIter::new(&bytecode);
173
174        assert_eq!(iter.next(), Some(Inst { opcode: o(0x01), immediate: &[] }));
175        assert_eq!(iter.next(), Some(Inst { opcode: o(0x02), immediate: &[] }));
176        assert_eq!(iter.next(), Some(Inst { opcode: o(0x03), immediate: &[] }));
177        assert_eq!(iter.next(), Some(Inst { opcode: o(0x04), immediate: &[] }));
178        assert_eq!(iter.next(), Some(Inst { opcode: o(0x05), immediate: &[] }));
179        assert_eq!(iter.next(), None);
180    }
181
182    #[test]
183    fn iter_with_imm() {
184        let bytecode = [op::PUSH0, op::PUSH1, 0x69, op::PUSH2, 0x01, 0x02];
185        let mut iter = InstIter::new(&bytecode);
186
187        assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH0), immediate: &[] }));
188        assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH1), immediate: &[0x69] }));
189        assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH2), immediate: &[0x01, 0x02] }));
190        assert_eq!(iter.next(), None);
191    }
192
193    #[test]
194    fn iter_with_imm_too_short() {
195        let bytecode = [op::PUSH2, 0x69];
196        let mut iter = InstIter::new(&bytecode);
197
198        assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH2), immediate: &[] }));
199        assert_eq!(iter.next(), None);
200    }
201
202    #[test]
203    fn display() {
204        let bytecode = [op::PUSH0, op::PUSH1, 0x69, op::PUSH2, 0x01, 0x02];
205        let s = format_bytecode(&bytecode);
206        assert_eq!(s, "PUSH0 PUSH1 0x69 PUSH2 0x0102");
207    }
208
209    #[test]
210    fn decode_push2_and_stop() {
211        // 0x61 0xAA 0xBB = PUSH2 0xAABB
212        // 0x00           = STOP
213        let code = vec![0x61, 0xAA, 0xBB, 0x00];
214        let insns = InstIter::new(&code).with_pc().collect::<Vec<_>>();
215
216        // PUSH2 then STOP
217        assert_eq!(insns.len(), 2);
218
219        // PUSH2 at pc = 0
220        let i0 = &insns[0];
221        assert_eq!(i0.0, 0);
222        assert_eq!(i0.1.opcode, op::PUSH2);
223        assert_eq!(i0.1.immediate, &[0xAA, 0xBB]);
224
225        // STOP at pc = 3
226        let i1 = &insns[1];
227        assert_eq!(i1.0, 3);
228        assert_eq!(i1.1.opcode, op::STOP);
229        assert!(i1.1.immediate.is_empty());
230    }
231
232    #[test]
233    fn decode_arithmetic_ops() {
234        // 0x01 = ADD, 0x02 = MUL, 0x03 = SUB, 0x04 = DIV
235        let code = vec![0x01, 0x02, 0x03, 0x04];
236        let insns = InstIter::new(&code).with_pc().collect::<Vec<_>>();
237
238        assert_eq!(insns.len(), 4);
239
240        let expected = [(0, op::ADD), (1, op::MUL), (2, op::SUB), (3, op::DIV)];
241        for ((pc, want_op), insn) in expected.iter().zip(insns.iter()) {
242            assert_eq!(insn.0, *pc);
243            assert_eq!(insn.1.opcode, *want_op);
244            assert!(insn.1.immediate.is_empty());
245        }
246    }
247}