Skip to main content

foundry_evm_fuzz/strategies/
state.rs

1use crate::{
2    BasicTxDetails, invariant::FuzzRunIdentifiedContracts, strategies::literals::LiteralsDictionary,
3};
4use alloy_dyn_abi::{DynSolType, DynSolValue, EventExt, FunctionExt};
5use alloy_json_abi::{Function, JsonAbi};
6use alloy_primitives::{
7    Address, B256, Bytes, Log, U256,
8    map::{AddressIndexSet, AddressMap, B256IndexSet, HashMap, IndexSet},
9};
10use foundry_common::{
11    ignore_metadata_hash, mapping_slots::MappingSlots, slot_identifier::SlotIdentifier,
12};
13use foundry_compilers::artifacts::StorageLayout;
14use foundry_config::FuzzDictionaryConfig;
15use foundry_evm_core::{bytecode::InstIter, utils::StateChangeset};
16use revm::{
17    database::{CacheDB, DatabaseRef, DbAccount},
18    state::AccountInfo,
19};
20use std::{cell::RefCell, collections::BTreeMap, fmt, rc::Rc, sync::Arc};
21
22/// The maximum number of bytes we will look at in bytecodes to find push bytes (24 KiB).
23///
24/// This is to limit the performance impact of fuzz tests that might deploy arbitrarily sized
25/// bytecode (as is the case with Solmate).
26const PUSH_BYTE_ANALYSIS_LIMIT: usize = 24 * 1024;
27
28/// Immutable fuzz dictionary seed used by parallel stateless fuzz workers.
29#[derive(Clone, Debug)]
30pub struct EvmFuzzState {
31    inner: Arc<FuzzDictionary>,
32    /// Addresses of external libraries deployed in test setup, excluded from fuzz test inputs.
33    pub deployed_libs: Vec<Address>,
34}
35
36/// Worker-local mutable fuzz dictionary used by invariant campaigns.
37#[derive(Clone, Debug)]
38pub struct InvariantFuzzState {
39    inner: Rc<RefCell<FuzzDictionary>>,
40    /// Addresses of external libraries deployed in test setup, excluded from fuzz test inputs.
41    pub deployed_libs: Vec<Address>,
42}
43
44pub trait FuzzStateReader: Clone + 'static {
45    fn deployed_libs(&self) -> &[Address];
46    fn with_dictionary<R>(&self, f: impl FnOnce(&FuzzDictionary) -> R) -> R;
47}
48
49impl EvmFuzzState {
50    #[cfg(test)]
51    pub(crate) fn test() -> Self {
52        Self::new(
53            &[],
54            &CacheDB::<revm::database::EmptyDB>::default(),
55            FuzzDictionaryConfig::default(),
56            None,
57        )
58    }
59
60    pub fn new<DB: DatabaseRef>(
61        deployed_libs: &[Address],
62        db: &CacheDB<DB>,
63        config: FuzzDictionaryConfig,
64        literals: Option<&LiteralsDictionary>,
65    ) -> Self {
66        // Sort accounts to ensure deterministic dictionary generation from the same setUp state.
67        let mut accs = db.cache.accounts.iter().collect::<Vec<_>>();
68        accs.sort_by_key(|(address, _)| *address);
69
70        // Create fuzz dictionary and insert values from db state.
71        let mut dictionary = FuzzDictionary::new(config);
72        dictionary.insert_db_values(accs);
73        if let Some(literals) = literals {
74            dictionary.literal_values = literals.clone();
75        }
76
77        Self { inner: Arc::new(dictionary), deployed_libs: deployed_libs.to_vec() }
78    }
79
80    pub fn into_invariant(self) -> InvariantFuzzState {
81        InvariantFuzzState {
82            inner: Rc::new(RefCell::new((*self.inner).clone())),
83            deployed_libs: self.deployed_libs,
84        }
85    }
86
87    pub fn fork(&self) -> Self {
88        Self { inner: Arc::clone(&self.inner), deployed_libs: self.deployed_libs.clone() }
89    }
90
91    pub fn collect_values(&mut self, values: impl IntoIterator<Item = B256>) {
92        let dict = Arc::make_mut(&mut self.inner);
93        for value in values {
94            dict.insert_value(value);
95        }
96    }
97
98    /// Logs stats about the current state.
99    pub fn log_stats(&self) {
100        self.inner.log_stats();
101    }
102
103    /// Test-only helper to seed the dictionary with literal values.
104    #[cfg(test)]
105    pub(crate) fn seed_literals(&mut self, map: super::LiteralMaps) {
106        Arc::make_mut(&mut self.inner).seed_literals(map);
107    }
108}
109
110impl FuzzStateReader for EvmFuzzState {
111    fn deployed_libs(&self) -> &[Address] {
112        &self.deployed_libs
113    }
114
115    fn with_dictionary<R>(&self, f: impl FnOnce(&FuzzDictionary) -> R) -> R {
116        f(&self.inner)
117    }
118}
119
120impl InvariantFuzzState {
121    pub fn snapshot(&self) -> EvmFuzzState {
122        EvmFuzzState {
123            inner: Arc::new(self.inner.borrow().clone()),
124            deployed_libs: self.deployed_libs.clone(),
125        }
126    }
127
128    pub fn collect_values(&self, values: impl IntoIterator<Item = B256>) {
129        let mut dict = self.inner.borrow_mut();
130        for value in values {
131            dict.insert_value(value);
132        }
133    }
134
135    /// Collects state changes from a [StateChangeset] and logs into an [InvariantFuzzState]
136    /// according to the given [FuzzDictionaryConfig].
137    #[allow(clippy::too_many_arguments)]
138    pub fn collect_values_from_call(
139        &self,
140        fuzzed_contracts: &FuzzRunIdentifiedContracts,
141        tx: &BasicTxDetails,
142        result: &Bytes,
143        logs: &[Log],
144        state_changeset: &StateChangeset,
145        run_depth: u32,
146        mapping_slots: Option<&AddressMap<MappingSlots>>,
147    ) {
148        let mut dict = self.inner.borrow_mut();
149        let targets = fuzzed_contracts.targets();
150        let (target_abi, target_function) = targets.fuzzed_artifacts(tx);
151        dict.insert_logs_values(target_abi, logs, run_depth);
152        dict.insert_result_values(target_function, result, run_depth);
153        // Get storage layouts for contracts in the state changeset
154        let storage_layouts = targets.get_storage_layouts();
155        dict.insert_new_state_values(state_changeset, &storage_layouts, mapping_slots);
156    }
157
158    /// Collects typed trace-cmp operands from sancov-instrumented code.
159    /// Values are inserted into both persistent state values (survive reverts) and typed
160    /// sample buckets (for ABI-aware mutation).
161    pub fn collect_typed_cmp_values(&self, values: impl IntoIterator<Item = (u8, B256)>) {
162        let mut dict = self.inner.borrow_mut();
163        for (width, value) in values {
164            dict.insert_persistent_value(value);
165            dict.insert_typed_cmp_value(width, value);
166        }
167    }
168
169    /// Removes all newly added entries from the dictionary.
170    ///
171    /// Should be called between fuzz/invariant runs to avoid accumulating data derived from fuzz
172    /// inputs.
173    pub fn revert(&self) {
174        self.inner.borrow_mut().revert();
175    }
176
177    /// Logs stats about the current state.
178    pub fn log_stats(&self) {
179        self.inner.borrow().log_stats();
180    }
181}
182
183impl FuzzStateReader for InvariantFuzzState {
184    fn deployed_libs(&self) -> &[Address] {
185        &self.deployed_libs
186    }
187
188    fn with_dictionary<R>(&self, f: impl FnOnce(&FuzzDictionary) -> R) -> R {
189        f(&self.inner.borrow())
190    }
191}
192
193impl From<EvmFuzzState> for InvariantFuzzState {
194    fn from(state: EvmFuzzState) -> Self {
195        state.into_invariant()
196    }
197}
198
199// We're using `IndexSet` to have a stable element order when restoring persisted state, as well as
200// for performance when iterating over the sets.
201/// Maximum number of persistent values from sancov trace-cmp.
202const MAX_PERSISTENT_VALUES: usize = 2048;
203
204#[derive(Clone)]
205pub struct FuzzDictionary {
206    /// Collected state values.
207    state_values: B256IndexSet,
208    /// Addresses that already had their PUSH bytes collected.
209    addresses: AddressIndexSet,
210    /// Configuration for the dictionary.
211    config: FuzzDictionaryConfig,
212    /// Number of state values initially collected from db.
213    /// Used to revert new collected values at the end of each run.
214    db_state_values: usize,
215    /// Number of address values initially collected from db.
216    /// Used to revert new collected addresses at the end of each run.
217    db_addresses: usize,
218    /// Typed runtime sample values persisted across invariant runs.
219    /// Initially seeded with literal values collected from the source code.
220    sample_values: HashMap<DynSolType, B256IndexSet>,
221    /// Lazily initialized dictionary of literal values collected from the source code.
222    literal_values: LiteralsDictionary,
223    /// Tracks whether literals from `literal_values` have been merged into `sample_values`.
224    ///
225    /// Set to `true` on first call to `seed_samples()`. Before seeding, `samples()` checks both
226    /// maps separately. After seeding, literals are merged in, so only `sample_values` is checked.
227    samples_seeded: bool,
228    /// Persistent values from sancov trace-cmp that survive `revert()` across runs.
229    persistent_values: B256IndexSet,
230
231    misses: usize,
232    hits: usize,
233}
234
235impl fmt::Debug for FuzzDictionary {
236    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
237        f.debug_struct("FuzzDictionary")
238            .field("state_values", &self.state_values.len())
239            .field("addresses", &self.addresses)
240            .field("persistent_values", &self.persistent_values.len())
241            .finish()
242    }
243}
244
245impl Default for FuzzDictionary {
246    fn default() -> Self {
247        Self::new(Default::default())
248    }
249}
250
251impl FuzzDictionary {
252    pub fn new(config: FuzzDictionaryConfig) -> Self {
253        let mut dictionary = Self {
254            config,
255            samples_seeded: false,
256
257            state_values: Default::default(),
258            addresses: Default::default(),
259            db_state_values: Default::default(),
260            db_addresses: Default::default(),
261            sample_values: Default::default(),
262            literal_values: Default::default(),
263            persistent_values: Default::default(),
264            misses: Default::default(),
265            hits: Default::default(),
266        };
267        dictionary.prefill();
268        dictionary
269    }
270
271    /// Insert common values into the dictionary at initialization.
272    fn prefill(&mut self) {
273        self.insert_value(B256::ZERO);
274    }
275
276    /// Seeds `sample_values` with all words from the [`LiteralsDictionary`].
277    /// Should only be called once per dictionary lifetime.
278    #[cold]
279    fn seed_samples(&mut self) {
280        trace!("seeding `sample_values` from literal dictionary");
281        self.sample_values
282            .extend(self.literal_values.get().words.iter().map(|(k, v)| (k.clone(), v.clone())));
283        self.samples_seeded = true;
284    }
285
286    /// Insert values from initial db state into fuzz dictionary.
287    /// These values are persisted across invariant runs.
288    fn insert_db_values(&mut self, db_state: Vec<(&Address, &DbAccount)>) {
289        for (address, account) in db_state {
290            // Insert basic account information
291            self.insert_value(address.into_word());
292            // Insert push bytes
293            self.insert_push_bytes_values(address, &account.info);
294            // Insert storage values.
295            if self.config.include_storage {
296                // Sort storage values before inserting to ensure deterministic dictionary.
297                let values = account.storage.iter().collect::<BTreeMap<_, _>>();
298                for (slot, value) in values {
299                    self.insert_storage_value(slot, value, None, None);
300                }
301            }
302        }
303
304        // We need at least some state data if DB is empty,
305        // otherwise we can't select random data for state fuzzing.
306        if self.values().is_empty() {
307            // Prefill with a random address.
308            self.insert_value(Address::random().into_word());
309        }
310
311        // Record number of values and addresses inserted from db to be used for reverting at the
312        // end of each run.
313        self.db_state_values = self.state_values.len();
314        self.db_addresses = self.addresses.len();
315    }
316
317    /// Insert values collected from call result into fuzz dictionary.
318    fn insert_result_values(
319        &mut self,
320        function: Option<&Function>,
321        result: &Bytes,
322        run_depth: u32,
323    ) {
324        if let Some(function) = function
325            && !function.outputs.is_empty()
326        {
327            // Decode result and collect samples to be used in subsequent fuzz runs.
328            if let Ok(decoded_result) = function.abi_decode_output(result) {
329                self.insert_sample_values(decoded_result, run_depth);
330            }
331        }
332    }
333
334    /// Insert values from call log topics and data into fuzz dictionary.
335    fn insert_logs_values(&mut self, abi: Option<&JsonAbi>, logs: &[Log], run_depth: u32) {
336        let mut samples = Vec::new();
337        // Decode logs with known events and collect samples from indexed fields and event body.
338        for log in logs {
339            let mut log_decoded = false;
340            // Try to decode log with events from contract abi.
341            if let Some(abi) = abi {
342                for event in abi.events() {
343                    if let Ok(decoded_event) = event.decode_log(log) {
344                        samples.extend(decoded_event.indexed);
345                        samples.extend(decoded_event.body);
346                        log_decoded = true;
347                        break;
348                    }
349                }
350            }
351
352            // If we weren't able to decode event then we insert raw data in fuzz dictionary.
353            if !log_decoded {
354                for &topic in log.topics() {
355                    self.insert_value(topic);
356                }
357                let chunks = log.data.data.chunks_exact(32);
358                let rem = chunks.remainder();
359                for chunk in chunks {
360                    self.insert_value(chunk.try_into().unwrap());
361                }
362                if !rem.is_empty() {
363                    self.insert_value(B256::right_padding_from(rem));
364                }
365            }
366        }
367
368        // Insert samples collected from current call in fuzz dictionary.
369        self.insert_sample_values(samples, run_depth);
370    }
371
372    /// Insert values from call state changeset into fuzz dictionary.
373    /// These values are removed at the end of current run.
374    fn insert_new_state_values(
375        &mut self,
376        state_changeset: &StateChangeset,
377        storage_layouts: &HashMap<Address, Arc<StorageLayout>>,
378        mapping_slots: Option<&AddressMap<MappingSlots>>,
379    ) {
380        for (address, account) in state_changeset {
381            // Insert basic account information.
382            self.insert_value(address.into_word());
383            // Insert push bytes.
384            self.insert_push_bytes_values(address, &account.info);
385            // Insert storage values.
386            if self.config.include_storage {
387                let slot_identifier =
388                    storage_layouts.get(address).map(|layout| SlotIdentifier::new(layout.clone()));
389                trace!(
390                    "{address:?} has mapping_slots {}",
391                    mapping_slots.is_some_and(|m| m.contains_key(address))
392                );
393                let mapping_slots = mapping_slots.and_then(|m| m.get(address));
394                for (slot, value) in &account.storage {
395                    self.insert_storage_value(
396                        slot,
397                        &value.present_value,
398                        slot_identifier.as_ref(),
399                        mapping_slots,
400                    );
401                }
402            }
403        }
404    }
405
406    /// Insert values from push bytes into fuzz dictionary.
407    /// Values are collected only once for a given address.
408    /// If values are newly collected then they are removed at the end of current run.
409    fn insert_push_bytes_values(&mut self, address: &Address, account_info: &AccountInfo) {
410        if self.config.include_push_bytes
411            && !self.addresses.contains(address)
412            && let Some(code) = &account_info.code
413        {
414            self.insert_address(*address);
415            if !self.values_full() {
416                self.collect_push_bytes(ignore_metadata_hash(code.original_byte_slice()));
417            }
418        }
419    }
420
421    fn collect_push_bytes(&mut self, code: &[u8]) {
422        let len = code.len().min(PUSH_BYTE_ANALYSIS_LIMIT);
423        let code = &code[..len];
424        for inst in InstIter::new(code) {
425            // Don't add 0 to the dictionary as it's already present.
426            if !inst.immediate.is_empty()
427                && let Some(push_value) = U256::try_from_be_slice(inst.immediate)
428                && push_value != U256::ZERO
429            {
430                self.insert_value_u256(push_value);
431            }
432        }
433    }
434
435    /// Insert values from single storage slot and storage value into fuzz dictionary.
436    /// Uses [`SlotIdentifier`] to identify storage slots types.
437    fn insert_storage_value(
438        &mut self,
439        slot: &U256,
440        value: &U256,
441        slot_identifier: Option<&SlotIdentifier>,
442        mapping_slots: Option<&MappingSlots>,
443    ) {
444        let slot = B256::from(*slot);
445        let value = B256::from(*value);
446
447        // Always insert the slot itself
448        self.insert_value(slot);
449
450        // If we have a storage layout, use SlotIdentifier for better type identification.
451        if let Some(slot_identifier) = slot_identifier
452            // Identify slot type.
453            && let Some(slot_info) = slot_identifier.identify(&slot, mapping_slots)
454            && slot_info.decode(value).is_some()
455        {
456            trace!(?slot_info, "inserting typed storage value");
457            if !self.samples_seeded {
458                self.seed_samples();
459            }
460            self.sample_values.entry(slot_info.slot_type.dyn_sol_type).or_default().insert(value);
461        } else {
462            self.insert_value_u256(value.into());
463        }
464    }
465
466    /// Insert address into fuzz dictionary.
467    /// If address is newly collected then it is removed by index at the end of current run.
468    fn insert_address(&mut self, address: Address) {
469        if self.addresses.len() < self.config.max_fuzz_dictionary_addresses {
470            self.addresses.insert(address);
471        }
472    }
473
474    /// Insert raw value into fuzz dictionary.
475    ///
476    /// If value is newly collected then it is removed by index at the end of current run.
477    ///
478    /// Returns true if the value was inserted.
479    fn insert_value(&mut self, value: B256) -> bool {
480        let insert = !self.values_full();
481        if insert {
482            let new_value = self.state_values.insert(value);
483            let counter = if new_value { &mut self.misses } else { &mut self.hits };
484            *counter += 1;
485        }
486        insert
487    }
488
489    /// Insert a persistent value that survives `revert()` across invariant runs.
490    /// Used for trace-cmp operands that should compound over time.
491    fn insert_persistent_value(&mut self, value: B256) {
492        if self.persistent_values.len() >= MAX_PERSISTENT_VALUES {
493            return;
494        }
495        if self.persistent_values.insert(value) && self.state_values.insert(value) {
496            self.db_state_values += 1;
497        }
498    }
499
500    /// Insert a typed trace-cmp value into the `sample_values` map.
501    /// Maps sancov width to `DynSolType` buckets and promotes to larger types.
502    fn insert_typed_cmp_value(&mut self, width: u8, value: B256) {
503        if !self.samples_seeded {
504            self.seed_samples();
505        }
506
507        const MAX_TYPED_CMP_PER_BUCKET: usize = 1024;
508
509        let native_type = match width {
510            8 => DynSolType::Uint(8),
511            16 => DynSolType::Uint(16),
512            32 => DynSolType::Uint(32),
513            64 => DynSolType::Uint(64),
514            _ => DynSolType::Uint(256),
515        };
516
517        let insert = |map: &mut HashMap<DynSolType, B256IndexSet>, ty: DynSolType, val: B256| {
518            let bucket = map.entry(ty).or_default();
519            if bucket.len() < MAX_TYPED_CMP_PER_BUCKET {
520                bucket.insert(val);
521            }
522        };
523
524        insert(&mut self.sample_values, native_type, value);
525
526        if width <= 64 {
527            insert(&mut self.sample_values, DynSolType::Uint(128), value);
528            insert(&mut self.sample_values, DynSolType::Uint(256), value);
529            insert(&mut self.sample_values, DynSolType::Int(256), value);
530        }
531    }
532
533    fn insert_value_u256(&mut self, value: U256) -> bool {
534        // Also add the value below and above the push value to the dictionary.
535        let one = U256::from(1);
536        self.insert_value(value.into())
537            | self.insert_value((value.wrapping_sub(one)).into())
538            | self.insert_value((value.wrapping_add(one)).into())
539    }
540
541    fn values_full(&self) -> bool {
542        self.state_values.len() >= self.config.max_fuzz_dictionary_values
543    }
544
545    /// Insert sample values that are reused across multiple runs.
546    /// The number of samples is limited to invariant run depth.
547    /// If collected samples limit is reached then values are inserted as regular values.
548    pub fn insert_sample_values(
549        &mut self,
550        sample_values: impl IntoIterator<Item = DynSolValue>,
551        limit: u32,
552    ) {
553        if !self.samples_seeded {
554            self.seed_samples();
555        }
556        for sample in sample_values {
557            if let (Some(sample_type), Some(sample_value)) = (sample.as_type(), sample.as_word()) {
558                if let Some(values) = self.sample_values.get_mut(&sample_type) {
559                    if values.len() < limit as usize {
560                        values.insert(sample_value);
561                    } else {
562                        // Insert as state value (will be removed at the end of the run).
563                        self.insert_value(sample_value);
564                    }
565                } else {
566                    self.sample_values.entry(sample_type).or_default().insert(sample_value);
567                }
568            }
569        }
570    }
571
572    pub const fn values(&self) -> &B256IndexSet {
573        &self.state_values
574    }
575
576    pub fn len(&self) -> usize {
577        self.state_values.len()
578    }
579
580    pub fn is_empty(&self) -> bool {
581        self.state_values.is_empty()
582    }
583
584    /// Returns sample values for a given type, checking both runtime samples and literals.
585    ///
586    /// Before `seed_samples()` is called, checks both `literal_values` and `sample_values`
587    /// separately. After seeding, all literal values are merged into `sample_values`.
588    #[inline]
589    pub fn samples(&self, param_type: &DynSolType) -> Option<&B256IndexSet> {
590        // If not seeded yet, return literals
591        if !self.samples_seeded {
592            return self.literal_values.get().words.get(param_type);
593        }
594
595        self.sample_values.get(param_type)
596    }
597
598    /// Returns the collected literal strings, triggering initialization if needed.
599    #[inline]
600    pub fn ast_strings(&self) -> &IndexSet<String> {
601        &self.literal_values.get().strings
602    }
603
604    /// Returns the collected literal bytes (hex strings), triggering initialization if needed.
605    #[inline]
606    pub fn ast_bytes(&self) -> &IndexSet<Bytes> {
607        &self.literal_values.get().bytes
608    }
609
610    #[inline]
611    pub const fn addresses(&self) -> &AddressIndexSet {
612        &self.addresses
613    }
614
615    /// Revert values and addresses collected during the run by truncating to initial db len.
616    pub fn revert(&mut self) {
617        self.state_values.truncate(self.db_state_values);
618        self.addresses.truncate(self.db_addresses);
619    }
620
621    pub fn log_stats(&self) {
622        trace!(
623            addresses.len = self.addresses.len(),
624            sample.len = self.sample_values.len(),
625            state.len = self.state_values.len(),
626            state.misses = self.misses,
627            state.hits = self.hits,
628            "FuzzDictionary stats",
629        );
630    }
631
632    #[cfg(test)]
633    /// Test-only helper to seed the dictionary with literal values.
634    pub(crate) fn seed_literals(&mut self, map: super::LiteralMaps) {
635        self.literal_values.set(map);
636    }
637}