Skip to main content

foundry_evm_fuzz/strategies/
state.rs

1use crate::{
2    BasicTxDetails, invariant::FuzzRunIdentifiedContracts, strategies::literals::LiteralsDictionary,
3};
4use alloy_dyn_abi::{DynSolType, DynSolValue, EventExt, FunctionExt};
5use alloy_json_abi::{Function, JsonAbi};
6use alloy_primitives::{
7    Address, B256, Bytes, Log, U256,
8    map::{AddressIndexSet, AddressMap, B256IndexSet, HashMap, IndexSet},
9};
10use foundry_common::{
11    ignore_metadata_hash, mapping_slots::MappingSlots, slot_identifier::SlotIdentifier,
12};
13use foundry_compilers::artifacts::StorageLayout;
14use foundry_config::FuzzDictionaryConfig;
15use foundry_evm_core::{bytecode::InstIter, utils::StateChangeset};
16use parking_lot::{RawRwLock, RwLock, lock_api::RwLockReadGuard};
17use revm::{
18    database::{CacheDB, DatabaseRef, DbAccount},
19    state::AccountInfo,
20};
21use std::{collections::BTreeMap, fmt, sync::Arc};
22
23/// The maximum number of bytes we will look at in bytecodes to find push bytes (24 KiB).
24///
25/// This is to limit the performance impact of fuzz tests that might deploy arbitrarily sized
26/// bytecode (as is the case with Solmate).
27const PUSH_BYTE_ANALYSIS_LIMIT: usize = 24 * 1024;
28
29/// A set of arbitrary 32 byte data from the VM used to generate values for the strategy.
30///
31/// Wrapped in a shareable container.
32#[derive(Clone, Debug)]
33pub struct EvmFuzzState {
34    inner: Arc<RwLock<FuzzDictionary>>,
35    /// Addresses of external libraries deployed in test setup, excluded from fuzz test inputs.
36    pub deployed_libs: Vec<Address>,
37    /// Records mapping accesses. Used to identify storage slots belonging to mappings and sampling
38    /// the values in the [`FuzzDictionary`].
39    ///
40    /// Only needed when [`StorageLayout`] is available.
41    pub(crate) mapping_slots: Option<AddressMap<MappingSlots>>,
42}
43
44impl EvmFuzzState {
45    #[cfg(test)]
46    pub(crate) fn test() -> Self {
47        Self::new(
48            &[],
49            &CacheDB::<revm::database::EmptyDB>::default(),
50            FuzzDictionaryConfig::default(),
51            None,
52        )
53    }
54
55    pub fn new<DB: DatabaseRef>(
56        deployed_libs: &[Address],
57        db: &CacheDB<DB>,
58        config: FuzzDictionaryConfig,
59        literals: Option<&LiteralsDictionary>,
60    ) -> Self {
61        // Sort accounts to ensure deterministic dictionary generation from the same setUp state.
62        let mut accs = db.cache.accounts.iter().collect::<Vec<_>>();
63        accs.sort_by_key(|(address, _)| *address);
64
65        // Create fuzz dictionary and insert values from db state.
66        let mut dictionary = FuzzDictionary::new(config);
67        dictionary.insert_db_values(accs);
68        if let Some(literals) = literals {
69            dictionary.literal_values = literals.clone();
70        }
71
72        Self {
73            inner: Arc::new(RwLock::new(dictionary)),
74            deployed_libs: deployed_libs.to_vec(),
75            mapping_slots: None,
76        }
77    }
78
79    pub fn with_mapping_slots(mut self, mapping_slots: AddressMap<MappingSlots>) -> Self {
80        self.mapping_slots = Some(mapping_slots);
81        self
82    }
83
84    pub fn collect_values(&self, values: impl IntoIterator<Item = B256>) {
85        let mut dict = self.inner.write();
86        for value in values {
87            dict.insert_value(value);
88        }
89    }
90
91    /// Collects state changes from a [StateChangeset] and logs into an [EvmFuzzState] according to
92    /// the given [FuzzDictionaryConfig].
93    pub fn collect_values_from_call(
94        &self,
95        fuzzed_contracts: &FuzzRunIdentifiedContracts,
96        tx: &BasicTxDetails,
97        result: &Bytes,
98        logs: &[Log],
99        state_changeset: &StateChangeset,
100        run_depth: u32,
101    ) {
102        let mut dict = self.inner.write();
103        {
104            let targets = fuzzed_contracts.targets.lock();
105            let (target_abi, target_function) = targets.fuzzed_artifacts(tx);
106            dict.insert_logs_values(target_abi, logs, run_depth);
107            dict.insert_result_values(target_function, result, run_depth);
108            // Get storage layouts for contracts in the state changeset
109            let storage_layouts = targets.get_storage_layouts();
110            dict.insert_new_state_values(
111                state_changeset,
112                &storage_layouts,
113                self.mapping_slots.as_ref(),
114            );
115        }
116    }
117
118    /// Collects typed trace-cmp operands from sancov-instrumented code.
119    /// Values are inserted into both persistent state values (survive reverts) and typed
120    /// sample buckets (for ABI-aware mutation).
121    pub fn collect_typed_cmp_values(&self, values: impl IntoIterator<Item = (u8, B256)>) {
122        let mut dict = self.inner.write();
123        for (width, value) in values {
124            dict.insert_persistent_value(value);
125            dict.insert_typed_cmp_value(width, value);
126        }
127    }
128
129    /// Removes all newly added entries from the dictionary.
130    ///
131    /// Should be called between fuzz/invariant runs to avoid accumulating data derived from fuzz
132    /// inputs.
133    pub fn revert(&self) {
134        self.inner.write().revert();
135    }
136
137    pub fn dictionary_read(&self) -> RwLockReadGuard<'_, RawRwLock, FuzzDictionary> {
138        self.inner.read()
139    }
140
141    /// Logs stats about the current state.
142    pub fn log_stats(&self) {
143        self.inner.read().log_stats();
144    }
145
146    /// Test-only helper to seed the dictionary with literal values.
147    #[cfg(test)]
148    pub(crate) fn seed_literals(&self, map: super::LiteralMaps) {
149        self.inner.write().seed_literals(map);
150    }
151}
152
153// We're using `IndexSet` to have a stable element order when restoring persisted state, as well as
154// for performance when iterating over the sets.
155/// Maximum number of persistent values from sancov trace-cmp.
156const MAX_PERSISTENT_VALUES: usize = 2048;
157
158pub struct FuzzDictionary {
159    /// Collected state values.
160    state_values: B256IndexSet,
161    /// Addresses that already had their PUSH bytes collected.
162    addresses: AddressIndexSet,
163    /// Configuration for the dictionary.
164    config: FuzzDictionaryConfig,
165    /// Number of state values initially collected from db.
166    /// Used to revert new collected values at the end of each run.
167    db_state_values: usize,
168    /// Number of address values initially collected from db.
169    /// Used to revert new collected addresses at the end of each run.
170    db_addresses: usize,
171    /// Typed runtime sample values persisted across invariant runs.
172    /// Initially seeded with literal values collected from the source code.
173    sample_values: HashMap<DynSolType, B256IndexSet>,
174    /// Lazily initialized dictionary of literal values collected from the source code.
175    literal_values: LiteralsDictionary,
176    /// Tracks whether literals from `literal_values` have been merged into `sample_values`.
177    ///
178    /// Set to `true` on first call to `seed_samples()`. Before seeding, `samples()` checks both
179    /// maps separately. After seeding, literals are merged in, so only `sample_values` is checked.
180    samples_seeded: bool,
181    /// Persistent values from sancov trace-cmp that survive `revert()` across runs.
182    persistent_values: B256IndexSet,
183
184    misses: usize,
185    hits: usize,
186}
187
188impl fmt::Debug for FuzzDictionary {
189    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
190        f.debug_struct("FuzzDictionary")
191            .field("state_values", &self.state_values.len())
192            .field("addresses", &self.addresses)
193            .field("persistent_values", &self.persistent_values.len())
194            .finish()
195    }
196}
197
198impl Default for FuzzDictionary {
199    fn default() -> Self {
200        Self::new(Default::default())
201    }
202}
203
204impl FuzzDictionary {
205    pub fn new(config: FuzzDictionaryConfig) -> Self {
206        let mut dictionary = Self {
207            config,
208            samples_seeded: false,
209
210            state_values: Default::default(),
211            addresses: Default::default(),
212            db_state_values: Default::default(),
213            db_addresses: Default::default(),
214            sample_values: Default::default(),
215            literal_values: Default::default(),
216            persistent_values: Default::default(),
217            misses: Default::default(),
218            hits: Default::default(),
219        };
220        dictionary.prefill();
221        dictionary
222    }
223
224    /// Insert common values into the dictionary at initialization.
225    fn prefill(&mut self) {
226        self.insert_value(B256::ZERO);
227    }
228
229    /// Seeds `sample_values` with all words from the [`LiteralsDictionary`].
230    /// Should only be called once per dictionary lifetime.
231    #[cold]
232    fn seed_samples(&mut self) {
233        trace!("seeding `sample_values` from literal dictionary");
234        self.sample_values
235            .extend(self.literal_values.get().words.iter().map(|(k, v)| (k.clone(), v.clone())));
236        self.samples_seeded = true;
237    }
238
239    /// Insert values from initial db state into fuzz dictionary.
240    /// These values are persisted across invariant runs.
241    fn insert_db_values(&mut self, db_state: Vec<(&Address, &DbAccount)>) {
242        for (address, account) in db_state {
243            // Insert basic account information
244            self.insert_value(address.into_word());
245            // Insert push bytes
246            self.insert_push_bytes_values(address, &account.info);
247            // Insert storage values.
248            if self.config.include_storage {
249                // Sort storage values before inserting to ensure deterministic dictionary.
250                let values = account.storage.iter().collect::<BTreeMap<_, _>>();
251                for (slot, value) in values {
252                    self.insert_storage_value(slot, value, None, None);
253                }
254            }
255        }
256
257        // We need at least some state data if DB is empty,
258        // otherwise we can't select random data for state fuzzing.
259        if self.values().is_empty() {
260            // Prefill with a random address.
261            self.insert_value(Address::random().into_word());
262        }
263
264        // Record number of values and addresses inserted from db to be used for reverting at the
265        // end of each run.
266        self.db_state_values = self.state_values.len();
267        self.db_addresses = self.addresses.len();
268    }
269
270    /// Insert values collected from call result into fuzz dictionary.
271    fn insert_result_values(
272        &mut self,
273        function: Option<&Function>,
274        result: &Bytes,
275        run_depth: u32,
276    ) {
277        if let Some(function) = function
278            && !function.outputs.is_empty()
279        {
280            // Decode result and collect samples to be used in subsequent fuzz runs.
281            if let Ok(decoded_result) = function.abi_decode_output(result) {
282                self.insert_sample_values(decoded_result, run_depth);
283            }
284        }
285    }
286
287    /// Insert values from call log topics and data into fuzz dictionary.
288    fn insert_logs_values(&mut self, abi: Option<&JsonAbi>, logs: &[Log], run_depth: u32) {
289        let mut samples = Vec::new();
290        // Decode logs with known events and collect samples from indexed fields and event body.
291        for log in logs {
292            let mut log_decoded = false;
293            // Try to decode log with events from contract abi.
294            if let Some(abi) = abi {
295                for event in abi.events() {
296                    if let Ok(decoded_event) = event.decode_log(log) {
297                        samples.extend(decoded_event.indexed);
298                        samples.extend(decoded_event.body);
299                        log_decoded = true;
300                        break;
301                    }
302                }
303            }
304
305            // If we weren't able to decode event then we insert raw data in fuzz dictionary.
306            if !log_decoded {
307                for &topic in log.topics() {
308                    self.insert_value(topic);
309                }
310                let chunks = log.data.data.chunks_exact(32);
311                let rem = chunks.remainder();
312                for chunk in chunks {
313                    self.insert_value(chunk.try_into().unwrap());
314                }
315                if !rem.is_empty() {
316                    self.insert_value(B256::right_padding_from(rem));
317                }
318            }
319        }
320
321        // Insert samples collected from current call in fuzz dictionary.
322        self.insert_sample_values(samples, run_depth);
323    }
324
325    /// Insert values from call state changeset into fuzz dictionary.
326    /// These values are removed at the end of current run.
327    fn insert_new_state_values(
328        &mut self,
329        state_changeset: &StateChangeset,
330        storage_layouts: &HashMap<Address, Arc<StorageLayout>>,
331        mapping_slots: Option<&AddressMap<MappingSlots>>,
332    ) {
333        for (address, account) in state_changeset {
334            // Insert basic account information.
335            self.insert_value(address.into_word());
336            // Insert push bytes.
337            self.insert_push_bytes_values(address, &account.info);
338            // Insert storage values.
339            if self.config.include_storage {
340                let slot_identifier =
341                    storage_layouts.get(address).map(|layout| SlotIdentifier::new(layout.clone()));
342                trace!(
343                    "{address:?} has mapping_slots {}",
344                    mapping_slots.is_some_and(|m| m.contains_key(address))
345                );
346                let mapping_slots = mapping_slots.and_then(|m| m.get(address));
347                for (slot, value) in &account.storage {
348                    self.insert_storage_value(
349                        slot,
350                        &value.present_value,
351                        slot_identifier.as_ref(),
352                        mapping_slots,
353                    );
354                }
355            }
356        }
357    }
358
359    /// Insert values from push bytes into fuzz dictionary.
360    /// Values are collected only once for a given address.
361    /// If values are newly collected then they are removed at the end of current run.
362    fn insert_push_bytes_values(&mut self, address: &Address, account_info: &AccountInfo) {
363        if self.config.include_push_bytes
364            && !self.addresses.contains(address)
365            && let Some(code) = &account_info.code
366        {
367            self.insert_address(*address);
368            if !self.values_full() {
369                self.collect_push_bytes(ignore_metadata_hash(code.original_byte_slice()));
370            }
371        }
372    }
373
374    fn collect_push_bytes(&mut self, code: &[u8]) {
375        let len = code.len().min(PUSH_BYTE_ANALYSIS_LIMIT);
376        let code = &code[..len];
377        for inst in InstIter::new(code) {
378            // Don't add 0 to the dictionary as it's already present.
379            if !inst.immediate.is_empty()
380                && let Some(push_value) = U256::try_from_be_slice(inst.immediate)
381                && push_value != U256::ZERO
382            {
383                self.insert_value_u256(push_value);
384            }
385        }
386    }
387
388    /// Insert values from single storage slot and storage value into fuzz dictionary.
389    /// Uses [`SlotIdentifier`] to identify storage slots types.
390    fn insert_storage_value(
391        &mut self,
392        slot: &U256,
393        value: &U256,
394        slot_identifier: Option<&SlotIdentifier>,
395        mapping_slots: Option<&MappingSlots>,
396    ) {
397        let slot = B256::from(*slot);
398        let value = B256::from(*value);
399
400        // Always insert the slot itself
401        self.insert_value(slot);
402
403        // If we have a storage layout, use SlotIdentifier for better type identification.
404        if let Some(slot_identifier) = slot_identifier
405            // Identify slot type.
406            && let Some(slot_info) = slot_identifier.identify(&slot, mapping_slots)
407            && slot_info.decode(value).is_some()
408        {
409            trace!(?slot_info, "inserting typed storage value");
410            if !self.samples_seeded {
411                self.seed_samples();
412            }
413            self.sample_values.entry(slot_info.slot_type.dyn_sol_type).or_default().insert(value);
414        } else {
415            self.insert_value_u256(value.into());
416        }
417    }
418
419    /// Insert address into fuzz dictionary.
420    /// If address is newly collected then it is removed by index at the end of current run.
421    fn insert_address(&mut self, address: Address) {
422        if self.addresses.len() < self.config.max_fuzz_dictionary_addresses {
423            self.addresses.insert(address);
424        }
425    }
426
427    /// Insert raw value into fuzz dictionary.
428    ///
429    /// If value is newly collected then it is removed by index at the end of current run.
430    ///
431    /// Returns true if the value was inserted.
432    fn insert_value(&mut self, value: B256) -> bool {
433        let insert = !self.values_full();
434        if insert {
435            let new_value = self.state_values.insert(value);
436            let counter = if new_value { &mut self.misses } else { &mut self.hits };
437            *counter += 1;
438        }
439        insert
440    }
441
442    /// Insert a persistent value that survives `revert()` across invariant runs.
443    /// Used for trace-cmp operands that should compound over time.
444    fn insert_persistent_value(&mut self, value: B256) {
445        if self.persistent_values.len() >= MAX_PERSISTENT_VALUES {
446            return;
447        }
448        if self.persistent_values.insert(value) && self.state_values.insert(value) {
449            self.db_state_values += 1;
450        }
451    }
452
453    /// Insert a typed trace-cmp value into the `sample_values` map.
454    /// Maps sancov width to `DynSolType` buckets and promotes to larger types.
455    fn insert_typed_cmp_value(&mut self, width: u8, value: B256) {
456        if !self.samples_seeded {
457            self.seed_samples();
458        }
459
460        const MAX_TYPED_CMP_PER_BUCKET: usize = 1024;
461
462        let native_type = match width {
463            8 => DynSolType::Uint(8),
464            16 => DynSolType::Uint(16),
465            32 => DynSolType::Uint(32),
466            64 => DynSolType::Uint(64),
467            _ => DynSolType::Uint(256),
468        };
469
470        let insert = |map: &mut HashMap<DynSolType, B256IndexSet>, ty: DynSolType, val: B256| {
471            let bucket = map.entry(ty).or_default();
472            if bucket.len() < MAX_TYPED_CMP_PER_BUCKET {
473                bucket.insert(val);
474            }
475        };
476
477        insert(&mut self.sample_values, native_type, value);
478
479        if width <= 64 {
480            insert(&mut self.sample_values, DynSolType::Uint(128), value);
481            insert(&mut self.sample_values, DynSolType::Uint(256), value);
482            insert(&mut self.sample_values, DynSolType::Int(256), value);
483        }
484    }
485
486    fn insert_value_u256(&mut self, value: U256) -> bool {
487        // Also add the value below and above the push value to the dictionary.
488        let one = U256::from(1);
489        self.insert_value(value.into())
490            | self.insert_value((value.wrapping_sub(one)).into())
491            | self.insert_value((value.wrapping_add(one)).into())
492    }
493
494    fn values_full(&self) -> bool {
495        self.state_values.len() >= self.config.max_fuzz_dictionary_values
496    }
497
498    /// Insert sample values that are reused across multiple runs.
499    /// The number of samples is limited to invariant run depth.
500    /// If collected samples limit is reached then values are inserted as regular values.
501    pub fn insert_sample_values(
502        &mut self,
503        sample_values: impl IntoIterator<Item = DynSolValue>,
504        limit: u32,
505    ) {
506        if !self.samples_seeded {
507            self.seed_samples();
508        }
509        for sample in sample_values {
510            if let (Some(sample_type), Some(sample_value)) = (sample.as_type(), sample.as_word()) {
511                if let Some(values) = self.sample_values.get_mut(&sample_type) {
512                    if values.len() < limit as usize {
513                        values.insert(sample_value);
514                    } else {
515                        // Insert as state value (will be removed at the end of the run).
516                        self.insert_value(sample_value);
517                    }
518                } else {
519                    self.sample_values.entry(sample_type).or_default().insert(sample_value);
520                }
521            }
522        }
523    }
524
525    pub const fn values(&self) -> &B256IndexSet {
526        &self.state_values
527    }
528
529    pub fn len(&self) -> usize {
530        self.state_values.len()
531    }
532
533    pub fn is_empty(&self) -> bool {
534        self.state_values.is_empty()
535    }
536
537    /// Returns sample values for a given type, checking both runtime samples and literals.
538    ///
539    /// Before `seed_samples()` is called, checks both `literal_values` and `sample_values`
540    /// separately. After seeding, all literal values are merged into `sample_values`.
541    #[inline]
542    pub fn samples(&self, param_type: &DynSolType) -> Option<&B256IndexSet> {
543        // If not seeded yet, return literals
544        if !self.samples_seeded {
545            return self.literal_values.get().words.get(param_type);
546        }
547
548        self.sample_values.get(param_type)
549    }
550
551    /// Returns the collected literal strings, triggering initialization if needed.
552    #[inline]
553    pub fn ast_strings(&self) -> &IndexSet<String> {
554        &self.literal_values.get().strings
555    }
556
557    /// Returns the collected literal bytes (hex strings), triggering initialization if needed.
558    #[inline]
559    pub fn ast_bytes(&self) -> &IndexSet<Bytes> {
560        &self.literal_values.get().bytes
561    }
562
563    #[inline]
564    pub const fn addresses(&self) -> &AddressIndexSet {
565        &self.addresses
566    }
567
568    /// Revert values and addresses collected during the run by truncating to initial db len.
569    pub fn revert(&mut self) {
570        self.state_values.truncate(self.db_state_values);
571        self.addresses.truncate(self.db_addresses);
572    }
573
574    pub fn log_stats(&self) {
575        trace!(
576            addresses.len = self.addresses.len(),
577            sample.len = self.sample_values.len(),
578            state.len = self.state_values.len(),
579            state.misses = self.misses,
580            state.hits = self.hits,
581            "FuzzDictionary stats",
582        );
583    }
584
585    #[cfg(test)]
586    /// Test-only helper to seed the dictionary with literal values.
587    pub(crate) fn seed_literals(&mut self, map: super::LiteralMaps) {
588        self.literal_values.set(map);
589    }
590}