foundry_evm_fuzz/strategies/
state.rs

1use crate::{
2    BasicTxDetails, invariant::FuzzRunIdentifiedContracts, strategies::literals::LiteralsDictionary,
3};
4use alloy_dyn_abi::{DynSolType, DynSolValue, EventExt, FunctionExt};
5use alloy_json_abi::{Function, JsonAbi};
6use alloy_primitives::{
7    Address, B256, Bytes, Log, U256,
8    map::{AddressIndexSet, AddressMap, B256IndexSet, HashMap, IndexSet},
9};
10use foundry_common::{
11    compile::Analysis, ignore_metadata_hash, mapping_slots::MappingSlots,
12    slot_identifier::SlotIdentifier,
13};
14use foundry_compilers::{ProjectPathsConfig, artifacts::StorageLayout};
15use foundry_config::FuzzDictionaryConfig;
16use foundry_evm_core::{bytecode::InstIter, utils::StateChangeset};
17use parking_lot::{RawRwLock, RwLock, lock_api::RwLockReadGuard};
18use revm::{
19    database::{CacheDB, DatabaseRef, DbAccount},
20    state::AccountInfo,
21};
22use std::{collections::BTreeMap, fmt, sync::Arc};
23
24/// The maximum number of bytes we will look at in bytecodes to find push bytes (24 KiB).
25///
26/// This is to limit the performance impact of fuzz tests that might deploy arbitrarily sized
27/// bytecode (as is the case with Solmate).
28const PUSH_BYTE_ANALYSIS_LIMIT: usize = 24 * 1024;
29
30/// A set of arbitrary 32 byte data from the VM used to generate values for the strategy.
31///
32/// Wrapped in a shareable container.
33#[derive(Clone, Debug)]
34pub struct EvmFuzzState {
35    inner: Arc<RwLock<FuzzDictionary>>,
36    /// Addresses of external libraries deployed in test setup, excluded from fuzz test inputs.
37    pub deployed_libs: Vec<Address>,
38    /// Records mapping accesses. Used to identify storage slots belonging to mappings and sampling
39    /// the values in the [`FuzzDictionary`].
40    ///
41    /// Only needed when [`StorageLayout`] is available.
42    pub(crate) mapping_slots: Option<AddressMap<MappingSlots>>,
43}
44
45impl EvmFuzzState {
46    pub fn new<DB: DatabaseRef>(
47        db: &CacheDB<DB>,
48        config: FuzzDictionaryConfig,
49        deployed_libs: &[Address],
50        analysis: Option<&Analysis>,
51        paths_config: Option<&ProjectPathsConfig>,
52    ) -> Self {
53        // Sort accounts to ensure deterministic dictionary generation from the same setUp state.
54        let mut accs = db.cache.accounts.iter().collect::<Vec<_>>();
55        accs.sort_by_key(|(address, _)| *address);
56
57        // Create fuzz dictionary and insert values from db state.
58        let mut dictionary = FuzzDictionary::new(config);
59        dictionary.insert_db_values(accs);
60        dictionary.literal_values = LiteralsDictionary::new(
61            analysis.cloned(),
62            paths_config.cloned(),
63            config.max_fuzz_dictionary_literals,
64        );
65
66        Self {
67            inner: Arc::new(RwLock::new(dictionary)),
68            deployed_libs: deployed_libs.to_vec(),
69            mapping_slots: None,
70        }
71    }
72
73    pub fn with_mapping_slots(mut self, mapping_slots: AddressMap<MappingSlots>) -> Self {
74        self.mapping_slots = Some(mapping_slots);
75        self
76    }
77
78    pub fn collect_values(&self, values: impl IntoIterator<Item = B256>) {
79        let mut dict = self.inner.write();
80        for value in values {
81            dict.insert_value(value);
82        }
83    }
84
85    /// Collects state changes from a [StateChangeset] and logs into an [EvmFuzzState] according to
86    /// the given [FuzzDictionaryConfig].
87    pub fn collect_values_from_call(
88        &self,
89        fuzzed_contracts: &FuzzRunIdentifiedContracts,
90        tx: &BasicTxDetails,
91        result: &Bytes,
92        logs: &[Log],
93        state_changeset: &StateChangeset,
94        run_depth: u32,
95    ) {
96        let mut dict = self.inner.write();
97        {
98            let targets = fuzzed_contracts.targets.lock();
99            let (target_abi, target_function) = targets.fuzzed_artifacts(tx);
100            dict.insert_logs_values(target_abi, logs, run_depth);
101            dict.insert_result_values(target_function, result, run_depth);
102            // Get storage layouts for contracts in the state changeset
103            let storage_layouts = targets.get_storage_layouts();
104            dict.insert_new_state_values(
105                state_changeset,
106                &storage_layouts,
107                self.mapping_slots.as_ref(),
108            );
109        }
110    }
111
112    /// Removes all newly added entries from the dictionary.
113    ///
114    /// Should be called between fuzz/invariant runs to avoid accumulating data derived from fuzz
115    /// inputs.
116    pub fn revert(&self) {
117        self.inner.write().revert();
118    }
119
120    pub fn dictionary_read(&self) -> RwLockReadGuard<'_, RawRwLock, FuzzDictionary> {
121        self.inner.read()
122    }
123
124    /// Logs stats about the current state.
125    pub fn log_stats(&self) {
126        self.inner.read().log_stats();
127    }
128
129    #[cfg(test)]
130    /// Test-only helper to seed the dictionary with literal values.
131    pub(crate) fn seed_literals(&self, map: super::LiteralMaps) {
132        self.inner.write().seed_literals(map);
133    }
134}
135
136// We're using `IndexSet` to have a stable element order when restoring persisted state, as well as
137// for performance when iterating over the sets.
138#[derive(Default)]
139pub struct FuzzDictionary {
140    /// Collected state values.
141    state_values: B256IndexSet,
142    /// Addresses that already had their PUSH bytes collected.
143    addresses: AddressIndexSet,
144    /// Configuration for the dictionary.
145    config: FuzzDictionaryConfig,
146    /// Number of state values initially collected from db.
147    /// Used to revert new collected values at the end of each run.
148    db_state_values: usize,
149    /// Number of address values initially collected from db.
150    /// Used to revert new collected addresses at the end of each run.
151    db_addresses: usize,
152    /// Typed runtime sample values persisted across invariant runs.
153    /// Initially seeded with literal values collected from the source code.
154    sample_values: HashMap<DynSolType, B256IndexSet>,
155    /// Lazily initialized dictionary of literal values collected from the source code.
156    literal_values: LiteralsDictionary,
157    /// Tracks whether literals from `literal_values` have been merged into `sample_values`.
158    ///
159    /// Set to `true` on first call to `seed_samples()`. Before seeding, `samples()` checks both
160    /// maps separately. After seeding, literals are merged in, so only `sample_values` is checked.
161    samples_seeded: bool,
162
163    misses: usize,
164    hits: usize,
165}
166
167impl fmt::Debug for FuzzDictionary {
168    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
169        f.debug_struct("FuzzDictionary")
170            .field("state_values", &self.state_values.len())
171            .field("addresses", &self.addresses)
172            .finish()
173    }
174}
175
176impl FuzzDictionary {
177    pub fn new(config: FuzzDictionaryConfig) -> Self {
178        let mut dictionary = Self { config, samples_seeded: false, ..Default::default() };
179        dictionary.prefill();
180        dictionary
181    }
182
183    /// Insert common values into the dictionary at initialization.
184    fn prefill(&mut self) {
185        self.insert_value(B256::ZERO);
186    }
187
188    /// Seeds `sample_values` with all words from the [`LiteralsDictionary`].
189    /// Should only be called once per dictionary lifetime.
190    #[cold]
191    fn seed_samples(&mut self) {
192        trace!("seeding `sample_values` from literal dictionary");
193        self.sample_values.extend(self.literal_values.take_words());
194        self.samples_seeded = true;
195    }
196
197    /// Insert values from initial db state into fuzz dictionary.
198    /// These values are persisted across invariant runs.
199    fn insert_db_values(&mut self, db_state: Vec<(&Address, &DbAccount)>) {
200        for (address, account) in db_state {
201            // Insert basic account information
202            self.insert_value(address.into_word());
203            // Insert push bytes
204            self.insert_push_bytes_values(address, &account.info);
205            // Insert storage values.
206            if self.config.include_storage {
207                // Sort storage values before inserting to ensure deterministic dictionary.
208                let values = account.storage.iter().collect::<BTreeMap<_, _>>();
209                for (slot, value) in values {
210                    self.insert_storage_value(slot, value, None, None);
211                }
212            }
213        }
214
215        // We need at least some state data if DB is empty,
216        // otherwise we can't select random data for state fuzzing.
217        if self.values().is_empty() {
218            // Prefill with a random address.
219            self.insert_value(Address::random().into_word());
220        }
221
222        // Record number of values and addresses inserted from db to be used for reverting at the
223        // end of each run.
224        self.db_state_values = self.state_values.len();
225        self.db_addresses = self.addresses.len();
226    }
227
228    /// Insert values collected from call result into fuzz dictionary.
229    fn insert_result_values(
230        &mut self,
231        function: Option<&Function>,
232        result: &Bytes,
233        run_depth: u32,
234    ) {
235        if let Some(function) = function
236            && !function.outputs.is_empty()
237        {
238            // Decode result and collect samples to be used in subsequent fuzz runs.
239            if let Ok(decoded_result) = function.abi_decode_output(result) {
240                self.insert_sample_values(decoded_result, run_depth);
241            }
242        }
243    }
244
245    /// Insert values from call log topics and data into fuzz dictionary.
246    fn insert_logs_values(&mut self, abi: Option<&JsonAbi>, logs: &[Log], run_depth: u32) {
247        let mut samples = Vec::new();
248        // Decode logs with known events and collect samples from indexed fields and event body.
249        for log in logs {
250            let mut log_decoded = false;
251            // Try to decode log with events from contract abi.
252            if let Some(abi) = abi {
253                for event in abi.events() {
254                    if let Ok(decoded_event) = event.decode_log(log) {
255                        samples.extend(decoded_event.indexed);
256                        samples.extend(decoded_event.body);
257                        log_decoded = true;
258                        break;
259                    }
260                }
261            }
262
263            // If we weren't able to decode event then we insert raw data in fuzz dictionary.
264            if !log_decoded {
265                for &topic in log.topics() {
266                    self.insert_value(topic);
267                }
268                let chunks = log.data.data.chunks_exact(32);
269                let rem = chunks.remainder();
270                for chunk in chunks {
271                    self.insert_value(chunk.try_into().unwrap());
272                }
273                if !rem.is_empty() {
274                    self.insert_value(B256::right_padding_from(rem));
275                }
276            }
277        }
278
279        // Insert samples collected from current call in fuzz dictionary.
280        self.insert_sample_values(samples, run_depth);
281    }
282
283    /// Insert values from call state changeset into fuzz dictionary.
284    /// These values are removed at the end of current run.
285    fn insert_new_state_values(
286        &mut self,
287        state_changeset: &StateChangeset,
288        storage_layouts: &HashMap<Address, Arc<StorageLayout>>,
289        mapping_slots: Option<&AddressMap<MappingSlots>>,
290    ) {
291        for (address, account) in state_changeset {
292            // Insert basic account information.
293            self.insert_value(address.into_word());
294            // Insert push bytes.
295            self.insert_push_bytes_values(address, &account.info);
296            // Insert storage values.
297            if self.config.include_storage {
298                let storage_layout = storage_layouts.get(address).cloned();
299                trace!(
300                    "{address:?} has mapping_slots {}",
301                    mapping_slots.is_some_and(|m| m.contains_key(address))
302                );
303                let mapping_slots = mapping_slots.and_then(|m| m.get(address));
304                for (slot, value) in &account.storage {
305                    self.insert_storage_value(
306                        slot,
307                        &value.present_value,
308                        storage_layout.as_deref(),
309                        mapping_slots,
310                    );
311                }
312            }
313        }
314    }
315
316    /// Insert values from push bytes into fuzz dictionary.
317    /// Values are collected only once for a given address.
318    /// If values are newly collected then they are removed at the end of current run.
319    fn insert_push_bytes_values(&mut self, address: &Address, account_info: &AccountInfo) {
320        if self.config.include_push_bytes
321            && !self.addresses.contains(address)
322            && let Some(code) = &account_info.code
323        {
324            self.insert_address(*address);
325            if !self.values_full() {
326                self.collect_push_bytes(ignore_metadata_hash(code.original_byte_slice()));
327            }
328        }
329    }
330
331    fn collect_push_bytes(&mut self, code: &[u8]) {
332        let len = code.len().min(PUSH_BYTE_ANALYSIS_LIMIT);
333        let code = &code[..len];
334        for inst in InstIter::new(code) {
335            // Don't add 0 to the dictionary as it's already present.
336            if !inst.immediate.is_empty()
337                && let Some(push_value) = U256::try_from_be_slice(inst.immediate)
338                && push_value != U256::ZERO
339            {
340                self.insert_value_u256(push_value);
341            }
342        }
343    }
344
345    /// Insert values from single storage slot and storage value into fuzz dictionary.
346    /// Uses [`SlotIdentifier`] to identify storage slots types.
347    fn insert_storage_value(
348        &mut self,
349        slot: &U256,
350        value: &U256,
351        layout: Option<&StorageLayout>,
352        mapping_slots: Option<&MappingSlots>,
353    ) {
354        let slot = B256::from(*slot);
355        let value = B256::from(*value);
356
357        // Always insert the slot itself
358        self.insert_value(slot);
359
360        // If we have a storage layout, use SlotIdentifier for better type identification
361        if let Some(slot_identifier) =
362            layout.map(|l| SlotIdentifier::new(l.clone().into()))
363            // Identify Slot Type
364            && let Some(slot_info) = slot_identifier.identify(&slot, mapping_slots) && slot_info.decode(value).is_some()
365        {
366            trace!(?slot_info, "inserting typed storage value");
367            if !self.samples_seeded {
368                self.seed_samples();
369            }
370            self.sample_values.entry(slot_info.slot_type.dyn_sol_type).or_default().insert(value);
371        } else {
372            self.insert_value_u256(value.into());
373        }
374    }
375
376    /// Insert address into fuzz dictionary.
377    /// If address is newly collected then it is removed by index at the end of current run.
378    fn insert_address(&mut self, address: Address) {
379        if self.addresses.len() < self.config.max_fuzz_dictionary_addresses {
380            self.addresses.insert(address);
381        }
382    }
383
384    /// Insert raw value into fuzz dictionary.
385    ///
386    /// If value is newly collected then it is removed by index at the end of current run.
387    ///
388    /// Returns true if the value was inserted.
389    fn insert_value(&mut self, value: B256) -> bool {
390        let insert = !self.values_full();
391        if insert {
392            let new_value = self.state_values.insert(value);
393            let counter = if new_value { &mut self.misses } else { &mut self.hits };
394            *counter += 1;
395        }
396        insert
397    }
398
399    fn insert_value_u256(&mut self, value: U256) -> bool {
400        // Also add the value below and above the push value to the dictionary.
401        let one = U256::from(1);
402        self.insert_value(value.into())
403            | self.insert_value((value.wrapping_sub(one)).into())
404            | self.insert_value((value.wrapping_add(one)).into())
405    }
406
407    fn values_full(&self) -> bool {
408        self.state_values.len() >= self.config.max_fuzz_dictionary_values
409    }
410
411    /// Insert sample values that are reused across multiple runs.
412    /// The number of samples is limited to invariant run depth.
413    /// If collected samples limit is reached then values are inserted as regular values.
414    pub fn insert_sample_values(
415        &mut self,
416        sample_values: impl IntoIterator<Item = DynSolValue>,
417        limit: u32,
418    ) {
419        if !self.samples_seeded {
420            self.seed_samples();
421        }
422        for sample in sample_values {
423            if let (Some(sample_type), Some(sample_value)) = (sample.as_type(), sample.as_word()) {
424                if let Some(values) = self.sample_values.get_mut(&sample_type) {
425                    if values.len() < limit as usize {
426                        values.insert(sample_value);
427                    } else {
428                        // Insert as state value (will be removed at the end of the run).
429                        self.insert_value(sample_value);
430                    }
431                } else {
432                    self.sample_values.entry(sample_type).or_default().insert(sample_value);
433                }
434            }
435        }
436    }
437
438    pub fn values(&self) -> &B256IndexSet {
439        &self.state_values
440    }
441
442    pub fn len(&self) -> usize {
443        self.state_values.len()
444    }
445
446    pub fn is_empty(&self) -> bool {
447        self.state_values.is_empty()
448    }
449
450    /// Returns sample values for a given type, checking both runtime samples and literals.
451    ///
452    /// Before `seed_samples()` is called, checks both `literal_values` and `sample_values`
453    /// separately. After seeding, all literal values are merged into `sample_values`.
454    #[inline]
455    pub fn samples(&self, param_type: &DynSolType) -> Option<&B256IndexSet> {
456        // If not seeded yet, return literals
457        if !self.samples_seeded {
458            return self.literal_values.get().words.get(param_type);
459        }
460
461        self.sample_values.get(param_type)
462    }
463
464    /// Returns the collected literal strings, triggering initialization if needed.
465    #[inline]
466    pub fn ast_strings(&self) -> &IndexSet<String> {
467        &self.literal_values.get().strings
468    }
469
470    /// Returns the collected literal bytes (hex strings), triggering initialization if needed.
471    #[inline]
472    pub fn ast_bytes(&self) -> &IndexSet<Bytes> {
473        &self.literal_values.get().bytes
474    }
475
476    #[inline]
477    pub fn addresses(&self) -> &AddressIndexSet {
478        &self.addresses
479    }
480
481    /// Revert values and addresses collected during the run by truncating to initial db len.
482    pub fn revert(&mut self) {
483        self.state_values.truncate(self.db_state_values);
484        self.addresses.truncate(self.db_addresses);
485    }
486
487    pub fn log_stats(&self) {
488        trace!(
489            addresses.len = self.addresses.len(),
490            sample.len = self.sample_values.len(),
491            state.len = self.state_values.len(),
492            state.misses = self.misses,
493            state.hits = self.hits,
494            "FuzzDictionary stats",
495        );
496    }
497
498    #[cfg(test)]
499    /// Test-only helper to seed the dictionary with literal values.
500    pub(crate) fn seed_literals(&mut self, map: super::LiteralMaps) {
501        self.literal_values.set(map);
502    }
503}