foundry_evm/executors/
corpus.rs

1use crate::executors::{Executor, RawCallResult};
2use alloy_dyn_abi::JsonAbiExt;
3use alloy_json_abi::Function;
4use alloy_primitives::{Bytes, U256};
5use eyre::eyre;
6use foundry_config::FuzzCorpusConfig;
7use foundry_evm_fuzz::{
8    BasicTxDetails,
9    invariant::FuzzRunIdentifiedContracts,
10    strategies::{EvmFuzzState, mutate_param_value},
11};
12use proptest::{
13    prelude::{Just, Rng, Strategy},
14    prop_oneof,
15    strategy::{BoxedStrategy, ValueTree},
16    test_runner::TestRunner,
17};
18use serde::Serialize;
19use std::{
20    fmt,
21    path::PathBuf,
22    time::{SystemTime, UNIX_EPOCH},
23};
24use uuid::Uuid;
25
26const METADATA_SUFFIX: &str = "metadata.json";
27const JSON_EXTENSION: &str = ".json";
28const FAVORABILITY_THRESHOLD: f64 = 0.3;
29const COVERAGE_MAP_SIZE: usize = 65536;
30
31/// Possible mutation strategies to apply on a call sequence.
32#[derive(Debug, Clone)]
33enum MutationType {
34    /// Splice original call sequence.
35    Splice,
36    /// Repeat selected call several times.
37    Repeat,
38    /// Interleave calls from two random call sequences.
39    Interleave,
40    /// Replace prefix of the original call sequence with new calls.
41    Prefix,
42    /// Replace suffix of the original call sequence with new calls.
43    Suffix,
44    /// ABI mutate random args of selected call in sequence.
45    Abi,
46}
47
48/// Holds Corpus information.
49#[derive(Serialize)]
50struct CorpusEntry {
51    // Unique corpus identifier.
52    uuid: Uuid,
53    // Total mutations of corpus as primary source.
54    total_mutations: usize,
55    // New coverage found as a result of mutating this corpus.
56    new_finds_produced: usize,
57    // Corpus call sequence.
58    #[serde(skip_serializing)]
59    tx_seq: Vec<BasicTxDetails>,
60    // Whether this corpus is favored, i.e. producing new finds more often than
61    // `FAVORABILITY_THRESHOLD`.
62    is_favored: bool,
63}
64
65impl CorpusEntry {
66    /// New corpus from given call sequence and corpus path to read uuid.
67    pub fn new(tx_seq: Vec<BasicTxDetails>, path: PathBuf) -> eyre::Result<Self> {
68        let uuid = if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
69            Uuid::try_from(stem.strip_suffix(JSON_EXTENSION).unwrap_or(stem).to_string())?
70        } else {
71            Uuid::new_v4()
72        };
73        Ok(Self { uuid, total_mutations: 0, new_finds_produced: 0, tx_seq, is_favored: false })
74    }
75
76    /// New corpus with given call sequence and new uuid.
77    pub fn from_tx_seq(tx_seq: &[BasicTxDetails]) -> Self {
78        Self {
79            uuid: Uuid::new_v4(),
80            total_mutations: 0,
81            new_finds_produced: 0,
82            tx_seq: tx_seq.into(),
83            is_favored: false,
84        }
85    }
86}
87
88#[derive(Serialize, Default)]
89pub(crate) struct CorpusMetrics {
90    // Number of edges seen during the invariant run.
91    cumulative_edges_seen: usize,
92    // Number of features (new hitcount bin of previously hit edge) seen during the invariant run.
93    cumulative_features_seen: usize,
94    // Number of corpus entries.
95    corpus_count: usize,
96    // Number of corpus entries that are favored.
97    favored_items: usize,
98}
99
100impl fmt::Display for CorpusMetrics {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        writeln!(f)?;
103        writeln!(f, "        - cumulative edges seen: {}", self.cumulative_edges_seen)?;
104        writeln!(f, "        - cumulative features seen: {}", self.cumulative_features_seen)?;
105        writeln!(f, "        - corpus count: {}", self.corpus_count)?;
106        write!(f, "        - favored items: {}", self.favored_items)?;
107        Ok(())
108    }
109}
110
111impl CorpusMetrics {
112    /// Records number of new edges or features explored during the campaign.
113    pub fn update_seen(&mut self, is_edge: bool) {
114        if is_edge {
115            self.cumulative_edges_seen += 1;
116        } else {
117            self.cumulative_features_seen += 1;
118        }
119    }
120
121    /// Updates campaign favored items.
122    pub fn update_favored(&mut self, is_favored: bool, corpus_favored: bool) {
123        if is_favored && !corpus_favored {
124            self.favored_items += 1;
125        } else if !is_favored && corpus_favored {
126            self.favored_items -= 1;
127        }
128    }
129}
130
131/// Fuzz corpus manager, used in coverage guided fuzzing mode by both stateless and stateful tests.
132pub(crate) struct CorpusManager {
133    // Fuzzed calls generator.
134    tx_generator: BoxedStrategy<BasicTxDetails>,
135    // Call sequence mutation strategy type generator.
136    mutation_generator: BoxedStrategy<MutationType>,
137    // Corpus configuration.
138    config: FuzzCorpusConfig,
139    // In-memory corpus, populated from persisted files and current runs.
140    // Mutation is performed on these.
141    in_memory_corpus: Vec<CorpusEntry>,
142    // Identifier of current mutated entry.
143    current_mutated: Option<Uuid>,
144    // Number of failed replays from persisted corpus.
145    failed_replays: usize,
146    // History of binned hitcount of edges seen during fuzzing.
147    history_map: Vec<u8>,
148    // Corpus metrics.
149    pub(crate) metrics: CorpusMetrics,
150}
151
152impl CorpusManager {
153    pub fn new(
154        config: FuzzCorpusConfig,
155        tx_generator: BoxedStrategy<BasicTxDetails>,
156        executor: &Executor,
157        fuzzed_function: Option<&Function>,
158        fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>,
159    ) -> eyre::Result<Self> {
160        let mutation_generator = prop_oneof![
161            Just(MutationType::Splice),
162            Just(MutationType::Repeat),
163            Just(MutationType::Interleave),
164            Just(MutationType::Prefix),
165            Just(MutationType::Suffix),
166            Just(MutationType::Abi),
167        ]
168        .boxed();
169        let mut history_map = vec![0u8; COVERAGE_MAP_SIZE];
170        let mut metrics = CorpusMetrics::default();
171        let mut in_memory_corpus = vec![];
172        let mut failed_replays = 0;
173
174        // Early return if corpus dir / coverage guided fuzzing not configured.
175        let Some(corpus_dir) = &config.corpus_dir else {
176            return Ok(Self {
177                tx_generator,
178                mutation_generator,
179                config,
180                in_memory_corpus,
181                current_mutated: None,
182                failed_replays,
183                history_map,
184                metrics,
185            });
186        };
187
188        // Ensure corpus dir for current test is created.
189        if !corpus_dir.is_dir() {
190            foundry_common::fs::create_dir_all(corpus_dir)?;
191        }
192
193        let can_replay_tx = |tx: &BasicTxDetails| -> bool {
194            fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx))
195                || fuzzed_function.is_some_and(|function| {
196                    tx.call_details
197                        .calldata
198                        .get(..4)
199                        .is_some_and(|selector| function.selector() == selector)
200                })
201        };
202
203        'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? {
204            let path = entry?.path();
205            if path.is_file()
206                && let Some(name) = path.file_name().and_then(|s| s.to_str())
207                && name.contains(METADATA_SUFFIX)
208            {
209                // Ignore metadata files
210                continue;
211            }
212
213            let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) {
214                Some("gz") => foundry_common::fs::read_json_gzip_file::<Vec<BasicTxDetails>>(&path),
215                _ => foundry_common::fs::read_json_file::<Vec<BasicTxDetails>>(&path),
216            };
217
218            let Ok(tx_seq) = read_corpus_result else {
219                trace!(target: "corpus", "failed to load corpus from {}", path.display());
220                continue;
221            };
222
223            if !tx_seq.is_empty() {
224                // Warm up history map from loaded sequences.
225                let mut executor = executor.clone();
226                for tx in &tx_seq {
227                    if can_replay_tx(tx) {
228                        let mut call_result = executor
229                            .call_raw(
230                                tx.sender,
231                                tx.call_details.target,
232                                tx.call_details.calldata.clone(),
233                                U256::ZERO,
234                            )
235                            .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?;
236
237                        let (new_coverage, is_edge) =
238                            call_result.merge_edge_coverage(&mut history_map);
239                        if new_coverage {
240                            metrics.update_seen(is_edge);
241                        }
242
243                        // Commit only when running invariant / stateful tests.
244                        if fuzzed_contracts.is_some() {
245                            executor.commit(&mut call_result);
246                        }
247                    } else {
248                        failed_replays += 1;
249
250                        // If the only input for fuzzed function cannot be replied, then move to
251                        // next one without adding it in memory.
252                        if fuzzed_function.is_some() {
253                            continue 'corpus_replay;
254                        }
255                    }
256                }
257
258                metrics.corpus_count += 1;
259
260                trace!(
261                    target: "corpus",
262                    "load sequence with len {} from corpus file {}",
263                    tx_seq.len(),
264                    path.display()
265                );
266
267                // Populate in memory corpus with the sequence from corpus file.
268                in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?);
269            }
270        }
271
272        Ok(Self {
273            tx_generator,
274            mutation_generator,
275            config,
276            in_memory_corpus,
277            current_mutated: None,
278            failed_replays,
279            history_map,
280            metrics,
281        })
282    }
283
284    /// Updates stats for the given call sequence, if new coverage produced.
285    /// Persists the call sequence (if corpus directory is configured and new coverage) and updates
286    /// in-memory corpus.
287    pub fn process_inputs(&mut self, inputs: &[BasicTxDetails], new_coverage: bool) {
288        // Early return if corpus dir / coverage guided fuzzing is not configured.
289        let Some(corpus_dir) = &self.config.corpus_dir else {
290            return;
291        };
292
293        // Update stats of current mutated primary corpus.
294        if let Some(uuid) = &self.current_mutated {
295            if let Some(corpus) =
296                self.in_memory_corpus.iter_mut().find(|corpus| corpus.uuid.eq(uuid))
297            {
298                corpus.total_mutations += 1;
299                if new_coverage {
300                    corpus.new_finds_produced += 1
301                }
302                let is_favored = (corpus.new_finds_produced as f64 / corpus.total_mutations as f64)
303                    < FAVORABILITY_THRESHOLD;
304                self.metrics.update_favored(is_favored, corpus.is_favored);
305                corpus.is_favored = is_favored;
306
307                trace!(
308                    target: "corpus",
309                    "updated corpus {}, total mutations: {}, new finds: {}",
310                    corpus.uuid, corpus.total_mutations, corpus.new_finds_produced
311                );
312            }
313
314            self.current_mutated = None;
315        }
316
317        // Collect inputs only if current run produced new coverage.
318        if !new_coverage {
319            return;
320        }
321
322        let corpus = CorpusEntry::from_tx_seq(inputs);
323        let corpus_uuid = corpus.uuid;
324
325        // Persist to disk if corpus dir is configured.
326        let write_result = if self.config.corpus_gzip {
327            foundry_common::fs::write_json_gzip_file(
328                corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}.gz")).as_path(),
329                &corpus.tx_seq,
330            )
331        } else {
332            foundry_common::fs::write_json_file(
333                corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}")).as_path(),
334                &corpus.tx_seq,
335            )
336        };
337
338        if let Err(err) = write_result {
339            debug!(target: "corpus", %err, "Failed to record call sequence {:?}", &corpus.tx_seq);
340        } else {
341            trace!(
342                target: "corpus",
343                "persisted {} inputs for new coverage in {corpus_uuid} corpus",
344                &corpus.tx_seq.len()
345            );
346        }
347
348        // This includes reverting txs in the corpus and `can_continue` removes
349        // them. We want this as it is new coverage and may help reach the other branch.
350        self.metrics.corpus_count += 1;
351        self.in_memory_corpus.push(corpus);
352    }
353
354    /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than
355    /// configured max mutations value. Used by invariant test campaigns.
356    pub fn new_inputs(
357        &mut self,
358        test_runner: &mut TestRunner,
359        fuzz_state: &EvmFuzzState,
360        targeted_contracts: &FuzzRunIdentifiedContracts,
361    ) -> eyre::Result<Vec<BasicTxDetails>> {
362        let mut new_seq = vec![];
363
364        // Early return with first_input only if corpus dir / coverage guided fuzzing not
365        // configured.
366        if !self.config.is_coverage_guided() {
367            new_seq.push(self.new_tx(test_runner)?);
368            return Ok(new_seq);
369        };
370
371        if !self.in_memory_corpus.is_empty() {
372            self.evict_oldest_corpus()?;
373
374            let mutation_type = self
375                .mutation_generator
376                .new_tree(test_runner)
377                .map_err(|err| eyre!("Could not generate mutation type {err}"))?
378                .current();
379            let rng = test_runner.rng();
380            let corpus_len = self.in_memory_corpus.len();
381            let primary = &self.in_memory_corpus[rng.random_range(0..corpus_len)];
382            let secondary = &self.in_memory_corpus[rng.random_range(0..corpus_len)];
383
384            match mutation_type {
385                MutationType::Splice => {
386                    trace!(target: "corpus", "splice {} and {}", primary.uuid, secondary.uuid);
387
388                    self.current_mutated = Some(primary.uuid);
389
390                    let start1 = rng.random_range(0..primary.tx_seq.len());
391                    let end1 = rng.random_range(start1..primary.tx_seq.len());
392
393                    let start2 = rng.random_range(0..secondary.tx_seq.len());
394                    let end2 = rng.random_range(start2..secondary.tx_seq.len());
395
396                    for tx in primary.tx_seq.iter().take(end1).skip(start1) {
397                        new_seq.push(tx.clone());
398                    }
399                    for tx in secondary.tx_seq.iter().take(end2).skip(start2) {
400                        new_seq.push(tx.clone());
401                    }
402                }
403                MutationType::Repeat => {
404                    let corpus = if rng.random::<bool>() { primary } else { secondary };
405                    trace!(target: "corpus", "repeat {}", corpus.uuid);
406
407                    self.current_mutated = Some(corpus.uuid);
408
409                    new_seq = corpus.tx_seq.clone();
410                    let start = rng.random_range(0..corpus.tx_seq.len());
411                    let end = rng.random_range(start..corpus.tx_seq.len());
412                    let item_idx = rng.random_range(0..corpus.tx_seq.len());
413                    let repeated = vec![new_seq[item_idx].clone(); end - start];
414                    new_seq.splice(start..end, repeated);
415                }
416                MutationType::Interleave => {
417                    trace!(target: "corpus", "interleave {} with {}", primary.uuid, secondary.uuid);
418
419                    self.current_mutated = Some(primary.uuid);
420
421                    for (tx1, tx2) in primary.tx_seq.iter().zip(secondary.tx_seq.iter()) {
422                        // chunks?
423                        let tx = if rng.random::<bool>() { tx1.clone() } else { tx2.clone() };
424                        new_seq.push(tx);
425                    }
426                }
427                MutationType::Prefix => {
428                    let corpus = if rng.random::<bool>() { primary } else { secondary };
429                    trace!(target: "corpus", "overwrite prefix of {}", corpus.uuid);
430
431                    self.current_mutated = Some(corpus.uuid);
432
433                    new_seq = corpus.tx_seq.clone();
434                    for i in 0..rng.random_range(0..=new_seq.len()) {
435                        new_seq[i] = self.new_tx(test_runner)?;
436                    }
437                }
438                MutationType::Suffix => {
439                    let corpus = if rng.random::<bool>() { primary } else { secondary };
440                    trace!(target: "corpus", "overwrite suffix of {}", corpus.uuid);
441
442                    self.current_mutated = Some(corpus.uuid);
443
444                    new_seq = corpus.tx_seq.clone();
445                    for i in new_seq.len() - rng.random_range(0..new_seq.len())..corpus.tx_seq.len()
446                    {
447                        new_seq[i] = self.new_tx(test_runner)?;
448                    }
449                }
450                MutationType::Abi => {
451                    let targets = targeted_contracts.targets.lock();
452                    let corpus = if rng.random::<bool>() { primary } else { secondary };
453                    trace!(target: "corpus", "ABI mutate args of {}", corpus.uuid);
454
455                    self.current_mutated = Some(corpus.uuid);
456
457                    new_seq = corpus.tx_seq.clone();
458
459                    let idx = rng.random_range(0..new_seq.len());
460                    let tx = new_seq.get_mut(idx).unwrap();
461                    if let (_, Some(function)) = targets.fuzzed_artifacts(tx) {
462                        // TODO add call_value to call details and mutate it as well as sender some
463                        // of the time
464                        if !function.inputs.is_empty() {
465                            self.abi_mutate(tx, function, test_runner, fuzz_state)?;
466                        }
467                    }
468                }
469            }
470        }
471
472        // Make sure the new sequence contains at least one tx to start fuzzing from.
473        if new_seq.is_empty() {
474            new_seq.push(self.new_tx(test_runner)?);
475        }
476        trace!(target: "corpus", "new sequence of {} calls generated", new_seq.len());
477
478        Ok(new_seq)
479    }
480
481    /// Generates new input from in memory corpus. Evicts oldest corpus mutated more than
482    /// configured max mutations value. Used by fuzz test campaigns.
483    pub fn new_input(
484        &mut self,
485        test_runner: &mut TestRunner,
486        fuzz_state: &EvmFuzzState,
487        function: &Function,
488    ) -> eyre::Result<Bytes> {
489        // Early return if not running with coverage guided fuzzing.
490        if !self.config.is_coverage_guided() {
491            return Ok(self.new_tx(test_runner)?.call_details.calldata);
492        }
493
494        let tx = if !self.in_memory_corpus.is_empty() {
495            self.evict_oldest_corpus()?;
496
497            let corpus = &self.in_memory_corpus
498                [test_runner.rng().random_range(0..self.in_memory_corpus.len())];
499            self.current_mutated = Some(corpus.uuid);
500            let new_seq = corpus.tx_seq.clone();
501            let mut tx = new_seq.first().unwrap().clone();
502            self.abi_mutate(&mut tx, function, test_runner, fuzz_state)?;
503            tx
504        } else {
505            self.new_tx(test_runner)?
506        };
507
508        Ok(tx.call_details.calldata)
509    }
510
511    /// Returns the next call to be used in call sequence.
512    /// If coverage guided fuzzing is not configured or if previous input was discarded then this is
513    /// a new tx from strategy.
514    /// If running with coverage guided fuzzing it returns a new call only when sequence
515    /// does not have enough entries, or randomly. Otherwise, returns the next call from initial
516    /// sequence.
517    pub fn generate_next_input(
518        &mut self,
519        test_runner: &mut TestRunner,
520        sequence: &[BasicTxDetails],
521        discarded: bool,
522        depth: usize,
523    ) -> eyre::Result<BasicTxDetails> {
524        // Early return with new input if corpus dir / coverage guided fuzzing not configured or if
525        // call was discarded.
526        if self.config.corpus_dir.is_none() || discarded {
527            return self.new_tx(test_runner);
528        }
529
530        // When running with coverage guided fuzzing enabled then generate new sequence if initial
531        // sequence's length is less than depth or randomly, to occasionally intermix new txs.
532        if depth > sequence.len().saturating_sub(1) || test_runner.rng().random_ratio(1, 10) {
533            return self.new_tx(test_runner);
534        }
535
536        // Continue with the next call initial sequence
537        Ok(sequence[depth].clone())
538    }
539
540    /// Generates single call from corpus strategy.
541    pub fn new_tx(&mut self, test_runner: &mut TestRunner) -> eyre::Result<BasicTxDetails> {
542        Ok(self
543            .tx_generator
544            .new_tree(test_runner)
545            .map_err(|_| eyre!("Could not generate case"))?
546            .current())
547    }
548
549    /// Returns campaign failed replays.
550    pub fn failed_replays(self) -> usize {
551        self.failed_replays
552    }
553
554    /// Collects coverage from call result and updates metrics.
555    pub fn merge_edge_coverage(&mut self, call_result: &mut RawCallResult) -> bool {
556        if !self.config.collect_edge_coverage() {
557            return false;
558        }
559
560        let (new_coverage, is_edge) = call_result.merge_edge_coverage(&mut self.history_map);
561        if new_coverage {
562            self.metrics.update_seen(is_edge);
563        }
564        new_coverage
565    }
566
567    /// Flush the oldest corpus mutated more than configured max mutations unless they are
568    /// favored.
569    fn evict_oldest_corpus(&mut self) -> eyre::Result<()> {
570        if self.in_memory_corpus.len() > self.config.corpus_min_size.max(1)
571            && let Some(index) = self.in_memory_corpus.iter().position(|corpus| {
572                corpus.total_mutations > self.config.corpus_min_mutations && !corpus.is_favored
573            })
574        {
575            let corpus = self.in_memory_corpus.get(index).unwrap();
576
577            let uuid = corpus.uuid;
578            debug!(target: "corpus", "evict corpus {uuid}");
579
580            // Flush to disk the seed metadata at the time of eviction.
581            let eviction_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs();
582            foundry_common::fs::write_json_file(
583                self.config
584                    .corpus_dir
585                    .clone()
586                    .unwrap()
587                    .join(format!("{uuid}-{eviction_time}-{METADATA_SUFFIX}"))
588                    .as_path(),
589                &corpus,
590            )?;
591
592            // Remove corpus from memory.
593            self.in_memory_corpus.remove(index);
594        }
595        Ok(())
596    }
597
598    /// Mutates calldata of provided tx by abi decoding current values and randomly selecting the
599    /// inputs to change.
600    fn abi_mutate(
601        &self,
602        tx: &mut BasicTxDetails,
603        function: &Function,
604        test_runner: &mut TestRunner,
605        fuzz_state: &EvmFuzzState,
606    ) -> eyre::Result<()> {
607        // let rng = test_runner.rng();
608        let mut arg_mutation_rounds =
609            test_runner.rng().random_range(0..=function.inputs.len()).max(1);
610        let round_arg_idx: Vec<usize> = if function.inputs.len() <= 1 {
611            vec![0]
612        } else {
613            (0..arg_mutation_rounds)
614                .map(|_| test_runner.rng().random_range(0..function.inputs.len()))
615                .collect()
616        };
617        let mut prev_inputs = function
618            .abi_decode_input(&tx.call_details.calldata[4..])
619            .map_err(|err| eyre!("failed to load previous inputs: {err}"))?;
620
621        while arg_mutation_rounds > 0 {
622            let idx = round_arg_idx[arg_mutation_rounds - 1];
623            prev_inputs[idx] = mutate_param_value(
624                &function
625                    .inputs
626                    .get(idx)
627                    .expect("Could not get input to mutate")
628                    .selector_type()
629                    .parse()?,
630                prev_inputs[idx].clone(),
631                test_runner,
632                fuzz_state,
633            );
634            arg_mutation_rounds -= 1;
635        }
636
637        tx.call_details.calldata =
638            function.abi_encode_input(&prev_inputs).map_err(|e| eyre!(e.to_string()))?.into();
639        Ok(())
640    }
641}