foundry_evm/executors/invariant/
corpus.rs

1use crate::executors::{
2    Executor,
3    invariant::{InvariantTest, InvariantTestRun},
4};
5use alloy_dyn_abi::JsonAbiExt;
6use alloy_primitives::U256;
7use eyre::eyre;
8use foundry_config::InvariantConfig;
9use foundry_evm_fuzz::{
10    invariant::{BasicTxDetails, FuzzRunIdentifiedContracts},
11    strategies::fuzz_param_from_state,
12};
13use proptest::{
14    prelude::{Just, Rng, Strategy},
15    prop_oneof,
16    strategy::{BoxedStrategy, ValueTree},
17    test_runner::TestRunner,
18};
19use serde::Serialize;
20use std::{
21    fmt,
22    path::PathBuf,
23    time::{SystemTime, UNIX_EPOCH},
24};
25use uuid::Uuid;
26
27const METADATA_SUFFIX: &str = "metadata.json";
28const JSON_EXTENSION: &str = ".json";
29const FAVORABILITY_THRESHOLD: f64 = 0.3;
30
31/// Possible mutation strategies to apply on a call sequence.
32#[derive(Debug, Clone)]
33enum MutationType {
34    /// Splice original call sequence.
35    Splice,
36    /// Repeat selected call several times.
37    Repeat,
38    /// Interleave calls from two random call sequences.
39    Interleave,
40    /// Replace prefix of the original call sequence with new calls.
41    Prefix,
42    /// Replace suffix of the original call sequence with new calls.
43    Suffix,
44    /// ABI mutate random args of selected call in sequence.
45    Abi,
46}
47
48/// Holds Corpus information.
49#[derive(Serialize)]
50struct CorpusEntry {
51    // Unique corpus identifier.
52    uuid: Uuid,
53    // Total mutations of corpus as primary source.
54    total_mutations: usize,
55    // New coverage found as a result of mutating this corpus.
56    new_finds_produced: usize,
57    // Corpus call sequence.
58    #[serde(skip_serializing)]
59    tx_seq: Vec<BasicTxDetails>,
60    // Whether this corpus is favored, i.e. producing new finds more often than
61    // `FAVORABILITY_THRESHOLD`.
62    is_favored: bool,
63}
64
65impl CorpusEntry {
66    /// New corpus from given call sequence and corpus path to read uuid.
67    pub fn new(tx_seq: Vec<BasicTxDetails>, path: PathBuf) -> eyre::Result<Self> {
68        let uuid = if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
69            Uuid::try_from(stem.strip_suffix(JSON_EXTENSION).unwrap_or(stem).to_string())?
70        } else {
71            Uuid::new_v4()
72        };
73        Ok(Self { uuid, total_mutations: 0, new_finds_produced: 0, tx_seq, is_favored: false })
74    }
75
76    /// New corpus with given call sequence and new uuid.
77    pub fn from_tx_seq(tx_seq: Vec<BasicTxDetails>) -> Self {
78        Self {
79            uuid: Uuid::new_v4(),
80            total_mutations: 0,
81            new_finds_produced: 0,
82            tx_seq,
83            is_favored: false,
84        }
85    }
86}
87
88#[derive(Serialize, Default)]
89pub(crate) struct CorpusMetrics {
90    // Number of edges seen during the invariant run.
91    cumulative_edges_seen: usize,
92    // Number of features (new hitcount bin of previously hit edge) seen during the invariant run.
93    cumulative_features_seen: usize,
94    // Number of corpus entries.
95    corpus_count: usize,
96    // Number of corpus entries that are favored.
97    favored_items: usize,
98}
99
100impl fmt::Display for CorpusMetrics {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        writeln!(f)?;
103        writeln!(f, "        - cumulative edges seen: {}", self.cumulative_edges_seen)?;
104        writeln!(f, "        - cumulative features seen: {}", self.cumulative_features_seen)?;
105        writeln!(f, "        - corpus count: {}", self.corpus_count)?;
106        write!(f, "        - favored items: {}", self.favored_items)?;
107        Ok(())
108    }
109}
110
111impl CorpusMetrics {
112    /// Records number of new edges or features explored during the campaign.
113    pub fn update_seen(&mut self, is_edge: bool) {
114        if is_edge {
115            self.cumulative_edges_seen += 1;
116        } else {
117            self.cumulative_features_seen += 1;
118        }
119    }
120
121    /// Updates campaign favored items.
122    pub fn update_favored(&mut self, is_favored: bool, corpus_favored: bool) {
123        if is_favored && !corpus_favored {
124            self.favored_items += 1;
125        } else if !is_favored && corpus_favored {
126            self.favored_items -= 1;
127        }
128    }
129}
130
131/// Invariant corpus manager.
132pub struct TxCorpusManager {
133    // Fuzzed calls generator.
134    tx_generator: BoxedStrategy<BasicTxDetails>,
135    // Call sequence mutation strategy type generator.
136    mutation_generator: BoxedStrategy<MutationType>,
137    // Path to invariant corpus directory. If None, sequences with new coverage are not persisted.
138    corpus_dir: Option<PathBuf>, // TODO consolidate into config
139    // Whether corpus to use gzip file compression and decompression.
140    corpus_gzip: bool,
141    // Number of mutations until entry marked as eligible to be flushed from in-memory corpus.
142    // Mutations will be performed at least `corpus_min_mutations` times.
143    corpus_min_mutations: usize,
144    // Number of corpus that won't be evicted from memory.
145    corpus_min_size: usize,
146    // In-memory corpus, populated from persisted files and current runs.
147    // Mutation is performed on these.
148    in_memory_corpus: Vec<CorpusEntry>,
149    // Identifier of current mutated entry.
150    current_mutated: Option<Uuid>,
151    // Number of failed replays from persisted corpus.
152    failed_replays: usize,
153    // Corpus metrics.
154    pub(crate) metrics: CorpusMetrics,
155}
156
157impl TxCorpusManager {
158    pub fn new(
159        invariant_config: &InvariantConfig,
160        test_name: &String,
161        fuzzed_contracts: &FuzzRunIdentifiedContracts,
162        tx_generator: BoxedStrategy<BasicTxDetails>,
163        executor: &Executor,
164        history_map: &mut [u8],
165    ) -> eyre::Result<Self> {
166        let mutation_generator = prop_oneof![
167            Just(MutationType::Splice),
168            Just(MutationType::Repeat),
169            Just(MutationType::Interleave),
170            Just(MutationType::Prefix),
171            Just(MutationType::Suffix),
172            Just(MutationType::Abi),
173        ]
174        .boxed();
175        let mut in_memory_corpus = vec![];
176        let corpus_gzip = invariant_config.corpus_gzip;
177        let corpus_min_mutations = invariant_config.corpus_min_mutations;
178        let corpus_min_size = invariant_config.corpus_min_size;
179        let mut failed_replays = 0;
180
181        // Early return if corpus dir / coverage guided fuzzing not configured.
182        let Some(corpus_dir) = &invariant_config.corpus_dir else {
183            return Ok(Self {
184                tx_generator,
185                mutation_generator,
186                corpus_dir: None,
187                corpus_gzip,
188                corpus_min_mutations,
189                corpus_min_size,
190                in_memory_corpus,
191                current_mutated: None,
192                failed_replays,
193                metrics: CorpusMetrics::default(),
194            });
195        };
196
197        // Ensure corpus dir for invariant function is created.
198        let corpus_dir = corpus_dir.join(test_name);
199        if !corpus_dir.is_dir() {
200            foundry_common::fs::create_dir_all(&corpus_dir)?;
201        }
202
203        let fuzzed_contracts = fuzzed_contracts.targets.lock();
204        let mut metrics = CorpusMetrics::default();
205
206        for entry in std::fs::read_dir(&corpus_dir)? {
207            let path = entry?.path();
208            if path.is_file()
209                && let Some(name) = path.file_name().and_then(|s| s.to_str())
210            {
211                // Ignore metadata files
212                if name.contains(METADATA_SUFFIX) {
213                    continue;
214                }
215            }
216            metrics.corpus_count += 1;
217
218            let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) {
219                Some("gz") => foundry_common::fs::read_json_gzip_file::<Vec<BasicTxDetails>>(&path),
220                _ => foundry_common::fs::read_json_file::<Vec<BasicTxDetails>>(&path),
221            };
222
223            let Ok(tx_seq) = read_corpus_result else {
224                trace!(target: "corpus", "failed to load corpus from {}", path.display());
225                continue;
226            };
227
228            if !tx_seq.is_empty() {
229                // Warm up history map from loaded sequences.
230                let mut executor = executor.clone();
231                for tx in &tx_seq {
232                    let mut call_result = executor
233                        .call_raw(
234                            tx.sender,
235                            tx.call_details.target,
236                            tx.call_details.calldata.clone(),
237                            U256::ZERO,
238                        )
239                        .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?;
240
241                    if fuzzed_contracts.can_replay(tx) {
242                        let (new_coverage, is_edge) = call_result.merge_edge_coverage(history_map);
243                        if new_coverage {
244                            metrics.update_seen(is_edge);
245                        }
246
247                        executor.commit(&mut call_result);
248                    } else {
249                        failed_replays += 1;
250                    }
251                }
252
253                trace!(
254                    target: "corpus",
255                    "load sequence with len {} from corpus file {}",
256                    tx_seq.len(),
257                    path.display()
258                );
259
260                // Populate in memory corpus with sequence from corpus file.
261                in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?);
262            }
263        }
264
265        Ok(Self {
266            tx_generator,
267            mutation_generator,
268            corpus_dir: Some(corpus_dir),
269            corpus_gzip,
270            corpus_min_mutations,
271            corpus_min_size,
272            in_memory_corpus,
273            current_mutated: None,
274            failed_replays,
275            metrics,
276        })
277    }
278
279    /// Collects inputs from given invariant run, if new coverage produced.
280    /// Persists call sequence (if corpus directory is configured) and updates in-memory corpus.
281    pub fn collect_inputs(&mut self, test_run: &InvariantTestRun) {
282        // Early return if corpus dir / coverage guided fuzzing is not configured.
283        let Some(corpus_dir) = &self.corpus_dir else {
284            return;
285        };
286
287        // Update stats of current mutated primary corpus.
288        if let Some(uuid) = &self.current_mutated {
289            if let Some(corpus) =
290                self.in_memory_corpus.iter_mut().find(|corpus| corpus.uuid.eq(uuid))
291            {
292                corpus.total_mutations += 1;
293                if test_run.new_coverage {
294                    corpus.new_finds_produced += 1
295                }
296                let is_favored = (corpus.new_finds_produced as f64 / corpus.total_mutations as f64)
297                    < FAVORABILITY_THRESHOLD;
298                self.metrics.update_favored(is_favored, corpus.is_favored);
299                corpus.is_favored = is_favored;
300
301                trace!(
302                    target: "corpus",
303                    "updated corpus {}, total mutations: {}, new finds: {}",
304                    corpus.uuid, corpus.total_mutations, corpus.new_finds_produced
305                );
306            }
307
308            self.current_mutated = None;
309        }
310
311        // Collect inputs only if current run produced new coverage.
312        if !test_run.new_coverage {
313            return;
314        }
315
316        let corpus = CorpusEntry::from_tx_seq(test_run.inputs.clone());
317        let corpus_uuid = corpus.uuid;
318
319        // Persist to disk if corpus dir is configured.
320        let write_result = if self.corpus_gzip {
321            foundry_common::fs::write_json_gzip_file(
322                corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}.gz")).as_path(),
323                &corpus.tx_seq,
324            )
325        } else {
326            foundry_common::fs::write_json_file(
327                corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}")).as_path(),
328                &corpus.tx_seq,
329            )
330        };
331
332        if let Err(err) = write_result {
333            debug!(target: "corpus", %err, "Failed to record call sequence {:?}", &corpus.tx_seq);
334        } else {
335            trace!(
336                target: "corpus",
337                "persisted {} inputs for new coverage in {corpus_uuid} corpus",
338                &corpus.tx_seq.len()
339            );
340        }
341
342        // This includes reverting txs in the corpus and `can_continue` removes
343        // them. We want this as it is new coverage and may help reach the other branch.
344        self.metrics.corpus_count += 1;
345        self.in_memory_corpus.push(corpus);
346    }
347
348    /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than
349    /// configured max mutations value.
350    pub fn new_sequence(&mut self, test: &InvariantTest) -> eyre::Result<Vec<BasicTxDetails>> {
351        let mut new_seq = vec![];
352        let test_runner = &mut test.execution_data.borrow_mut().branch_runner;
353
354        // Early return with first_input only if corpus dir / coverage guided fuzzing not
355        // configured.
356        let Some(corpus_dir) = &self.corpus_dir else {
357            new_seq.push(self.new_tx(test_runner)?);
358            return Ok(new_seq);
359        };
360
361        if !self.in_memory_corpus.is_empty() {
362            // Flush oldest corpus mutated more than configured max mutations unless they are
363            // favored.
364            let should_evict = self.in_memory_corpus.len() > self.corpus_min_size.max(1);
365            if should_evict
366                && let Some(index) = self.in_memory_corpus.iter().position(|corpus| {
367                    corpus.total_mutations > self.corpus_min_mutations && !corpus.is_favored
368                })
369            {
370                let corpus = self.in_memory_corpus.get(index).unwrap();
371
372                let uuid = corpus.uuid;
373                debug!(target: "corpus", "evict corpus {uuid}");
374
375                // Flush to disk the seed metadata at the time of eviction.
376                let eviction_time = SystemTime::now()
377                    .duration_since(UNIX_EPOCH)
378                    .expect("Time went backwards")
379                    .as_secs();
380                foundry_common::fs::write_json_file(
381                    corpus_dir.join(format!("{uuid}-{eviction_time}-{METADATA_SUFFIX}")).as_path(),
382                    &corpus,
383                )?;
384
385                // Remove corpus from memory.
386                self.in_memory_corpus.remove(index);
387            }
388
389            let mutation_type = self
390                .mutation_generator
391                .new_tree(test_runner)
392                .expect("Could not generate mutation type")
393                .current();
394            let rng = test_runner.rng();
395            let corpus_len = self.in_memory_corpus.len();
396            let primary = &self.in_memory_corpus[rng.random_range(0..corpus_len)];
397            let secondary = &self.in_memory_corpus[rng.random_range(0..corpus_len)];
398
399            match mutation_type {
400                MutationType::Splice => {
401                    trace!(target: "corpus", "splice {} and {}", primary.uuid, secondary.uuid);
402
403                    self.current_mutated = Some(primary.uuid);
404
405                    let start1 = rng.random_range(0..primary.tx_seq.len());
406                    let end1 = rng.random_range(start1..primary.tx_seq.len());
407
408                    let start2 = rng.random_range(0..secondary.tx_seq.len());
409                    let end2 = rng.random_range(start2..secondary.tx_seq.len());
410
411                    for tx in primary.tx_seq.iter().take(end1).skip(start1) {
412                        new_seq.push(tx.clone());
413                    }
414                    for tx in secondary.tx_seq.iter().take(end2).skip(start2) {
415                        new_seq.push(tx.clone());
416                    }
417                }
418                MutationType::Repeat => {
419                    let corpus = if rng.random::<bool>() { primary } else { secondary };
420                    trace!(target: "corpus", "repeat {}", corpus.uuid);
421
422                    self.current_mutated = Some(corpus.uuid);
423
424                    new_seq = corpus.tx_seq.clone();
425                    let start = rng.random_range(0..corpus.tx_seq.len());
426                    let end = rng.random_range(start..corpus.tx_seq.len());
427                    let item_idx = rng.random_range(0..corpus.tx_seq.len());
428                    let repeated = vec![new_seq[item_idx].clone(); end - start];
429                    new_seq.splice(start..end, repeated);
430                }
431                MutationType::Interleave => {
432                    trace!(target: "corpus", "interleave {} with {}", primary.uuid, secondary.uuid);
433
434                    self.current_mutated = Some(primary.uuid);
435
436                    for (tx1, tx2) in primary.tx_seq.iter().zip(secondary.tx_seq.iter()) {
437                        // chunks?
438                        let tx = if rng.random::<bool>() { tx1.clone() } else { tx2.clone() };
439                        new_seq.push(tx);
440                    }
441                }
442                MutationType::Prefix => {
443                    let corpus = if rng.random::<bool>() { primary } else { secondary };
444                    trace!(target: "corpus", "overwrite prefix of {}", corpus.uuid);
445
446                    self.current_mutated = Some(corpus.uuid);
447
448                    new_seq = corpus.tx_seq.clone();
449                    for i in 0..rng.random_range(0..=new_seq.len()) {
450                        new_seq[i] = self.new_tx(test_runner)?;
451                    }
452                }
453                MutationType::Suffix => {
454                    let corpus = if rng.random::<bool>() { primary } else { secondary };
455                    trace!(target: "corpus", "overwrite suffix of {}", corpus.uuid);
456
457                    self.current_mutated = Some(corpus.uuid);
458
459                    new_seq = corpus.tx_seq.clone();
460                    for i in new_seq.len() - rng.random_range(0..new_seq.len())..corpus.tx_seq.len()
461                    {
462                        new_seq[i] = self.new_tx(test_runner)?;
463                    }
464                }
465                MutationType::Abi => {
466                    let targets = test.targeted_contracts.targets.lock();
467                    let corpus = if rng.random::<bool>() { primary } else { secondary };
468                    trace!(target: "corpus", "ABI mutate args of {}", corpus.uuid);
469
470                    self.current_mutated = Some(corpus.uuid);
471
472                    new_seq = corpus.tx_seq.clone();
473
474                    let idx = rng.random_range(0..new_seq.len());
475                    let tx = new_seq.get_mut(idx).unwrap();
476                    if let (_, Some(function)) = targets.fuzzed_artifacts(tx) {
477                        // TODO add call_value to call details and mutate it as well as sender some
478                        // of the time
479                        if !function.inputs.is_empty() {
480                            let mut new_function = function.clone();
481                            let mut arg_mutation_rounds =
482                                rng.random_range(0..=function.inputs.len()).max(1);
483                            let round_arg_idx: Vec<usize> = if function.inputs.len() <= 1 {
484                                vec![0]
485                            } else {
486                                (0..arg_mutation_rounds)
487                                    .map(|_| {
488                                        test_runner.rng().random_range(0..function.inputs.len())
489                                    })
490                                    .collect()
491                            };
492                            // TODO mutation strategy for individual ABI types
493                            let mut prev_inputs = function
494                                .abi_decode_input(&tx.call_details.calldata[4..])
495                                .expect("fuzzed_artifacts returned wrong sig");
496                            // For now, only new inputs are generated, no existing inputs are
497                            // mutated.
498                            let mut gen_input = |input: &alloy_json_abi::Param| {
499                                fuzz_param_from_state(
500                                    &input.selector_type().parse().unwrap(),
501                                    &test.fuzz_state,
502                                )
503                                .new_tree(test_runner)
504                                .expect("Could not generate case")
505                                .current()
506                            };
507
508                            while arg_mutation_rounds > 0 {
509                                let idx = round_arg_idx[arg_mutation_rounds - 1];
510                                let input = new_function
511                                    .inputs
512                                    .get_mut(idx)
513                                    .expect("Could not get input to mutate");
514                                let new_input = gen_input(input);
515                                prev_inputs[idx] = new_input;
516                                arg_mutation_rounds -= 1;
517                            }
518
519                            tx.call_details.calldata = new_function
520                                .abi_encode_input(&prev_inputs)
521                                .map_err(|e| eyre!(e.to_string()))?
522                                .into();
523                        }
524                    }
525                }
526            }
527        }
528
529        // Make sure sequence contains at least one tx to start fuzzing from.
530        if new_seq.is_empty() {
531            new_seq.push(self.new_tx(test_runner)?);
532        }
533        trace!(target: "corpus", "new sequence of {} calls generated", new_seq.len());
534
535        Ok(new_seq)
536    }
537
538    /// Returns the next call to be used in call sequence.
539    /// If coverage guided fuzzing is not configured or if previous input was discarded then this is
540    /// a new tx from strategy.
541    /// If running with coverage guided fuzzing it returns a new call only when sequence
542    /// does not have enough entries, or randomly. Otherwise, returns the next call from initial
543    /// sequence.
544    pub fn generate_next_input(
545        &mut self,
546        test: &InvariantTest,
547        sequence: &[BasicTxDetails],
548        discarded: bool,
549        depth: usize,
550    ) -> eyre::Result<BasicTxDetails> {
551        let test_runner = &mut test.execution_data.borrow_mut().branch_runner;
552
553        // Early return with new input if corpus dir / coverage guided fuzzing not configured or if
554        // call was discarded.
555        if self.corpus_dir.is_none() || discarded {
556            return self.new_tx(test_runner);
557        }
558
559        // When running with coverage guided fuzzing enabled then generate new sequence if initial
560        // sequence's length is less than depth or randomly, to occasionally intermix new txs.
561        if depth > sequence.len().saturating_sub(1) || test_runner.rng().random_ratio(1, 10) {
562            return self.new_tx(test_runner);
563        }
564
565        // Continue with the next call initial sequence
566        Ok(sequence[depth].clone())
567    }
568
569    /// Generates single call from invariant strategy.
570    pub fn new_tx(&mut self, test_runner: &mut TestRunner) -> eyre::Result<BasicTxDetails> {
571        Ok(self
572            .tx_generator
573            .new_tree(test_runner)
574            .map_err(|_| eyre!("Could not generate case"))?
575            .current())
576    }
577
578    /// Returns campaign failed replays.
579    pub fn failed_replays(self) -> usize {
580        self.failed_replays
581    }
582
583    /// Updates seen edges or features metrics.
584    pub fn update_seen_metrics(&mut self, is_edge: bool) {
585        self.metrics.update_seen(is_edge);
586    }
587}