Skip to main content

foundry_evm_traces/identifier/
external.rs

1use super::{IdentifiedAddress, TraceIdentifier};
2use crate::debug::ContractSources;
3use alloy_primitives::{
4    Address,
5    map::{Entry, HashMap, HashSet},
6};
7use eyre::WrapErr;
8use foundry_block_explorers::{contract::Metadata, errors::EtherscanError};
9use foundry_common::compile::etherscan_project;
10use foundry_config::{Chain, Config};
11use futures::{
12    future::join_all,
13    stream::{FuturesUnordered, Stream, StreamExt},
14    task::{Context, Poll},
15};
16use revm_inspectors::tracing::types::CallTraceNode;
17use serde::Deserialize;
18use std::{
19    borrow::Cow,
20    pin::Pin,
21    sync::{
22        Arc,
23        atomic::{AtomicBool, Ordering},
24    },
25};
26use tokio::time::{Duration, Interval};
27
28/// A trace identifier that tries to identify addresses using Etherscan.
29pub struct ExternalIdentifier {
30    fetchers: Vec<Arc<dyn ExternalFetcherT>>,
31    /// Cached contracts.
32    contracts: HashMap<Address, (FetcherKind, Option<Metadata>)>,
33}
34
35impl ExternalIdentifier {
36    /// Creates a new external identifier with the given client
37    pub fn new(config: &Config, mut chain: Option<Chain>) -> eyre::Result<Option<Self>> {
38        if config.offline {
39            return Ok(None);
40        }
41
42        let no_proxy = config.eth_rpc_no_proxy;
43        let config = match config.get_etherscan_config_with_chain(chain) {
44            Ok(Some(config)) => {
45                chain = config.chain;
46                Some(config)
47            }
48            Ok(None) => {
49                warn!(target: "evm::traces::external", "etherscan config not found");
50                None
51            }
52            Err(err) => {
53                warn!(target: "evm::traces::external", ?err, "failed to get etherscan config");
54                None
55            }
56        };
57
58        let mut fetchers = Vec::<Arc<dyn ExternalFetcherT>>::new();
59        if let Some(chain) = chain {
60            debug!(target: "evm::traces::external", ?chain, "using sourcify identifier");
61            fetchers.push(Arc::new(SourcifyFetcher::new(chain)));
62        }
63        if let Some(config) = config {
64            debug!(target: "evm::traces::external", chain=?config.chain, url=?config.api_url, "using etherscan identifier");
65            match config.into_client_with_no_proxy(no_proxy) {
66                Ok(client) => {
67                    fetchers.push(Arc::new(EtherscanFetcher::new(client)));
68                }
69                Err(err) => {
70                    warn!(target: "evm::traces::external", ?err, "failed to create etherscan client");
71                }
72            }
73        }
74        if fetchers.is_empty() {
75            debug!(target: "evm::traces::external", "no fetchers enabled");
76            return Ok(None);
77        }
78
79        Ok(Some(Self { fetchers, contracts: Default::default() }))
80    }
81
82    /// Goes over the list of contracts we have pulled from the traces, clones their source from
83    /// Etherscan and compiles them locally, for usage in the debugger.
84    pub async fn get_compiled_contracts(&self) -> eyre::Result<ContractSources> {
85        // Collect contract info upfront so we can reference it in error messages
86        let contracts_info: Vec<_> = self
87            .contracts
88            .iter()
89            // filter out vyper files and contracts without metadata
90            .filter_map(|(addr, (_, metadata))| {
91                if let Some(metadata) = metadata.as_ref()
92                    && !metadata.is_vyper()
93                {
94                    Some((*addr, metadata))
95                } else {
96                    None
97                }
98            })
99            .collect();
100
101        let outputs_fut = contracts_info
102            .iter()
103            .map(|(addr, metadata)| async move {
104                sh_println!("Compiling: {} {addr}", metadata.contract_name)?;
105                let root = tempfile::tempdir()?;
106                let root_path = root.path();
107                let project = etherscan_project(metadata, root_path)?;
108                let output = project.compile()?;
109                if output.has_compiler_errors() {
110                    eyre::bail!("{output}")
111                }
112
113                Ok((project, output, root))
114            })
115            .collect::<Vec<_>>();
116
117        // poll all the futures concurrently
118        let outputs = join_all(outputs_fut).await;
119
120        let mut sources: ContractSources = Default::default();
121
122        // construct the map
123        for (idx, res) in outputs.into_iter().enumerate() {
124            let (addr, metadata) = &contracts_info[idx];
125            let name = &metadata.contract_name;
126            let (project, output, _) =
127                res.wrap_err_with(|| format!("Failed to compile contract {name} at {addr}"))?;
128            sources
129                .insert(&output, project.root(), None)
130                .wrap_err_with(|| format!("Failed to insert contract {name} at {addr}"))?;
131        }
132
133        Ok(sources)
134    }
135
136    fn identify_from_metadata(
137        &self,
138        address: Address,
139        metadata: &Metadata,
140    ) -> IdentifiedAddress<'static> {
141        let label = metadata.contract_name.clone();
142        let abi = metadata.abi().ok().map(Cow::Owned);
143        IdentifiedAddress {
144            address,
145            label: Some(label.clone()),
146            contract: Some(label),
147            abi,
148            artifact_id: None,
149        }
150    }
151}
152
153impl TraceIdentifier for ExternalIdentifier {
154    fn identify_addresses(&mut self, nodes: &[&CallTraceNode]) -> Vec<IdentifiedAddress<'_>> {
155        if nodes.is_empty() {
156            return Vec::new();
157        }
158
159        trace!(target: "evm::traces::external", "identify {} addresses", nodes.len());
160
161        let mut identities = Vec::new();
162        let mut to_fetch = HashSet::new();
163
164        // Check cache first.
165        for &node in nodes {
166            let address = node.trace.address;
167            if let Some((_, metadata)) = self.contracts.get(&address) {
168                if let Some(metadata) = metadata {
169                    identities.push(self.identify_from_metadata(address, metadata));
170                } else {
171                    // Do nothing. We know that this contract was not verified.
172                }
173            } else {
174                to_fetch.insert(address);
175            }
176        }
177
178        if to_fetch.is_empty() {
179            return identities;
180        }
181        trace!(target: "evm::traces::external", "fetching {} addresses", to_fetch.len());
182
183        let to_fetch = to_fetch.into_iter().collect::<Vec<_>>();
184        let fetchers =
185            self.fetchers.iter().map(|fetcher| ExternalFetcher::new(fetcher.clone(), &to_fetch));
186        let fetched_identities = foundry_common::block_on(
187            futures::stream::select_all(fetchers)
188                .filter_map(|(address, value)| {
189                    let addr = value
190                        .1
191                        .as_ref()
192                        .map(|metadata| self.identify_from_metadata(address, metadata));
193                    match self.contracts.entry(address) {
194                        Entry::Occupied(mut occupied_entry) => {
195                            let old = occupied_entry.get();
196                            // Only override when the new result is strictly better:
197                            // - new has metadata and old doesn't, OR
198                            // - both have metadata but new is from Etherscan and old is not.
199                            // Never downgrade a successful lookup to None.
200                            let should_replace = match (&old.1, &value.1) {
201                                (None, Some(_)) => true,
202                                (Some(_), None) => false,
203                                _ => {
204                                    matches!(value.0, FetcherKind::Etherscan)
205                                        && !matches!(old.0, FetcherKind::Etherscan)
206                                }
207                            };
208                            if should_replace {
209                                occupied_entry.insert(value);
210                            }
211                        }
212                        Entry::Vacant(vacant_entry) => {
213                            vacant_entry.insert(value);
214                        }
215                    }
216                    async move { addr }
217                })
218                .collect::<Vec<IdentifiedAddress<'_>>>(),
219        );
220        trace!(target: "evm::traces::external", "fetched {} addresses: {fetched_identities:#?}", fetched_identities.len());
221
222        identities.extend(fetched_identities);
223        identities
224    }
225}
226
227type FetchFuture =
228    Pin<Box<dyn Future<Output = (Address, Result<Option<Metadata>, EtherscanError>)>>>;
229
230/// Maximum number of times a single address is retried through a transient Cloudflare
231/// block before we give up on it. Bounded so a persistent block can't loop forever.
232const MAX_CLOUDFLARE_RETRIES: u32 = 5;
233
234/// A rate limit aware fetcher.
235///
236/// Fetches information about multiple addresses concurrently, while respecting rate limits.
237struct ExternalFetcher {
238    /// The fetcher
239    fetcher: Arc<dyn ExternalFetcherT>,
240    /// The time we wait if we hit the rate limit
241    timeout: Duration,
242    /// The interval we are currently waiting for before making a new request
243    backoff: Option<Interval>,
244    /// The maximum amount of requests to send concurrently
245    concurrency: usize,
246    /// The addresses we have yet to make requests for
247    queue: Vec<Address>,
248    /// The in progress requests
249    in_progress: FuturesUnordered<FetchFuture>,
250    /// Per-address retry counter for transient Cloudflare blocks.
251    attempts: HashMap<Address, u32>,
252}
253
254impl ExternalFetcher {
255    fn new(fetcher: Arc<dyn ExternalFetcherT>, to_fetch: &[Address]) -> Self {
256        Self {
257            timeout: fetcher.timeout(),
258            backoff: None,
259            concurrency: fetcher.concurrency(),
260            fetcher,
261            queue: to_fetch.to_vec(),
262            in_progress: FuturesUnordered::new(),
263            attempts: HashMap::default(),
264        }
265    }
266
267    fn queue_next_reqs(&mut self) {
268        while self.in_progress.len() < self.concurrency {
269            let Some(addr) = self.queue.pop() else { break };
270            let fetcher = Arc::clone(&self.fetcher);
271            self.in_progress.push(Box::pin(async move {
272                trace!(target: "evm::traces::external", ?addr, "fetching info");
273                let res = fetcher.fetch(addr).await;
274                (addr, res)
275            }));
276        }
277    }
278}
279
280impl Stream for ExternalFetcher {
281    type Item = (Address, (FetcherKind, Option<Metadata>));
282
283    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
284        let pin = self.get_mut();
285
286        let _guard =
287            info_span!("evm::traces::external", kind=?pin.fetcher.kind(), "ExternalFetcher")
288                .entered();
289
290        if pin.fetcher.invalid_api_key().load(Ordering::Relaxed) {
291            return Poll::Ready(None);
292        }
293
294        loop {
295            if let Some(mut backoff) = pin.backoff.take()
296                && backoff.poll_tick(cx).is_pending()
297            {
298                pin.backoff = Some(backoff);
299                return Poll::Pending;
300            }
301
302            pin.queue_next_reqs();
303
304            let mut made_progress_this_iter = false;
305            match pin.in_progress.poll_next_unpin(cx) {
306                Poll::Pending => {}
307                Poll::Ready(None) => return Poll::Ready(None),
308                Poll::Ready(Some((addr, res))) => {
309                    made_progress_this_iter = true;
310                    match res {
311                        Ok(metadata) => {
312                            return Poll::Ready(Some((addr, (pin.fetcher.kind(), metadata))));
313                        }
314                        Err(EtherscanError::ContractCodeNotVerified(_)) => {
315                            return Poll::Ready(Some((addr, (pin.fetcher.kind(), None))));
316                        }
317                        Err(EtherscanError::RateLimitExceeded) => {
318                            warn!(target: "evm::traces::external", "rate limit exceeded on attempt");
319                            pin.backoff = Some(tokio::time::interval(pin.timeout));
320                            pin.queue.push(addr);
321                        }
322                        Err(EtherscanError::InvalidApiKey) => {
323                            warn!(target: "evm::traces::external", "invalid api key");
324                            // mark key as invalid
325                            pin.fetcher.invalid_api_key().store(true, Ordering::Relaxed);
326                            return Poll::Ready(None);
327                        }
328                        Err(EtherscanError::BlockedByCloudflare) => {
329                            // A Cloudflare block is transient rate limiting (often triggered
330                            // by request bursts), not a permanent failure like an invalid key.
331                            // Back off and retry the address a bounded number of times instead
332                            // of aborting the whole stream, which would abandon every still-
333                            // queued address and leave traces only partially decoded (#9880).
334                            let attempts = {
335                                let entry = pin.attempts.entry(addr).or_default();
336                                *entry += 1;
337                                *entry
338                            };
339                            if attempts <= MAX_CLOUDFLARE_RETRIES {
340                                warn!(target: "evm::traces::external", attempts, "blocked by cloudflare, backing off");
341                                pin.backoff = Some(tokio::time::interval(pin.timeout));
342                                pin.queue.push(addr);
343                            } else {
344                                warn!(target: "evm::traces::external", "blocked by cloudflare, giving up on address");
345                                return Poll::Ready(Some((addr, (pin.fetcher.kind(), None))));
346                            }
347                        }
348                        Err(err) => {
349                            warn!(target: "evm::traces::external", ?err, "could not get info");
350                            // Cache the failure so we don't re-fetch on subsequent arenas.
351                            return Poll::Ready(Some((addr, (pin.fetcher.kind(), None))));
352                        }
353                    }
354                }
355            }
356
357            if !made_progress_this_iter {
358                return Poll::Pending;
359            }
360        }
361    }
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, Eq)]
365enum FetcherKind {
366    Etherscan,
367    Sourcify,
368}
369
370#[async_trait::async_trait]
371trait ExternalFetcherT: Send + Sync {
372    fn kind(&self) -> FetcherKind;
373    fn timeout(&self) -> Duration;
374    fn concurrency(&self) -> usize;
375    fn invalid_api_key(&self) -> &AtomicBool;
376    async fn fetch(&self, address: Address) -> Result<Option<Metadata>, EtherscanError>;
377}
378
379struct EtherscanFetcher {
380    client: foundry_block_explorers::Client,
381    invalid_api_key: AtomicBool,
382}
383
384impl EtherscanFetcher {
385    const fn new(client: foundry_block_explorers::Client) -> Self {
386        Self { client, invalid_api_key: AtomicBool::new(false) }
387    }
388}
389
390#[async_trait::async_trait]
391impl ExternalFetcherT for EtherscanFetcher {
392    fn kind(&self) -> FetcherKind {
393        FetcherKind::Etherscan
394    }
395
396    fn timeout(&self) -> Duration {
397        Duration::from_secs(1)
398    }
399
400    fn concurrency(&self) -> usize {
401        5
402    }
403
404    fn invalid_api_key(&self) -> &AtomicBool {
405        &self.invalid_api_key
406    }
407
408    async fn fetch(&self, address: Address) -> Result<Option<Metadata>, EtherscanError> {
409        self.client.contract_source_code(address).await.map(|mut metadata| metadata.items.pop())
410    }
411}
412
413struct SourcifyFetcher {
414    client: reqwest::Client,
415    url: String,
416    invalid_api_key: AtomicBool,
417}
418
419impl SourcifyFetcher {
420    fn new(chain: Chain) -> Self {
421        Self {
422            client: reqwest::Client::new(),
423            url: format!("https://sourcify.dev/server/v2/contract/{}", chain.id()),
424            invalid_api_key: AtomicBool::new(false),
425        }
426    }
427}
428
429#[async_trait::async_trait]
430impl ExternalFetcherT for SourcifyFetcher {
431    fn kind(&self) -> FetcherKind {
432        FetcherKind::Sourcify
433    }
434
435    fn timeout(&self) -> Duration {
436        Duration::from_secs(1)
437    }
438
439    fn concurrency(&self) -> usize {
440        5
441    }
442
443    fn invalid_api_key(&self) -> &AtomicBool {
444        &self.invalid_api_key
445    }
446
447    async fn fetch(&self, address: Address) -> Result<Option<Metadata>, EtherscanError> {
448        let url = format!("{url}/{address}?fields=abi,compilation", url = self.url);
449        let response = self
450            .client
451            .get(url)
452            .send()
453            .await
454            .map_err(|e| EtherscanError::Unknown(e.to_string()))?;
455        let code = response.status();
456        match code.as_u16() {
457            // Not verified.
458            404 => return Err(EtherscanError::ContractCodeNotVerified(address)),
459            // Too many requests.
460            429 => return Err(EtherscanError::RateLimitExceeded),
461            _ => {}
462        }
463        let response: SourcifyResponse =
464            response.json().await.map_err(|e| EtherscanError::Unknown(e.to_string()))?;
465        trace!(target: "evm::traces::external", "Sourcify response for {address}: {response:#?}");
466        match response {
467            SourcifyResponse::Success(metadata) => Ok(Some(metadata.into())),
468            SourcifyResponse::Error(error) => Err(EtherscanError::Unknown(format!("{error:#?}"))),
469        }
470    }
471}
472
473/// Sourcify API response for `/v2/contract/{chainId}/{address}`.
474#[derive(Debug, Clone, Deserialize)]
475#[serde(untagged)]
476enum SourcifyResponse {
477    Success(SourcifyMetadata),
478    Error(SourcifyError),
479}
480
481#[derive(Debug, Clone, Deserialize)]
482#[serde(rename_all = "camelCase")]
483#[expect(dead_code)] // Used in Debug.
484struct SourcifyError {
485    custom_code: String,
486    message: String,
487    error_id: String,
488}
489
490#[derive(Debug, Clone, Deserialize)]
491#[serde(rename_all = "camelCase")]
492struct SourcifyMetadata {
493    #[serde(default)]
494    abi: Option<Box<serde_json::value::RawValue>>,
495    #[serde(default)]
496    compilation: Option<Compilation>,
497}
498
499#[derive(Debug, Clone, Deserialize)]
500#[serde(rename_all = "camelCase")]
501struct Compilation {
502    #[serde(default)]
503    compiler_version: String,
504    #[serde(default)]
505    name: String,
506}
507
508impl From<SourcifyMetadata> for Metadata {
509    fn from(metadata: SourcifyMetadata) -> Self {
510        let SourcifyMetadata { abi, compilation } = metadata;
511        let (contract_name, compiler_version) = compilation
512            .map(|c| (c.name, c.compiler_version))
513            .unwrap_or_else(|| (String::new(), String::new()));
514        // Defaulted fields may be fetched from sourcify but we don't make use of them.
515        Self {
516            source_code: foundry_block_explorers::contract::SourceCodeMetadata::Sources(
517                Default::default(),
518            ),
519            abi: Box::<str>::from(abi.unwrap_or_default()).into(),
520            contract_name,
521            compiler_version,
522            optimization_used: 0,
523            runs: 0,
524            constructor_arguments: Default::default(),
525            evm_version: String::new(),
526            library: String::new(),
527            license_type: String::new(),
528            proxy: 0,
529            implementation: None,
530            swarm_source: String::new(),
531        }
532    }
533}
534
535#[cfg(test)]
536mod tests {
537    use super::*;
538    use std::{collections::HashSet as StdHashSet, sync::Mutex};
539
540    /// Fetcher that returns a transient Cloudflare block the first time it sees an address, then
541    /// succeeds. Mirrors Etherscan/Cloudflare throttling a burst of concurrent requests.
542    struct FlakyCloudflareFetcher {
543        seen: Mutex<StdHashSet<Address>>,
544        invalid: AtomicBool,
545    }
546
547    #[async_trait::async_trait]
548    impl ExternalFetcherT for FlakyCloudflareFetcher {
549        fn kind(&self) -> FetcherKind {
550            FetcherKind::Etherscan
551        }
552        fn timeout(&self) -> Duration {
553            Duration::from_millis(1)
554        }
555        fn concurrency(&self) -> usize {
556            1
557        }
558        fn invalid_api_key(&self) -> &AtomicBool {
559            &self.invalid
560        }
561        async fn fetch(&self, address: Address) -> Result<Option<Metadata>, EtherscanError> {
562            let first_time = self.seen.lock().unwrap().insert(address);
563            if first_time { Err(EtherscanError::BlockedByCloudflare) } else { Ok(None) }
564        }
565    }
566
567    /// Regression test for #9880: a transient Cloudflare block on one address must not abandon the
568    /// rest of the queue. Before the fix the fetcher returned `Poll::Ready(None)` on the first
569    /// block, ending the stream and leaving later addresses unidentified (partial trace decoding).
570    #[tokio::test]
571    async fn cloudflare_block_retries_instead_of_abandoning_queue() {
572        let addrs: Vec<Address> = (1u8..=4).map(Address::with_last_byte).collect();
573        let fetcher: Arc<dyn ExternalFetcherT> = Arc::new(FlakyCloudflareFetcher {
574            seen: Mutex::new(StdHashSet::new()),
575            invalid: AtomicBool::new(false),
576        });
577
578        let collected: Vec<_> = ExternalFetcher::new(fetcher, &addrs).collect().await;
579
580        let got: StdHashSet<Address> = collected.into_iter().map(|(addr, _)| addr).collect();
581        let want: StdHashSet<Address> = addrs.into_iter().collect();
582        assert_eq!(got, want, "every address must be yielded despite a transient cloudflare block");
583    }
584}