forge_doc/preprocessor/
infer_hyperlinks.rs

1use super::{Preprocessor, PreprocessorId};
2use crate::{Comments, Document, ParseItem, ParseSource, solang_ext::SafeUnwrap};
3use regex::{Captures, Match, Regex};
4use std::{
5    borrow::Cow,
6    path::{Path, PathBuf},
7    sync::LazyLock,
8};
9
10/// A regex that matches `{identifier-part}` placeholders
11///
12/// Overloaded functions are referenced by including the exact function arguments in the `part`
13/// section of the placeholder.
14static RE_INLINE_LINK: LazyLock<Regex> = LazyLock::new(|| {
15    Regex::new(r"(?m)(\{(?P<xref>xref-)?(?P<identifier>[a-zA-Z_][0-9a-zA-Z_]*)(-(?P<part>[a-zA-Z_][0-9a-zA-Z_-]*))?}(\[(?P<link>(.*?))\])?)").unwrap()
16});
17
18/// [InferInlineHyperlinks] preprocessor id.
19pub const INFER_INLINE_HYPERLINKS_ID: PreprocessorId = PreprocessorId("infer inline hyperlinks");
20
21/// The infer hyperlinks preprocessor tries to map @dev tags to referenced items
22/// Traverses the documents and attempts to find referenced items
23/// comments for dev comment tags.
24///
25/// This preprocessor replaces inline links in comments with the links to the referenced items.
26#[derive(Debug, Default)]
27#[non_exhaustive]
28pub struct InferInlineHyperlinks;
29
30impl Preprocessor for InferInlineHyperlinks {
31    fn id(&self) -> PreprocessorId {
32        INFER_INLINE_HYPERLINKS_ID
33    }
34
35    fn preprocess(&self, mut documents: Vec<Document>) -> Result<Vec<Document>, eyre::Error> {
36        // traverse all comments and try to match inline links and replace with inline links for
37        // markdown
38        let mut docs = Vec::with_capacity(documents.len());
39        while !documents.is_empty() {
40            let mut document = documents.remove(0);
41            let target_path = document.relative_output_path().to_path_buf();
42            for idx in 0..document.content.len() {
43                let (mut comments, item_children_len) = {
44                    let item = document.content.get_mut(idx).unwrap();
45                    let comments = std::mem::take(&mut item.comments);
46                    let children = item.children.len();
47                    (comments, children)
48                };
49                Self::inline_doc_links(&documents, &target_path, &mut comments, &document);
50                document.content.get_mut(idx).unwrap().comments = comments;
51
52                // we also need to iterate over all child items
53                // This is a bit horrible but we need to traverse all items in all documents
54                for child_idx in 0..item_children_len {
55                    let mut comments = {
56                        let item = document.content.get_mut(idx).unwrap();
57
58                        std::mem::take(&mut item.children[child_idx].comments)
59                    };
60                    Self::inline_doc_links(&documents, &target_path, &mut comments, &document);
61                    document.content.get_mut(idx).unwrap().children[child_idx].comments = comments;
62                }
63            }
64
65            docs.push(document);
66        }
67
68        Ok(docs)
69    }
70}
71
72impl InferInlineHyperlinks {
73    /// Finds the first match for the given link.
74    ///
75    /// All items get their own section in the markdown file.
76    /// This section uses the identifier of the item: `#functionname`
77    ///
78    /// Note: the target path is the relative path to the markdown file.
79    fn find_match<'a>(
80        link: &InlineLink<'a>,
81        target_path: &Path,
82        items: impl Iterator<Item = &'a ParseItem>,
83    ) -> Option<InlineLinkTarget<'a>> {
84        for item in items {
85            match &item.source {
86                ParseSource::Contract(contract) => {
87                    let name = &contract.name.safe_unwrap().name;
88                    if name == link.identifier {
89                        if link.part.is_none() {
90                            return Some(InlineLinkTarget::borrowed(
91                                name,
92                                target_path.to_path_buf(),
93                            ));
94                        }
95                        // try to find the referenced item in the contract's children
96                        return Self::find_match(link, target_path, item.children.iter());
97                    }
98                }
99                ParseSource::Function(fun) => {
100                    // TODO: handle overloaded functions
101                    // functions can be overloaded so we need to keep track of how many matches we
102                    // have so we can match the correct one
103                    if let Some(id) = &fun.name {
104                        // Note: constructors don't have a name
105                        if id.name == link.ref_name() {
106                            return Some(InlineLinkTarget::borrowed(
107                                &id.name,
108                                target_path.to_path_buf(),
109                            ));
110                        }
111                    } else if link.ref_name() == "constructor" {
112                        return Some(InlineLinkTarget::borrowed(
113                            "constructor",
114                            target_path.to_path_buf(),
115                        ));
116                    }
117                }
118                ParseSource::Variable(_) => {}
119                ParseSource::Event(ev) => {
120                    let ev_name = &ev.name.safe_unwrap().name;
121                    if ev_name == link.ref_name() {
122                        return Some(InlineLinkTarget::borrowed(
123                            ev_name,
124                            target_path.to_path_buf(),
125                        ));
126                    }
127                }
128                ParseSource::Error(err) => {
129                    let err_name = &err.name.safe_unwrap().name;
130                    if err_name == link.ref_name() {
131                        return Some(InlineLinkTarget::borrowed(
132                            err_name,
133                            target_path.to_path_buf(),
134                        ));
135                    }
136                }
137                ParseSource::Struct(structdef) => {
138                    let struct_name = &structdef.name.safe_unwrap().name;
139                    if struct_name == link.ref_name() {
140                        return Some(InlineLinkTarget::borrowed(
141                            struct_name,
142                            target_path.to_path_buf(),
143                        ));
144                    }
145                }
146                ParseSource::Enum(_) => {}
147                ParseSource::Type(_) => {}
148            }
149        }
150
151        None
152    }
153
154    /// Attempts to convert inline links to markdown links.
155    fn inline_doc_links(
156        documents: &[Document],
157        target_path: &Path,
158        comments: &mut Comments,
159        parent: &Document,
160    ) {
161        // loop over all comments in the item
162        for comment in comments.iter_mut() {
163            let val = comment.value.clone();
164            // replace all links with inline markdown links
165            for link in InlineLink::captures(val.as_str()) {
166                let target = if link.is_external() {
167                    // find in all documents
168                    documents.iter().find_map(|doc| {
169                        Self::find_match(
170                            &link,
171                            doc.relative_output_path(),
172                            doc.content.iter_items().flat_map(|item| {
173                                Some(item).into_iter().chain(item.children.iter())
174                            }),
175                        )
176                    })
177                } else {
178                    // find matches in the document
179                    Self::find_match(
180                        &link,
181                        target_path,
182                        parent
183                            .content
184                            .iter_items()
185                            .flat_map(|item| Some(item).into_iter().chain(item.children.iter())),
186                    )
187                };
188                if let Some(target) = target {
189                    let display_value = link.markdown_link_display_value();
190                    let markdown_link = format!("[{display_value}]({target})");
191                    // replace the link with the markdown link
192                    comment.value =
193                        comment.value.as_str().replacen(link.as_str(), markdown_link.as_str(), 1);
194                }
195            }
196        }
197    }
198}
199
200struct InlineLinkTarget<'a> {
201    section: Cow<'a, str>,
202    target_path: PathBuf,
203}
204
205impl<'a> InlineLinkTarget<'a> {
206    fn borrowed(section: &'a str, target_path: PathBuf) -> Self {
207        Self { section: Cow::Borrowed(section), target_path }
208    }
209}
210
211impl std::fmt::Display for InlineLinkTarget<'_> {
212    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
213        // NOTE: the url should be absolute for markdown and section names are lowercase
214        write!(f, "/{}#{}", self.target_path.display(), self.section.to_lowercase())
215    }
216}
217
218/// A parsed link to an item.
219#[derive(Debug)]
220struct InlineLink<'a> {
221    outer: Match<'a>,
222    identifier: &'a str,
223    part: Option<&'a str>,
224    link: Option<&'a str>,
225}
226
227impl<'a> InlineLink<'a> {
228    fn from_capture(cap: Captures<'a>) -> Option<Self> {
229        Some(Self {
230            outer: cap.get(1)?,
231            identifier: cap.name("identifier")?.as_str(),
232            part: cap.name("part").map(|m| m.as_str()),
233            link: cap.name("link").map(|m| m.as_str()),
234        })
235    }
236
237    fn captures(s: &'a str) -> impl Iterator<Item = Self> + 'a {
238        RE_INLINE_LINK.captures_iter(s).filter_map(Self::from_capture)
239    }
240
241    /// Parses the first inline link.
242    #[allow(unused)]
243    fn capture(s: &'a str) -> Option<Self> {
244        let cap = RE_INLINE_LINK.captures(s)?;
245        Self::from_capture(cap)
246    }
247
248    /// Returns the name of the link
249    fn markdown_link_display_value(&self) -> Cow<'_, str> {
250        if let Some(link) = self.link {
251            Cow::Borrowed(link)
252        } else if let Some(part) = self.part {
253            Cow::Owned(format!("{}-{}", self.identifier, part))
254        } else {
255            Cow::Borrowed(self.identifier)
256        }
257    }
258
259    /// Returns the name of the referenced item.
260    fn ref_name(&self) -> &str {
261        self.exact_identifier().split('-').next().unwrap()
262    }
263
264    fn exact_identifier(&self) -> &str {
265        let mut name = self.identifier;
266        if let Some(part) = self.part {
267            name = part;
268        }
269        name
270    }
271
272    /// Returns the name of the referenced item and its arguments, if any.
273    ///
274    /// Eg: `safeMint-address-uint256-` returns `("safeMint", ["address", "uint256"])`
275    #[expect(unused)]
276    fn ref_name_exact(&self) -> (&str, impl Iterator<Item = &str> + '_) {
277        let identifier = self.exact_identifier();
278        let mut iter = identifier.split('-');
279        (iter.next().unwrap(), iter.filter(|s| !s.is_empty()))
280    }
281
282    /// Returns the content of the matched link.
283    fn as_str(&self) -> &str {
284        self.outer.as_str()
285    }
286
287    /// Returns true if the link is external.
288    fn is_external(&self) -> bool {
289        self.part.is_some()
290    }
291}
292
293#[cfg(test)]
294mod tests {
295    use super::*;
296
297    #[test]
298    fn parse_inline_links() {
299        let s = "    {IERC165-supportsInterface}   ";
300        let cap = RE_INLINE_LINK.captures(s).unwrap();
301
302        let identifier = cap.name("identifier").unwrap().as_str();
303        assert_eq!(identifier, "IERC165");
304        let part = cap.name("part").unwrap().as_str();
305        assert_eq!(part, "supportsInterface");
306
307        let s = "    {supportsInterface}   ";
308        let cap = RE_INLINE_LINK.captures(s).unwrap();
309
310        let identifier = cap.name("identifier").unwrap().as_str();
311        assert_eq!(identifier, "supportsInterface");
312
313        let s = "{xref-ERC721-_safeMint-address-uint256-}";
314        let cap = RE_INLINE_LINK.captures(s).unwrap();
315
316        let identifier = cap.name("identifier").unwrap().as_str();
317        assert_eq!(identifier, "ERC721");
318        let identifier = cap.name("xref").unwrap().as_str();
319        assert_eq!(identifier, "xref-");
320        let identifier = cap.name("part").unwrap().as_str();
321        assert_eq!(identifier, "_safeMint-address-uint256-");
322
323        let link = InlineLink::capture(s).unwrap();
324        assert_eq!(link.ref_name(), "_safeMint");
325        assert_eq!(link.as_str(), "{xref-ERC721-_safeMint-address-uint256-}");
326
327        let s = "{xref-ERC721-_safeMint-address-uint256-}[`Named link`]";
328        let link = InlineLink::capture(s).unwrap();
329        assert_eq!(link.link, Some("`Named link`"));
330        assert_eq!(link.markdown_link_display_value(), "`Named link`");
331    }
332}