forge_doc/preprocessor/
infer_hyperlinks.rs

1use super::{Preprocessor, PreprocessorId};
2use crate::{Comments, Document, ParseItem, ParseSource};
3use forge_fmt::solang_ext::SafeUnwrap;
4use regex::{Captures, Match, Regex};
5use std::{
6    borrow::Cow,
7    path::{Path, PathBuf},
8    sync::LazyLock,
9};
10
11/// A regex that matches `{identifier-part}` placeholders
12///
13/// Overloaded functions are referenced by including the exact function arguments in the `part`
14/// section of the placeholder.
15static RE_INLINE_LINK: LazyLock<Regex> = LazyLock::new(|| {
16    Regex::new(r"(?m)(\{(?P<xref>xref-)?(?P<identifier>[a-zA-Z_][0-9a-zA-Z_]*)(-(?P<part>[a-zA-Z_][0-9a-zA-Z_-]*))?}(\[(?P<link>(.*?))\])?)").unwrap()
17});
18
19/// [InferInlineHyperlinks] preprocessor id.
20pub const INFER_INLINE_HYPERLINKS_ID: PreprocessorId = PreprocessorId("infer inline hyperlinks");
21
22/// The infer hyperlinks preprocessor tries to map @dev tags to referenced items
23/// Traverses the documents and attempts to find referenced items
24/// comments for dev comment tags.
25///
26/// This preprocessor replaces inline links in comments with the links to the referenced items.
27#[derive(Debug, Default)]
28#[non_exhaustive]
29pub struct InferInlineHyperlinks;
30
31impl Preprocessor for InferInlineHyperlinks {
32    fn id(&self) -> PreprocessorId {
33        INFER_INLINE_HYPERLINKS_ID
34    }
35
36    fn preprocess(&self, mut documents: Vec<Document>) -> Result<Vec<Document>, eyre::Error> {
37        // traverse all comments and try to match inline links and replace with inline links for
38        // markdown
39        let mut docs = Vec::with_capacity(documents.len());
40        while !documents.is_empty() {
41            let mut document = documents.remove(0);
42            let target_path = document.relative_output_path().to_path_buf();
43            for idx in 0..document.content.len() {
44                let (mut comments, item_children_len) = {
45                    let item = document.content.get_mut(idx).unwrap();
46                    let comments = std::mem::take(&mut item.comments);
47                    let children = item.children.len();
48                    (comments, children)
49                };
50                Self::inline_doc_links(&documents, &target_path, &mut comments, &document);
51                document.content.get_mut(idx).unwrap().comments = comments;
52
53                // we also need to iterate over all child items
54                // This is a bit horrible but we need to traverse all items in all documents
55                for child_idx in 0..item_children_len {
56                    let mut comments = {
57                        let item = document.content.get_mut(idx).unwrap();
58
59                        std::mem::take(&mut item.children[child_idx].comments)
60                    };
61                    Self::inline_doc_links(&documents, &target_path, &mut comments, &document);
62                    document.content.get_mut(idx).unwrap().children[child_idx].comments = comments;
63                }
64            }
65
66            docs.push(document);
67        }
68
69        Ok(docs)
70    }
71}
72
73impl InferInlineHyperlinks {
74    /// Finds the first match for the given link.
75    ///
76    /// All items get their own section in the markdown file.
77    /// This section uses the identifier of the item: `#functionname`
78    ///
79    /// Note: the target path is the relative path to the markdown file.
80    fn find_match<'a>(
81        link: &InlineLink<'a>,
82        target_path: &Path,
83        items: impl Iterator<Item = &'a ParseItem>,
84    ) -> Option<InlineLinkTarget<'a>> {
85        for item in items {
86            match &item.source {
87                ParseSource::Contract(contract) => {
88                    let name = &contract.name.safe_unwrap().name;
89                    if name == link.identifier {
90                        if link.part.is_none() {
91                            return Some(InlineLinkTarget::borrowed(name, target_path.to_path_buf()))
92                        }
93                        // try to find the referenced item in the contract's children
94                        return Self::find_match(link, target_path, item.children.iter())
95                    }
96                }
97                ParseSource::Function(fun) => {
98                    // TODO: handle overloaded functions
99                    // functions can be overloaded so we need to keep track of how many matches we
100                    // have so we can match the correct one
101                    if let Some(id) = &fun.name {
102                        // Note: constructors don't have a name
103                        if id.name == link.ref_name() {
104                            return Some(InlineLinkTarget::borrowed(
105                                &id.name,
106                                target_path.to_path_buf(),
107                            ))
108                        }
109                    } else if link.ref_name() == "constructor" {
110                        return Some(InlineLinkTarget::borrowed(
111                            "constructor",
112                            target_path.to_path_buf(),
113                        ))
114                    }
115                }
116                ParseSource::Variable(_) => {}
117                ParseSource::Event(ev) => {
118                    let ev_name = &ev.name.safe_unwrap().name;
119                    if ev_name == link.ref_name() {
120                        return Some(InlineLinkTarget::borrowed(ev_name, target_path.to_path_buf()))
121                    }
122                }
123                ParseSource::Error(err) => {
124                    let err_name = &err.name.safe_unwrap().name;
125                    if err_name == link.ref_name() {
126                        return Some(InlineLinkTarget::borrowed(err_name, target_path.to_path_buf()))
127                    }
128                }
129                ParseSource::Struct(structdef) => {
130                    let struct_name = &structdef.name.safe_unwrap().name;
131                    if struct_name == link.ref_name() {
132                        return Some(InlineLinkTarget::borrowed(
133                            struct_name,
134                            target_path.to_path_buf(),
135                        ))
136                    }
137                }
138                ParseSource::Enum(_) => {}
139                ParseSource::Type(_) => {}
140            }
141        }
142
143        None
144    }
145
146    /// Attempts to convert inline links to markdown links.
147    fn inline_doc_links(
148        documents: &[Document],
149        target_path: &Path,
150        comments: &mut Comments,
151        parent: &Document,
152    ) {
153        // loop over all comments in the item
154        for comment in comments.iter_mut() {
155            let val = comment.value.clone();
156            // replace all links with inline markdown links
157            for link in InlineLink::captures(val.as_str()) {
158                let target = if link.is_external() {
159                    // find in all documents
160                    documents.iter().find_map(|doc| {
161                        Self::find_match(
162                            &link,
163                            doc.relative_output_path(),
164                            doc.content.iter_items().flat_map(|item| {
165                                Some(item).into_iter().chain(item.children.iter())
166                            }),
167                        )
168                    })
169                } else {
170                    // find matches in the document
171                    Self::find_match(
172                        &link,
173                        target_path,
174                        parent
175                            .content
176                            .iter_items()
177                            .flat_map(|item| Some(item).into_iter().chain(item.children.iter())),
178                    )
179                };
180                if let Some(target) = target {
181                    let display_value = link.markdown_link_display_value();
182                    let markdown_link = format!("[{display_value}]({target})");
183                    // replace the link with the markdown link
184                    comment.value =
185                        comment.value.as_str().replacen(link.as_str(), markdown_link.as_str(), 1);
186                }
187            }
188        }
189    }
190}
191
192struct InlineLinkTarget<'a> {
193    section: Cow<'a, str>,
194    target_path: PathBuf,
195}
196
197impl<'a> InlineLinkTarget<'a> {
198    fn borrowed(section: &'a str, target_path: PathBuf) -> Self {
199        Self { section: Cow::Borrowed(section), target_path }
200    }
201}
202
203impl std::fmt::Display for InlineLinkTarget<'_> {
204    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
205        // NOTE: the url should be absolute for markdown and section names are lowercase
206        write!(f, "/{}#{}", self.target_path.display(), self.section.to_lowercase())
207    }
208}
209
210/// A parsed link to an item.
211#[derive(Debug)]
212struct InlineLink<'a> {
213    outer: Match<'a>,
214    identifier: &'a str,
215    part: Option<&'a str>,
216    link: Option<&'a str>,
217}
218
219impl<'a> InlineLink<'a> {
220    fn from_capture(cap: Captures<'a>) -> Option<Self> {
221        Some(Self {
222            outer: cap.get(1)?,
223            identifier: cap.name("identifier")?.as_str(),
224            part: cap.name("part").map(|m| m.as_str()),
225            link: cap.name("link").map(|m| m.as_str()),
226        })
227    }
228
229    fn captures(s: &'a str) -> impl Iterator<Item = Self> + 'a {
230        RE_INLINE_LINK.captures(s).map(Self::from_capture).into_iter().flatten()
231    }
232
233    /// Parses the first inline link.
234    #[allow(unused)]
235    fn capture(s: &'a str) -> Option<Self> {
236        let cap = RE_INLINE_LINK.captures(s)?;
237        Self::from_capture(cap)
238    }
239
240    /// Returns the name of the link
241    fn markdown_link_display_value(&self) -> Cow<'_, str> {
242        if let Some(link) = self.link {
243            Cow::Borrowed(link)
244        } else if let Some(part) = self.part {
245            Cow::Owned(format!("{}-{}", self.identifier, part))
246        } else {
247            Cow::Borrowed(self.identifier)
248        }
249    }
250
251    /// Returns the name of the referenced item.
252    fn ref_name(&self) -> &str {
253        self.exact_identifier().split('-').next().unwrap()
254    }
255
256    fn exact_identifier(&self) -> &str {
257        let mut name = self.identifier;
258        if let Some(part) = self.part {
259            name = part;
260        }
261        name
262    }
263
264    /// Returns the name of the referenced item and its arguments, if any.
265    ///
266    /// Eg: `safeMint-address-uint256-` returns `("safeMint", ["address", "uint256"])`
267    #[expect(unused)]
268    fn ref_name_exact(&self) -> (&str, impl Iterator<Item = &str> + '_) {
269        let identifier = self.exact_identifier();
270        let mut iter = identifier.split('-');
271        (iter.next().unwrap(), iter.filter(|s| !s.is_empty()))
272    }
273
274    /// Returns the content of the matched link.
275    fn as_str(&self) -> &str {
276        self.outer.as_str()
277    }
278
279    /// Returns true if the link is external.
280    fn is_external(&self) -> bool {
281        self.part.is_some()
282    }
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn parse_inline_links() {
291        let s = "    {IERC165-supportsInterface}   ";
292        let cap = RE_INLINE_LINK.captures(s).unwrap();
293
294        let identifier = cap.name("identifier").unwrap().as_str();
295        assert_eq!(identifier, "IERC165");
296        let part = cap.name("part").unwrap().as_str();
297        assert_eq!(part, "supportsInterface");
298
299        let s = "    {supportsInterface}   ";
300        let cap = RE_INLINE_LINK.captures(s).unwrap();
301
302        let identifier = cap.name("identifier").unwrap().as_str();
303        assert_eq!(identifier, "supportsInterface");
304
305        let s = "{xref-ERC721-_safeMint-address-uint256-}";
306        let cap = RE_INLINE_LINK.captures(s).unwrap();
307
308        let identifier = cap.name("identifier").unwrap().as_str();
309        assert_eq!(identifier, "ERC721");
310        let identifier = cap.name("xref").unwrap().as_str();
311        assert_eq!(identifier, "xref-");
312        let identifier = cap.name("part").unwrap().as_str();
313        assert_eq!(identifier, "_safeMint-address-uint256-");
314
315        let link = InlineLink::capture(s).unwrap();
316        assert_eq!(link.ref_name(), "_safeMint");
317        assert_eq!(link.as_str(), "{xref-ERC721-_safeMint-address-uint256-}");
318
319        let s = "{xref-ERC721-_safeMint-address-uint256-}[`Named link`]";
320        let link = InlineLink::capture(s).unwrap();
321        assert_eq!(link.link, Some("`Named link`"));
322        assert_eq!(link.markdown_link_display_value(), "`Named link`");
323    }
324}