forge_doc/preprocessor/
infer_hyperlinks.rs

1use super::{Preprocessor, PreprocessorId};
2use crate::{Comments, Document, ParseItem, ParseSource};
3use forge_fmt::solang_ext::SafeUnwrap;
4use regex::{Captures, Match, Regex};
5use std::{
6    borrow::Cow,
7    path::{Path, PathBuf},
8    sync::LazyLock,
9};
10
11/// A regex that matches `{identifier-part}` placeholders
12///
13/// Overloaded functions are referenced by including the exact function arguments in the `part`
14/// section of the placeholder.
15static RE_INLINE_LINK: LazyLock<Regex> = LazyLock::new(|| {
16    Regex::new(r"(?m)(\{(?P<xref>xref-)?(?P<identifier>[a-zA-Z_][0-9a-zA-Z_]*)(-(?P<part>[a-zA-Z_][0-9a-zA-Z_-]*))?}(\[(?P<link>(.*?))\])?)").unwrap()
17});
18
19/// [InferInlineHyperlinks] preprocessor id.
20pub const INFER_INLINE_HYPERLINKS_ID: PreprocessorId = PreprocessorId("infer inline hyperlinks");
21
22/// The infer hyperlinks preprocessor tries to map @dev tags to referenced items
23/// Traverses the documents and attempts to find referenced items
24/// comments for dev comment tags.
25///
26/// This preprocessor replaces inline links in comments with the links to the referenced items.
27#[derive(Debug, Default)]
28#[non_exhaustive]
29pub struct InferInlineHyperlinks;
30
31impl Preprocessor for InferInlineHyperlinks {
32    fn id(&self) -> PreprocessorId {
33        INFER_INLINE_HYPERLINKS_ID
34    }
35
36    fn preprocess(&self, mut documents: Vec<Document>) -> Result<Vec<Document>, eyre::Error> {
37        // traverse all comments and try to match inline links and replace with inline links for
38        // markdown
39        let mut docs = Vec::with_capacity(documents.len());
40        while !documents.is_empty() {
41            let mut document = documents.remove(0);
42            let target_path = document.relative_output_path().to_path_buf();
43            for idx in 0..document.content.len() {
44                let (mut comments, item_children_len) = {
45                    let item = document.content.get_mut(idx).unwrap();
46                    let comments = std::mem::take(&mut item.comments);
47                    let children = item.children.len();
48                    (comments, children)
49                };
50                Self::inline_doc_links(&documents, &target_path, &mut comments, &document);
51                document.content.get_mut(idx).unwrap().comments = comments;
52
53                // we also need to iterate over all child items
54                // This is a bit horrible but we need to traverse all items in all documents
55                for child_idx in 0..item_children_len {
56                    let mut comments = {
57                        let item = document.content.get_mut(idx).unwrap();
58
59                        std::mem::take(&mut item.children[child_idx].comments)
60                    };
61                    Self::inline_doc_links(&documents, &target_path, &mut comments, &document);
62                    document.content.get_mut(idx).unwrap().children[child_idx].comments = comments;
63                }
64            }
65
66            docs.push(document);
67        }
68
69        Ok(docs)
70    }
71}
72
73impl InferInlineHyperlinks {
74    /// Finds the first match for the given link.
75    ///
76    /// All items get their own section in the markdown file.
77    /// This section uses the identifier of the item: `#functionname`
78    ///
79    /// Note: the target path is the relative path to the markdown file.
80    fn find_match<'a>(
81        link: &InlineLink<'a>,
82        target_path: &Path,
83        items: impl Iterator<Item = &'a ParseItem>,
84    ) -> Option<InlineLinkTarget<'a>> {
85        for item in items {
86            match &item.source {
87                ParseSource::Contract(contract) => {
88                    let name = &contract.name.safe_unwrap().name;
89                    if name == link.identifier {
90                        if link.part.is_none() {
91                            return Some(InlineLinkTarget::borrowed(
92                                name,
93                                target_path.to_path_buf(),
94                            ));
95                        }
96                        // try to find the referenced item in the contract's children
97                        return Self::find_match(link, target_path, item.children.iter());
98                    }
99                }
100                ParseSource::Function(fun) => {
101                    // TODO: handle overloaded functions
102                    // functions can be overloaded so we need to keep track of how many matches we
103                    // have so we can match the correct one
104                    if let Some(id) = &fun.name {
105                        // Note: constructors don't have a name
106                        if id.name == link.ref_name() {
107                            return Some(InlineLinkTarget::borrowed(
108                                &id.name,
109                                target_path.to_path_buf(),
110                            ));
111                        }
112                    } else if link.ref_name() == "constructor" {
113                        return Some(InlineLinkTarget::borrowed(
114                            "constructor",
115                            target_path.to_path_buf(),
116                        ));
117                    }
118                }
119                ParseSource::Variable(_) => {}
120                ParseSource::Event(ev) => {
121                    let ev_name = &ev.name.safe_unwrap().name;
122                    if ev_name == link.ref_name() {
123                        return Some(InlineLinkTarget::borrowed(
124                            ev_name,
125                            target_path.to_path_buf(),
126                        ));
127                    }
128                }
129                ParseSource::Error(err) => {
130                    let err_name = &err.name.safe_unwrap().name;
131                    if err_name == link.ref_name() {
132                        return Some(InlineLinkTarget::borrowed(
133                            err_name,
134                            target_path.to_path_buf(),
135                        ));
136                    }
137                }
138                ParseSource::Struct(structdef) => {
139                    let struct_name = &structdef.name.safe_unwrap().name;
140                    if struct_name == link.ref_name() {
141                        return Some(InlineLinkTarget::borrowed(
142                            struct_name,
143                            target_path.to_path_buf(),
144                        ));
145                    }
146                }
147                ParseSource::Enum(_) => {}
148                ParseSource::Type(_) => {}
149            }
150        }
151
152        None
153    }
154
155    /// Attempts to convert inline links to markdown links.
156    fn inline_doc_links(
157        documents: &[Document],
158        target_path: &Path,
159        comments: &mut Comments,
160        parent: &Document,
161    ) {
162        // loop over all comments in the item
163        for comment in comments.iter_mut() {
164            let val = comment.value.clone();
165            // replace all links with inline markdown links
166            for link in InlineLink::captures(val.as_str()) {
167                let target = if link.is_external() {
168                    // find in all documents
169                    documents.iter().find_map(|doc| {
170                        Self::find_match(
171                            &link,
172                            doc.relative_output_path(),
173                            doc.content.iter_items().flat_map(|item| {
174                                Some(item).into_iter().chain(item.children.iter())
175                            }),
176                        )
177                    })
178                } else {
179                    // find matches in the document
180                    Self::find_match(
181                        &link,
182                        target_path,
183                        parent
184                            .content
185                            .iter_items()
186                            .flat_map(|item| Some(item).into_iter().chain(item.children.iter())),
187                    )
188                };
189                if let Some(target) = target {
190                    let display_value = link.markdown_link_display_value();
191                    let markdown_link = format!("[{display_value}]({target})");
192                    // replace the link with the markdown link
193                    comment.value =
194                        comment.value.as_str().replacen(link.as_str(), markdown_link.as_str(), 1);
195                }
196            }
197        }
198    }
199}
200
201struct InlineLinkTarget<'a> {
202    section: Cow<'a, str>,
203    target_path: PathBuf,
204}
205
206impl<'a> InlineLinkTarget<'a> {
207    fn borrowed(section: &'a str, target_path: PathBuf) -> Self {
208        Self { section: Cow::Borrowed(section), target_path }
209    }
210}
211
212impl std::fmt::Display for InlineLinkTarget<'_> {
213    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
214        // NOTE: the url should be absolute for markdown and section names are lowercase
215        write!(f, "/{}#{}", self.target_path.display(), self.section.to_lowercase())
216    }
217}
218
219/// A parsed link to an item.
220#[derive(Debug)]
221struct InlineLink<'a> {
222    outer: Match<'a>,
223    identifier: &'a str,
224    part: Option<&'a str>,
225    link: Option<&'a str>,
226}
227
228impl<'a> InlineLink<'a> {
229    fn from_capture(cap: Captures<'a>) -> Option<Self> {
230        Some(Self {
231            outer: cap.get(1)?,
232            identifier: cap.name("identifier")?.as_str(),
233            part: cap.name("part").map(|m| m.as_str()),
234            link: cap.name("link").map(|m| m.as_str()),
235        })
236    }
237
238    fn captures(s: &'a str) -> impl Iterator<Item = Self> + 'a {
239        RE_INLINE_LINK.captures(s).map(Self::from_capture).into_iter().flatten()
240    }
241
242    /// Parses the first inline link.
243    #[allow(unused)]
244    fn capture(s: &'a str) -> Option<Self> {
245        let cap = RE_INLINE_LINK.captures(s)?;
246        Self::from_capture(cap)
247    }
248
249    /// Returns the name of the link
250    fn markdown_link_display_value(&self) -> Cow<'_, str> {
251        if let Some(link) = self.link {
252            Cow::Borrowed(link)
253        } else if let Some(part) = self.part {
254            Cow::Owned(format!("{}-{}", self.identifier, part))
255        } else {
256            Cow::Borrowed(self.identifier)
257        }
258    }
259
260    /// Returns the name of the referenced item.
261    fn ref_name(&self) -> &str {
262        self.exact_identifier().split('-').next().unwrap()
263    }
264
265    fn exact_identifier(&self) -> &str {
266        let mut name = self.identifier;
267        if let Some(part) = self.part {
268            name = part;
269        }
270        name
271    }
272
273    /// Returns the name of the referenced item and its arguments, if any.
274    ///
275    /// Eg: `safeMint-address-uint256-` returns `("safeMint", ["address", "uint256"])`
276    #[expect(unused)]
277    fn ref_name_exact(&self) -> (&str, impl Iterator<Item = &str> + '_) {
278        let identifier = self.exact_identifier();
279        let mut iter = identifier.split('-');
280        (iter.next().unwrap(), iter.filter(|s| !s.is_empty()))
281    }
282
283    /// Returns the content of the matched link.
284    fn as_str(&self) -> &str {
285        self.outer.as_str()
286    }
287
288    /// Returns true if the link is external.
289    fn is_external(&self) -> bool {
290        self.part.is_some()
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    #[test]
299    fn parse_inline_links() {
300        let s = "    {IERC165-supportsInterface}   ";
301        let cap = RE_INLINE_LINK.captures(s).unwrap();
302
303        let identifier = cap.name("identifier").unwrap().as_str();
304        assert_eq!(identifier, "IERC165");
305        let part = cap.name("part").unwrap().as_str();
306        assert_eq!(part, "supportsInterface");
307
308        let s = "    {supportsInterface}   ";
309        let cap = RE_INLINE_LINK.captures(s).unwrap();
310
311        let identifier = cap.name("identifier").unwrap().as_str();
312        assert_eq!(identifier, "supportsInterface");
313
314        let s = "{xref-ERC721-_safeMint-address-uint256-}";
315        let cap = RE_INLINE_LINK.captures(s).unwrap();
316
317        let identifier = cap.name("identifier").unwrap().as_str();
318        assert_eq!(identifier, "ERC721");
319        let identifier = cap.name("xref").unwrap().as_str();
320        assert_eq!(identifier, "xref-");
321        let identifier = cap.name("part").unwrap().as_str();
322        assert_eq!(identifier, "_safeMint-address-uint256-");
323
324        let link = InlineLink::capture(s).unwrap();
325        assert_eq!(link.ref_name(), "_safeMint");
326        assert_eq!(link.as_str(), "{xref-ERC721-_safeMint-address-uint256-}");
327
328        let s = "{xref-ERC721-_safeMint-address-uint256-}[`Named link`]";
329        let link = InlineLink::capture(s).unwrap();
330        assert_eq!(link.link, Some("`Named link`"));
331        assert_eq!(link.markdown_link_display_value(), "`Named link`");
332    }
333}