Skip to main content

forge_doc/preprocessor/
infer_hyperlinks.rs

1use super::{Preprocessor, PreprocessorId};
2use crate::{Comments, Document, ParseItem, ParseSource};
3use regex::{Captures, Match, Regex};
4use std::{
5    borrow::Cow,
6    path::{Path, PathBuf},
7    sync::LazyLock,
8};
9
10/// A regex that matches `{identifier-part}` placeholders
11///
12/// Overloaded functions are referenced by including the exact function arguments in the `part`
13/// section of the placeholder.
14static RE_INLINE_LINK: LazyLock<Regex> = LazyLock::new(|| {
15    Regex::new(r"(?m)(\{(?P<xref>xref-)?(?P<identifier>[a-zA-Z_][0-9a-zA-Z_]*)(-(?P<part>[a-zA-Z_][0-9a-zA-Z_-]*))?}(\[(?P<link>(.*?))\])?)").unwrap()
16});
17
18/// [InferInlineHyperlinks] preprocessor id.
19pub const INFER_INLINE_HYPERLINKS_ID: PreprocessorId = PreprocessorId("infer inline hyperlinks");
20
21/// The infer hyperlinks preprocessor tries to map @dev tags to referenced items
22/// Traverses the documents and attempts to find referenced items
23/// comments for dev comment tags.
24///
25/// This preprocessor replaces inline links in comments with the links to the referenced items.
26#[derive(Debug, Default)]
27#[non_exhaustive]
28pub struct InferInlineHyperlinks;
29
30impl Preprocessor for InferInlineHyperlinks {
31    fn id(&self) -> PreprocessorId {
32        INFER_INLINE_HYPERLINKS_ID
33    }
34
35    fn preprocess(&self, mut documents: Vec<Document>) -> Result<Vec<Document>, eyre::Error> {
36        // traverse all comments and try to match inline links and replace with inline links for
37        // markdown
38        let mut docs = Vec::with_capacity(documents.len());
39        while !documents.is_empty() {
40            let mut document = documents.remove(0);
41            let target_path = document.relative_output_path().to_path_buf();
42            for idx in 0..document.content.len() {
43                let (mut comments, item_children_len) = {
44                    let item = document.content.get_mut(idx).unwrap();
45                    let comments = std::mem::take(&mut item.comments);
46                    let children = item.children.len();
47                    (comments, children)
48                };
49                Self::inline_doc_links(&documents, &target_path, &mut comments, &document);
50                document.content.get_mut(idx).unwrap().comments = comments;
51
52                // we also need to iterate over all child items
53                // This is a bit horrible but we need to traverse all items in all documents
54                for child_idx in 0..item_children_len {
55                    let mut comments = {
56                        let item = document.content.get_mut(idx).unwrap();
57
58                        std::mem::take(&mut item.children[child_idx].comments)
59                    };
60                    Self::inline_doc_links(&documents, &target_path, &mut comments, &document);
61                    document.content.get_mut(idx).unwrap().children[child_idx].comments = comments;
62                }
63            }
64
65            docs.push(document);
66        }
67
68        Ok(docs)
69    }
70}
71
72impl InferInlineHyperlinks {
73    /// Finds the first match for the given link.
74    ///
75    /// All items get their own section in the markdown file.
76    /// This section uses the identifier of the item: `#functionname`
77    ///
78    /// Note: the target path is the relative path to the markdown file.
79    fn find_match<'a>(
80        link: &InlineLink<'a>,
81        target_path: &Path,
82        items: impl Iterator<Item = &'a ParseItem>,
83    ) -> Option<InlineLinkTarget<'a>> {
84        for item in items {
85            match &item.source {
86                ParseSource::Contract(contract) => {
87                    let name = &contract.name;
88                    if name == link.identifier {
89                        if link.part.is_none() {
90                            return Some(InlineLinkTarget::borrowed(
91                                name,
92                                target_path.to_path_buf(),
93                            ));
94                        }
95                        // try to find the referenced item in the contract's children
96                        return Self::find_match(link, target_path, item.children.iter());
97                    }
98                }
99                ParseSource::Function(fun) => {
100                    // TODO: handle overloaded functions
101                    // functions can be overloaded so we need to keep track of how many matches we
102                    // have so we can match the correct one
103                    if let Some(id) = &fun.name {
104                        // Note: constructors don't have a name
105                        if id == link.ref_name() {
106                            return Some(InlineLinkTarget::borrowed(id, target_path.to_path_buf()));
107                        }
108                    } else if link.ref_name() == "constructor" {
109                        return Some(InlineLinkTarget::borrowed(
110                            "constructor",
111                            target_path.to_path_buf(),
112                        ));
113                    }
114                }
115                ParseSource::Variable(_) => {}
116                ParseSource::Event(ev) => {
117                    let ev_name = &ev.name;
118                    if ev_name == link.ref_name() {
119                        return Some(InlineLinkTarget::borrowed(
120                            ev_name,
121                            target_path.to_path_buf(),
122                        ));
123                    }
124                }
125                ParseSource::Error(err) => {
126                    let err_name = &err.name;
127                    if err_name == link.ref_name() {
128                        return Some(InlineLinkTarget::borrowed(
129                            err_name,
130                            target_path.to_path_buf(),
131                        ));
132                    }
133                }
134                ParseSource::Struct(structdef) => {
135                    let struct_name = &structdef.name;
136                    if struct_name == link.ref_name() {
137                        return Some(InlineLinkTarget::borrowed(
138                            struct_name,
139                            target_path.to_path_buf(),
140                        ));
141                    }
142                }
143                ParseSource::Enum(_) => {}
144                ParseSource::Type(_) => {}
145            }
146        }
147
148        None
149    }
150
151    /// Attempts to convert inline links to markdown links.
152    fn inline_doc_links(
153        documents: &[Document],
154        target_path: &Path,
155        comments: &mut Comments,
156        parent: &Document,
157    ) {
158        // loop over all comments in the item
159        for comment in comments.iter_mut() {
160            let val = comment.value.clone();
161            // replace all links with inline markdown links
162            for link in InlineLink::captures(val.as_str()) {
163                let target = if link.is_external() {
164                    // find in all documents
165                    documents.iter().find_map(|doc| {
166                        Self::find_match(
167                            &link,
168                            doc.relative_output_path(),
169                            doc.content.iter_items().flat_map(|item| {
170                                Some(item).into_iter().chain(item.children.iter())
171                            }),
172                        )
173                    })
174                } else {
175                    // find matches in the document
176                    Self::find_match(
177                        &link,
178                        target_path,
179                        parent
180                            .content
181                            .iter_items()
182                            .flat_map(|item| Some(item).into_iter().chain(item.children.iter())),
183                    )
184                };
185                if let Some(target) = target {
186                    let display_value = link.markdown_link_display_value();
187                    let markdown_link = format!("[{display_value}]({target})");
188                    // replace the link with the markdown link
189                    comment.value =
190                        comment.value.as_str().replacen(link.as_str(), markdown_link.as_str(), 1);
191                }
192            }
193        }
194    }
195}
196
197struct InlineLinkTarget<'a> {
198    section: Cow<'a, str>,
199    target_path: PathBuf,
200}
201
202impl<'a> InlineLinkTarget<'a> {
203    const fn borrowed(section: &'a str, target_path: PathBuf) -> Self {
204        Self { section: Cow::Borrowed(section), target_path }
205    }
206}
207
208impl std::fmt::Display for InlineLinkTarget<'_> {
209    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
210        // NOTE: the url should be absolute for markdown and section names are lowercase
211        write!(f, "/{}#{}", self.target_path.display(), self.section.to_lowercase())
212    }
213}
214
215/// A parsed link to an item.
216#[derive(Debug)]
217struct InlineLink<'a> {
218    outer: Match<'a>,
219    identifier: &'a str,
220    part: Option<&'a str>,
221    link: Option<&'a str>,
222}
223
224impl<'a> InlineLink<'a> {
225    fn from_capture(cap: Captures<'a>) -> Option<Self> {
226        Some(Self {
227            outer: cap.get(1)?,
228            identifier: cap.name("identifier")?.as_str(),
229            part: cap.name("part").map(|m| m.as_str()),
230            link: cap.name("link").map(|m| m.as_str()),
231        })
232    }
233
234    fn captures(s: &'a str) -> impl Iterator<Item = Self> + 'a {
235        RE_INLINE_LINK.captures_iter(s).filter_map(Self::from_capture)
236    }
237
238    /// Parses the first inline link.
239    #[allow(unused)]
240    fn capture(s: &'a str) -> Option<Self> {
241        let cap = RE_INLINE_LINK.captures(s)?;
242        Self::from_capture(cap)
243    }
244
245    /// Returns the name of the link
246    fn markdown_link_display_value(&self) -> Cow<'_, str> {
247        if let Some(link) = self.link {
248            Cow::Borrowed(link)
249        } else if let Some(part) = self.part {
250            Cow::Owned(format!("{}-{}", self.identifier, part))
251        } else {
252            Cow::Borrowed(self.identifier)
253        }
254    }
255
256    /// Returns the name of the referenced item.
257    fn ref_name(&self) -> &str {
258        self.exact_identifier().split('-').next().unwrap()
259    }
260
261    const fn exact_identifier(&self) -> &str {
262        if let Some(part) = self.part { part } else { self.identifier }
263    }
264
265    /// Returns the content of the matched link.
266    fn as_str(&self) -> &str {
267        self.outer.as_str()
268    }
269
270    /// Returns true if the link is external.
271    const fn is_external(&self) -> bool {
272        self.part.is_some()
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279
280    #[test]
281    fn parse_inline_links() {
282        let s = "    {IERC165-supportsInterface}   ";
283        let cap = RE_INLINE_LINK.captures(s).unwrap();
284
285        let identifier = cap.name("identifier").unwrap().as_str();
286        assert_eq!(identifier, "IERC165");
287        let part = cap.name("part").unwrap().as_str();
288        assert_eq!(part, "supportsInterface");
289
290        let s = "    {supportsInterface}   ";
291        let cap = RE_INLINE_LINK.captures(s).unwrap();
292
293        let identifier = cap.name("identifier").unwrap().as_str();
294        assert_eq!(identifier, "supportsInterface");
295
296        let s = "{xref-ERC721-_safeMint-address-uint256-}";
297        let cap = RE_INLINE_LINK.captures(s).unwrap();
298
299        let identifier = cap.name("identifier").unwrap().as_str();
300        assert_eq!(identifier, "ERC721");
301        let identifier = cap.name("xref").unwrap().as_str();
302        assert_eq!(identifier, "xref-");
303        let identifier = cap.name("part").unwrap().as_str();
304        assert_eq!(identifier, "_safeMint-address-uint256-");
305
306        let link = InlineLink::capture(s).unwrap();
307        assert_eq!(link.ref_name(), "_safeMint");
308        assert_eq!(link.as_str(), "{xref-ERC721-_safeMint-address-uint256-}");
309
310        let s = "{xref-ERC721-_safeMint-address-uint256-}[`Named link`]";
311        let link = InlineLink::capture(s).unwrap();
312        assert_eq!(link.link, Some("`Named link`"));
313        assert_eq!(link.markdown_link_display_value(), "`Named link`");
314    }
315}