Skip to main content

forge_doc/parser/
mod.rs

1//! The parser module.
2
3use solar::parse::ast;
4
5/// Parser error.
6pub mod error;
7
8/// Owned source types.
9pub mod source;
10
11/// Parser item.
12mod item;
13pub use item::{ParseItem, ParseSource};
14
15/// Doc comment.
16mod comment;
17pub use comment::{Comment, CommentTag, Comments, CommentsRef};
18
19use source::*;
20
21/// The documentation parser.
22///
23/// Walks the solar AST and extracts [`ParseItem`]s with owned source data and doc comments.
24#[derive(Debug)]
25pub struct Parser {
26    /// Parsed results.
27    items: Vec<ParseItem>,
28    /// The source code string of the file being parsed.
29    source: String,
30    /// The global byte offset of this file's first byte in solar's source map.
31    ///
32    /// Solar uses a global `SourceMap` where each file's `BytePos` values start at
33    /// `file.start_pos` rather than 0. All span `lo`/`hi` values must be offset by this
34    /// amount before indexing into `self.source`.
35    file_start: usize,
36    /// Tab width used to format code.
37    tab_width: usize,
38}
39
40impl Parser {
41    /// Create a new instance of [Parser].
42    ///
43    /// `file_start` is `ast_source.file.start_pos.to_usize()` — the offset of the file's
44    /// first byte in solar's global source map. Pass `0` in tests where you parse directly.
45    pub const fn new(source: String, file_start: usize, tab_width: usize) -> Self {
46        Self { items: Vec::new(), source, file_start, tab_width }
47    }
48
49    /// Parse a solar source unit and return the parsed items.
50    pub fn parse(mut self, source_unit: &ast::SourceUnit<'_>) -> Vec<ParseItem> {
51        for item in source_unit.items.iter() {
52            if let Some(parsed) = self.parse_item(item) {
53                self.items.push(parsed);
54            }
55        }
56        self.items
57    }
58
59    /// Parse a single solar AST item into a [ParseItem].
60    fn parse_item(&self, item: &ast::Item<'_>) -> Option<ParseItem> {
61        let docs = Self::parse_docs(&item.docs);
62        let span = item.span;
63
64        match &item.kind {
65            ast::ItemKind::Contract(contract) => {
66                let source = self.parse_contract(contract);
67                let code = self.extract_code(span);
68                let mut parse_item =
69                    ParseItem::new(ParseSource::Contract(source)).with_comments(docs);
70
71                // Parse children
72                let mut children = Vec::new();
73                for child in contract.body.iter() {
74                    if let Some(parsed) = self.parse_item(child) {
75                        children.push(parsed);
76                    }
77                }
78                parse_item.children = children;
79                parse_item.code = code;
80                Some(parse_item)
81            }
82            ast::ItemKind::Function(func) => {
83                let source = self.parse_function(func);
84                let code = self.extract_prototype_code(func);
85                Some(
86                    ParseItem::new(ParseSource::Function(source))
87                        .with_comments(docs)
88                        .with_code(code),
89                )
90            }
91            ast::ItemKind::Variable(var) => {
92                let source = self.parse_variable(var);
93                let code = self.extract_code(span);
94                Some(
95                    ParseItem::new(ParseSource::Variable(source))
96                        .with_comments(docs)
97                        .with_code(code),
98                )
99            }
100            ast::ItemKind::Event(event) => {
101                let source = self.parse_event(event);
102                let code = self.extract_code(span);
103                Some(ParseItem::new(ParseSource::Event(source)).with_comments(docs).with_code(code))
104            }
105            ast::ItemKind::Error(err) => {
106                let source = self.parse_error(err);
107                let code = self.extract_code(span);
108                Some(ParseItem::new(ParseSource::Error(source)).with_comments(docs).with_code(code))
109            }
110            ast::ItemKind::Struct(strukt) => {
111                let source = self.parse_struct(strukt);
112                let code = self.extract_code(span);
113                Some(
114                    ParseItem::new(ParseSource::Struct(source)).with_comments(docs).with_code(code),
115                )
116            }
117            ast::ItemKind::Enum(enm) => {
118                let source = self.parse_enum(enm);
119                let code = self.extract_code(span);
120                Some(ParseItem::new(ParseSource::Enum(source)).with_comments(docs).with_code(code))
121            }
122            ast::ItemKind::Udvt(udvt) => {
123                let source = TypeSource { name: udvt.name.to_string() };
124                let code = self.extract_code(span);
125                Some(ParseItem::new(ParseSource::Type(source)).with_comments(docs).with_code(code))
126            }
127            // Skip pragmas, imports, using directives
128            _ => None,
129        }
130    }
131
132    fn parse_contract(&self, contract: &ast::ItemContract<'_>) -> ContractSource {
133        let kind = match contract.kind {
134            ast::ContractKind::Contract => ContractKind::Contract,
135            ast::ContractKind::AbstractContract => ContractKind::Abstract,
136            ast::ContractKind::Interface => ContractKind::Interface,
137            ast::ContractKind::Library => ContractKind::Library,
138        };
139
140        let bases = contract
141            .bases
142            .iter()
143            .map(|base| {
144                let full_name = base.name.to_string();
145                let ident = base.name.last().name.to_string();
146                BaseInfo { name: full_name, ident }
147            })
148            .collect();
149
150        ContractSource { name: contract.name.to_string(), kind, bases }
151    }
152
153    fn parse_function(&self, func: &ast::ItemFunction<'_>) -> FunctionSource {
154        let name = func.header.name.map(|n| n.to_string());
155        let kind = func.kind.to_string();
156        let params = self.parse_var_defs(&func.header.parameters);
157        let returns =
158            func.header.returns.as_deref().map(|r| self.parse_var_defs(r)).unwrap_or_default();
159        FunctionSource { name, kind, params, returns }
160    }
161
162    fn parse_variable(&self, var: &ast::VariableDefinition<'_>) -> VariableSource {
163        let name = var.name.map(|n| n.to_string()).unwrap_or_default();
164
165        let mut attrs = Vec::new();
166        if let Some(m) = var.mutability {
167            match m {
168                ast::VarMut::Constant => attrs.push(VariableAttr::Constant),
169                ast::VarMut::Immutable => attrs.push(VariableAttr::Immutable),
170            }
171        }
172
173        VariableSource { name, attrs }
174    }
175
176    fn parse_event(&self, event: &ast::ItemEvent<'_>) -> EventSource {
177        let fields = self.parse_var_defs(&event.parameters);
178        EventSource { name: event.name.to_string(), fields }
179    }
180
181    fn parse_error(&self, err: &ast::ItemError<'_>) -> ErrorSource {
182        let fields = self.parse_var_defs(&err.parameters);
183        ErrorSource { name: err.name.to_string(), fields }
184    }
185
186    fn parse_struct(&self, strukt: &ast::ItemStruct<'_>) -> StructSource {
187        let fields = self.parse_var_defs(strukt.fields);
188        StructSource { name: strukt.name.to_string(), fields }
189    }
190
191    /// Parse a list of variable definitions into [ParamInfo].
192    fn parse_var_defs(&self, vars: &[ast::VariableDefinition<'_>]) -> Vec<ParamInfo> {
193        vars.iter()
194            .map(|v| ParamInfo { name: v.name.map(|n| n.to_string()), ty: self.type_string(&v.ty) })
195            .collect()
196    }
197
198    /// Extract the type as a string from the source code.
199    fn type_string(&self, ty: &ast::Type<'_>) -> String {
200        let lo = ty.span.lo().to_usize().saturating_sub(self.file_start);
201        let hi = ty.span.hi().to_usize().saturating_sub(self.file_start);
202        if lo < self.source.len() && hi <= self.source.len() && lo < hi {
203            self.source[lo..hi].to_string()
204        } else {
205            String::new()
206        }
207    }
208
209    fn parse_enum(&self, enm: &ast::ItemEnum<'_>) -> EnumSource {
210        let variants = enm.variants.iter().map(|v| v.to_string()).collect();
211        EnumSource { name: enm.name.to_string(), variants }
212    }
213
214    /// Parse doc comments from solar's [ast::DocComments] into our [Comments] type.
215    fn parse_docs(docs: &ast::DocComments<'_>) -> Comments {
216        if docs.is_empty() {
217            return Comments::default();
218        }
219        Comments::from_doc_lines(docs.iter().map(|d| d.symbol.as_str()))
220    }
221
222    /// Extract a code snippet from the source for the given span.
223    fn extract_code(&self, span: ast::Span) -> String {
224        let lo = span.lo().to_usize().saturating_sub(self.file_start);
225        let hi = span.hi().to_usize().saturating_sub(self.file_start);
226        if lo < self.source.len() && hi <= self.source.len() && lo < hi {
227            let code = &self.source[lo..hi];
228            self.dedent(code)
229        } else {
230            String::new()
231        }
232    }
233
234    /// Extract only the function prototype (excluding the body) from the source.
235    fn extract_prototype_code(&self, func: &ast::ItemFunction<'_>) -> String {
236        let lo = func.header.span.lo().to_usize().saturating_sub(self.file_start);
237        let hi = func.header.span.hi().to_usize().saturating_sub(self.file_start);
238        if lo < self.source.len() && hi <= self.source.len() && lo < hi {
239            let mut code = self.source[lo..hi].to_string();
240            code.push(';');
241            self.dedent(&code)
242        } else {
243            String::new()
244        }
245    }
246
247    /// Remove one level of indentation from code.
248    fn dedent(&self, code: &str) -> String {
249        let prefix = &" ".repeat(self.tab_width);
250        code.lines()
251            .map(|line| line.strip_prefix(prefix).unwrap_or(line))
252            .collect::<Vec<_>>()
253            .join("\n")
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    fn parse_source(src: &str) -> Vec<ParseItem> {
262        use solar::parse::{
263            Parser as SolarParser,
264            ast::{Arena, interface},
265            interface::Session,
266        };
267
268        let sess =
269            Session::builder().with_silent_emitter(Some("test parse failed".to_string())).build();
270
271        sess.enter(|| -> Vec<ParseItem> {
272            let arena = Arena::new();
273            let mut parser = SolarParser::from_source_code(
274                &sess,
275                &arena,
276                interface::source_map::FileName::Custom("test".to_string()),
277                src.to_string(),
278            )
279            .expect("failed to create parser");
280
281            let source_unit = parser.parse_file().map_err(|e| e.emit()).expect("failed to parse");
282
283            // file_start=0: when parsing directly, solar's BytePos values start at 0.
284            let doc = Parser::new(src.to_string(), 0, 4);
285            doc.parse(&source_unit)
286        })
287    }
288
289    macro_rules! test_single_unit {
290        ($test:ident, $src:expr, $variant:ident $identity:expr) => {
291            #[test]
292            fn $test() {
293                let items = parse_source($src);
294                assert_eq!(items.len(), 1);
295                let item = items.first().unwrap();
296                assert!(item.comments.is_empty());
297                assert!(item.children.is_empty());
298                assert_eq!(item.source.ident(), $identity);
299                assert!(matches!(item.source, ParseSource::$variant(_)));
300            }
301        };
302    }
303
304    #[test]
305    fn empty_source() {
306        assert_eq!(parse_source(""), vec![]);
307    }
308
309    test_single_unit!(single_function, "function someFn() { }", Function "someFn");
310    test_single_unit!(single_variable, "uint256 constant VALUE = 0;", Variable "VALUE");
311    test_single_unit!(single_event, "event SomeEvent();", Event "SomeEvent");
312    test_single_unit!(single_error, "error SomeError();", Error "SomeError");
313    test_single_unit!(single_struct, "struct SomeStruct { }", Struct "SomeStruct");
314    test_single_unit!(single_enum, "enum SomeEnum { SOME, OTHER }", Enum "SomeEnum");
315    test_single_unit!(single_contract, "contract Contract { }", Contract "Contract");
316
317    #[test]
318    fn multiple_shallow_contracts() {
319        let items = parse_source(
320            r"
321            contract A { }
322            contract B { }
323            contract C { }
324        ",
325        );
326        assert_eq!(items.len(), 3);
327
328        let first_item = items.first().unwrap();
329        assert!(matches!(first_item.source, ParseSource::Contract(_)));
330        assert_eq!(first_item.source.ident(), "A");
331
332        let first_item = items.get(1).unwrap();
333        assert!(matches!(first_item.source, ParseSource::Contract(_)));
334        assert_eq!(first_item.source.ident(), "B");
335
336        let first_item = items.get(2).unwrap();
337        assert!(matches!(first_item.source, ParseSource::Contract(_)));
338        assert_eq!(first_item.source.ident(), "C");
339    }
340
341    #[test]
342    fn contract_with_children_items() {
343        let items = parse_source(
344            r"
345            event TopLevelEvent();
346
347            contract Contract {
348                event ContractEvent();
349                error ContractError();
350                struct ContractStruct { }
351                enum ContractEnum { }
352
353                uint256 constant CONTRACT_CONSTANT = 0;
354                bool contractVar;
355
356                function contractFunction(uint256) external returns (uint256) {
357                    bool localVar; // must be ignored
358                }
359            }
360        ",
361        );
362
363        assert_eq!(items.len(), 2);
364
365        let event = items.first().unwrap();
366        assert!(event.comments.is_empty());
367        assert!(event.children.is_empty());
368        assert_eq!(event.source.ident(), "TopLevelEvent");
369        assert!(matches!(event.source, ParseSource::Event(_)));
370
371        let contract = items.get(1).unwrap();
372        assert!(contract.comments.is_empty());
373        assert_eq!(contract.children.len(), 7);
374        assert_eq!(contract.source.ident(), "Contract");
375        assert!(matches!(contract.source, ParseSource::Contract(_)));
376        assert!(contract.children.iter().all(|ch| ch.children.is_empty()));
377        assert!(contract.children.iter().all(|ch| ch.comments.is_empty()));
378    }
379
380    #[test]
381    fn contract_with_fallback() {
382        let items = parse_source(
383            r"
384            contract Contract {
385                fallback() external payable {}
386            }
387        ",
388        );
389
390        assert_eq!(items.len(), 1);
391
392        let contract = items.first().unwrap();
393        assert!(contract.comments.is_empty());
394        assert_eq!(contract.children.len(), 1);
395        assert_eq!(contract.source.ident(), "Contract");
396        assert!(matches!(contract.source, ParseSource::Contract(_)));
397
398        let fallback = contract.children.first().unwrap();
399        assert_eq!(fallback.source.ident(), "fallback");
400        assert!(matches!(fallback.source, ParseSource::Function(_)));
401    }
402
403    #[test]
404    fn overloaded_function_signatures() {
405        let items = parse_source(
406            r"
407            interface IFoo {
408                function process(address addr) external;
409                function process(address[] calldata addrs) external;
410                function process(address addr, uint256 value) external;
411            }
412        ",
413        );
414        assert_eq!(items.len(), 1);
415        let contract = items.first().unwrap();
416        assert_eq!(contract.children.len(), 3);
417        let sigs: Vec<String> = contract.children.iter().map(|ch| ch.source.signature()).collect();
418        assert_eq!(sigs[0], "process(address)", "first overload");
419        assert_eq!(sigs[1], "process(address[])", "second overload (array)");
420        assert_eq!(sigs[2], "process(address,uint256)", "third overload");
421    }
422
423    #[test]
424    fn contract_with_doc_comments() {
425        let items = parse_source(
426            r"
427            pragma solidity ^0.8.19;
428            /// @notice    Cool contract
429            /**
430             * @dev line one
431             *    line 2
432             */
433            contract Test {
434                /// my function
435                ///       i like whitespace
436                function test() {}
437            }
438        ",
439        );
440
441        assert_eq!(items.len(), 1);
442
443        let contract = items.first().unwrap();
444        assert_eq!(contract.comments.len(), 2);
445        assert_eq!(
446            *contract.comments.first().unwrap(),
447            Comment::new(CommentTag::Notice, "Cool contract".to_owned())
448        );
449        assert_eq!(
450            *contract.comments.get(1).unwrap(),
451            Comment::new(CommentTag::Dev, "line one\nline 2".to_owned())
452        );
453
454        let function = contract.children.first().unwrap();
455        assert_eq!(
456            *function.comments.first().unwrap(),
457            Comment::new(CommentTag::Notice, "my function\ni like whitespace".to_owned())
458        );
459    }
460}