foundry_common/comments/
mod.rs

1use crate::iter::IterDelimited;
2use solar::parse::{
3    ast::{CommentKind, Span},
4    interface::{BytePos, CharPos, SourceMap, source_map::SourceFile},
5    lexer::token::RawTokenKind as TokenKind,
6};
7use std::fmt;
8
9mod comment;
10pub use comment::{Comment, CommentStyle};
11
12pub mod inline_config;
13
14pub const DISABLE_START: &str = "forgefmt: disable-start";
15pub const DISABLE_END: &str = "forgefmt: disable-end";
16
17pub struct Comments {
18    comments: std::vec::IntoIter<Comment>,
19}
20
21impl fmt::Debug for Comments {
22    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23        f.write_str("Comments")?;
24        f.debug_list().entries(self.iter()).finish()
25    }
26}
27
28impl Comments {
29    pub fn new(
30        sf: &SourceFile,
31        sm: &SourceMap,
32        normalize_cmnts: bool,
33        group_cmnts: bool,
34        tab_width: Option<usize>,
35    ) -> Self {
36        let gatherer = CommentGatherer::new(sf, sm, normalize_cmnts, tab_width).gather();
37
38        Self {
39            comments: if group_cmnts {
40                gatherer.group().into_iter()
41            } else {
42                gatherer.comments.into_iter()
43            },
44        }
45    }
46
47    pub fn peek(&self) -> Option<&Comment> {
48        self.comments.as_slice().first()
49    }
50
51    #[allow(clippy::should_implement_trait)]
52    pub fn next(&mut self) -> Option<Comment> {
53        self.comments.next()
54    }
55
56    pub fn iter(&self) -> impl Iterator<Item = &Comment> {
57        self.comments.as_slice().iter()
58    }
59
60    /// Finds the first trailing comment on the same line as `span_pos`, allowing for `Mixed`
61    /// style comments to appear before it.
62    ///
63    /// Returns the comment and its index in the buffer.
64    pub fn peek_trailing(
65        &self,
66        sm: &SourceMap,
67        span_pos: BytePos,
68        next_pos: Option<BytePos>,
69    ) -> Option<(&Comment, usize)> {
70        let span_line = sm.lookup_char_pos(span_pos).line;
71        for (i, cmnt) in self.iter().enumerate() {
72            // If we have moved to the next line, we can stop.
73            let comment_line = sm.lookup_char_pos(cmnt.pos()).line;
74            if comment_line != span_line {
75                break;
76            }
77
78            // The comment must start after the given span position.
79            if cmnt.pos() < span_pos {
80                continue;
81            }
82
83            // The comment must be before the next element.
84            if cmnt.pos() >= next_pos.unwrap_or_else(|| cmnt.pos() + BytePos(1)) {
85                break;
86            }
87
88            // Stop when we find a trailing or a non-mixed comment
89            match cmnt.style {
90                CommentStyle::Mixed => continue,
91                CommentStyle::Trailing => return Some((cmnt, i)),
92                _ => break,
93            }
94        }
95        None
96    }
97}
98
99struct CommentGatherer<'ast> {
100    sf: &'ast SourceFile,
101    sm: &'ast SourceMap,
102    text: &'ast str,
103    start_bpos: BytePos,
104    pos: usize,
105    comments: Vec<Comment>,
106    code_to_the_left: bool,
107    disabled_block_depth: usize,
108    tab_width: Option<usize>,
109}
110
111impl<'ast> CommentGatherer<'ast> {
112    fn new(
113        sf: &'ast SourceFile,
114        sm: &'ast SourceMap,
115        normalize_cmnts: bool,
116        tab_width: Option<usize>,
117    ) -> Self {
118        Self {
119            sf,
120            sm,
121            text: sf.src.as_str(),
122            start_bpos: sf.start_pos,
123            pos: 0,
124            comments: Vec::new(),
125            code_to_the_left: false,
126            disabled_block_depth: if normalize_cmnts { 0 } else { 1 },
127            tab_width,
128        }
129    }
130
131    /// Consumes the gatherer and returns the collected comments.
132    fn gather(mut self) -> Self {
133        for token in solar::parse::Cursor::new(&self.text[self.pos..]) {
134            self.process_token(token);
135        }
136        self
137    }
138
139    /// Post-processes a list of comments to group consecutive comments.
140    ///
141    /// Necessary for properly indenting multi-line trailing comments, which would
142    /// otherwise be parsed as a `Trailing` followed by several `Isolated`.
143    fn group(self) -> Vec<Comment> {
144        let mut processed = Vec::new();
145        let mut cursor = self.comments.into_iter().peekable();
146
147        while let Some(mut current) = cursor.next() {
148            if current.kind == CommentKind::Line
149                && (current.style.is_trailing() || current.style.is_isolated())
150            {
151                let mut ref_line = self.sm.lookup_char_pos(current.span.hi()).line;
152                while let Some(next_comment) = cursor.peek() {
153                    if !next_comment.style.is_isolated()
154                        || next_comment.kind != CommentKind::Line
155                        || ref_line + 1 != self.sm.lookup_char_pos(next_comment.span.lo()).line
156                    {
157                        break;
158                    }
159
160                    let next_to_merge = cursor.next().unwrap();
161                    current.lines.extend(next_to_merge.lines);
162                    current.span = current.span.to(next_to_merge.span);
163                    ref_line += 1;
164                }
165            }
166
167            processed.push(current);
168        }
169
170        processed
171    }
172
173    /// Creates a `Span` relative to the source file's start position.
174    fn make_span(&self, range: std::ops::Range<usize>) -> Span {
175        Span::new(self.start_bpos + range.start as u32, self.start_bpos + range.end as u32)
176    }
177
178    /// Processes a single token from the source.
179    fn process_token(&mut self, token: solar::parse::lexer::token::RawToken) {
180        let token_range = self.pos..self.pos + token.len as usize;
181        let span = self.make_span(token_range.clone());
182        let token_text = &self.text[token_range];
183
184        // Keep track of disabled blocks
185        if token_text.trim_start().contains(DISABLE_START) {
186            self.disabled_block_depth += 1;
187        } else if token_text.trim_start().contains(DISABLE_END) {
188            self.disabled_block_depth -= 1;
189        }
190
191        match token.kind {
192            TokenKind::Whitespace => {
193                if let Some(mut idx) = token_text.find('\n') {
194                    self.code_to_the_left = false;
195
196                    while let Some(next_newline) = token_text[idx + 1..].find('\n') {
197                        idx += 1 + next_newline;
198                        let pos = self.pos + idx;
199                        self.comments.push(Comment {
200                            is_doc: false,
201                            kind: CommentKind::Line,
202                            style: CommentStyle::BlankLine,
203                            lines: vec![],
204                            span: self.make_span(pos..pos),
205                        });
206                        // If not disabled, early-exit as we want only a single blank line.
207                        if self.disabled_block_depth == 0 {
208                            break;
209                        }
210                    }
211                }
212            }
213            TokenKind::BlockComment { is_doc, .. } => {
214                let code_to_the_right = !matches!(
215                    self.text[self.pos + token.len as usize..].chars().next(),
216                    Some('\r' | '\n')
217                );
218                let style = match (self.code_to_the_left, code_to_the_right) {
219                    (_, true) => CommentStyle::Mixed,
220                    (false, false) => CommentStyle::Isolated,
221                    (true, false) => CommentStyle::Trailing,
222                };
223                let kind = CommentKind::Block;
224
225                // Count the number of chars since the start of the line by rescanning.
226                let pos_in_file = self.start_bpos + BytePos(self.pos as u32);
227                let line_begin_in_file = line_begin_pos(self.sf, pos_in_file);
228                let line_begin_pos = (line_begin_in_file - self.start_bpos).to_usize();
229                let col = CharPos(self.text[line_begin_pos..self.pos].chars().count());
230
231                let lines = self.split_block_comment_into_lines(token_text, is_doc, col);
232                self.comments.push(Comment { is_doc, kind, style, lines, span })
233            }
234            TokenKind::LineComment { is_doc } => {
235                let line =
236                    if self.disabled_block_depth != 0 { token_text } else { token_text.trim_end() };
237                self.comments.push(Comment {
238                    is_doc,
239                    kind: CommentKind::Line,
240                    style: if self.code_to_the_left {
241                        CommentStyle::Trailing
242                    } else {
243                        CommentStyle::Isolated
244                    },
245                    lines: vec![line.into()],
246                    span,
247                });
248            }
249            _ => {
250                self.code_to_the_left = true;
251            }
252        }
253        self.pos += token.len as usize;
254    }
255
256    /// Splits a block comment into lines, ensuring that each line is properly formatted.
257    fn split_block_comment_into_lines(
258        &self,
259        text: &str,
260        is_doc: bool,
261        col: CharPos,
262    ) -> Vec<String> {
263        // if formatting is disabled, return as is
264        if self.disabled_block_depth != 0 {
265            return vec![text.into()];
266        }
267
268        let mut res: Vec<String> = vec![];
269        let mut lines = text.lines();
270        if let Some(line) = lines.next() {
271            let line = line.trim_end();
272            // Ensure first line of a doc comment only has the `/**` decorator
273            if let Some((_, second)) = line.split_once("/**") {
274                res.push("/**".to_string());
275                if !second.trim().is_empty() {
276                    let line = normalize_block_comment_ws(second, col).trim_end();
277                    // Ensure last line of a doc comment only has the `*/` decorator
278                    if let Some((first, _)) = line.split_once("*/") {
279                        if !first.trim().is_empty() {
280                            res.push(format_doc_block_comment(first.trim_end(), self.tab_width));
281                        }
282                        res.push(" */".to_string());
283                    } else {
284                        res.push(format_doc_block_comment(line.trim_end(), self.tab_width));
285                    }
286                }
287            } else {
288                res.push(line.to_string());
289            }
290        }
291
292        for (pos, line) in lines.delimited() {
293            let line = normalize_block_comment_ws(line, col).trim_end().to_string();
294            if !is_doc {
295                res.push(line);
296                continue;
297            }
298            if !pos.is_last {
299                res.push(format_doc_block_comment(&line, self.tab_width));
300            } else {
301                if let Some((first, _)) = line.split_once("*/")
302                    && !first.trim().is_empty()
303                {
304                    res.push(format_doc_block_comment(first, self.tab_width));
305                }
306                res.push(" */".to_string());
307            }
308        }
309        res
310    }
311}
312
313/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
314/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
315/// whitespace. Note that `k` may be outside bounds of `s`.
316fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
317    let mut idx = 0;
318    for (i, ch) in s.char_indices().take(col.to_usize()) {
319        if !ch.is_whitespace() {
320            return None;
321        }
322        idx = i + ch.len_utf8();
323    }
324    Some(idx)
325}
326
327/// Returns `Some(k)` where `k` is the byte offset of the first non-whitespace char. Returns `k = 0`
328/// if `s` starts with a non-whitespace char. If `s` only contains whitespaces, returns `None`.
329fn first_non_whitespace(s: &str) -> Option<usize> {
330    let mut len = 0;
331    for (i, ch) in s.char_indices() {
332        if ch.is_whitespace() {
333            len = ch.len_utf8()
334        } else {
335            return if i == 0 { Some(0) } else { Some(i + 1 - len) };
336        }
337    }
338    None
339}
340
341/// Returns a slice of `s` with a whitespace prefix removed based on `col`. If the first `col` chars
342/// of `s` are all whitespace, returns a slice starting after that prefix.
343fn normalize_block_comment_ws(s: &str, col: CharPos) -> &str {
344    let len = s.len();
345    if let Some(col) = all_whitespace(s, col) {
346        return if col < len { &s[col..] } else { "" };
347    }
348    if let Some(col) = first_non_whitespace(s) {
349        return &s[col..];
350    }
351    s
352}
353
354/// Formats a doc block comment line so that they have the ` *` decorator.
355fn format_doc_block_comment(line: &str, tab_width: Option<usize>) -> String {
356    if line.is_empty() {
357        return (" *").to_string();
358    }
359
360    if let Some((_, rest_of_line)) = line.split_once("*") {
361        if rest_of_line.is_empty() {
362            (" *").to_string()
363        } else if let Some(tab_width) = tab_width {
364            let mut normalized = String::from(" *");
365            line_with_tabs(
366                &mut normalized,
367                rest_of_line,
368                tab_width,
369                Some(Consolidation::MinOneTab),
370            );
371            normalized
372        } else {
373            format!(" *{rest_of_line}",)
374        }
375    } else if let Some(tab_width) = tab_width {
376        let mut normalized = String::from(" *\t");
377        line_with_tabs(&mut normalized, line, tab_width, Some(Consolidation::WithoutSpaces));
378        normalized
379    } else {
380        format!(" * {line}")
381    }
382}
383
384pub enum Consolidation {
385    MinOneTab,
386    WithoutSpaces,
387}
388
389/// Normalizes the leading whitespace of a string slice according to a given tab width.
390///
391/// It aggregates and converts leading whitespace (spaces and tabs) into a representation that
392/// maximizes the amount of tabs.
393pub fn line_with_tabs(
394    output: &mut String,
395    line: &str,
396    tab_width: usize,
397    strategy: Option<Consolidation>,
398) {
399    // Find the end of the leading whitespace (any sequence of spaces and tabs)
400    let first_non_ws = line.find(|c| c != ' ' && c != '\t').unwrap_or(line.len());
401    let (leading_ws, rest_of_line) = line.split_at(first_non_ws);
402
403    // Compute its equivalent length and derive the required amount of tabs and spaces
404    let total_width =
405        leading_ws.chars().fold(0, |width, c| width + if c == ' ' { 1 } else { tab_width });
406    let (mut num_tabs, mut num_spaces) = (total_width / tab_width, total_width % tab_width);
407
408    // Adjust based on the desired config
409    match strategy {
410        Some(Consolidation::MinOneTab) => {
411            if num_tabs == 0 && num_spaces != 0 {
412                (num_tabs, num_spaces) = (1, 0);
413            } else if num_spaces != 0 {
414                (num_tabs, num_spaces) = (num_tabs + 1, 0);
415            }
416        }
417        Some(Consolidation::WithoutSpaces) => {
418            if num_spaces != 0 {
419                (num_tabs, num_spaces) = (num_tabs + 1, 0);
420            }
421        }
422        None => (),
423    };
424
425    // Append the normalized indentation and the rest of the line to the output
426    output.extend(std::iter::repeat_n('\t', num_tabs));
427    output.extend(std::iter::repeat_n(' ', num_spaces));
428    output.push_str(rest_of_line);
429}
430
431/// Returns the `BytePos` of the beginning of the current line.
432fn line_begin_pos(sf: &SourceFile, pos: BytePos) -> BytePos {
433    let pos = sf.relative_position(pos);
434    let line_index = sf.lookup_line(pos).unwrap();
435    let line_start_pos = sf.lines()[line_index];
436    sf.absolute_position(line_start_pos)
437}