Skip to main content

foundry_common/comments/
mod.rs

1use crate::iter::IterDelimited;
2use solar::parse::{
3    ast::{CommentKind, Span},
4    interface::{BytePos, CharPos, SourceMap, source_map::SourceFile},
5    lexer::token::RawTokenKind as TokenKind,
6};
7use std::fmt;
8
9mod comment;
10pub use comment::{Comment, CommentStyle};
11
12pub mod inline_config;
13
14pub const DISABLE_START: &str = "forgefmt: disable-start";
15pub const DISABLE_END: &str = "forgefmt: disable-end";
16
17pub struct Comments {
18    comments: std::collections::VecDeque<Comment>,
19}
20
21impl fmt::Debug for Comments {
22    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23        f.write_str("Comments")?;
24        f.debug_list().entries(self.iter()).finish()
25    }
26}
27
28impl Comments {
29    pub fn new(
30        sf: &SourceFile,
31        sm: &SourceMap,
32        normalize_cmnts: bool,
33        group_cmnts: bool,
34        tab_width: Option<usize>,
35    ) -> Self {
36        let gatherer = CommentGatherer::new(sf, sm, normalize_cmnts, tab_width).gather();
37
38        Self {
39            comments: if group_cmnts { gatherer.group().into() } else { gatherer.comments.into() },
40        }
41    }
42
43    pub fn peek(&self) -> Option<&Comment> {
44        self.comments.front()
45    }
46
47    #[allow(clippy::should_implement_trait)]
48    pub fn next(&mut self) -> Option<Comment> {
49        self.comments.pop_front()
50    }
51
52    pub fn iter(&self) -> impl Iterator<Item = &Comment> {
53        self.comments.iter()
54    }
55
56    /// Adds a new comment at the beginning of the list.
57    ///
58    /// Should only be used when comments are gathered scattered, and must be manually sorted.
59    ///
60    /// **WARNING:** This struct works under the assumption that comments are always sorted by
61    /// ascending span position. It is the caller's responsibility to ensure that this premise
62    /// always holds true.
63    pub fn push_front(&mut self, cmnt: Comment) {
64        self.comments.push_front(cmnt)
65    }
66
67    /// Finds the first trailing comment on the same line as `span_pos`, allowing for `Mixed`
68    /// style comments to appear before it.
69    ///
70    /// Returns the comment and its index in the buffer.
71    pub fn peek_trailing(
72        &self,
73        sm: &SourceMap,
74        span_pos: BytePos,
75        next_pos: Option<BytePos>,
76    ) -> Option<(&Comment, usize)> {
77        let span_line = sm.lookup_char_pos(span_pos).line;
78        for (i, cmnt) in self.iter().enumerate() {
79            // If we have moved to the next line, we can stop.
80            let comment_line = sm.lookup_char_pos(cmnt.pos()).line;
81            if comment_line != span_line {
82                break;
83            }
84
85            // The comment must start after the given span position.
86            if cmnt.pos() < span_pos {
87                continue;
88            }
89
90            // The comment must be before the next element.
91            if cmnt.pos() >= next_pos.unwrap_or_else(|| cmnt.pos() + BytePos(1)) {
92                break;
93            }
94
95            // Stop when we find a trailing or a non-mixed comment
96            match cmnt.style {
97                CommentStyle::Mixed => continue,
98                CommentStyle::Trailing => return Some((cmnt, i)),
99                _ => break,
100            }
101        }
102        None
103    }
104}
105
106struct CommentGatherer<'ast> {
107    sf: &'ast SourceFile,
108    sm: &'ast SourceMap,
109    text: &'ast str,
110    start_bpos: BytePos,
111    pos: usize,
112    comments: Vec<Comment>,
113    code_to_the_left: bool,
114    disabled_block_depth: usize,
115    tab_width: Option<usize>,
116}
117
118impl<'ast> CommentGatherer<'ast> {
119    fn new(
120        sf: &'ast SourceFile,
121        sm: &'ast SourceMap,
122        normalize_cmnts: bool,
123        tab_width: Option<usize>,
124    ) -> Self {
125        Self {
126            sf,
127            sm,
128            text: sf.src.as_str(),
129            start_bpos: sf.start_pos,
130            pos: 0,
131            comments: Vec::new(),
132            code_to_the_left: false,
133            disabled_block_depth: if normalize_cmnts { 0 } else { 1 },
134            tab_width,
135        }
136    }
137
138    /// Consumes the gatherer and returns the collected comments.
139    fn gather(mut self) -> Self {
140        for token in solar::parse::Cursor::new(&self.text[self.pos..]) {
141            self.process_token(token);
142        }
143        self
144    }
145
146    /// Post-processes a list of comments to group consecutive comments.
147    ///
148    /// Necessary for properly indenting multi-line trailing comments, which would
149    /// otherwise be parsed as a `Trailing` followed by several `Isolated`.
150    fn group(self) -> Vec<Comment> {
151        let mut processed = Vec::new();
152        let mut cursor = self.comments.into_iter().peekable();
153
154        while let Some(mut current) = cursor.next() {
155            if current.kind == CommentKind::Line
156                && (current.style.is_trailing() || current.style.is_isolated())
157            {
158                let mut ref_line = self.sm.lookup_char_pos(current.span.hi()).line;
159                while let Some(next_comment) = cursor.peek() {
160                    if !next_comment.style.is_isolated()
161                        || next_comment.kind != CommentKind::Line
162                        || ref_line + 1 != self.sm.lookup_char_pos(next_comment.span.lo()).line
163                    {
164                        break;
165                    }
166
167                    let next_to_merge = cursor.next().unwrap();
168                    current.lines.extend(next_to_merge.lines);
169                    current.span = current.span.to(next_to_merge.span);
170                    ref_line += 1;
171                }
172            }
173
174            processed.push(current);
175        }
176
177        processed
178    }
179
180    /// Creates a `Span` relative to the source file's start position.
181    fn make_span(&self, range: std::ops::Range<usize>) -> Span {
182        Span::new(self.start_bpos + range.start as u32, self.start_bpos + range.end as u32)
183    }
184
185    /// Processes a single token from the source.
186    fn process_token(&mut self, token: solar::parse::lexer::token::RawToken) {
187        let token_range = self.pos..self.pos + token.len as usize;
188        let span = self.make_span(token_range.clone());
189        let token_text = &self.text[token_range];
190
191        // Keep track of disabled blocks
192        if token_text.trim_start().contains(DISABLE_START) {
193            self.disabled_block_depth += 1;
194        } else if token_text.trim_start().contains(DISABLE_END) {
195            self.disabled_block_depth -= 1;
196        }
197
198        #[allow(clippy::collapsible_match)]
199        match token.kind {
200            TokenKind::Whitespace => {
201                if let Some(mut idx) = token_text.find('\n') {
202                    self.code_to_the_left = false;
203
204                    while let Some(next_newline) = token_text[idx + 1..].find('\n') {
205                        idx += 1 + next_newline;
206                        let pos = self.pos + idx;
207                        self.comments.push(Comment {
208                            is_doc: false,
209                            kind: CommentKind::Line,
210                            style: CommentStyle::BlankLine,
211                            lines: vec![],
212                            span: self.make_span(pos..pos),
213                        });
214                        // If not disabled, early-exit as we want only a single blank line.
215                        if self.disabled_block_depth == 0 {
216                            break;
217                        }
218                    }
219                }
220            }
221            TokenKind::BlockComment { is_doc, .. } => {
222                let code_to_the_right = !matches!(
223                    self.text[self.pos + token.len as usize..].chars().next(),
224                    Some('\r' | '\n')
225                );
226                let style = match (self.code_to_the_left, code_to_the_right) {
227                    (_, true) => CommentStyle::Mixed,
228                    (false, false) => CommentStyle::Isolated,
229                    (true, false) => CommentStyle::Trailing,
230                };
231                let kind = CommentKind::Block;
232
233                // Count the number of chars since the start of the line by rescanning.
234                let pos_in_file = self.start_bpos + BytePos(self.pos as u32);
235                let line_begin_in_file = line_begin_pos(self.sf, pos_in_file);
236                let line_begin_pos = (line_begin_in_file - self.start_bpos).to_usize();
237                let mut col = CharPos(self.text[line_begin_pos..self.pos].chars().count());
238
239                // To preserve alignment in multi-line non-doc comments, normalize the block based
240                // on its least-indented line.
241                if !is_doc && token_text.contains('\n') {
242                    col = token_text.lines().skip(1).fold(col, |min, line| {
243                        if line.is_empty() {
244                            return min;
245                        }
246                        std::cmp::min(
247                            CharPos(line.chars().count() - line.trim_start().chars().count()),
248                            min,
249                        )
250                    })
251                };
252
253                let lines = self.split_block_comment_into_lines(token_text, is_doc, col);
254                self.comments.push(Comment { is_doc, kind, style, lines, span })
255            }
256            TokenKind::LineComment { is_doc } => {
257                let line =
258                    if self.disabled_block_depth != 0 { token_text } else { token_text.trim_end() };
259                self.comments.push(Comment {
260                    is_doc,
261                    kind: CommentKind::Line,
262                    style: if self.code_to_the_left {
263                        CommentStyle::Trailing
264                    } else {
265                        CommentStyle::Isolated
266                    },
267                    lines: vec![line.into()],
268                    span,
269                });
270            }
271            _ => {
272                self.code_to_the_left = true;
273            }
274        }
275        self.pos += token.len as usize;
276    }
277
278    /// Splits a block comment into lines, ensuring that each line is properly formatted.
279    fn split_block_comment_into_lines(
280        &self,
281        text: &str,
282        is_doc: bool,
283        col: CharPos,
284    ) -> Vec<String> {
285        // if formatting is disabled, return as is
286        if self.disabled_block_depth != 0 {
287            return vec![text.into()];
288        }
289
290        let mut res: Vec<String> = vec![];
291        let mut lines = text.lines();
292        if let Some(line) = lines.next() {
293            let line = line.trim_end();
294            // Ensure first line of a doc comment only has the `/**` decorator
295            if is_doc && let Some((_, second)) = line.split_once("/**") {
296                res.push("/**".to_string());
297                if !second.trim().is_empty() {
298                    let line = normalize_block_comment_ws(second, col).trim_end();
299                    // Ensure last line of a doc comment only has the `*/` decorator
300                    if let Some((first, _)) = line.split_once("*/") {
301                        if !first.trim().is_empty() {
302                            res.push(format_doc_block_comment(first.trim_end(), self.tab_width));
303                        }
304                        res.push(" */".to_string());
305                    } else {
306                        res.push(format_doc_block_comment(line.trim_end(), self.tab_width));
307                    }
308                }
309            } else {
310                res.push(line.to_string());
311            }
312        }
313
314        for (pos, line) in lines.delimited() {
315            let line = normalize_block_comment_ws(line, col).trim_end().to_string();
316            if !is_doc {
317                res.push(line);
318                continue;
319            }
320            if !pos.is_last {
321                res.push(format_doc_block_comment(&line, self.tab_width));
322            } else {
323                // Ensure last line of a doc comment only has the `*/` decorator
324                if let Some((first, _)) = line.split_once("*/")
325                    && !first.trim().is_empty()
326                {
327                    res.push(format_doc_block_comment(first.trim_end(), self.tab_width));
328                }
329                res.push(" */".to_string());
330            }
331        }
332        res
333    }
334}
335
336/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
337/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
338/// whitespace. Note that `k` may be outside bounds of `s`.
339fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
340    let mut idx = 0;
341    for (i, ch) in s.char_indices().take(col.to_usize()) {
342        if !ch.is_whitespace() {
343            return None;
344        }
345        idx = i + ch.len_utf8();
346    }
347    Some(idx)
348}
349
350/// Returns `Some(k)` where `k` is the byte offset of the first non-whitespace char. Returns `k = 0`
351/// if `s` starts with a non-whitespace char. If `s` only contains whitespaces, returns `None`.
352fn first_non_whitespace(s: &str) -> Option<usize> {
353    let mut len = 0;
354    for (i, ch) in s.char_indices() {
355        if ch.is_whitespace() {
356            len = ch.len_utf8()
357        } else {
358            return if i == 0 { Some(0) } else { Some(i + 1 - len) };
359        }
360    }
361    None
362}
363
364/// Returns a slice of `s` with a whitespace prefix removed based on `col`. If the first `col` chars
365/// of `s` are all whitespace, returns a slice starting after that prefix.
366fn normalize_block_comment_ws(s: &str, col: CharPos) -> &str {
367    let len = s.len();
368    if let Some(col) = all_whitespace(s, col) {
369        return if col < len { &s[col..] } else { "" };
370    }
371    if let Some(col) = first_non_whitespace(s) {
372        return &s[col..];
373    }
374    s
375}
376
377/// Formats a doc block comment line so that they have the ` *` decorator.
378fn format_doc_block_comment(line: &str, tab_width: Option<usize>) -> String {
379    if line.is_empty() {
380        return (" *").to_string();
381    }
382
383    if let Some((_, rest_of_line)) = line.split_once("*") {
384        if rest_of_line.is_empty() {
385            (" *").to_string()
386        } else if let Some(tab_width) = tab_width {
387            let mut normalized = String::from(" *");
388            line_with_tabs(
389                &mut normalized,
390                rest_of_line,
391                tab_width,
392                Some(Consolidation::MinOneTab),
393            );
394            normalized
395        } else {
396            format!(" *{rest_of_line}",)
397        }
398    } else if let Some(tab_width) = tab_width {
399        let mut normalized = String::from(" *\t");
400        line_with_tabs(&mut normalized, line, tab_width, Some(Consolidation::WithoutSpaces));
401        normalized
402    } else {
403        format!(" * {line}")
404    }
405}
406
407pub enum Consolidation {
408    MinOneTab,
409    WithoutSpaces,
410}
411
412/// Normalizes the leading whitespace of a string slice according to a given tab width.
413///
414/// It aggregates and converts leading whitespace (spaces and tabs) into a representation that
415/// maximizes the amount of tabs.
416pub fn line_with_tabs(
417    output: &mut String,
418    line: &str,
419    tab_width: usize,
420    strategy: Option<Consolidation>,
421) {
422    // Find the end of the leading whitespace (any sequence of spaces and tabs)
423    let first_non_ws = line.find(|c| c != ' ' && c != '\t').unwrap_or(line.len());
424    let (leading_ws, rest_of_line) = line.split_at(first_non_ws);
425
426    // Compute its equivalent length and derive the required amount of tabs and spaces
427    let total_width =
428        leading_ws.chars().fold(0, |width, c| width + if c == ' ' { 1 } else { tab_width });
429    let (mut num_tabs, mut num_spaces) = (total_width / tab_width, total_width % tab_width);
430
431    // Adjust based on the desired config
432    match strategy {
433        Some(Consolidation::MinOneTab) => {
434            if num_tabs == 0 && num_spaces != 0 {
435                (num_tabs, num_spaces) = (1, 0);
436            } else if num_spaces != 0 {
437                (num_tabs, num_spaces) = (num_tabs + 1, 0);
438            }
439        }
440        Some(Consolidation::WithoutSpaces) if num_spaces != 0 => {
441            (num_tabs, num_spaces) = (num_tabs + 1, 0);
442        }
443        _ => (),
444    };
445
446    // Append the normalized indentation and the rest of the line to the output
447    output.extend(std::iter::repeat_n('\t', num_tabs));
448    output.extend(std::iter::repeat_n(' ', num_spaces));
449    output.push_str(rest_of_line);
450}
451
452/// Estimates the display width of a string, accounting for tabs.
453pub fn estimate_line_width(line: &str, tab_width: usize) -> usize {
454    line.chars().fold(0, |width, c| width + if c == '\t' { tab_width } else { 1 })
455}
456
457/// Returns the `BytePos` of the beginning of the current line.
458fn line_begin_pos(sf: &SourceFile, pos: BytePos) -> BytePos {
459    let pos = sf.relative_position(pos);
460    let line_index = sf.lookup_line(pos).unwrap();
461    let line_start_pos = sf.lines()[line_index];
462    sf.absolute_position(line_start_pos)
463}