foundry_common/comments/
mod.rs

1use crate::iter::IterDelimited;
2use solar::parse::{
3    ast::{CommentKind, Span},
4    interface::{BytePos, CharPos, SourceMap, source_map::SourceFile},
5    lexer::token::RawTokenKind as TokenKind,
6};
7use std::fmt;
8
9mod comment;
10pub use comment::{Comment, CommentStyle};
11
12pub mod inline_config;
13
14pub const DISABLE_START: &str = "forgefmt: disable-start";
15pub const DISABLE_END: &str = "forgefmt: disable-end";
16
17pub struct Comments {
18    comments: std::collections::VecDeque<Comment>,
19}
20
21impl fmt::Debug for Comments {
22    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23        f.write_str("Comments")?;
24        f.debug_list().entries(self.iter()).finish()
25    }
26}
27
28impl Comments {
29    pub fn new(
30        sf: &SourceFile,
31        sm: &SourceMap,
32        normalize_cmnts: bool,
33        group_cmnts: bool,
34        tab_width: Option<usize>,
35    ) -> Self {
36        let gatherer = CommentGatherer::new(sf, sm, normalize_cmnts, tab_width).gather();
37
38        Self {
39            comments: if group_cmnts { gatherer.group().into() } else { gatherer.comments.into() },
40        }
41    }
42
43    pub fn peek(&self) -> Option<&Comment> {
44        self.comments.front()
45    }
46
47    #[allow(clippy::should_implement_trait)]
48    pub fn next(&mut self) -> Option<Comment> {
49        self.comments.pop_front()
50    }
51
52    pub fn iter(&self) -> impl Iterator<Item = &Comment> {
53        self.comments.iter()
54    }
55
56    /// Adds a new comment at the beginning of the list.
57    ///
58    /// Should only be used when comments are gathered scattered, and must be manually sorted.
59    ///
60    /// **WARNING:** This struct works under the assumption that comments are always sorted by
61    /// ascending span position. It is the caller's responsibility to ensure that this premise
62    /// always holds true.
63    pub fn push_front(&mut self, cmnt: Comment) {
64        self.comments.push_front(cmnt)
65    }
66
67    /// Finds the first trailing comment on the same line as `span_pos`, allowing for `Mixed`
68    /// style comments to appear before it.
69    ///
70    /// Returns the comment and its index in the buffer.
71    pub fn peek_trailing(
72        &self,
73        sm: &SourceMap,
74        span_pos: BytePos,
75        next_pos: Option<BytePos>,
76    ) -> Option<(&Comment, usize)> {
77        let span_line = sm.lookup_char_pos(span_pos).line;
78        for (i, cmnt) in self.iter().enumerate() {
79            // If we have moved to the next line, we can stop.
80            let comment_line = sm.lookup_char_pos(cmnt.pos()).line;
81            if comment_line != span_line {
82                break;
83            }
84
85            // The comment must start after the given span position.
86            if cmnt.pos() < span_pos {
87                continue;
88            }
89
90            // The comment must be before the next element.
91            if cmnt.pos() >= next_pos.unwrap_or_else(|| cmnt.pos() + BytePos(1)) {
92                break;
93            }
94
95            // Stop when we find a trailing or a non-mixed comment
96            match cmnt.style {
97                CommentStyle::Mixed => continue,
98                CommentStyle::Trailing => return Some((cmnt, i)),
99                _ => break,
100            }
101        }
102        None
103    }
104}
105
106struct CommentGatherer<'ast> {
107    sf: &'ast SourceFile,
108    sm: &'ast SourceMap,
109    text: &'ast str,
110    start_bpos: BytePos,
111    pos: usize,
112    comments: Vec<Comment>,
113    code_to_the_left: bool,
114    disabled_block_depth: usize,
115    tab_width: Option<usize>,
116}
117
118impl<'ast> CommentGatherer<'ast> {
119    fn new(
120        sf: &'ast SourceFile,
121        sm: &'ast SourceMap,
122        normalize_cmnts: bool,
123        tab_width: Option<usize>,
124    ) -> Self {
125        Self {
126            sf,
127            sm,
128            text: sf.src.as_str(),
129            start_bpos: sf.start_pos,
130            pos: 0,
131            comments: Vec::new(),
132            code_to_the_left: false,
133            disabled_block_depth: if normalize_cmnts { 0 } else { 1 },
134            tab_width,
135        }
136    }
137
138    /// Consumes the gatherer and returns the collected comments.
139    fn gather(mut self) -> Self {
140        for token in solar::parse::Cursor::new(&self.text[self.pos..]) {
141            self.process_token(token);
142        }
143        self
144    }
145
146    /// Post-processes a list of comments to group consecutive comments.
147    ///
148    /// Necessary for properly indenting multi-line trailing comments, which would
149    /// otherwise be parsed as a `Trailing` followed by several `Isolated`.
150    fn group(self) -> Vec<Comment> {
151        let mut processed = Vec::new();
152        let mut cursor = self.comments.into_iter().peekable();
153
154        while let Some(mut current) = cursor.next() {
155            if current.kind == CommentKind::Line
156                && (current.style.is_trailing() || current.style.is_isolated())
157            {
158                let mut ref_line = self.sm.lookup_char_pos(current.span.hi()).line;
159                while let Some(next_comment) = cursor.peek() {
160                    if !next_comment.style.is_isolated()
161                        || next_comment.kind != CommentKind::Line
162                        || ref_line + 1 != self.sm.lookup_char_pos(next_comment.span.lo()).line
163                    {
164                        break;
165                    }
166
167                    let next_to_merge = cursor.next().unwrap();
168                    current.lines.extend(next_to_merge.lines);
169                    current.span = current.span.to(next_to_merge.span);
170                    ref_line += 1;
171                }
172            }
173
174            processed.push(current);
175        }
176
177        processed
178    }
179
180    /// Creates a `Span` relative to the source file's start position.
181    fn make_span(&self, range: std::ops::Range<usize>) -> Span {
182        Span::new(self.start_bpos + range.start as u32, self.start_bpos + range.end as u32)
183    }
184
185    /// Processes a single token from the source.
186    fn process_token(&mut self, token: solar::parse::lexer::token::RawToken) {
187        let token_range = self.pos..self.pos + token.len as usize;
188        let span = self.make_span(token_range.clone());
189        let token_text = &self.text[token_range];
190
191        // Keep track of disabled blocks
192        if token_text.trim_start().contains(DISABLE_START) {
193            self.disabled_block_depth += 1;
194        } else if token_text.trim_start().contains(DISABLE_END) {
195            self.disabled_block_depth -= 1;
196        }
197
198        match token.kind {
199            TokenKind::Whitespace => {
200                if let Some(mut idx) = token_text.find('\n') {
201                    self.code_to_the_left = false;
202
203                    while let Some(next_newline) = token_text[idx + 1..].find('\n') {
204                        idx += 1 + next_newline;
205                        let pos = self.pos + idx;
206                        self.comments.push(Comment {
207                            is_doc: false,
208                            kind: CommentKind::Line,
209                            style: CommentStyle::BlankLine,
210                            lines: vec![],
211                            span: self.make_span(pos..pos),
212                        });
213                        // If not disabled, early-exit as we want only a single blank line.
214                        if self.disabled_block_depth == 0 {
215                            break;
216                        }
217                    }
218                }
219            }
220            TokenKind::BlockComment { is_doc, .. } => {
221                let code_to_the_right = !matches!(
222                    self.text[self.pos + token.len as usize..].chars().next(),
223                    Some('\r' | '\n')
224                );
225                let style = match (self.code_to_the_left, code_to_the_right) {
226                    (_, true) => CommentStyle::Mixed,
227                    (false, false) => CommentStyle::Isolated,
228                    (true, false) => CommentStyle::Trailing,
229                };
230                let kind = CommentKind::Block;
231
232                // Count the number of chars since the start of the line by rescanning.
233                let pos_in_file = self.start_bpos + BytePos(self.pos as u32);
234                let line_begin_in_file = line_begin_pos(self.sf, pos_in_file);
235                let line_begin_pos = (line_begin_in_file - self.start_bpos).to_usize();
236                let mut col = CharPos(self.text[line_begin_pos..self.pos].chars().count());
237
238                // To preserve alignment in multi-line non-doc comments, normalize the block based
239                // on its least-indented line.
240                if !is_doc && token_text.contains('\n') {
241                    col = token_text.lines().skip(1).fold(col, |min, line| {
242                        if line.is_empty() {
243                            return min;
244                        }
245                        std::cmp::min(
246                            CharPos(line.chars().count() - line.trim_start().chars().count()),
247                            min,
248                        )
249                    })
250                };
251
252                let lines = self.split_block_comment_into_lines(token_text, is_doc, col);
253                self.comments.push(Comment { is_doc, kind, style, lines, span })
254            }
255            TokenKind::LineComment { is_doc } => {
256                let line =
257                    if self.disabled_block_depth != 0 { token_text } else { token_text.trim_end() };
258                self.comments.push(Comment {
259                    is_doc,
260                    kind: CommentKind::Line,
261                    style: if self.code_to_the_left {
262                        CommentStyle::Trailing
263                    } else {
264                        CommentStyle::Isolated
265                    },
266                    lines: vec![line.into()],
267                    span,
268                });
269            }
270            _ => {
271                self.code_to_the_left = true;
272            }
273        }
274        self.pos += token.len as usize;
275    }
276
277    /// Splits a block comment into lines, ensuring that each line is properly formatted.
278    fn split_block_comment_into_lines(
279        &self,
280        text: &str,
281        is_doc: bool,
282        col: CharPos,
283    ) -> Vec<String> {
284        // if formatting is disabled, return as is
285        if self.disabled_block_depth != 0 {
286            return vec![text.into()];
287        }
288
289        let mut res: Vec<String> = vec![];
290        let mut lines = text.lines();
291        if let Some(line) = lines.next() {
292            let line = line.trim_end();
293            // Ensure first line of a doc comment only has the `/**` decorator
294            if is_doc && let Some((_, second)) = line.split_once("/**") {
295                res.push("/**".to_string());
296                if !second.trim().is_empty() {
297                    let line = normalize_block_comment_ws(second, col).trim_end();
298                    // Ensure last line of a doc comment only has the `*/` decorator
299                    if let Some((first, _)) = line.split_once("*/") {
300                        if !first.trim().is_empty() {
301                            res.push(format_doc_block_comment(first.trim_end(), self.tab_width));
302                        }
303                        res.push(" */".to_string());
304                    } else {
305                        res.push(format_doc_block_comment(line.trim_end(), self.tab_width));
306                    }
307                }
308            } else {
309                res.push(line.to_string());
310            }
311        }
312
313        for (pos, line) in lines.delimited() {
314            let line = normalize_block_comment_ws(line, col).trim_end().to_string();
315            if !is_doc {
316                res.push(line);
317                continue;
318            }
319            if !pos.is_last {
320                res.push(format_doc_block_comment(&line, self.tab_width));
321            } else {
322                // Ensure last line of a doc comment only has the `*/` decorator
323                if let Some((first, _)) = line.split_once("*/")
324                    && !first.trim().is_empty()
325                {
326                    res.push(format_doc_block_comment(first.trim_end(), self.tab_width));
327                }
328                res.push(" */".to_string());
329            }
330        }
331        res
332    }
333}
334
335/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
336/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
337/// whitespace. Note that `k` may be outside bounds of `s`.
338fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
339    let mut idx = 0;
340    for (i, ch) in s.char_indices().take(col.to_usize()) {
341        if !ch.is_whitespace() {
342            return None;
343        }
344        idx = i + ch.len_utf8();
345    }
346    Some(idx)
347}
348
349/// Returns `Some(k)` where `k` is the byte offset of the first non-whitespace char. Returns `k = 0`
350/// if `s` starts with a non-whitespace char. If `s` only contains whitespaces, returns `None`.
351fn first_non_whitespace(s: &str) -> Option<usize> {
352    let mut len = 0;
353    for (i, ch) in s.char_indices() {
354        if ch.is_whitespace() {
355            len = ch.len_utf8()
356        } else {
357            return if i == 0 { Some(0) } else { Some(i + 1 - len) };
358        }
359    }
360    None
361}
362
363/// Returns a slice of `s` with a whitespace prefix removed based on `col`. If the first `col` chars
364/// of `s` are all whitespace, returns a slice starting after that prefix.
365fn normalize_block_comment_ws(s: &str, col: CharPos) -> &str {
366    let len = s.len();
367    if let Some(col) = all_whitespace(s, col) {
368        return if col < len { &s[col..] } else { "" };
369    }
370    if let Some(col) = first_non_whitespace(s) {
371        return &s[col..];
372    }
373    s
374}
375
376/// Formats a doc block comment line so that they have the ` *` decorator.
377fn format_doc_block_comment(line: &str, tab_width: Option<usize>) -> String {
378    if line.is_empty() {
379        return (" *").to_string();
380    }
381
382    if let Some((_, rest_of_line)) = line.split_once("*") {
383        if rest_of_line.is_empty() {
384            (" *").to_string()
385        } else if let Some(tab_width) = tab_width {
386            let mut normalized = String::from(" *");
387            line_with_tabs(
388                &mut normalized,
389                rest_of_line,
390                tab_width,
391                Some(Consolidation::MinOneTab),
392            );
393            normalized
394        } else {
395            format!(" *{rest_of_line}",)
396        }
397    } else if let Some(tab_width) = tab_width {
398        let mut normalized = String::from(" *\t");
399        line_with_tabs(&mut normalized, line, tab_width, Some(Consolidation::WithoutSpaces));
400        normalized
401    } else {
402        format!(" * {line}")
403    }
404}
405
406pub enum Consolidation {
407    MinOneTab,
408    WithoutSpaces,
409}
410
411/// Normalizes the leading whitespace of a string slice according to a given tab width.
412///
413/// It aggregates and converts leading whitespace (spaces and tabs) into a representation that
414/// maximizes the amount of tabs.
415pub fn line_with_tabs(
416    output: &mut String,
417    line: &str,
418    tab_width: usize,
419    strategy: Option<Consolidation>,
420) {
421    // Find the end of the leading whitespace (any sequence of spaces and tabs)
422    let first_non_ws = line.find(|c| c != ' ' && c != '\t').unwrap_or(line.len());
423    let (leading_ws, rest_of_line) = line.split_at(first_non_ws);
424
425    // Compute its equivalent length and derive the required amount of tabs and spaces
426    let total_width =
427        leading_ws.chars().fold(0, |width, c| width + if c == ' ' { 1 } else { tab_width });
428    let (mut num_tabs, mut num_spaces) = (total_width / tab_width, total_width % tab_width);
429
430    // Adjust based on the desired config
431    match strategy {
432        Some(Consolidation::MinOneTab) => {
433            if num_tabs == 0 && num_spaces != 0 {
434                (num_tabs, num_spaces) = (1, 0);
435            } else if num_spaces != 0 {
436                (num_tabs, num_spaces) = (num_tabs + 1, 0);
437            }
438        }
439        Some(Consolidation::WithoutSpaces) => {
440            if num_spaces != 0 {
441                (num_tabs, num_spaces) = (num_tabs + 1, 0);
442            }
443        }
444        None => (),
445    };
446
447    // Append the normalized indentation and the rest of the line to the output
448    output.extend(std::iter::repeat_n('\t', num_tabs));
449    output.extend(std::iter::repeat_n(' ', num_spaces));
450    output.push_str(rest_of_line);
451}
452
453/// Estimates the display width of a string, accounting for tabs.
454pub fn estimate_line_width(line: &str, tab_width: usize) -> usize {
455    line.chars().fold(0, |width, c| width + if c == '\t' { tab_width } else { 1 })
456}
457
458/// Returns the `BytePos` of the beginning of the current line.
459fn line_begin_pos(sf: &SourceFile, pos: BytePos) -> BytePos {
460    let pos = sf.relative_position(pos);
461    let line_index = sf.lookup_line(pos).unwrap();
462    let line_start_pos = sf.lines()[line_index];
463    sf.absolute_position(line_start_pos)
464}
foundry_common/comments/mod.rs

foundry_common/comments/
mod.rs