forge_fmt/
comments.rs

1use crate::inline_config::{InlineConfigItem, InvalidInlineConfigItem};
2use itertools::Itertools;
3use solang_parser::pt::*;
4use std::collections::VecDeque;
5
6/// The type of a Comment
7#[derive(Clone, Copy, Debug, PartialEq, Eq)]
8pub enum CommentType {
9    /// A Line comment (e.g. `// ...`)
10    Line,
11    /// A Block comment (e.g. `/* ... */`)
12    Block,
13    /// A Doc Line comment (e.g. `/// ...`)
14    DocLine,
15    /// A Doc Block comment (e.g. `/** ... */`)
16    DocBlock,
17}
18
19/// The comment position
20#[derive(Clone, Copy, Debug, PartialEq, Eq)]
21pub enum CommentPosition {
22    /// Comes before the code it describes
23    Prefix,
24    /// Comes after the code it describes
25    Postfix,
26}
27
28/// Comment with additional metadata
29#[derive(Clone, Debug, PartialEq, Eq)]
30pub struct CommentWithMetadata {
31    pub ty: CommentType,
32    pub loc: Loc,
33    pub has_newline_before: bool,
34    pub indent_len: usize,
35    pub comment: String,
36    pub position: CommentPosition,
37}
38
39impl PartialOrd for CommentWithMetadata {
40    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
41        Some(self.cmp(other))
42    }
43}
44
45impl Ord for CommentWithMetadata {
46    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
47        self.loc.cmp(&other.loc)
48    }
49}
50
51impl CommentWithMetadata {
52    fn new(
53        comment: Comment,
54        position: CommentPosition,
55        has_newline_before: bool,
56        indent_len: usize,
57    ) -> Self {
58        let (ty, loc, comment) = match comment {
59            Comment::Line(loc, comment) => (CommentType::Line, loc, comment),
60            Comment::Block(loc, comment) => (CommentType::Block, loc, comment),
61            Comment::DocLine(loc, comment) => (CommentType::DocLine, loc, comment),
62            Comment::DocBlock(loc, comment) => (CommentType::DocBlock, loc, comment),
63        };
64        Self {
65            comment: comment.trim_end().to_string(),
66            ty,
67            loc,
68            position,
69            has_newline_before,
70            indent_len,
71        }
72    }
73
74    /// Construct a comment with metadata by analyzing its surrounding source code
75    fn from_comment_and_src(comment: Comment, src: &str, last_comment: Option<&Self>) -> Self {
76        let src_before = &src[..comment.loc().start()];
77        if src_before.is_empty() {
78            return Self::new(comment, CommentPosition::Prefix, false, 0)
79        }
80
81        let mut lines_before = src_before.lines().rev();
82        let this_line =
83            if src_before.ends_with('\n') { "" } else { lines_before.next().unwrap_or_default() };
84        let indent_len = this_line.chars().take_while(|c| c.is_whitespace()).count();
85        let last_line = lines_before.next().map(str::trim_start);
86
87        if matches!(comment, Comment::DocLine(..) | Comment::DocBlock(..)) {
88            return Self::new(
89                comment,
90                CommentPosition::Prefix,
91                last_line.is_none_or(str::is_empty),
92                indent_len,
93            )
94        }
95
96        // TODO: this loop takes almost the entirety of the time spent in parsing, which is up to
97        // 80% of `crate::fmt`
98        let mut code_end = 0;
99        for (state, idx, ch) in src_before.comment_state_char_indices() {
100            if matches!(state, CommentState::None) && !ch.is_whitespace() {
101                code_end = idx;
102            }
103        }
104
105        let (position, has_newline_before) = if src_before[code_end..].contains('\n') {
106            // comment sits on a line without code
107            if let Some(last_line) = last_line {
108                if last_line.is_empty() {
109                    // line before is empty
110                    (CommentPosition::Prefix, true)
111                } else {
112                    // line has something
113                    // check if the last comment after code was a postfix comment
114                    if last_comment
115                        .is_some_and(|last| last.loc.end() > code_end && !last.is_prefix())
116                    {
117                        // get the indent size of the next item of code
118                        let next_indent_len = src[comment.loc().end()..]
119                            .non_comment_chars()
120                            .take_while(|ch| ch.is_whitespace())
121                            .fold(indent_len, |indent, ch| if ch == '\n' { 0 } else { indent + 1 });
122                        if indent_len > next_indent_len {
123                            // the comment indent is bigger than the next code indent
124                            (CommentPosition::Postfix, false)
125                        } else {
126                            // the comment indent is equal to or less than the next code
127                            // indent
128                            (CommentPosition::Prefix, false)
129                        }
130                    } else {
131                        // if there is no postfix comment after the piece of code
132                        (CommentPosition::Prefix, false)
133                    }
134                }
135            } else {
136                // beginning of file
137                (CommentPosition::Prefix, false)
138            }
139        } else {
140            // comment is after some code
141            (CommentPosition::Postfix, false)
142        };
143
144        Self::new(comment, position, has_newline_before, indent_len)
145    }
146
147    pub fn is_line(&self) -> bool {
148        matches!(self.ty, CommentType::Line | CommentType::DocLine)
149    }
150
151    pub fn is_doc_block(&self) -> bool {
152        matches!(self.ty, CommentType::DocBlock)
153    }
154
155    pub fn is_prefix(&self) -> bool {
156        matches!(self.position, CommentPosition::Prefix)
157    }
158
159    pub fn is_before(&self, byte: usize) -> bool {
160        self.loc.start() < byte
161    }
162
163    /// Returns the contents of the comment without the start and end tokens
164    pub fn contents(&self) -> &str {
165        let mut s = self.comment.as_str();
166        if let Some(stripped) = s.strip_prefix(self.start_token()) {
167            s = stripped;
168        }
169        if let Some(end_token) = self.end_token() {
170            if let Some(stripped) = s.strip_suffix(end_token) {
171                s = stripped;
172            }
173        }
174        s
175    }
176
177    /// The start token of the comment
178    #[inline]
179    pub const fn start_token(&self) -> &'static str {
180        match self.ty {
181            CommentType::Line => "//",
182            CommentType::Block => "/*",
183            CommentType::DocLine => "///",
184            CommentType::DocBlock => "/**",
185        }
186    }
187
188    /// The token that gets written on the newline when the
189    /// comment is wrapped
190    #[inline]
191    pub const fn wrap_token(&self) -> &'static str {
192        match self.ty {
193            CommentType::Line => "// ",
194            CommentType::DocLine => "/// ",
195            CommentType::Block => "",
196            CommentType::DocBlock => " * ",
197        }
198    }
199
200    /// The end token of the comment
201    #[inline]
202    pub const fn end_token(&self) -> Option<&'static str> {
203        match self.ty {
204            CommentType::Line | CommentType::DocLine => None,
205            CommentType::Block | CommentType::DocBlock => Some("*/"),
206        }
207    }
208}
209
210/// A list of comments
211#[derive(Clone, Debug, Default)]
212pub struct Comments {
213    prefixes: VecDeque<CommentWithMetadata>,
214    postfixes: VecDeque<CommentWithMetadata>,
215}
216
217impl Comments {
218    pub fn new(mut comments: Vec<Comment>, src: &str) -> Self {
219        let mut prefixes = VecDeque::with_capacity(comments.len());
220        let mut postfixes = VecDeque::with_capacity(comments.len());
221        let mut last_comment = None;
222
223        comments.sort_by_key(|comment| comment.loc());
224        for comment in comments {
225            let comment = CommentWithMetadata::from_comment_and_src(comment, src, last_comment);
226            let vec = if comment.is_prefix() { &mut prefixes } else { &mut postfixes };
227            vec.push_back(comment);
228            last_comment = Some(vec.back().unwrap());
229        }
230        Self { prefixes, postfixes }
231    }
232
233    /// Helper for removing comments before a byte offset
234    fn remove_comments_before(
235        comments: &mut VecDeque<CommentWithMetadata>,
236        byte: usize,
237    ) -> Vec<CommentWithMetadata> {
238        let pos = comments
239            .iter()
240            .find_position(|comment| !comment.is_before(byte))
241            .map(|(idx, _)| idx)
242            .unwrap_or_else(|| comments.len());
243        if pos == 0 {
244            return Vec::new()
245        }
246        comments.rotate_left(pos);
247        comments.split_off(comments.len() - pos).into()
248    }
249
250    /// Remove any prefix comments that occur before the byte offset in the src
251    pub(crate) fn remove_prefixes_before(&mut self, byte: usize) -> Vec<CommentWithMetadata> {
252        Self::remove_comments_before(&mut self.prefixes, byte)
253    }
254
255    /// Remove any postfix comments that occur before the byte offset in the src
256    pub(crate) fn remove_postfixes_before(&mut self, byte: usize) -> Vec<CommentWithMetadata> {
257        Self::remove_comments_before(&mut self.postfixes, byte)
258    }
259
260    /// Remove any comments that occur before the byte offset in the src
261    pub(crate) fn remove_all_comments_before(&mut self, byte: usize) -> Vec<CommentWithMetadata> {
262        self.remove_prefixes_before(byte)
263            .into_iter()
264            .merge(self.remove_postfixes_before(byte))
265            .collect()
266    }
267
268    pub(crate) fn pop(&mut self) -> Option<CommentWithMetadata> {
269        if self.iter().next()?.is_prefix() {
270            self.prefixes.pop_front()
271        } else {
272            self.postfixes.pop_front()
273        }
274    }
275
276    pub(crate) fn iter(&self) -> impl Iterator<Item = &CommentWithMetadata> {
277        self.prefixes.iter().merge(self.postfixes.iter())
278    }
279
280    /// Parse all comments to return a list of inline config items. This will return an iterator of
281    /// results of parsing comments which start with `forgefmt:`
282    pub fn parse_inline_config_items(
283        &self,
284    ) -> impl Iterator<Item = Result<(Loc, InlineConfigItem), (Loc, InvalidInlineConfigItem)>> + '_
285    {
286        self.iter()
287            .filter_map(|comment| {
288                Some((comment, comment.contents().trim_start().strip_prefix("forgefmt:")?.trim()))
289            })
290            .map(|(comment, item)| {
291                let loc = comment.loc;
292                item.parse().map(|out| (loc, out)).map_err(|out| (loc, out))
293            })
294    }
295}
296
297/// The state of a character in a string with possible comments
298#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
299pub enum CommentState {
300    /// character not in a comment
301    #[default]
302    None,
303    /// First `/` in line comment start `"//"`
304    LineStart1,
305    /// Second `/` in  line comment start `"//"`
306    LineStart2,
307    /// Character in a line comment
308    Line,
309    /// `/` in block comment start `"/*"`
310    BlockStart1,
311    /// `*` in block comment start `"/*"`
312    BlockStart2,
313    /// Character in a block comment
314    Block,
315    /// `*` in block comment end `"*/"`
316    BlockEnd1,
317    /// `/` in block comment end `"*/"`
318    BlockEnd2,
319}
320
321/// An Iterator over characters and indices in a string slice with information about the state of
322/// comments
323pub struct CommentStateCharIndices<'a> {
324    iter: std::str::CharIndices<'a>,
325    state: CommentState,
326}
327
328impl<'a> CommentStateCharIndices<'a> {
329    #[inline]
330    fn new(string: &'a str) -> Self {
331        Self { iter: string.char_indices(), state: CommentState::None }
332    }
333
334    #[inline]
335    pub fn with_state(mut self, state: CommentState) -> Self {
336        self.state = state;
337        self
338    }
339
340    #[inline]
341    pub fn peek(&mut self) -> Option<(usize, char)> {
342        self.iter.clone().next()
343    }
344}
345
346impl Iterator for CommentStateCharIndices<'_> {
347    type Item = (CommentState, usize, char);
348
349    #[inline]
350    fn next(&mut self) -> Option<Self::Item> {
351        let (idx, ch) = self.iter.next()?;
352        match self.state {
353            CommentState::None => {
354                if ch == '/' {
355                    self.state = match self.peek() {
356                        Some((_, '/')) => CommentState::LineStart1,
357                        Some((_, '*')) => CommentState::BlockStart1,
358                        _ => CommentState::None,
359                    };
360                }
361            }
362            CommentState::LineStart1 => {
363                self.state = CommentState::LineStart2;
364            }
365            CommentState::LineStart2 => {
366                self.state = CommentState::Line;
367            }
368            CommentState::Line => {
369                if ch == '\n' {
370                    self.state = CommentState::None;
371                }
372            }
373            CommentState::BlockStart1 => {
374                self.state = CommentState::BlockStart2;
375            }
376            CommentState::BlockStart2 => {
377                self.state = CommentState::Block;
378            }
379            CommentState::Block => {
380                if ch == '*' {
381                    if let Some((_, '/')) = self.peek() {
382                        self.state = CommentState::BlockEnd1;
383                    }
384                }
385            }
386            CommentState::BlockEnd1 => {
387                self.state = CommentState::BlockEnd2;
388            }
389            CommentState::BlockEnd2 => {
390                self.state = CommentState::None;
391            }
392        }
393        Some((self.state, idx, ch))
394    }
395
396    #[inline]
397    fn size_hint(&self) -> (usize, Option<usize>) {
398        self.iter.size_hint()
399    }
400
401    #[inline]
402    fn count(self) -> usize {
403        self.iter.count()
404    }
405}
406
407impl std::iter::FusedIterator for CommentStateCharIndices<'_> {}
408
409/// An Iterator over characters in a string slice which are not a apart of comments
410pub struct NonCommentChars<'a>(CommentStateCharIndices<'a>);
411
412impl Iterator for NonCommentChars<'_> {
413    type Item = char;
414
415    #[inline]
416    fn next(&mut self) -> Option<Self::Item> {
417        for (state, _, ch) in self.0.by_ref() {
418            if state == CommentState::None {
419                return Some(ch)
420            }
421        }
422        None
423    }
424}
425
426/// Helpers for iterating over comment containing strings
427pub trait CommentStringExt {
428    fn comment_state_char_indices(&self) -> CommentStateCharIndices<'_>;
429
430    #[inline]
431    fn non_comment_chars(&self) -> NonCommentChars<'_> {
432        NonCommentChars(self.comment_state_char_indices())
433    }
434
435    #[inline]
436    fn trim_comments(&self) -> String {
437        self.non_comment_chars().collect()
438    }
439}
440
441impl<T> CommentStringExt for T
442where
443    T: AsRef<str>,
444{
445    #[inline]
446    fn comment_state_char_indices(&self) -> CommentStateCharIndices<'_> {
447        CommentStateCharIndices::new(self.as_ref())
448    }
449}
450
451impl CommentStringExt for str {
452    #[inline]
453    fn comment_state_char_indices(&self) -> CommentStateCharIndices<'_> {
454        CommentStateCharIndices::new(self)
455    }
456}