forge_fmt/pp/
mod.rs

1//! Adapted from [`rustc_ast_pretty`](https://github.com/rust-lang/rust/blob/07d3fd1d9b9c1f07475b96a9d168564bf528db68/compiler/rustc_ast_pretty/src/pp.rs)
2//! and [`prettyplease`](https://github.com/dtolnay/prettyplease/blob/8eb8c14649aea32e810732bd4d64fe519e6b752a/src/algorithm.rs).
3
4use crate::{DEBUG, DEBUG_INDENT};
5use ring::RingBuffer;
6use std::{borrow::Cow, cmp, collections::VecDeque, iter};
7
8mod convenience;
9mod helpers;
10mod ring;
11
12// Every line is allowed at least this much space, even if highly indented.
13const MIN_SPACE: isize = 40;
14
15/// How to break. Described in more detail in the module docs.
16#[derive(Clone, Copy, Debug, PartialEq, Eq)]
17pub enum Breaks {
18    Consistent,
19    Inconsistent,
20}
21
22#[derive(Clone, Copy, Debug, PartialEq, Eq)]
23enum IndentStyle {
24    /// Vertically aligned under whatever column this block begins at.
25    /// ```ignore
26    /// fn demo(arg1: usize,
27    ///         arg2: usize) {}
28    /// ```
29    Visual,
30    /// Indented relative to the indentation level of the previous line.
31    /// ```ignore
32    /// fn demo(
33    ///     arg1: usize,
34    ///     arg2: usize,
35    /// ) {}
36    /// ```
37    Block { offset: isize },
38}
39
40#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
41pub(crate) struct BreakToken {
42    pub(crate) offset: isize,
43    pub(crate) blank_space: usize,
44    pub(crate) pre_break: Option<&'static str>,
45    pub(crate) post_break: Option<&'static str>,
46    pub(crate) if_nonempty: bool,
47    pub(crate) never_break: bool,
48}
49
50#[derive(Clone, Copy, Debug, PartialEq, Eq)]
51pub(crate) struct BeginToken {
52    indent: IndentStyle,
53    breaks: Breaks,
54}
55
56#[derive(Debug, PartialEq, Eq)]
57pub(crate) enum Token {
58    // In practice a string token contains either a `&'static str` or a
59    // `String`. `Cow` is overkill for this because we never modify the data,
60    // but it's more convenient than rolling our own more specialized type.
61    String(Cow<'static, str>),
62    Break(BreakToken),
63    Begin(BeginToken),
64    End,
65}
66
67#[derive(Copy, Clone, Debug)]
68enum PrintFrame {
69    Fits(Breaks),
70    Broken(usize, Breaks),
71}
72
73pub(crate) const SIZE_INFINITY: isize = 0xffff;
74
75#[derive(Debug)]
76pub struct Printer {
77    out: String,
78    /// Number of spaces left on line.
79    space: isize,
80    /// Ring-buffer of tokens and calculated sizes.
81    buf: RingBuffer<BufEntry>,
82    /// Running size of stream "...left".
83    left_total: isize,
84    /// Running size of stream "...right".
85    right_total: isize,
86    /// Pseudo-stack, really a ring too. Holds the
87    /// primary-ring-buffers index of the Begin that started the
88    /// current block, possibly with the most recent Break after that
89    /// Begin (if there is any) on top of it. Stuff is flushed off the
90    /// bottom as it becomes irrelevant due to the primary ring-buffer
91    /// advancing.
92    scan_stack: VecDeque<usize>,
93    /// Stack of blocks-in-progress being flushed by print.
94    print_stack: Vec<PrintFrame>,
95    /// Level of indentation of current line.
96    indent: usize,
97    /// Buffered indentation to avoid writing trailing whitespace.
98    pending_indentation: usize,
99    /// The token most recently popped from the left boundary of the
100    /// ring-buffer for printing.
101    last_printed: Option<Token>,
102
103    /// Target line width.
104    margin: isize,
105    /// If `Some(tab_width)` the printer will use tabs for indentation.
106    indent_config: Option<usize>,
107}
108
109#[derive(Debug)]
110pub struct BufEntry {
111    token: Token,
112    size: isize,
113}
114
115impl Printer {
116    pub fn new(margin: usize, use_tab_with_size: Option<usize>) -> Self {
117        let margin = (margin as isize).clamp(MIN_SPACE, SIZE_INFINITY - 1);
118        Self {
119            out: String::new(),
120            space: margin,
121            buf: RingBuffer::new(),
122            left_total: 0,
123            right_total: 0,
124            scan_stack: VecDeque::new(),
125            print_stack: Vec::new(),
126            indent: 0,
127            pending_indentation: 0,
128            last_printed: None,
129
130            margin,
131            indent_config: use_tab_with_size,
132        }
133    }
134
135    /// Predicts available space on the current or next line based on pending breaks.
136    ///
137    /// This function provides a heuristic for estimating available space by checking if
138    /// an unconditional hard break is pending in the buffer. The printer's internal
139    /// `self.space` value may not accurately reflect pending formatting decisions.
140    ///
141    /// # Returns
142    ///
143    /// - The full `margin` if an unconditional hard break is pending, signaling that a new line
144    ///   will be created. Callers should apply their own indentation logic as they have more
145    ///   semantic context about the code structure.
146    /// - The current space left (`self.space`) if no hard break is found, which can be trusted when
147    ///   no line breaks are imminent.
148    ///
149    /// # Trade-offs
150    ///
151    /// This heuristic may overestimate available space,
152    /// but provides a reliable signal for hard breaks while keeping the implementation
153    /// simple.
154    pub(crate) fn space_left(&self) -> usize {
155        // Scan backwards through the buffer for the last unconditional hard break.
156        for i in self.buf.index_range().rev() {
157            let token = &self.buf[i].token;
158
159            if let Token::Break(break_token) = token
160                && break_token.blank_space as isize >= SIZE_INFINITY
161                && !break_token.never_break
162            {
163                return self.margin as usize;
164            }
165
166            // Stop at first non-end token.
167            if !matches!(token, Token::End) {
168                break;
169            }
170        }
171
172        // If no hard break pending, return actual space on current line or the full margin if space
173        // is negative.
174        (if self.space < 0 { self.margin } else { self.space }) as usize
175    }
176
177    pub(crate) fn last_token(&self) -> Option<&Token> {
178        self.last_token_still_buffered().or(self.last_printed.as_ref())
179    }
180
181    pub(crate) fn last_token_still_buffered(&self) -> Option<&Token> {
182        if self.buf.is_empty() {
183            return None;
184        }
185        Some(&self.buf.last().token)
186    }
187
188    /// Be very careful with this!
189    pub(crate) fn replace_last_token_still_buffered(&mut self, token: Token) {
190        self.buf.last_mut().token = token;
191    }
192
193    /// WARNING: Be very careful with this!
194    ///
195    /// Searches backwards through the buffer to find and replace the last token
196    /// that satisfies a predicate. This is a specialized and sensitive operation.
197    ///
198    /// This function's traversal logic is specifically designed to handle cases
199    /// where formatting boxes have been closed (e.g., after a multi-line
200    /// comment). It will automatically skip over any trailing `Token::End`
201    /// tokens to find the substantive token before them.
202    ///
203    /// The search stops as soon as it encounters any token other than `End`
204    /// (i.e., a `String`, `Break`, or `Begin`). The provided predicate is then
205    /// called on that token. If the predicate returns `true`, the token is
206    /// replaced.
207    ///
208    /// This function will only ever evaluate the predicate on **one** token.
209    pub(crate) fn find_and_replace_last_token_still_buffered<F>(
210        &mut self,
211        new_token: Token,
212        predicate: F,
213    ) where
214        F: FnOnce(&Token) -> bool,
215    {
216        for i in self.buf.index_range().rev() {
217            let token = &self.buf[i].token;
218            if let Token::End = token {
219                // It's safe to skip the end of a box.
220                continue;
221            }
222
223            // Apply the predicate and return after the first non-end token.
224            if predicate(token) {
225                self.buf[i].token = new_token;
226            }
227            break;
228        }
229    }
230
231    fn scan_eof(&mut self) {
232        if !self.scan_stack.is_empty() {
233            self.check_stack(0);
234            self.advance_left();
235        }
236    }
237
238    fn scan_begin(&mut self, token: BeginToken) {
239        if self.scan_stack.is_empty() {
240            self.left_total = 1;
241            self.right_total = 1;
242            self.buf.clear();
243        }
244        let right = self.buf.push(BufEntry { token: Token::Begin(token), size: -self.right_total });
245        self.scan_stack.push_back(right);
246    }
247
248    fn scan_end(&mut self) {
249        if self.scan_stack.is_empty() {
250            self.print_end();
251        } else {
252            if !self.buf.is_empty()
253                && let Token::Break(break_token) = self.buf.last().token
254            {
255                if self.buf.len() >= 2
256                    && let Token::Begin(_) = self.buf.second_last().token
257                {
258                    self.buf.pop_last();
259                    self.buf.pop_last();
260                    self.scan_stack.pop_back();
261                    self.scan_stack.pop_back();
262                    self.right_total -= break_token.blank_space as isize;
263                    return;
264                }
265                if break_token.if_nonempty {
266                    self.buf.pop_last();
267                    self.scan_stack.pop_back();
268                    self.right_total -= break_token.blank_space as isize;
269                }
270            }
271            let right = self.buf.push(BufEntry { token: Token::End, size: -1 });
272            self.scan_stack.push_back(right);
273        }
274    }
275
276    pub(crate) fn scan_break(&mut self, token: BreakToken) {
277        if self.scan_stack.is_empty() {
278            self.left_total = 1;
279            self.right_total = 1;
280            self.buf.clear();
281        } else {
282            self.check_stack(0);
283        }
284        let right = self.buf.push(BufEntry { token: Token::Break(token), size: -self.right_total });
285        self.scan_stack.push_back(right);
286        self.right_total += token.blank_space as isize;
287    }
288
289    fn scan_string(&mut self, string: Cow<'static, str>) {
290        if self.scan_stack.is_empty() {
291            self.print_string(&string);
292        } else {
293            let len = string.len() as isize;
294            self.buf.push(BufEntry { token: Token::String(string), size: len });
295            self.right_total += len;
296            self.check_stream();
297        }
298    }
299
300    #[track_caller]
301    pub(crate) fn offset(&mut self, offset: isize) {
302        match &mut self.buf.last_mut().token {
303            Token::Break(token) => token.offset += offset,
304            Token::Begin(_) => {}
305            Token::String(_) | Token::End => unreachable!(),
306        }
307    }
308
309    pub(crate) fn ends_with(&self, ch: char) -> bool {
310        for i in self.buf.index_range().rev() {
311            if let Token::String(token) = &self.buf[i].token {
312                return token.ends_with(ch);
313            }
314        }
315        self.out.ends_with(ch)
316    }
317
318    fn check_stream(&mut self) {
319        while self.right_total - self.left_total > self.space {
320            if *self.scan_stack.front().unwrap() == self.buf.index_range().start {
321                self.scan_stack.pop_front().unwrap();
322                self.buf.first_mut().size = SIZE_INFINITY;
323            }
324
325            self.advance_left();
326
327            if self.buf.is_empty() {
328                break;
329            }
330        }
331    }
332
333    fn advance_left(&mut self) {
334        while self.buf.first().size >= 0 {
335            let left = self.buf.pop_first();
336
337            match &left.token {
338                Token::String(string) => {
339                    self.left_total += left.size;
340                    self.print_string(string);
341                }
342                Token::Break(token) => {
343                    self.left_total += token.blank_space as isize;
344                    self.print_break(*token, left.size);
345                }
346                Token::Begin(token) => self.print_begin(*token, left.size),
347                Token::End => self.print_end(),
348            }
349
350            self.last_printed = Some(left.token);
351
352            if self.buf.is_empty() {
353                break;
354            }
355        }
356    }
357
358    fn check_stack(&mut self, mut depth: usize) {
359        while let Some(&index) = self.scan_stack.back() {
360            let entry = &mut self.buf[index];
361            match entry.token {
362                Token::Begin(_) => {
363                    if depth == 0 {
364                        break;
365                    }
366                    self.scan_stack.pop_back().unwrap();
367                    entry.size += self.right_total;
368                    depth -= 1;
369                }
370                Token::End => {
371                    // paper says + not =, but that makes no sense.
372                    self.scan_stack.pop_back().unwrap();
373                    entry.size = 1;
374                    depth += 1;
375                }
376                _ => {
377                    self.scan_stack.pop_back().unwrap();
378                    entry.size += self.right_total;
379                    if depth == 0 {
380                        break;
381                    }
382                }
383            }
384        }
385    }
386
387    fn get_top(&self) -> PrintFrame {
388        self.print_stack.last().copied().unwrap_or(PrintFrame::Broken(0, Breaks::Inconsistent))
389    }
390
391    fn print_begin(&mut self, token: BeginToken, size: isize) {
392        if DEBUG {
393            self.out.push(match token.breaks {
394                Breaks::Consistent => '«',
395                Breaks::Inconsistent => '‹',
396            });
397            if DEBUG_INDENT && let IndentStyle::Block { offset } = token.indent {
398                self.out.extend(offset.to_string().chars().map(|ch| match ch {
399                    '0'..='9' => ['₀', '₁', '₂', '₃', '₄', '₅', '₆', '₇', '₈', '₉']
400                        [(ch as u8 - b'0') as usize],
401                    '-' => '₋',
402                    _ => unreachable!(),
403                }));
404            }
405        }
406
407        if size > self.space {
408            self.print_stack.push(PrintFrame::Broken(self.indent, token.breaks));
409            self.indent = match token.indent {
410                IndentStyle::Block { offset } => {
411                    usize::try_from(self.indent as isize + offset).unwrap()
412                }
413                IndentStyle::Visual => (self.margin - self.space) as usize,
414            };
415        } else {
416            self.print_stack.push(PrintFrame::Fits(token.breaks));
417        }
418    }
419
420    fn print_end(&mut self) {
421        let breaks = match self.print_stack.pop().unwrap() {
422            PrintFrame::Broken(indent, breaks) => {
423                self.indent = indent;
424                breaks
425            }
426            PrintFrame::Fits(breaks) => breaks,
427        };
428        if DEBUG {
429            self.out.push(match breaks {
430                Breaks::Consistent => '»',
431                Breaks::Inconsistent => '›',
432            });
433        }
434    }
435
436    fn print_break(&mut self, token: BreakToken, size: isize) {
437        let fits = token.never_break
438            || match self.get_top() {
439                PrintFrame::Fits(..) => true,
440                PrintFrame::Broken(.., Breaks::Consistent) => false,
441                PrintFrame::Broken(.., Breaks::Inconsistent) => size <= self.space,
442            };
443        if fits {
444            self.pending_indentation += token.blank_space;
445            self.space -= token.blank_space as isize;
446            if DEBUG {
447                self.out.push('·');
448            }
449        } else {
450            if let Some(pre_break) = token.pre_break {
451                self.print_indent();
452                self.out.push_str(pre_break);
453            }
454            if DEBUG {
455                self.out.push('·');
456            }
457            self.out.push('\n');
458            let indent = self.indent as isize + token.offset;
459            self.pending_indentation = usize::try_from(indent).expect("negative indentation");
460            self.space = cmp::max(self.margin - indent, MIN_SPACE);
461            if let Some(post_break) = token.post_break {
462                self.print_indent();
463                self.out.push_str(post_break);
464                self.space -= post_break.len() as isize;
465            }
466        }
467    }
468
469    fn print_string(&mut self, string: &str) {
470        self.print_indent();
471        self.out.push_str(string);
472        self.space -= string.len() as isize;
473    }
474
475    fn print_indent(&mut self) {
476        self.out.reserve(self.pending_indentation);
477        if let Some(tab_width) = self.indent_config {
478            let num_tabs = self.pending_indentation / tab_width;
479            self.out.extend(iter::repeat_n('\t', num_tabs));
480
481            let remainder = self.pending_indentation % tab_width;
482            self.out.extend(iter::repeat_n(' ', remainder));
483        } else {
484            self.out.extend(iter::repeat_n(' ', self.pending_indentation));
485        }
486        self.pending_indentation = 0;
487    }
488}