From 3c1e75a0e21a54ccc4e08800753a1a28d99a6f86 Mon Sep 17 00:00:00 2001 From: Nick Dower Date: Fri, 12 Jun 2026 09:32:16 +0200 Subject: [PATCH 1/2] [fix] indentation & trailing whitespace in Heredoc An attempt at fixing a few Heredoc issues: * 1 tab equals 8 spaces * 1 space + 1 tab equals 8 spaces * Trailing whitespace in squiggly Heredoc should not be stripped. * A single whitespace-only line should be preserved * Multiple whitespace-only lines should be preserved See: https://docs.ruby-lang.org/en/master/syntax/literals_rdoc.html#here-document-literals >The indentation of the least-indented line will be removed from each >line of the content. Note that empty lines and lines consisting solely >of literal tabs and spaces will be ignored for the purposes of >determining indentation, but escaped tabs and spaces are considered >non-indentation characters. > >For the purpose of measuring an indentation, a horizontal tab is >regarded as a sequence of one to eight spaces such that the column >position corresponding to its end is a multiple of eight. The amount to >be removed is counted in terms of the number of spaces. If the boundary >appears in the middle of a tab, that tab is not removed. --- ci/string_literals_stress_test.rb | 72 +++++++++++++ .../heredoc_indented_whitespace_actual.rb | 72 +++++++++++++ .../heredoc_indented_whitespace_expected.rb | 102 +++++++++++++++++- librubyfmt/src/format_prism.rs | 53 +++++++-- librubyfmt/src/heredoc_string.rs | 25 +++-- 5 files changed, 305 insertions(+), 19 deletions(-) diff --git a/ci/string_literals_stress_test.rb b/ci/string_literals_stress_test.rb index 0a80d9f58..aa03063ad 100644 --- a/ci/string_literals_stress_test.rb +++ b/ci/string_literals_stress_test.rb @@ -118,3 +118,75 @@ def foo puts <( .count() - escaped_ws_count; + let raw_leading_cols = count_indent_cols(&raw[..raw_leading]); + let unescaped_leading_cols = count_indent_cols(&unescaped[..unescaped_leading]); + // The difference is the common indent (if raw has more leading whitespace) - if raw_leading > unescaped_leading { - return Some(raw_leading - unescaped_leading); + if raw_leading_cols > unescaped_leading_cols { + return Some(raw_leading_cols - unescaped_leading_cols); } } None @@ -960,11 +963,10 @@ fn format_inner_string<'src>( .enumerate() .map(|(line_idx, line)| { // Strip from lines at line boundaries - let should_strip = (line_idx > 0 || prev_ended_with_newline) - && !line.is_empty() - && line.len() >= common_indent; + let should_strip = + (line_idx > 0 || prev_ended_with_newline) && !line.is_empty(); if should_strip { - &line[common_indent..] + strip_indent_cols(line, common_indent) } else { line } @@ -5167,3 +5169,42 @@ fn format_write_node<'src>( ps.emit_space(); ps.with_start_of_line(false, |ps| format_node(ps, value)); } + +/// Count leading whitespace columns. Tabs advance to the next multiple of 8. +fn count_indent_cols(line: &[u8]) -> usize { + let mut col = 0; + for &b in line { + match b { + b' ' => col += 1, + b'\t' => col = (col / 8 + 1) * 8, + _ => break, + } + } + col +} + +/// Strip up to `cols` leading whitespace columns from `line`. +/// Tabs advance to the next multiple of 8. +/// If a tab would advance past `cols`, it is not stripped. +fn strip_indent_cols(line: &[u8], cols: usize) -> &[u8] { + let mut col = 0; + let mut i = 0; + while i < line.len() && col < cols { + match line[i] { + b' ' => { + col += 1; + i += 1; + } + b'\t' => { + col = (col / 8 + 1) * 8; + if col > cols { + // We are in the middle of a tab. Do not strip it. + break; + } + i += 1; + } + _ => break, + } + } + &line[i..] +} diff --git a/librubyfmt/src/heredoc_string.rs b/librubyfmt/src/heredoc_string.rs index a491ce1e6..113cc5d58 100644 --- a/librubyfmt/src/heredoc_string.rs +++ b/librubyfmt/src/heredoc_string.rs @@ -68,8 +68,16 @@ impl<'src> HeredocString<'src> { let indent = self.indent; if self.kind.is_squiggly() { - // For squiggly heredocs, we need to apply indentation to Normal segments - // but not to Raw segments (which come from nested non-squiggly heredocs). + // Do not indent raw segments (nested non-squiggly heredocs). + // Do not indent when all normal segments contain only whitespace. + let should_indent = self.segments.iter().any(|seg| { + if let HeredocSegment::Normal(c) = seg { + c.iter().any(|&b| b != b' ' && b != b'\t' && b != b'\n') + } else { + false + } + }); + let mut result = Vec::new(); for segment in self.segments { match segment { @@ -79,9 +87,10 @@ impl<'src> HeredocString<'src> { if i > 0 { result.push(b'\n'); } - let mut indented = get_indent(indent as usize + 2).into_owned(); - indented.extend_from_slice(line); - result.extend_from_slice(indented.trim_ascii_end()); + if !line.is_empty() && should_indent { + result.extend_from_slice(get_indent(indent as usize + 2).as_ref()); + } + result.extend_from_slice(line); } } HeredocSegment::Raw(content) => { @@ -90,14 +99,14 @@ impl<'src> HeredocString<'src> { if i > 0 { result.push(b'\n'); } - result.extend_from_slice(line.trim_ascii_end()); + result.extend_from_slice(line); } } } } result } else { - // For non-squiggly heredocs, just join segments and trim line endings + // For non-squiggly heredocs, just join segments let mut result = Vec::new(); for segment in self.segments { let content = match segment { @@ -107,7 +116,7 @@ impl<'src> HeredocString<'src> { if i > 0 { result.push(b'\n'); } - result.extend_from_slice(line.trim_ascii_end()); + result.extend_from_slice(line); } } result From 8ce11e3e39ee20840b7005a6e29d2f828888e403 Mon Sep 17 00:00:00 2001 From: Nick Dower Date: Sun, 14 Jun 2026 08:23:31 +0200 Subject: [PATCH 2/2] Add string_test. Bump CI Ruby version to 3.4. --- .github/workflows/ci.yml | 3 + ci/string_literals_stress_test.rb | 72 ----- .../heredoc_indented_whitespace_actual.rb | 72 ----- .../heredoc_indented_whitespace_expected.rb | 92 ------ tests/string_test.rs | 278 ++++++++++++++++++ 5 files changed, 281 insertions(+), 236 deletions(-) create mode 100644 tests/string_test.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55aa076ad..4d1e8bd36 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,6 +51,9 @@ jobs: - uses: actions/checkout@v6 with: submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.4' - if: runner.os == 'macOS' uses: actions-rust-lang/setup-rust-toolchain@v1 with: diff --git a/ci/string_literals_stress_test.rb b/ci/string_literals_stress_test.rb index aa03063ad..0a80d9f58 100644 --- a/ci/string_literals_stress_test.rb +++ b/ci/string_literals_stress_test.rb @@ -118,75 +118,3 @@ def foo puts < std::process::Output { + cmd.stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + let mut child = cmd.spawn().expect("failed to spawn process"); + child + .stdin + .take() + .unwrap() + .write_all(input) + .expect("failed to write to stdin"); + child + .wait_with_output() + .expect("failed to wait for process") + } + + // Verify original behaviour with ruby. + let original_ruby = pipe_to(Command::new("ruby"), input.as_bytes()); + assert!( + original_ruby.status.success(), + "ruby failed on input:\n{input}\nStderr: {}", + String::from_utf8_lossy(&original_ruby.stderr) + ); + assert_eq!( + String::from_utf8_lossy(&original_ruby.stdout), + ruby_output, + "ruby output before formatting does not match expected output" + ); + + // Verify rubyfmt output. + let rubyfmt_cmd = Command::cargo_bin("rubyfmt-main").unwrap(); + let rubyfmt_result = pipe_to(rubyfmt_cmd, input.as_bytes()); + assert!( + rubyfmt_result.status.success(), + "rubyfmt failed on input:\n{input}\nStderr: {}", + String::from_utf8_lossy(&rubyfmt_result.stderr) + ); + assert_eq!( + String::from_utf8_lossy(&rubyfmt_result.stdout), + rubyfmt_output, + "rubyfmt output does not match expected formatted output" + ); + + // Verify `ruby` after formatting. + let formatted_ruby = pipe_to(Command::new("ruby"), &rubyfmt_result.stdout); + assert!( + formatted_ruby.status.success(), + "ruby failed on formatted output:\n{}\nStderr: {}", + rubyfmt_output, + String::from_utf8_lossy(&formatted_ruby.stderr) + ); + assert_eq!( + String::from_utf8_lossy(&formatted_ruby.stdout), + ruby_output, + "ruby output after formatting does not match expected output" + ); +}