diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55aa076a..4d1e8bd3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,6 +51,9 @@ jobs: - uses: actions/checkout@v6 with: submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.4' - if: runner.os == 'macOS' uses: actions-rust-lang/setup-rust-toolchain@v1 with: diff --git a/fixtures/small/heredoc_indented_whitespace_expected.rb b/fixtures/small/heredoc_indented_whitespace_expected.rb index 0f7a7e5f..29e03882 100644 --- a/fixtures/small/heredoc_indented_whitespace_expected.rb +++ b/fixtures/small/heredoc_indented_whitespace_expected.rb @@ -1,15 +1,15 @@ def foo <<~THING - awfweaf + awfweaf awefawef - - - + + + hi there THING <<-THING - + hi there THING end diff --git a/librubyfmt/src/format_prism.rs b/librubyfmt/src/format_prism.rs index e11afbf2..a193c2d5 100644 --- a/librubyfmt/src/format_prism.rs +++ b/librubyfmt/src/format_prism.rs @@ -931,9 +931,12 @@ fn format_inner_string<'src>( .count() - escaped_ws_count; + let raw_leading_cols = count_indent_cols(&raw[..raw_leading]); + let unescaped_leading_cols = count_indent_cols(&unescaped[..unescaped_leading]); + // The difference is the common indent (if raw has more leading whitespace) - if raw_leading > unescaped_leading { - return Some(raw_leading - unescaped_leading); + if raw_leading_cols > unescaped_leading_cols { + return Some(raw_leading_cols - unescaped_leading_cols); } } None @@ -960,11 +963,10 @@ fn format_inner_string<'src>( .enumerate() .map(|(line_idx, line)| { // Strip from lines at line boundaries - let should_strip = (line_idx > 0 || prev_ended_with_newline) - && !line.is_empty() - && line.len() >= common_indent; + let should_strip = + (line_idx > 0 || prev_ended_with_newline) && !line.is_empty(); if should_strip { - &line[common_indent..] + strip_indent_cols(line, common_indent) } else { line } @@ -5167,3 +5169,42 @@ fn format_write_node<'src>( ps.emit_space(); ps.with_start_of_line(false, |ps| format_node(ps, value)); } + +/// Count leading whitespace columns. Tabs advance to the next multiple of 8. +fn count_indent_cols(line: &[u8]) -> usize { + let mut col = 0; + for &b in line { + match b { + b' ' => col += 1, + b'\t' => col = (col / 8 + 1) * 8, + _ => break, + } + } + col +} + +/// Strip up to `cols` leading whitespace columns from `line`. +/// Tabs advance to the next multiple of 8. +/// If a tab would advance past `cols`, it is not stripped. +fn strip_indent_cols(line: &[u8], cols: usize) -> &[u8] { + let mut col = 0; + let mut i = 0; + while i < line.len() && col < cols { + match line[i] { + b' ' => { + col += 1; + i += 1; + } + b'\t' => { + col = (col / 8 + 1) * 8; + if col > cols { + // We are in the middle of a tab. Do not strip it. + break; + } + i += 1; + } + _ => break, + } + } + &line[i..] +} diff --git a/librubyfmt/src/heredoc_string.rs b/librubyfmt/src/heredoc_string.rs index a491ce1e..113cc5d5 100644 --- a/librubyfmt/src/heredoc_string.rs +++ b/librubyfmt/src/heredoc_string.rs @@ -68,8 +68,16 @@ impl<'src> HeredocString<'src> { let indent = self.indent; if self.kind.is_squiggly() { - // For squiggly heredocs, we need to apply indentation to Normal segments - // but not to Raw segments (which come from nested non-squiggly heredocs). + // Do not indent raw segments (nested non-squiggly heredocs). + // Do not indent when all normal segments contain only whitespace. + let should_indent = self.segments.iter().any(|seg| { + if let HeredocSegment::Normal(c) = seg { + c.iter().any(|&b| b != b' ' && b != b'\t' && b != b'\n') + } else { + false + } + }); + let mut result = Vec::new(); for segment in self.segments { match segment { @@ -79,9 +87,10 @@ impl<'src> HeredocString<'src> { if i > 0 { result.push(b'\n'); } - let mut indented = get_indent(indent as usize + 2).into_owned(); - indented.extend_from_slice(line); - result.extend_from_slice(indented.trim_ascii_end()); + if !line.is_empty() && should_indent { + result.extend_from_slice(get_indent(indent as usize + 2).as_ref()); + } + result.extend_from_slice(line); } } HeredocSegment::Raw(content) => { @@ -90,14 +99,14 @@ impl<'src> HeredocString<'src> { if i > 0 { result.push(b'\n'); } - result.extend_from_slice(line.trim_ascii_end()); + result.extend_from_slice(line); } } } } result } else { - // For non-squiggly heredocs, just join segments and trim line endings + // For non-squiggly heredocs, just join segments let mut result = Vec::new(); for segment in self.segments { let content = match segment { @@ -107,7 +116,7 @@ impl<'src> HeredocString<'src> { if i > 0 { result.push(b'\n'); } - result.extend_from_slice(line.trim_ascii_end()); + result.extend_from_slice(line); } } result diff --git a/tests/string_test.rs b/tests/string_test.rs new file mode 100644 index 00000000..717316b1 --- /dev/null +++ b/tests/string_test.rs @@ -0,0 +1,278 @@ +use assert_cmd::cargo::CommandCargoExt; +use std::io::Write; +use std::process::{Command, Stdio}; + +#[test] +fn squiggly_heredoc_whitespace_only_lines_do_not_impact_indentation() { + string_test( + concat!( + "puts(\n", + "<<~FOOBARBAZ\n", + " 2 spaces\n", + " \n", + "FOOBARBAZ\n", + ")\n", + ), + "2 spaces\n\n", + concat!( + "puts(\n", + " <<~FOOBARBAZ\n", + " 2 spaces\n", + "\n", + " FOOBARBAZ\n", + ")\n", + ), + ); +} + +#[test] +fn squiggly_heredoc_one_tab_equals_eight_spaces() { + string_test( + concat!( + "puts(\n", + "<<~FOOBARBAZ\n", + " 8 spaces\n", + "\t1 tab\n", + "FOOBARBAZ\n", + ")\n", + ), + "8 spaces\n1 tab\n", + concat!( + "puts(\n", + " <<~FOOBARBAZ\n", + " 8 spaces\n", + " 1 tab\n", + " FOOBARBAZ\n", + ")\n", + ), + ); +} + +#[test] +fn squiggly_heredoc_one_space_plus_one_tab_equals_eight_spaces() { + string_test( + concat!( + "puts(\n", + "<<~FOOBARBAZ\n", + " 9 spaces\n", + " \t1 space plus 1 tab\n", + "FOOBARBAZ\n", + ")\n", + ), + " 9 spaces\n1 space plus 1 tab\n", + concat!( + "puts(\n", + " <<~FOOBARBAZ\n", + " 9 spaces\n", + " 1 space plus 1 tab\n", + " FOOBARBAZ\n", + ")\n", + ), + ); +} + +#[test] +fn squiggly_heredoc_one_tab_plus_one_space_equals_nine_spaces() { + string_test( + concat!( + "puts(\n", + "<<~FOOBARBAZ\n", + " 9 spaces\n", + "\t 1 tab plus 1 space\n", + "FOOBARBAZ\n", + ")\n", + ), + "9 spaces\n1 tab plus 1 space\n", + concat!( + "puts(\n", + " <<~FOOBARBAZ\n", + " 9 spaces\n", + " 1 tab plus 1 space\n", + " FOOBARBAZ\n", + ")\n", + ), + ); +} + +#[test] +fn squiggly_heredoc_one_tab_is_greater_than_seven_spaces() { + string_test( + concat!( + "puts(\n", + "<<~FOOBARBAZ\n", + " 7 spaces\n", + "\t1 tab\n", + "FOOBARBAZ\n", + ")\n", + ), + "7 spaces\n\t1 tab\n", + concat!( + "puts(\n", + " <<~FOOBARBAZ\n", + " 7 spaces\n", + " \t1 tab\n", + " FOOBARBAZ\n", + ")\n", + ), + ); +} + +#[test] +fn squiggly_heredoc_a_single_whitespace_line_is_preserved() { + string_test( + concat!( + "puts(\n", + "<<~FOOBARBAZFOOBARBAZ\n", + " \n", + "FOOBARBAZFOOBARBAZ\n", + ")\n", + ), + " \n", + concat!( + "puts(\n", + " <<~FOOBARBAZFOOBARBAZ\n", + " \n", + " FOOBARBAZFOOBARBAZ\n", + ")\n", + ), + ); +} + +#[test] +fn normal_heredoc_a_single_whitespace_line_is_preserved() { + string_test( + concat!( + "puts(\n", + "<<-FOOBARBAZFOOBARBAZ\n", + " \n", + "FOOBARBAZFOOBARBAZ\n", + ")\n", + ), + " \n", + concat!( + "puts(\n", + " <<-FOOBARBAZFOOBARBAZ\n", + " \n", + " FOOBARBAZFOOBARBAZ\n", + ")\n", + ), + ); +} + +#[test] +fn squiggly_heredoc_trailing_whitespace_is_preserved() { + string_test( + concat!( + "puts(\n", + "<<~FOOBARBAZ\n", + " foo \n", + " bar\t\n", + " \n", + "\t\n", + "FOOBARBAZ\n", + ")\n", + ), + "foo \nbar\t\n \n\t\n", + concat!( + "puts(\n", + " <<~FOOBARBAZ\n", + " foo \n", + " bar\t\n", + " \n", + " \t\n", + " FOOBARBAZ\n", + ")\n", + ), + ); +} + +#[test] +fn normal_heredoc_trailing_whitespace_is_preserved() { + string_test( + concat!( + "puts(\n", + "<<-FOOBARBAZ\n", + "foo \n", + "bar\t\n", + " \n", + "\t\n", + "FOOBARBAZ\n", + ")\n", + ), + "foo \nbar\t\n \n\t\n", + concat!( + "puts(\n", + " <<-FOOBARBAZ\n", + "foo \n", + "bar\t\n", + " \n", + "\t\n", + " FOOBARBAZ\n", + ")\n", + ), + ); +} + +/// Runs a single string test: +/// +/// 1. Runs `ruby` on `input` and verifies it produces `ruby_output`. +/// 2. Runs `rubyfmt` on `input` and verifies it produces `rubyfmt_output`. +/// 3. Runs `ruby` on `rubyfmt_output` and verifies it produces `ruby_output`. +fn string_test(input: &str, ruby_output: &str, rubyfmt_output: &str) { + fn pipe_to(mut cmd: Command, input: &[u8]) -> std::process::Output { + cmd.stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + let mut child = cmd.spawn().expect("failed to spawn process"); + child + .stdin + .take() + .unwrap() + .write_all(input) + .expect("failed to write to stdin"); + child + .wait_with_output() + .expect("failed to wait for process") + } + + // Verify original behaviour with ruby. + let original_ruby = pipe_to(Command::new("ruby"), input.as_bytes()); + assert!( + original_ruby.status.success(), + "ruby failed on input:\n{input}\nStderr: {}", + String::from_utf8_lossy(&original_ruby.stderr) + ); + assert_eq!( + String::from_utf8_lossy(&original_ruby.stdout), + ruby_output, + "ruby output before formatting does not match expected output" + ); + + // Verify rubyfmt output. + let rubyfmt_cmd = Command::cargo_bin("rubyfmt-main").unwrap(); + let rubyfmt_result = pipe_to(rubyfmt_cmd, input.as_bytes()); + assert!( + rubyfmt_result.status.success(), + "rubyfmt failed on input:\n{input}\nStderr: {}", + String::from_utf8_lossy(&rubyfmt_result.stderr) + ); + assert_eq!( + String::from_utf8_lossy(&rubyfmt_result.stdout), + rubyfmt_output, + "rubyfmt output does not match expected formatted output" + ); + + // Verify `ruby` after formatting. + let formatted_ruby = pipe_to(Command::new("ruby"), &rubyfmt_result.stdout); + assert!( + formatted_ruby.status.success(), + "ruby failed on formatted output:\n{}\nStderr: {}", + rubyfmt_output, + String::from_utf8_lossy(&formatted_ruby.stderr) + ); + assert_eq!( + String::from_utf8_lossy(&formatted_ruby.stdout), + ruby_output, + "ruby output after formatting does not match expected output" + ); +}