From 0faa62be9fc7a0cfb78afabd44808c32ff682dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20M=C3=A4nnchen?= Date: Mon, 25 May 2026 23:44:16 +0200 Subject: [PATCH] Fix inline HTML comments being mishandled when inside backtick spans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The line scanner's `process_line` function used a regex to split any line containing `` into a block-level comment and trailing text, even when the comment appeared inside a backtick code span. This caused the backticks to be left unclosed and the comment to leak into the output. Fix by removing `process_line` and the `comment_rest` regex entirely — block-level comments at the start of a line are already handled correctly by `html_comment_complete`/`html_comment_start` in `_type_of`. Add a new `converter_for_inline_comment` to the inline parser so that `` appearing inline in text is emitted as a proper comment node rather than being HTML-escaped. Closes #518 --- lib/earmark_parser/ast/inline.ex | 12 ++++++++++++ lib/earmark_parser/line_scanner.ex | 14 +------------- .../i518_html_comment_in_backticks_test.exs | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 13 deletions(-) create mode 100644 test/regressions/i518_html_comment_in_backticks_test.exs diff --git a/lib/earmark_parser/ast/inline.ex b/lib/earmark_parser/ast/inline.ex index a45287ea..750253a6 100644 --- a/lib/earmark_parser/ast/inline.ex +++ b/lib/earmark_parser/ast/inline.ex @@ -55,6 +55,7 @@ defmodule Earmark.Parser.Ast.Inline do converter_for_sup: &converter_for_sup/1, # converter_for_code: &converter_for_code/1, + converter_for_inline_comment: &converter_for_inline_comment/1, converter_for_br: &converter_for_br/1, converter_for_inline_ial: &converter_for_inline_ial/1, converter_for_pure_link: &converter_for_pure_link/1, @@ -260,6 +261,17 @@ defmodule Earmark.Parser.Ast.Inline do end end + def converter_for_inline_comment({src, lnb, context, use_linky?}) do + comment_rgx = ~r/\A()/s + + if match = Regex.run(comment_rgx, src) do + [match, content] = match + inner = content |> String.replace_prefix("", "") + out = {:comment, [], [inner], %{comment: true}} + {behead(src, match), lnb, prepend(context, out), use_linky?} + end + end + def converter_for_inline_ial({src, lnb, context, use_linky?}) do inline_ial = ~r<^\s*\{:\s*(.*?)\s*}> diff --git a/lib/earmark_parser/line_scanner.ex b/lib/earmark_parser/line_scanner.ex index addb5ad8..990f1c23 100644 --- a/lib/earmark_parser/line_scanner.ex +++ b/lib/earmark_parser/line_scanner.ex @@ -42,7 +42,6 @@ defmodule Earmark.Parser.LineScanner do # Converted rgx_map to a function defp rgx_map(:block_quote), do: ~r/\A>\s?(.*)/ defp rgx_map(:column_rgx), do: ~r{\A[\s|:-]+\z} - defp rgx_map(:comment_rest), do: ~r/()(.*)/ defp rgx_map(:fence), do: ~r/\A(\s*)(`{3,}|~{3,})\s*([^`\s]*)\s*\z/u defp rgx_map(:footnote_definition), do: ~r/\A\[\^([^\s\]]+)\]:\s+(.*)/ defp rgx_map(:heading), do: ~r/^(\#{1,6})\s+(?|(.*?)\s*#*\s*$|(.*))/u @@ -296,23 +295,12 @@ defmodule Earmark.Parser.LineScanner do end defp _with_lookahead([line_lnb | lines], options, recursive) do - process_line(line_lnb, options, recursive) ++ + [type_of(line_lnb, options, recursive)] ++ _with_lookahead(lines, options, recursive) end defp _with_lookahead([], _options, _recursive), do: [] - defp process_line({line, lnb}, options, recursive) do - case regex_run(:comment_rest, line, capture: :all_but_first) do - [comment, rest] -> - [type_of({comment, lnb}, options, recursive)] ++ - [type_of({rest, lnb}, options, recursive)] - - nil -> - [type_of({line, lnb}, options, recursive)] - end - end - defp _determine_if_header(columns) do columns |> Enum.all?(fn col -> regex_run(:column_rgx, col) end) diff --git a/test/regressions/i518_html_comment_in_backticks_test.exs b/test/regressions/i518_html_comment_in_backticks_test.exs new file mode 100644 index 00000000..83c35683 --- /dev/null +++ b/test/regressions/i518_html_comment_in_backticks_test.exs @@ -0,0 +1,16 @@ +defmodule Test.Regressions.I518HtmlCommentInBackticksTest do + use ExUnit.Case + + test "HTML comment inside backticks is rendered as inline code" do + md = "some source ``" + html = "

\nsome source <!-- 2 -->

\n" + assert Earmark.as_html(md) == {:ok, html, []} + end + + test "HTML comment mid-line is rendered as a comment" do + md = "text more text" + html = "

\ntext \n more text

\n" + assert Earmark.as_html(md) == {:ok, html, []} + end +end +# SPDX-License-Identifier: Apache-2.0