From cd0ce9aa1aa14110197d62aacdfb018ad7f42b47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20M=C3=A4nnchen?= Date: Wed, 27 May 2026 21:27:36 +0200 Subject: [PATCH] Fix footnote definitions absorbing subsequent document content Footnote definitions were terminating the main parse loop and feeding all remaining lines into the footnote body collector, causing content after a footnote definition to be nested inside its `li` node. The `div.footnotes` block was also placed at the position of the first footnote rather than at the end of the document. Rewrite `FootnoteParser` to partition lines into per-footnote bodies (blank/indented continuations) and regular document lines, parse each separately, and append `FnList` at the end of the document. --- lib/earmark_parser/parser/footnote_parser.ex | 77 ++++++++++--------- ...otnote_absorbs_subsequent_content_test.exs | 46 +++++++++++ 2 files changed, 86 insertions(+), 37 deletions(-) create mode 100644 test/regressions/footnote_absorbs_subsequent_content_test.exs diff --git a/lib/earmark_parser/parser/footnote_parser.ex b/lib/earmark_parser/parser/footnote_parser.ex index 9b085a00..f50cd029 100644 --- a/lib/earmark_parser/parser/footnote_parser.ex +++ b/lib/earmark_parser/parser/footnote_parser.ex @@ -1,58 +1,61 @@ defmodule Earmark.Parser.Parser.FootnoteParser do - alias Earmark.Parser.{Block, Enum.Ext, Line} + alias Earmark.Parser.{Block, Line} @moduledoc false - def parse_fn_defs([fn_def | rest], result, options) do - acc = - {[fn_def.content], [%Block.FnList{blocks: [_block_fn_def(fn_def)]} | result], %{}, options} - rest - |> Ext.reduce_with_end(acc, &_parse_fn_def_reduce/2) - end + def parse_fn_defs(input, result, options) do + {fn_defs, doc_lines, footnotes, options1} = _collect_fn_defs(input, [], [], %{}, options) + fn_list = %Block.FnList{blocks: Enum.reverse(fn_defs)} - defp _parse_fn_def_reduce(ele_or_end, acc) + {doc_blocks, _doc_links, _inner_footnotes, options2} = + Earmark.Parser.Parser.parse(doc_lines, options1, false) - defp _parse_fn_def_reduce({:element, %Line.FnDef{content: content}=fn_def}, acc) do - {result1, footnotes, options1} = _complete_fn_def_block(acc, fn_def) - {[content], result1, footnotes, options1} + reversed_doc = Enum.reverse(doc_blocks) + {[fn_list | reversed_doc] ++ result, footnotes, options2} end - defp _parse_fn_def_reduce({:element, %{line: line}}, acc) do - _prepend_to_first_in4(line, acc) + defp _collect_fn_defs([], fn_defs, doc_lines, footnotes, options) do + {fn_defs, doc_lines, footnotes, options} end - defp _parse_fn_def_reduce(:end, acc) do - {[fn_list | rest], footnotes, options} = _complete_fn_def_block(acc) - {[%{fn_list | blocks: Enum.reverse(fn_list.blocks)} | rest], footnotes, options} - end + defp _collect_fn_defs([%Line.FnDef{} = fn_def | rest], fn_defs, doc_lines, footnotes, options) do + {body_lines, remaining} = _split_fn_body(rest) + + {inner_blocks, _links, _inner_fns, options1} = + Earmark.Parser.Parser.parse([fn_def.content | body_lines], options, true) - defp _prepend_to_first_in4(element, {a, b, c, d}) do - {[element | a], b, c, d} + closed_fn = %Block.FnDef{id: fn_def.id, lnb: fn_def.lnb, blocks: inner_blocks} + footnotes1 = Map.put(footnotes, closed_fn.id, closed_fn) + _collect_fn_defs(remaining, [closed_fn | fn_defs], doc_lines, footnotes1, options1) end - defp _block_fn_def(%Line.FnDef{} = fn_def) do - %Block.FnDef{id: fn_def.id, lnb: fn_def.lnb} + defp _collect_fn_defs([line | rest], fn_defs, doc_lines, footnotes, options) do + _collect_fn_defs(rest, fn_defs, doc_lines ++ [line.line], footnotes, options) end - defp _complete_fn_def_block( - {input, [%Block.FnList{blocks: [open_fn | closed_fns]} | rest], footnotes, options}, - new_fn_def \\ nil - ) do - # `_footnotes1` should be empty but let us not change the shape of parse depending - # on options or the value of recursive? - {inner_blocks, _links, _footnotes1, options1} = Earmark.Parser.Parser.parse(Enum.reverse(input), options, true) - closed_fn = %{open_fn | blocks: inner_blocks} - footnotes1 = Map.put(footnotes, closed_fn.id, closed_fn) + defp _split_fn_body(lines), do: _split_fn_body(lines, [], false) - fn_blocks = - if new_fn_def do - [_block_fn_def(new_fn_def), closed_fn | closed_fns] - else - [closed_fn | closed_fns] - end + defp _split_fn_body([], body, _after_blank), do: {Enum.reverse(body), []} - {[%Block.FnList{blocks: fn_blocks} | rest], footnotes1, options1} + defp _split_fn_body([%Line.FnDef{} | _] = rest, body, _after_blank) do + {Enum.reverse(body), rest} end + defp _split_fn_body([%Line.Blank{} | rest], body, _after_blank) do + _split_fn_body(rest, ["" | body], true) + end + + defp _split_fn_body([line | rest], body, true) do + if line.indent >= 4 do + _split_fn_body(rest, [line.line | body], false) + else + {Enum.reverse(body), [line | rest]} + end + end + + defp _split_fn_body([line | rest], body, false) do + _split_fn_body(rest, [line.line | body], false) + end end + # SPDX-License-Identifier: Apache-2.0 diff --git a/test/regressions/footnote_absorbs_subsequent_content_test.exs b/test/regressions/footnote_absorbs_subsequent_content_test.exs new file mode 100644 index 00000000..44da5894 --- /dev/null +++ b/test/regressions/footnote_absorbs_subsequent_content_test.exs @@ -0,0 +1,46 @@ +defmodule Regressions.FootnoteAbsorbsSubsequentContentTest do + use ExUnit.Case, async: true + + import Support.Helpers, only: [as_ast: 2] + + test "content after a footnote definition stays at the top level" do + markdown = """ + Before first[^1]. + + [^1]: First footnote. + + Between footnotes[^2]. + + [^2]: Second footnote. + + After all. + """ + + {:ok, ast, _} = as_ast(markdown, gfm: true, footnotes: true) + + assert [ + {"p", [], ["Before first", {"a", _, ["1"], %{}}, "."], %{}}, + {"p", [], ["Between footnotes", {"a", _, ["2"], %{}}, "."], %{}}, + {"p", [], ["After all."], %{}}, + {"div", [{"class", "footnotes"}], + [ + {"hr", [], [], %{}}, + {"ol", [], + [ + {"li", [{"id", "fn:1"}], + [ + {"a", _, ["↩"], %{}}, + {"p", [], ["First footnote."], %{}} + ], %{}}, + {"li", [{"id", "fn:2"}], + [ + {"a", _, ["↩"], %{}}, + {"p", [], ["Second footnote."], %{}} + ], %{}} + ], %{}} + ], %{}} + ] = ast + end +end + +# SPDX-License-Identifier: Apache-2.0