Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions compiler/driver.ml
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,60 @@ module Commands = struct
$ Cli.Flags.print_only_law
$ Cli.Flags.wrap_weaved_output)

let common_markdown options output print_only_law wrap_weaved_output =
let prg = Passes.surface options in
Message.debug "Weaving literate program into HTML";
get_output_format options ~ext:"html" output
@@ fun output_file fmt ->
let language =
Cli.file_lang (Global.input_src_file options.Global.input_src)
in
let weave_output = Literate.Md.ast_to_markdown language ~print_only_law in
Message.debug "Writing to %s" (Option.value ~default:"stdout" output_file);
if wrap_weaved_output then
Literate.Md.wrap_markdown prg.Surface.Ast.program_source_files language
fmt (fun fmt -> weave_output fmt prg)
else weave_output fmt prg

let markdown options output print_only_law wrap_weaved_output =
File.with_temp_file "catala_markdown" "md"
@@ fun temp_file_in ->
let raw_file = Global.raw_file temp_file_in in
let () =
common_markdown options (Some raw_file) print_only_law wrap_weaved_output
in
let language =
Cli.file_lang (Global.input_src_file options.Global.input_src)
in
get_output_format options ~ext:"pdf" output
@@ fun output_file _fmt ->
let output_file = Option.value ~default:"-" output_file in
Literate.Literate_common.run_pandoc_on_file temp_file_in output_file
language `Markdown;
(* For unknown reason, pandoc append new line on summary when choosing the markdown target.
So we just remove it to keep the markdown clean *)
let sed_command =
Format.sprintf {|sed -i -z "s/<summary>\n/<summary>/g" %s|} output_file
in
let return_code = Sys.command sed_command in
if return_code <> 0 then
Message.error
"Weaving failed: sed command \"%s\" returned with error code %d"
sed_command return_code

let md_cmd =
Cmd.v
(Cmd.info "markdown" ~man:Cli.man_base
~doc:
"Weaves an Pandoc-Markdown literate programming output of the \
Catala program.")
Term.(
const markdown
$ Cli.Flags.Global.options
$ Cli.Flags.output
$ Cli.Flags.print_only_law
$ Cli.Flags.wrap_weaved_output)

let latex options output print_only_law wrap_weaved_output extra_files =
let prg = Passes.surface options in
let prg_annex =
Expand Down Expand Up @@ -502,6 +556,33 @@ module Commands = struct
$ Cli.Flags.wrap_weaved_output
$ Cli.Flags.extra_files)

let pdf options output print_only_law wrap_weaved_output =

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is very nice but the PDF generated by Pandoc is very ugly no? Right now our pipeline of choice to generate PDF is through LaTeX with the preamble set out in

https://github.com/CatalaLang/catala/blob/master/compiler/literate/latex.ml#L52-L165

I would prefer not to roll out an out-of-the-box PDF feature that provides an uglier PDF than the one we're already providing with our current preferred pipeline.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes you're right as of right now the feature is far from being completed. That command was just to give an example of what was possible and also to test the pdf result with a single command instead of two (generating the pandoc markdown and then call pandoc targetting pdf)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mmmh after checking I may have introduced something wrong in the last version because indeed the pdf is not working as expected, it was working way better before now there are a lot of {...} that should not appear

File.with_temp_file "catala_markdown" "md"
@@ fun temp_file_in ->
let raw_file = Global.raw_file temp_file_in in
let () =
common_markdown options (Some raw_file) print_only_law wrap_weaved_output
in
let language =
Cli.file_lang (Global.input_src_file options.Global.input_src)
in
get_output_format options ~ext:"pdf" output
@@ fun output_file _fmt ->
let output_file = Option.value ~default:"-" output_file in
Literate.Literate_common.run_pandoc_on_file temp_file_in output_file
language `Pdf

let pdf_cmd =
Cmd.v
(Cmd.info "pdf" ~man:Cli.man_base
~doc:"Weaves a PDF output of the Catala program.")
Term.(
const pdf
$ Cli.Flags.Global.options
$ Cli.Flags.output
$ Cli.Flags.print_only_law
$ Cli.Flags.wrap_weaved_output)

let exceptions options includes stdlib ex_scope ex_variable output_format =
let prg, ctxt = Passes.desugared options ~includes ~stdlib in
Passes.debug_pass_name "scopelang";
Expand Down Expand Up @@ -1375,6 +1456,8 @@ module Commands = struct
c_cmd;
latex_cmd;
html_cmd;
md_cmd;
pdf_cmd;
makefile_cmd;
scopelang_cmd;
dcalc_cmd;
Expand Down
8 changes: 8 additions & 0 deletions compiler/literate/dune
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@
lexer_pl.py)
(run ocaml-crunch -e py -m plain -o %{target} .))))

(rule
(target pandoc_highlight.ml)
(action
(progn
(copy ../../syntax_highlighting/en/pandoc/catala_en.xml catala_en.xml)
(copy ../../syntax_highlighting/fr/pandoc/catala_fr.xml catala_fr.xml)
(run ocaml-crunch -e xml -m plain -o %{target} .))))

(documentation
(package catala)
(mld_files literate))
18 changes: 18 additions & 0 deletions compiler/literate/latex.ml
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,18 @@ let rec law_structure_to_latex
let filename = Pos.get_file (Mark.get c) in
let block_content = Mark.remove c in
check_exceeding_lines start_line filename block_content;
let c =
let block_content, pos = c in
(* Problem here, position may have been damaged due to the creation of new lines *)
let block_content = remove_exceeding_lines block_content in
block_content, pos
in
let c =
let block_content, pos = c in
(* Problem here, position may have been damaged due to the creation of new lines *)
let block_content = remove_exceeding_lines block_content in
block_content, pos
in
update_lines_of_code c;
code_block ~meta:false language fmt c
| A.CodeBlock (_, c, true) when not print_only_law ->
Expand All @@ -315,6 +327,12 @@ let rec law_structure_to_latex
let filename = Pos.get_file (Mark.get c) in
let block_content = Mark.remove c in
check_exceeding_lines start_line filename block_content;
let c =
let block_content, pos = c in
(* Problem here, position may have been damaged due to the creation of new lines *)
let block_content = remove_exceeding_lines block_content in
block_content, pos
in
update_lines_of_code c;
Format.fprintf fmt
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
Expand Down
84 changes: 84 additions & 0 deletions compiler/literate/literate_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,51 @@ let run_pandoc (s : string) (backend : [ `Html | `Latex ]) : string =
Sys.remove tmp_file_out;
tmp_file_as_string

let run_pandoc_on_file
(file_in : Global.file)
(file_out : Global.file)
language
(backend : [ `Html | `Latex | `Pdf | `Markdown ]) : unit =
let pandoc = "pandoc" in
let temp_syntax_file = Filename.temp_file "catala_syntax" "xml" in
let syntax_definition =
let syntaxes = "catala_" ^ Cli.language_code language ^ ".xml" in
match Pandoc_highlight.read syntaxes with
| None -> []
| Some content ->
let oc = open_out temp_syntax_file in
output_string oc content;
close_out oc;
["--syntax-definition"; temp_syntax_file]
in
let pandoc_args =
syntax_definition
@ [
"-s";
"-f";
"markdown+multiline_tables+tex_math_dollars+markdown_in_html_blocks+fenced_code_blocks";
"--mathjax";
"-t";
(match backend with
| `Html -> "html"
| `Latex -> "latex"
| `Pdf -> "pdf"
| `Markdown ->
(* gfm stands for Github Flavored Markdown,
the -raw_html is to forbid raw html in
the produced markdown *)
"gfm-raw_html");
"-o";
file_out;
]
in
let cmd =
Format.sprintf "%s %s %s" pandoc (String.concat " " pandoc_args) file_in
in
let return_code = Sys.command cmd in
if return_code <> 0 then raise_failed_pandoc cmd return_code;
Sys.remove temp_syntax_file

let check_exceeding_lines
?(max_len = 80)
(start_line : int)
Expand All @@ -114,6 +159,45 @@ let check_exceeding_lines
(start_line + i) (len_s + 1))
"This line is exceeding @{<bold;red>%d@} characters" max_len)

let split_a_line max_len exceeding_line =
let full_line_splitted = String.split_on_char ' ' exceeding_line in
let base_line, line =
let rec retrieve_spaces_starts base_line l =
match l with
| [] -> base_line, []
| "#" :: rem -> retrieve_spaces_starts ("#" :: base_line) rem
| "" :: rem -> retrieve_spaces_starts ("" :: base_line) rem
| remaining -> base_line, remaining
in
retrieve_spaces_starts [] full_line_splitted
in
let with_size = List.map (fun s -> String.length s, s) line in
let rec aux remaining current_line cpt acc =
match remaining with
| [] ->
let current_line = String.concat " " (List.rev current_line) in
current_line :: acc
| (len, word) :: rem ->
if cpt + len + (List.length current_line - 1) > max_len then
if current_line = [] then aux rem base_line 0 (word :: acc)
else
let current_line = String.concat " " (List.rev current_line) in
aux remaining base_line 0 (current_line :: acc)
else aux rem (word :: current_line) (cpt + len) acc
in
aux with_size base_line 0 []

let remove_exceeding_lines ?(max_len = 80) content =
let contents = String.split_on_char '\n' content in
let res =
List.fold_left
(fun acc content ->
let line_splitted = split_a_line max_len content in
line_splitted @ acc)
[] contents
in
String.concat "\n" (List.rev res)

let with_pygmentize_lexer lang f =
let lexer_py =
let lexer_fname = "lexer_" ^ Cli.language_code lang ^ ".py" in
Expand Down
19 changes: 19 additions & 0 deletions compiler/literate/literate_common.mli
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ val run_pandoc : string -> [ `Html | `Latex ] -> string
(** Runs the [pandoc] on a string to pretty-print markdown features into the
desired format. *)

val run_pandoc_on_file :
Global.file ->
Global.file ->
Global.backend_lang ->
[ `Html | `Latex | `Pdf | `Markdown ] ->
unit
(** Runs the [pandoc] on a file to pretty-print markdown features into the
desired format. *)

val check_exceeding_lines : ?max_len:int -> int -> string -> string -> unit
(** [check_exceeding_lines ~max_len start_line filename content] prints a
warning message for each lines of [content] exceeding [max_len] characters.
Expand All @@ -58,3 +67,13 @@ val call_pygmentize : ?lang:Global.backend_lang -> string list -> string
val with_pygmentize_lexer : Global.backend_lang -> (string list -> 'a) -> 'a
(** Creates the required lexer file and returns the corresponding [pygmentize]
command-line arguments *)

val split_a_line : int -> string -> string list
(** [split_a_line max_len line] split the string [line] in a list where each
element should not exceed [max_len]. The only exception is that the function
doesn't cut a word so if the [max_len] authorized is 10 and that we have a
word greater than 10 it will take a single line without being splitted *)

val remove_exceeding_lines : ?max_len:int -> string -> string
(** [remove_exceeding_lines ~max_len content] return a string that represent the
content splitted into multiple lines if content exceed max_len *)
Loading
Loading