diff --git a/compiler/driver.ml b/compiler/driver.ml index 76688ec35..a79f1090f 100644 --- a/compiler/driver.ml +++ b/compiler/driver.ml @@ -460,6 +460,60 @@ module Commands = struct $ Cli.Flags.print_only_law $ Cli.Flags.wrap_weaved_output) + let common_markdown options output print_only_law wrap_weaved_output = + let prg = Passes.surface options in + Message.debug "Weaving literate program into HTML"; + get_output_format options ~ext:"html" output + @@ fun output_file fmt -> + let language = + Cli.file_lang (Global.input_src_file options.Global.input_src) + in + let weave_output = Literate.Md.ast_to_markdown language ~print_only_law in + Message.debug "Writing to %s" (Option.value ~default:"stdout" output_file); + if wrap_weaved_output then + Literate.Md.wrap_markdown prg.Surface.Ast.program_source_files language + fmt (fun fmt -> weave_output fmt prg) + else weave_output fmt prg + + let markdown options output print_only_law wrap_weaved_output = + File.with_temp_file "catala_markdown" "md" + @@ fun temp_file_in -> + let raw_file = Global.raw_file temp_file_in in + let () = + common_markdown options (Some raw_file) print_only_law wrap_weaved_output + in + let language = + Cli.file_lang (Global.input_src_file options.Global.input_src) + in + get_output_format options ~ext:"pdf" output + @@ fun output_file _fmt -> + let output_file = Option.value ~default:"-" output_file in + Literate.Literate_common.run_pandoc_on_file temp_file_in output_file + language `Markdown; + (* For unknown reason, pandoc append new line on summary when choosing the markdown target. + So we just remove it to keep the markdown clean *) + let sed_command = + Format.sprintf {|sed -i -z "s/\n//g" %s|} output_file + in + let return_code = Sys.command sed_command in + if return_code <> 0 then + Message.error + "Weaving failed: sed command \"%s\" returned with error code %d" + sed_command return_code + + let md_cmd = + Cmd.v + (Cmd.info "markdown" ~man:Cli.man_base + ~doc: + "Weaves an Pandoc-Markdown literate programming output of the \ + Catala program.") + Term.( + const markdown + $ Cli.Flags.Global.options + $ Cli.Flags.output + $ Cli.Flags.print_only_law + $ Cli.Flags.wrap_weaved_output) + let latex options output print_only_law wrap_weaved_output extra_files = let prg = Passes.surface options in let prg_annex = @@ -502,6 +556,33 @@ module Commands = struct $ Cli.Flags.wrap_weaved_output $ Cli.Flags.extra_files) + let pdf options output print_only_law wrap_weaved_output = + File.with_temp_file "catala_markdown" "md" + @@ fun temp_file_in -> + let raw_file = Global.raw_file temp_file_in in + let () = + common_markdown options (Some raw_file) print_only_law wrap_weaved_output + in + let language = + Cli.file_lang (Global.input_src_file options.Global.input_src) + in + get_output_format options ~ext:"pdf" output + @@ fun output_file _fmt -> + let output_file = Option.value ~default:"-" output_file in + Literate.Literate_common.run_pandoc_on_file temp_file_in output_file + language `Pdf + + let pdf_cmd = + Cmd.v + (Cmd.info "pdf" ~man:Cli.man_base + ~doc:"Weaves a PDF output of the Catala program.") + Term.( + const pdf + $ Cli.Flags.Global.options + $ Cli.Flags.output + $ Cli.Flags.print_only_law + $ Cli.Flags.wrap_weaved_output) + let exceptions options includes stdlib ex_scope ex_variable output_format = let prg, ctxt = Passes.desugared options ~includes ~stdlib in Passes.debug_pass_name "scopelang"; @@ -1375,6 +1456,8 @@ module Commands = struct c_cmd; latex_cmd; html_cmd; + md_cmd; + pdf_cmd; makefile_cmd; scopelang_cmd; dcalc_cmd; diff --git a/compiler/literate/dune b/compiler/literate/dune index 81c539bf0..661e4f22e 100644 --- a/compiler/literate/dune +++ b/compiler/literate/dune @@ -18,6 +18,14 @@ lexer_pl.py) (run ocaml-crunch -e py -m plain -o %{target} .)))) +(rule + (target pandoc_highlight.ml) + (action + (progn + (copy ../../syntax_highlighting/en/pandoc/catala_en.xml catala_en.xml) + (copy ../../syntax_highlighting/fr/pandoc/catala_fr.xml catala_fr.xml) + (run ocaml-crunch -e xml -m plain -o %{target} .)))) + (documentation (package catala) (mld_files literate)) diff --git a/compiler/literate/latex.ml b/compiler/literate/latex.ml index 082638680..48abe58db 100644 --- a/compiler/literate/latex.ml +++ b/compiler/literate/latex.ml @@ -302,6 +302,18 @@ let rec law_structure_to_latex let filename = Pos.get_file (Mark.get c) in let block_content = Mark.remove c in check_exceeding_lines start_line filename block_content; + let c = + let block_content, pos = c in + (* Problem here, position may have been damaged due to the creation of new lines *) + let block_content = remove_exceeding_lines block_content in + block_content, pos + in + let c = + let block_content, pos = c in + (* Problem here, position may have been damaged due to the creation of new lines *) + let block_content = remove_exceeding_lines block_content in + block_content, pos + in update_lines_of_code c; code_block ~meta:false language fmt c | A.CodeBlock (_, c, true) when not print_only_law -> @@ -315,6 +327,12 @@ let rec law_structure_to_latex let filename = Pos.get_file (Mark.get c) in let block_content = Mark.remove c in check_exceeding_lines start_line filename block_content; + let c = + let block_content, pos = c in + (* Problem here, position may have been damaged due to the creation of new lines *) + let block_content = remove_exceeding_lines block_content in + block_content, pos + in update_lines_of_code c; Format.fprintf fmt "\\begin{tcolorbox}[colframe=OliveGreen, breakable, \ diff --git a/compiler/literate/literate_common.ml b/compiler/literate/literate_common.ml index 4a8d79519..2d9e16228 100644 --- a/compiler/literate/literate_common.ml +++ b/compiler/literate/literate_common.ml @@ -98,6 +98,51 @@ let run_pandoc (s : string) (backend : [ `Html | `Latex ]) : string = Sys.remove tmp_file_out; tmp_file_as_string +let run_pandoc_on_file + (file_in : Global.file) + (file_out : Global.file) + language + (backend : [ `Html | `Latex | `Pdf | `Markdown ]) : unit = + let pandoc = "pandoc" in + let temp_syntax_file = Filename.temp_file "catala_syntax" "xml" in + let syntax_definition = + let syntaxes = "catala_" ^ Cli.language_code language ^ ".xml" in + match Pandoc_highlight.read syntaxes with + | None -> [] + | Some content -> + let oc = open_out temp_syntax_file in + output_string oc content; + close_out oc; + ["--syntax-definition"; temp_syntax_file] + in + let pandoc_args = + syntax_definition + @ [ + "-s"; + "-f"; + "markdown+multiline_tables+tex_math_dollars+markdown_in_html_blocks+fenced_code_blocks"; + "--mathjax"; + "-t"; + (match backend with + | `Html -> "html" + | `Latex -> "latex" + | `Pdf -> "pdf" + | `Markdown -> + (* gfm stands for Github Flavored Markdown, + the -raw_html is to forbid raw html in + the produced markdown *) + "gfm-raw_html"); + "-o"; + file_out; + ] + in + let cmd = + Format.sprintf "%s %s %s" pandoc (String.concat " " pandoc_args) file_in + in + let return_code = Sys.command cmd in + if return_code <> 0 then raise_failed_pandoc cmd return_code; + Sys.remove temp_syntax_file + let check_exceeding_lines ?(max_len = 80) (start_line : int) @@ -114,6 +159,45 @@ let check_exceeding_lines (start_line + i) (len_s + 1)) "This line is exceeding @{%d@} characters" max_len) +let split_a_line max_len exceeding_line = + let full_line_splitted = String.split_on_char ' ' exceeding_line in + let base_line, line = + let rec retrieve_spaces_starts base_line l = + match l with + | [] -> base_line, [] + | "#" :: rem -> retrieve_spaces_starts ("#" :: base_line) rem + | "" :: rem -> retrieve_spaces_starts ("" :: base_line) rem + | remaining -> base_line, remaining + in + retrieve_spaces_starts [] full_line_splitted + in + let with_size = List.map (fun s -> String.length s, s) line in + let rec aux remaining current_line cpt acc = + match remaining with + | [] -> + let current_line = String.concat " " (List.rev current_line) in + current_line :: acc + | (len, word) :: rem -> + if cpt + len + (List.length current_line - 1) > max_len then + if current_line = [] then aux rem base_line 0 (word :: acc) + else + let current_line = String.concat " " (List.rev current_line) in + aux remaining base_line 0 (current_line :: acc) + else aux rem (word :: current_line) (cpt + len) acc + in + aux with_size base_line 0 [] + +let remove_exceeding_lines ?(max_len = 80) content = + let contents = String.split_on_char '\n' content in + let res = + List.fold_left + (fun acc content -> + let line_splitted = split_a_line max_len content in + line_splitted @ acc) + [] contents + in + String.concat "\n" (List.rev res) + let with_pygmentize_lexer lang f = let lexer_py = let lexer_fname = "lexer_" ^ Cli.language_code lang ^ ".py" in diff --git a/compiler/literate/literate_common.mli b/compiler/literate/literate_common.mli index d62705c6b..41f15130b 100644 --- a/compiler/literate/literate_common.mli +++ b/compiler/literate/literate_common.mli @@ -45,6 +45,15 @@ val run_pandoc : string -> [ `Html | `Latex ] -> string (** Runs the [pandoc] on a string to pretty-print markdown features into the desired format. *) +val run_pandoc_on_file : + Global.file -> + Global.file -> + Global.backend_lang -> + [ `Html | `Latex | `Pdf | `Markdown ] -> + unit +(** Runs the [pandoc] on a file to pretty-print markdown features into the + desired format. *) + val check_exceeding_lines : ?max_len:int -> int -> string -> string -> unit (** [check_exceeding_lines ~max_len start_line filename content] prints a warning message for each lines of [content] exceeding [max_len] characters. @@ -58,3 +67,13 @@ val call_pygmentize : ?lang:Global.backend_lang -> string list -> string val with_pygmentize_lexer : Global.backend_lang -> (string list -> 'a) -> 'a (** Creates the required lexer file and returns the corresponding [pygmentize] command-line arguments *) + +val split_a_line : int -> string -> string list +(** [split_a_line max_len line] split the string [line] in a list where each + element should not exceed [max_len]. The only exception is that the function + doesn't cut a word so if the [max_len] authorized is 10 and that we have a + word greater than 10 it will take a single line without being splitted *) + +val remove_exceeding_lines : ?max_len:int -> string -> string +(** [remove_exceeding_lines ~max_len content] return a string that represent the + content splitted into multiple lines if content exceed max_len *) diff --git a/compiler/literate/md.ml b/compiler/literate/md.ml new file mode 100644 index 000000000..27439f669 --- /dev/null +++ b/compiler/literate/md.ml @@ -0,0 +1,176 @@ +(* This file is part of the Catala compiler, a specification language for tax + and social benefits computation rules. Copyright (C) 2020 Inria, + contributors: Denis Merigoux , Emile Rolley + + + Licensed under the Apache License, Version 2.0 (the "License"); you may not + use this file except in compliance with the License. You may obtain a copy of + the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations under + the License. *) + +(** This modules weaves the source code and the legislative text together into a + document that law professionals can understand. *) + +open Catala_utils +open Literate_common +module A = Surface.Ast +module P = Printf +module R = Re.Pcre +module C = Global + +(** {1 Weaving} *) + +let wrap_markdown + (source_files : string list) + (language : Global.backend_lang) + (fmt : Format.formatter) + (wrapped : Format.formatter -> unit) : unit = + Format.fprintf fmt "%s@.@.%s@.@.%s:@.@.%a@.@." (literal_title language) + (literal_generated_by language) + (literal_source_files language) + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "@.@.") + (fun ppf filename -> + let mtime = (Unix.stat filename).Unix.st_mtime in + let ltime = Unix.localtime mtime in + let ftime = + Printf.sprintf "%d-%02d-%02d, %d:%02d" + (1900 + ltime.Unix.tm_year) + (ltime.Unix.tm_mon + 1) ltime.Unix.tm_mday ltime.Unix.tm_hour + ltime.Unix.tm_min + in + Format.fprintf ppf "* %s, %s %s" + (Filename.basename filename) + (literal_last_modification language) + ftime)) + source_files; + wrapped fmt + +let sanitize_md_href str = + str + |> String.to_ascii + |> R.substitute + ~rex:(R.regexp "[' '°\"!%(),/]") + ~subst: + (String.fold_left + (fun acc c -> Format.sprintf "%s%d" acc (Char.code c)) + "") + +let rec separate_law_and_code + (language : C.backend_lang) + (print_only_law : bool) + (parents_headings : string list) + (fmt : Format.formatter) + (i : A.law_structure) : unit = + match i with + | A.LawText t -> if t = "" then () else Format.fprintf fmt "%s\n" t + | A.CodeBlock (_, c, _metadata) when not print_only_law -> + let start_line = Pos.get_start_line (Mark.get c) + 1 in + let filename = Pos.get_file (Mark.get c) in + let block_content = Mark.remove c in + check_exceeding_lines start_line filename block_content; + let block_content = remove_exceeding_lines block_content in + Format.fprintf fmt "[%s]{.filename}\n\n%s@\n" + (Pos.get_file (Mark.get c)) + ("~~~~~~~catala\n" ^ block_content ^ "~~~~~~~\n") + | A.CodeBlock _ -> () + | A.LawHeading (heading, children) -> + let h_number = heading.law_heading_precedence + 1 in + let is_a_section_to_collapse = + (* Only 2 depth sections are collasped in a
tag. Indeed, this + allow to significantly reduce rendering time (~= 100x for the + [aides_logement] example in the catala-website), while remaining + practicable. *) + h_number = 2 + in + let h_name = Mark.remove heading.law_heading_name in + let complete_headings = parents_headings @ [h_name] in + let id = complete_headings |> String.concat "-" |> sanitize_md_href in + let fmt_details_open fmt () = + if is_a_section_to_collapse then + Format.fprintf fmt "
%s" h_name + else Format.fprintf fmt "%s" h_name + in + let fmt_details_close fmt () = + if is_a_section_to_collapse then Format.fprintf fmt "
" + in + Format.fprintf fmt "%s [%s](#%s){#%s .law-heading}@.%a%s@\n%a\n%a@." + (String.make h_number '#') h_name id id fmt_details_open () + (match heading.law_heading_id, language with + | Some id, `Fr -> ( + try + P.sprintf + "[Voir le texte sur \ + Légifrance.gouv.fr](https://legifrance.gouv.fr/%s/id/%s){.link-article}" + (if String.starts_with ~prefix:"LEGIARTI" id then "codes" + else if String.starts_with ~prefix:"JORFARTI" id then "jorf" + else if String.starts_with ~prefix:"CETATEXT" id then "ceta" + else raise Not_found) + id + with Not_found -> "") + | _ -> "") + (Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.fprintf fmt "\n") + (separate_law_and_code language print_only_law complete_headings)) + children fmt_details_close () + | A.LawInclude _ -> () + | A.ModuleDef _ | A.ModuleUse _ -> () (* TODO: show somehow ?*) + +let rec fmt_toc + (parents_headings : string list) + fmt + (items : A.law_structure list) = + let items = + items + |> List.filter (function A.LawHeading (_, _) -> true | _ -> false) + |> List.mapi (fun i item -> i + 1, item) + in + match items with + | [] -> () + | items -> + Format.fprintf fmt "@,%a" + (Format.pp_print_list (fun fmt (i, item) -> + match item with + | A.LawHeading (heading, childs) -> + let h_name = Mark.remove heading.law_heading_name in + let complete_headings = parents_headings @ [h_name] in + let id = + complete_headings |> String.concat "-" |> sanitize_md_href + in + Format.fprintf fmt "@[%d. [%s](#%s){.toc-item}%a@]" i h_name + id + (fmt_toc complete_headings) + childs + | _ -> ())) + items + +(** {1 API} *) + +let ast_to_markdown + (language : C.backend_lang) + ~(print_only_law : bool) + (fmt : Format.formatter) + (program : A.program) : unit = + let toc = + match language with + | `Fr -> "Sommaire" + | `En -> "Table of contents" + | `Pl -> "Spis treści." + in + + Format.fprintf fmt + "
%s@.@[%a@]@,
@]@.@.%a" toc + (fmt_toc []) program.program_items + (Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.fprintf fmt "\n\n") + (fun fmt -> + Format.fprintf fmt "%a" + (separate_law_and_code language print_only_law []))) + program.program_items diff --git a/compiler/literate/md.mli b/compiler/literate/md.mli new file mode 100644 index 000000000..fc2ec4b12 --- /dev/null +++ b/compiler/literate/md.mli @@ -0,0 +1,41 @@ +(* This file is part of the Catala compiler, a specification language for tax + and social benefits computation rules. Copyright (C) 2020 Inria, contributor: + Denis Merigoux + + Licensed under the Apache License, Version 2.0 (the "License"); you may not + use this file except in compliance with the License. You may obtain a copy of + the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations under + the License. *) + +(** This modules weaves the source code and the legislative text together into a + document that law professionals can understand. *) + +open Catala_utils + +(** {1 Helpers} *) + +val wrap_markdown : + string list -> + Global.backend_lang -> + Format.formatter -> + (Format.formatter -> unit) -> + unit +(** Usage: [wrap_html source_files language fmt wrapped] + + Prints an HTML complete page structure around the [wrapped] content. *) + +(** {1 API} *) + +val ast_to_markdown : + Global.backend_lang -> + print_only_law:bool -> + Format.formatter -> + Surface.Ast.program -> + unit diff --git a/compiler/manpages.sexp b/compiler/manpages.sexp index 8ef8c1287..ebfdc57b5 100644 --- a/compiler/manpages.sexp +++ b/compiler/manpages.sexp @@ -11,7 +11,9 @@ (rule (alias man) (action (with-stdout-to catala-latex.1 (run %{bin:catala} latex --help=groff)))) (rule (alias man) (action (with-stdout-to catala-lcalc.1 (run %{bin:catala} lcalc --help=groff)))) (rule (alias man) (action (with-stdout-to catala-makefile.1 (run %{bin:catala} makefile --help=groff)))) +(rule (alias man) (action (with-stdout-to catala-markdown.1 (run %{bin:catala} markdown --help=groff)))) (rule (alias man) (action (with-stdout-to catala-ocaml.1 (run %{bin:catala} ocaml --help=groff)))) +(rule (alias man) (action (with-stdout-to catala-pdf.1 (run %{bin:catala} pdf --help=groff)))) (rule (alias man) (action (with-stdout-to catala-pygmentize.1 (run %{bin:catala} pygmentize --help=groff)))) (rule (alias man) (action (with-stdout-to catala-python.1 (run %{bin:catala} python --help=groff)))) (rule (alias man) (action (with-stdout-to catala-scalc.1 (run %{bin:catala} scalc --help=groff)))) diff --git a/syntax_highlighting/dune b/syntax_highlighting/dune index 5e969e374..d74d24040 100644 --- a/syntax_highlighting/dune +++ b/syntax_highlighting/dune @@ -5,3 +5,4 @@ (emacs/catala-mode.el as emacs/site-lisp/catala-mode.el)) (section share_root) (package catala)) + diff --git a/syntax_highlighting/en/pandoc/catala_en.xml b/syntax_highlighting/en/pandoc/catala_en.xml new file mode 100644 index 000000000..1f89e682f --- /dev/null +++ b/syntax_highlighting/en/pandoc/catala_en.xml @@ -0,0 +1,185 @@ + + + + + + + + + +]> + + + + + + + match + fixed + by + down + up + varies + with + let + in + scope + declaration + includes + content + type + rule + condition + data + consequence + fulfilled + equals + assertion + definition + state + label + exception + anything + context + input + output + internal + contains + number + sum + exists + for + all + of + if + then + else + is + among + maximum + minimum + round + combine + to + initially + impossible + + + + true + false + + + + structure + enumeration + integer + boolean + date + duration + money + code_location + decimal + + + + + not + or + xor + and + year + month + day + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/syntax_highlighting/fr/pandoc/catala_fr.xml b/syntax_highlighting/fr/pandoc/catala_fr.xml new file mode 100644 index 000000000..8f541eb62 --- /dev/null +++ b/syntax_highlighting/fr/pandoc/catala_fr.xml @@ -0,0 +1,190 @@ + + + + + + + + + +]> + + + + + + + + selon + fixé + par + inférieur + supérieur + varie + avec + soit + dans + déclaration + inclusion + contenu + type + règle + condition + donnée + conséquence + rempli + assertion + définition + état + étiquette + exception + + contexte + entrée + résultat + interne + contient + nombre + somme + existe + pour + tout + de + si + alors + sinon + est + parmi + maximum + minimum + arrondi + combine + en + initialement + impossible + + + + vrai + faux + + + + structure + énumération + entier + booléen + date + durée + argent + position_source + décimal + décret + loi + + + + + non + ou + et + an + mois + jour + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +