From e1c6aeeeed58dcb23d0d2953c7126012d5b46dce Mon Sep 17 00:00:00 2001 From: pena Date: Mon, 17 Jan 2022 18:10:12 +0100 Subject: [PATCH] first commit --- .gitignore | 1 + .ocamlformat | 43 +++++++ CHANGES.md | 30 +++++ LICENSE.md | 8 ++ README.md | 74 ++++++++++++ doc/dune | 3 + doc/index.mld | 23 ++++ dune-project | 48 ++++++++ example/dune | 4 + example/main.ml | 34 ++++++ example/main.scfg | 7 ++ scfg.opam | 42 +++++++ shell.nix | 32 +++++ src/dune | 22 ++++ src/lexer.ml | 111 ++++++++++++++++++ src/menhir_parser.mly | 26 ++++ src/parse.ml | 59 ++++++++++ src/pp.ml | 52 ++++++++ src/query.ml | 127 ++++++++++++++++++++ src/scfg.ml | 28 +++++ src/schema.ml | 150 ++++++++++++++++++++++++ src/schema.mli | 73 ++++++++++++ src/types.ml | 12 ++ test/cram/bug1.scfg | 10 ++ test/cram/bug2.scfg | 1 + test/cram/bug3.scfg | 1 + test/cram/dune | 10 ++ test/cram/lex_error.scfg | 2 + test/cram/parse_error1.scfg | 1 + test/cram/parse_error2.scfg | 1 + test/cram/parse_error3.scfg | 1 + test/cram/parse_error4.scfg | 1 + test/cram/parse_start_with_newline.scfg | 4 + test/cram/test.t | 65 ++++++++++ test/cram/test1.scfg | 22 ++++ test/fuzz/dune | 6 + test/fuzz/fuzz.ml | 18 +++ test/fuzz/gen.ml | 21 ++++ test/unit/dune | 5 + test/unit/main.ml | 115 ++++++++++++++++++ test/unit/query.scfg | 11 ++ test/unit/test_chan.scfg | 1 + 42 files changed, 1305 insertions(+) create mode 100644 .gitignore create mode 100644 .ocamlformat create mode 100644 CHANGES.md create mode 100644 LICENSE.md create mode 100644 README.md create mode 100644 doc/dune create mode 100644 doc/index.mld create mode 100644 dune-project create mode 100644 example/dune create mode 100644 example/main.ml create mode 100644 example/main.scfg create mode 100644 scfg.opam create mode 100644 shell.nix create mode 100644 src/dune create mode 100644 src/lexer.ml create mode 100644 src/menhir_parser.mly create mode 100644 src/parse.ml create mode 100644 src/pp.ml create mode 100644 src/query.ml create mode 100644 src/scfg.ml create mode 100644 src/schema.ml create mode 100644 src/schema.mli create mode 100644 src/types.ml create mode 100644 test/cram/bug1.scfg create mode 100644 test/cram/bug2.scfg create mode 100644 test/cram/bug3.scfg create mode 100644 test/cram/dune create mode 100644 test/cram/lex_error.scfg create mode 100644 test/cram/parse_error1.scfg create mode 100644 test/cram/parse_error2.scfg create mode 100644 test/cram/parse_error3.scfg create mode 100644 test/cram/parse_error4.scfg create mode 100644 test/cram/parse_start_with_newline.scfg create mode 100644 test/cram/test.t create mode 100644 test/cram/test1.scfg create mode 100644 test/fuzz/dune create mode 100644 test/fuzz/fuzz.ml create mode 100644 test/fuzz/gen.ml create mode 100644 test/unit/dune create mode 100644 test/unit/main.ml create mode 100644 test/unit/query.scfg create mode 100644 test/unit/test_chan.scfg diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e35d885 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +_build diff --git a/.ocamlformat b/.ocamlformat new file mode 100644 index 0000000..1701629 --- /dev/null +++ b/.ocamlformat @@ -0,0 +1,43 @@ +version=0.28.1 +assignment-operator=end-line +break-cases=fit +break-fun-decl=wrap +break-fun-sig=wrap +break-infix=wrap +break-infix-before-func=false +break-separators=before +break-sequences=true +cases-exp-indent=2 +cases-matching-exp-indent=normal +doc-comments=before +doc-comments-padding=2 +doc-comments-tag-only=default +dock-collection-brackets=false +exp-grouping=preserve +field-space=loose +if-then-else=compact +indicate-multiline-delimiters=space +indicate-nested-or-patterns=unsafe-no +infix-precedence=indent +leading-nested-match-parens=false +let-and=sparse +let-binding-spacing=compact +let-module=compact +margin=80 +max-indent=2 +module-item-spacing=sparse +ocaml-version=4.14.0 +ocp-indent-compat=false +parens-ite=false +parens-tuple=always +parse-docstrings=true +sequence-blank-line=preserve-one +sequence-style=terminator +single-case=compact +space-around-arrays=true +space-around-lists=true +space-around-records=true +space-around-variants=true +type-decl=sparse +wrap-comments=false +wrap-fun-args=true diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..3afb6b8 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,30 @@ +## unreleased + +- add lower bound on cmdliner + +## 0.5 - 2025-02-04 + +- update prelude version used +- add missing dependencies on fmt, bos and fpath + +## 0.4 - 2025-01-28 + +- use prelude +- use cmdliner + +## 0.3 - 2024-11-18 + +- add some float functions to `Query` +- add some `_exn` versions of `Query.get` functions +- better parsing +- better tests + +## 0.2 - 2023-01-23 + +- add `get_param_bool`, `get_param_int`, `get_param_pos_int` to the `Query` module +- fix bug with files starting with newlines +- use a proper Format box instead of an int to indent + +## 0.1 - 2022-01-30 + +- first release diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..2e7dce4 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,8 @@ +The ISC License (ISC) +===================== + +Copyright © 2022, Léo Andrès + +Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..fc1c2dc --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# scfg + +scfg is an [OCaml] executable and library to work with the [scfg configuration file format]. + +## Installation + +`scfg` can be installed with [opam]: + +```sh +opam install scfg +``` + +If you don't have `opam`, you can install it following the [how to install opam] guide. + +If you can't or don't want to use `opam`, consult the [opam file] for build instructions. + +## Quickstart + +Using the library to parse a `scfg` file and reprint nicely: + +```ocaml +open Scfg + +let config = + match Parse.from_file "config.scfg" with + | Ok config -> config + | Error e -> begin + Format.eprintf "error: %s@." e; + exit 1 + end + +let () = + Format.printf "%a@." Pp.config config +``` + +The provided binary does exactly this. If you have the following `config.scfg` file: + +```scfg + name "a" "b b b" 'c' { + + + child1 "" "I'm léo" + + child2 'nono' + } +``` + +Running the binary on it will reprint it trying to make the output pretty: + +```shell-session +$ scfg config.scfg +name a "b b b" c { + child1 "" "I'm léo" + child2 nono +} +``` + +For more, have a look at the [example] folder or at the [test suite]. + +## About + +- [LICENSE] +- [CHANGELOG] + +[CHANGELOG]: ./CHANGES.md +[example]: ./example +[LICENSE]: ./LICENSE.md +[opam file]: ./scfg.opam +[test suite]: ./test + +[how to install opam]: https://opam.ocaml.org/doc/Install.html +[OCaml]: https://ocaml.org +[opam]: https://opam.ocaml.org +[scfg configuration file format]: https://git.sr.ht/~emersion/scfg diff --git a/doc/dune b/doc/dune new file mode 100644 index 0000000..2f275ea --- /dev/null +++ b/doc/dune @@ -0,0 +1,3 @@ +(documentation + (package scfg) + (mld_files index)) diff --git a/doc/index.mld b/doc/index.mld new file mode 100644 index 0000000..7b39257 --- /dev/null +++ b/doc/index.mld @@ -0,0 +1,23 @@ +{0 scfg} + +scfg is an {{:https://ocaml.org} OCaml} library/executable to work with the {{:https://git.sr.ht/~emersion/scfg} scfg configuration file format}. + +{1:api API} + + +{!modules: +Scfg.Parse +Scfg.Pp +Scfg.Query +Scfg.Types +} + + +{1:private_api Private API} + +You shouldn't have to use any of these modules, they're used internally only. + +{!modules: +Scfg.Lexer +Scfg.Menhir_parser +} diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..e561e73 --- /dev/null +++ b/dune-project @@ -0,0 +1,48 @@ +(lang dune 2.9) + +(implicit_transitive_deps false) + +(cram enable) + +(name scfg) + +(license ISC) + +(authors "pena ") + +(maintainers "pena ") + +(source + (uri git+https://forge.kumikode.org/kumikode/scfg.git)) + +(homepage https://forge.kumikode.org/kumikode/scfg) + +(bug_reports https://forge.kumikode.org/kumikode/scfg/issues) + +(generate_opam_files true) + +(package + (name scfg) + (synopsis + "OCaml library and executable to work with the scfg configuration file format") + (description + "scfg is an OCaml library and executable to work with the scfg configuration file format. It provides a parser, a pretty printer and a module to perform queries.") + (tags + (scfg configuration format simple config parser printer)) + (depends + (bos + (>= 0.2.1)) + (cmdliner + (>= 1.3.0)) + (crowbar :with-test) + fmt + fpath + (menhir + (>= 20211230)) + (ocaml + (>= 5.3)) + (prelude + (>= 0.5)) + sedlex)) + +(using menhir 2.1) diff --git a/example/dune b/example/dune new file mode 100644 index 0000000..8efe969 --- /dev/null +++ b/example/dune @@ -0,0 +1,4 @@ +(executable + (name main) + (modules main) + (libraries fpath scfg)) diff --git a/example/main.ml b/example/main.ml new file mode 100644 index 0000000..55a21f2 --- /dev/null +++ b/example/main.ml @@ -0,0 +1,34 @@ +(* run on the `main.scfg` file in this directory *) +let () = + if Array.length Sys.argv <> 2 then begin + Format.eprintf "usage: %s @\n" Sys.argv.(0); + exit 1 + end + +(* parsing file path *) +let filepath = + match Fpath.of_string Sys.argv.(1) with + | Error (`Msg e) -> + Format.eprintf "error: %s@\n" e; + exit 1 + | Ok path -> path + +(* parsing the file *) +let config = + match Scfg.Parse.from_file filepath with + | Error (`Msg e) -> + Format.eprintf "error: %s@\n" e; + exit 1 + | Ok config -> config + +(* printing the file *) +let () = Format.printf "```scfg@\n%a@\n```@\n" Scfg.Pp.config config + +(* querying the file *) +let () = + match Scfg.Query.get_dir "train" config with + | None -> Format.printf "No train found.@\n" + | Some train -> ( + match Scfg.Query.get_param 0 train with + | Error _e -> Format.printf "Train has no name.@\n" + | Ok name -> Format.printf "The first train is `%s`.@\n" name ) diff --git a/example/main.scfg b/example/main.scfg new file mode 100644 index 0000000..d41ab2f --- /dev/null +++ b/example/main.scfg @@ -0,0 +1,7 @@ +train A-Train { + bla bla bla +} + +train "John Col Train" { + tut tut tut +} diff --git a/scfg.opam b/scfg.opam new file mode 100644 index 0000000..580a2f6 --- /dev/null +++ b/scfg.opam @@ -0,0 +1,42 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: + "OCaml library and executable to work with the scfg configuration file format" +description: + "scfg is an OCaml library and executable to work with the scfg configuration file format. It provides a parser, a pretty printer and a module to perform queries." +maintainer: ["pena "] +authors: ["pena "] +license: "ISC" +tags: ["scfg" "configuration" "format" "simple" "config" "parser" "printer"] +homepage: "https://forge.kumikode.org/kumikode/scfg" +bug-reports: "https://forge.kumikode.org/kumikode/scfg/issues" +depends: [ + "dune" {>= "2.9"} + "bos" {>= "0.2.1"} + "cmdliner" {>= "1.3.0"} + "crowbar" {with-test} + "fmt" + "fpath" + "menhir" {>= "20211230"} + "ocaml" {>= "5.3"} + "prelude" {>= "0.5"} + "sedlex" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "--promote-install-files=false" + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] + ["dune" "install" "-p" name "--create-install-files" name] +] +dev-repo: "git+https://forge.kumikode.org/kumikode/scfg.git" diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..ea86ef1 --- /dev/null +++ b/shell.nix @@ -0,0 +1,32 @@ +{ pkgs ? import { } }: + +let + ocamlPackages = pkgs.ocaml-ng.ocamlPackages_5_3; +in +pkgs.mkShell { + name = "frost"; + dontDetectOcamlConflicts = false; + nativeBuildInputs = with ocamlPackages; [ + dune_3 + findlib + merlin + menhir + ocaml + ocamlformat + odoc + crowbar + ]; + buildInputs = with ocamlPackages; [ + bos + cmdliner + fmt + fpath + menhirLib + prelude + sedlex + ]; + shellHook = '' + export PATH=$PATH:${pkgs.lib.makeBinPath [ + ]} + ''; +} diff --git a/src/dune b/src/dune new file mode 100644 index 0000000..27e569c --- /dev/null +++ b/src/dune @@ -0,0 +1,22 @@ +(library + (public_name scfg) + (modules lexer menhir_parser parse pp query schema types) + (private_modules lexer menhir_parser) + (preprocess + (pps sedlex.ppx)) + (libraries bos fmt fpath menhirLib prelude sedlex) + (flags + (:standard -open Prelude)) + (instrumentation + (backend bisect_ppx))) + +(executable + (public_name scfg) + (modules scfg) + (flags + (:standard -open Prelude)) + (libraries cmdliner fmt fpath prelude scfg)) + +(menhir + (flags --table) + (modules menhir_parser)) diff --git a/src/lexer.ml b/src/lexer.ml new file mode 100644 index 0000000..b113aaa --- /dev/null +++ b/src/lexer.ml @@ -0,0 +1,111 @@ +open Sedlexing +open Menhir_parser + +let ctl = [%sedlex.regexp? '\x00' .. '\x1f' | '\x7F'] + +let wsp = [%sedlex.regexp? ' ' | '\t'] + +let vchar = [%sedlex.regexp? Sub (any, ctl)] + +let sqchar = [%sedlex.regexp? Sub (any, (ctl | '\''))] + +let dqchar = [%sedlex.regexp? Sub (any, (ctl | '"' | '\\'))] + +let achar = + [%sedlex.regexp? Sub (any, (ctl | '"' | '\\' | '\'' | '{' | '}' | wsp))] + +let newline = + [%sedlex.regexp? + Plus (Star wsp, '\n', Star wsp, Opt ('#', Star (Sub (any, '\n'))))] + +let esc_pair = [%sedlex.regexp? '\\', vchar] + +let squote_word = [%sedlex.regexp? '\'', Star sqchar, '\''] + +let dquote_word = [%sedlex.regexp? '"', Star (dqchar | esc_pair), '"'] + +let atom = [%sedlex.regexp? Plus (achar | esc_pair)] + +let string_of_atom s = + let b = Buffer.create (String.length s) in + let i = ref 0 in + while !i < String.length s do + let c = + if not @@ Char.equal '\\' s.[!i] then s.[!i] + else + match + incr i; + s.[!i] + with + | 'n' -> '\n' + | 'r' -> '\r' + | 't' -> '\t' + | '\\' -> '\\' + | '\'' -> '\'' + | '"' -> '"' + | '{' -> '{' + | '}' -> '}' + | _c -> assert false + in + Buffer.add_char b c; + incr i + done; + Buffer.contents b + +let string_of_dqword s = + let b = Buffer.create (String.length s) in + let i = ref 0 in + while !i < String.length s do + let c = + if not @@ Char.equal '\\' s.[!i] then s.[!i] + else + match + incr i; + s.[!i] + with + | 'n' -> '\n' + | 'r' -> '\r' + | 't' -> '\t' + | '\\' -> '\\' + | '"' -> '"' + | _c -> assert false + in + Buffer.add_char b c; + incr i + done; + Buffer.contents b + +exception Error of string + +let error msg = raise @@ Error msg + +let rec token buf = + match%sedlex buf with + (* 1 *) + | "{", newline -> LBRACE + | newline, "}" -> RBRACE + (* other *) + | wsp -> token buf + | Opt newline, eof -> EOF + | newline -> NEWLINE + | atom -> + let word = Utf8.lexeme buf in + let word = string_of_atom word in + WORD word + | dquote_word -> + let word = Utf8.lexeme buf in + let word = String.sub word 1 (String.length word - 2) in + let word = string_of_dqword word in + WORD word + | squote_word -> + let word = Utf8.lexeme buf in + let word = String.sub word 1 (String.length word - 2) in + WORD word + | any -> + let invalid = Utf8.lexeme buf in + let start, _stop = Sedlexing.lexing_positions buf in + Fmt.kstr error "File %s, line %i, character %i: unexpected lexeme `%s`" + start.pos_fname start.pos_lnum + (start.pos_cnum - start.pos_bol) + invalid + | _ -> assert false diff --git a/src/menhir_parser.mly b/src/menhir_parser.mly new file mode 100644 index 0000000..46b3051 --- /dev/null +++ b/src/menhir_parser.mly @@ -0,0 +1,26 @@ +%token WORD +%token LBRACE +%token RBRACE +%token NEWLINE +%token EOF + +%{ open Types %} + +%start config + +%% + +let params == +| ~ = list(WORD); <> + +let children := +| LBRACE; ~ = separated_list(NEWLINE, directive); RBRACE; <> +| { [] } + +let directive := +| name = WORD; ~ = params; ~ = children; { + { name; params; children } +} + +let config := +| option(NEWLINE); ~ = separated_list(NEWLINE, directive); EOF; <> diff --git a/src/parse.ml b/src/parse.ml new file mode 100644 index 0000000..cda15bb --- /dev/null +++ b/src/parse.ml @@ -0,0 +1,59 @@ +(** Module providing functions to parse a config from various kind of inputs. *) + +(** Pretty print a token *) +let pp_token fmt = function + | Menhir_parser.WORD s -> Fmt.pf fmt "WORD %s" s + | LBRACE -> Fmt.string fmt "LBRACE" + | RBRACE -> Fmt.string fmt "RBRACE" + | NEWLINE -> Fmt.string fmt "NEWLINE" + | EOF -> Fmt.string fmt "EOF" + +(** Parse a config from a lexing buffer. *) +let from_lexbuf = + let parser = + MenhirLib.Convert.Simplified.traditional2revised Menhir_parser.config + in + fun buf -> + let last_token = ref None in + let provider () = + let tok = Lexer.token buf in + let start, stop = Sedlexing.lexing_positions buf in + last_token := Some tok; + (tok, start, stop) + in + try Ok (parser provider) with + | Menhir_parser.Error -> + let start, _stop = Sedlexing.lexing_positions buf in + Fmt.error_msg "File %s, line %i, character %i: unexpected token %a" + start.pos_fname start.pos_lnum + (start.pos_cnum - start.pos_bol) + (Fmt.option pp_token) !last_token + | Lexer.Error msg -> Error (`Msg msg) + +(** Parse a config from a string. *) +let from_string s = from_lexbuf (Sedlexing.Utf8.from_string s) +(* + let filename = Filename.temp_file "scfg" "scfg" in + let chan = open_out_bin filename in + output_string chan s; + close_out_noerr chan; + let chan = open_in_bin filename in + from_lexbuf (Sedlexing.Utf8.from_channel chan) + *) + +(** Parse a config from a channel. *) +let from_channel c = from_lexbuf (Sedlexing.Utf8.from_channel c) + +(** Parse a config from a file. *) +let from_file f = + match + Bos.OS.File.with_ic f + (fun chan () -> + let lexbuf = Sedlexing.Utf8.from_channel chan in + Sedlexing.set_filename lexbuf (Fpath.to_string f); + from_lexbuf lexbuf ) + () + with + | Error _ as e -> e + | Ok (Error _ as e) -> e + | Ok (Ok _ as ok) -> ok diff --git a/src/pp.ml b/src/pp.ml new file mode 100644 index 0000000..ad6e398 --- /dev/null +++ b/src/pp.ml @@ -0,0 +1,52 @@ +(** Module providing functions to pretty print a config or parts of a config. *) + +open Types + +(** Print a name or a parameter on a given formatter. The function will try to + print with as low quoting as possible. *) +let param = + let chars_to_quote = Hashtbl.create 512 in + Array.iter + (fun c -> Hashtbl.replace chars_to_quote c ()) + [| ' '; '{'; '}'; '"'; '\\'; '\''; '\n'; '\r'; '\t' |]; + fun fmt param -> + if String.length param = 0 then Fmt.string fmt {|""|} + else if String.exists (Hashtbl.mem chars_to_quote) param then begin + if String.contains param '"' && not (String.contains param '\'') then + Fmt.pf fmt {|'%s'|} param + else + let buf = Buffer.create (String.length param) in + String.iter + (function + | '\n' -> Buffer.add_string buf "\\n" + | '\r' -> Buffer.add_string buf "\\r" + | '\t' -> Buffer.add_string buf "\\t" + | '"' -> Buffer.add_string buf "\\\"" + | '\\' -> Buffer.add_string buf "\\\\" + | c -> Buffer.add_char buf c ) + param; + let param = Buffer.contents buf in + Fmt.pf fmt {|"%s"|} param + end + else Fmt.string fmt param + +(** Print a list of parameters on a given formatter. *) +let params fmt = function + | [] -> () + | params -> + Fmt.pf fmt " %a" + (Fmt.list ~sep:(fun fmt () -> Fmt.string fmt " ") param) + params + +(** Print children of a directive on a given formatter. *) +let rec children fmt = function + | [] -> () + | children -> Fmt.pf fmt " {@\n @[%a@]@\n}" config children + +(** Print a directive on a given formatter. *) +and directive fmt d = + Fmt.pf fmt {|%a%a%a|} param d.name params d.params children d.children + +(** Print a config on a given formatter. *) +and config fmt (config : Types.config) = + Fmt.list ~sep:(fun fmt () -> Fmt.pf fmt "@\n") directive fmt config diff --git a/src/query.ml b/src/query.ml new file mode 100644 index 0000000..7d45656 --- /dev/null +++ b/src/query.ml @@ -0,0 +1,127 @@ +(** Module providing functions to search inside a config. *) + +open Types + +(** Returns a list of directives with the provided name from a list of + directives. *) +let get_dirs name directives = + List.filter (fun directive -> String.equal directive.name name) directives + +(** Returns the first directive with the provided name from a list of directive. +*) +let get_dir name directives = + List.find_opt (fun directive -> String.equal directive.name name) directives + +(** Same as [get_dir] but raises if no directive is found. *) +let get_dir_exn name directives = + match get_dir name directives with + | None -> Fmt.failwith "missing directive %a" Pp.param name + | Some dir -> dir + +(** Extract a given number of parameters from a directive. *) +let get_params n directive = + let len = List.length directive.params in + if len < n then + Fmt.error_msg "directive %a: want %d params, got only %d" Pp.param + directive.name n len + else Ok (List.filteri (fun i _param -> i < n) directive.params) + +(** Extract a parameter at a given index from a directive. *) +let get_param n directive = + let params = directive.params in + match List.nth_opt params n with + | None -> + Fmt.error_msg "directive %a: want param at index %d, got only %d" Pp.param + directive.name n (List.length params) + | Some param -> Ok param + +(** Same as [get_param] but raises if an error occurs. *) +let get_param_exn n directive = + match get_param n directive with + | Ok v -> v + | Error (`Msg msg) -> Fmt.failwith "%s" msg + +(** Extract a bool parameter at a given index from a directive. *) +let get_param_bool n directive = + let params = directive.params in + match List.nth_opt params n with + | None -> + Fmt.error_msg "directive %a: want param at index %d, got only %d" Pp.param + directive.name n (List.length params) + | Some param -> ( + match bool_of_string_opt param with + | None -> + Fmt.error_msg "directive %a: want bool param at index %d, but got `%s`" + Pp.param directive.name n param + | Some b -> Ok b ) + +(** Same as [get_param_bool] but raises if an error occurs. *) +let get_param_bool_exn n directive = + match get_param_bool n directive with + | Ok v -> v + | Error (`Msg msg) -> Fmt.failwith "%s" msg + +(** Extract an int parameter at a given index from a directive. *) +let get_param_int n directive = + let params = directive.params in + match List.nth_opt params n with + | None -> + Fmt.error_msg "directive %a: want param at index %d, got only %d" Pp.param + directive.name n (List.length params) + | Some param -> ( + match int_of_string_opt param with + | None -> + Fmt.error_msg "directive %a: want int param at index %d, but got %s" + Pp.param directive.name n param + | Some v -> Ok v ) + +(** Same as [get_param_int] but raises if an error occurs. *) +let get_param_int_exn n directive = + match get_param_int n directive with + | Ok v -> v + | Error (`Msg msg) -> Fmt.failwith "%s" msg + +(** Extract a positive int parameter at a given index from a directive. *) +let get_param_pos_int n directive = + let params = directive.params in + match List.nth_opt params n with + | None -> + Fmt.error_msg "directive %a: want param at index %d, got only %d" Pp.param + directive.name n (List.length params) + | Some param -> ( + match int_of_string_opt param with + | None -> + Fmt.error_msg "directive %a: want int param at index %d, but got %s" + Pp.param directive.name n param + | Some n -> + if n < 0 then + Fmt.error_msg + "directive %a: want positive int param at index %d, but got %d" + Pp.param directive.name n n + else Ok n ) + +(** Same as [get_param_pos_int] but raises if an error occurs. *) +let get_param_pos_int_exn n directive = + match get_param_pos_int n directive with + | Ok v -> v + | Error (`Msg msg) -> Fmt.failwith "%s" msg + +(** Extract a float parameter at a given index from a directive. *) +let get_param_float n directive = + let params = directive.params in + match List.nth_opt params n with + | None -> + Fmt.error_msg "directive %a: want param at index %d, got only %d" Pp.param + directive.name n (List.length params) + | Some param -> ( + match float_of_string_opt param with + | None -> + Fmt.error_msg "directive %a: want float param at index %d, but got %s" + Pp.param directive.name n param + | Some f -> Ok f ) + +(** Same as [get_param_float] but raises if an error occurs. *) +let get_param_float_exn n directive = + match get_param_float n directive with + | Ok v -> v + | Error (`Msg msg) -> Fmt.failwith "%s" msg diff --git a/src/scfg.ml b/src/scfg.ml new file mode 100644 index 0000000..1174dd9 --- /dev/null +++ b/src/scfg.ml @@ -0,0 +1,28 @@ +open Cmdliner + +(* Helpers *) +let existing_file_conv = Arg.conv (Fpath.of_string, Fpath.pp) + +(* Terms *) +let config = + let doc = "Input file" in + Arg.(required & pos 0 (some existing_file_conv) None (info [] ~doc)) + +(* Command *) +let pp_cmd = + let open Term.Syntax in + let+ config in + match Scfg.Parse.from_file config with + | Error (`Msg e) -> + Fmt.epr "%s" e; + exit 1 + | Ok config -> Fmt.pr "%a@." Scfg.Pp.config config + +let pp_info = + let doc = "Format scfg files." in + let man = [ `S Manpage.s_bugs; `P "pena " ] in + Cmd.info "scfg" ~version:"%%VERSION%%" ~doc ~man + +let cli = Cmd.v pp_info pp_cmd + +let () = exit @@ Cmdliner.Cmd.eval cli diff --git a/src/schema.ml b/src/schema.ml new file mode 100644 index 0000000..0d01c1a --- /dev/null +++ b/src/schema.ml @@ -0,0 +1,150 @@ +type nil = Nil + +type param + +type dir + +type ('a, 'b) directive = + { name : string + ; params : 'a + ; children : 'b + } + +module Field_type = struct + type _ t = + | String : string t + | Bool : bool t + | Int : int t + | Float : float t +end + +type (_, _) t = + | Field : 'a Field_type.t -> ('a, param) t + | Directive : + string * ('a, param) t * ('b, dir) t + -> (('a, 'b) directive, dir) t + | Product : ('i * ('a, 'i, 'k) product) -> ('a, 'k) t + +and (_, _, _) product = + | Proj_end : ('a, 'a, 'k) product + | Proj : (('b, 'k) t * ('a, 'i, 'k) product) -> ('a, 'b -> 'i, 'k) product + +type _ kt = + | P : string list -> param kt + | D : Types.directive list -> dir kt + +exception Schema_exn of string + +let error msg = raise (Schema_exn (Fmt.str "invalid schema: %s" msg)) + +let rec length : type a k. (a, k) t -> int = + fun ty -> + match ty with + | Field _ -> 1 + | Directive _ -> 1 + | Product (_i, prod) -> length_prod prod + +and length_prod : type a b k. (a, b, k) product -> int = + fun p -> + match p with Proj_end -> 0 | Proj (tk, prod) -> length tk + length_prod prod + +let conv_field : type a. a Field_type.t -> string -> a = + fun ty v -> + match ty with + | String -> v + | Bool -> ( + match bool_of_string_opt v with + | None -> error (Fmt.str "%s does not match a bool" v) + | Some b -> b ) + | Int -> ( + match int_of_string_opt v with + | None -> error (Fmt.str "%s does not match a int" v) + | Some b -> b ) + | Float -> ( + match float_of_string_opt v with + | None -> error (Fmt.str "%s does not match a float" v) + | Some b -> b ) + +let singleton l = match l with [ v ] -> v | _ -> error "expected singleton" + +let rec conv : type a k. (a, k) t -> k kt -> a = + fun ty k -> + match (ty, k) with + | Field ty, P l -> conv_field ty (singleton l) + | Directive (name, p_ty, c_ty), D l -> ( + let v = singleton l in + match String.equal v.name name with + | false -> error (Fmt.str "name mismatch: `%s` <> `%s`" name v.name) + | true -> + let params = conv p_ty (P v.params) in + let children = conv c_ty (D v.children) in + { name; params; children } ) + | Product ty, k -> conv_prod ty k + +and conv_prod : type a i k. i * (a, i, k) product -> k kt -> a = + fun (intro, ty) k -> + match (ty, k) with + | Proj_end, P l -> ( + match l with [] -> intro | _ -> error "too many items" ) + | Proj_end, D l -> ( + match l with [] -> intro | _ -> error "too many items" ) + | Proj (ty, prod), P l -> + let i = length ty in + let l1 = List.take i l in + let l2 = List.drop i l in + let b = conv ty (P l1) in + let a = conv_prod (intro b, prod) (P l2) in + a + | Proj (ty, prod), D l -> + let i = length ty in + let l1 = List.take i l in + let l2 = List.drop i l in + let b = conv ty (D l1) in + let a = conv_prod (intro b, prod) (D l2) in + a + +let string = Field Field_type.String + +let bool = Field Field_type.Bool + +let int = Field Field_type.Int + +let float = Field Field_type.Float + +let directive name params children = Directive (name, params, children) + +let nil = Product (Nil, Proj_end) + +let product intro prod = Product (intro, prod) + +let proj t prod = Proj (t, prod) + +let proj_end = Proj_end + +let custom : type a b k. (a -> (b, string) result) -> (a, k) t -> (b, k) t = + fun decode t -> + let intro s = match decode s with Ok x -> x | Error msg -> error msg in + product intro @@ proj t @@ proj_end + +let t1 t1 = + let intro = fun x1 -> x1 in + product intro @@ proj t1 proj_end + +let t2 t1 t2 = + let intro = fun x1 x2 -> (x1, x2) in + product intro @@ proj t1 @@ proj t2 proj_end + +let t3 t1 t2 t3 = + let intro = fun x1 x2 x3 -> (x1, x2, x3) in + product intro @@ proj t1 @@ proj t2 @@ proj t3 proj_end + +let t4 t1 t2 t3 t4 = + let intro = fun x1 x2 x3 x4 -> (x1, x2, x3, x4) in + product intro @@ proj t1 @@ proj t2 @@ proj t3 @@ proj t4 proj_end + +let t5 t1 t2 t3 t4 t5 = + let intro = fun x1 x2 x3 x4 x5 -> (x1, x2, x3, x4, x5) in + product intro @@ proj t1 @@ proj t2 @@ proj t3 @@ proj t4 @@ proj t5 proj_end + +let conv schema (config : Types.config) = + try Ok (conv schema (D config)) with Schema_exn s -> Error (`Msg s) diff --git a/src/schema.mli b/src/schema.mli new file mode 100644 index 0000000..a0dd5ce --- /dev/null +++ b/src/schema.mli @@ -0,0 +1,73 @@ +(* inspired by caqti: + https://github.com/paurkedal/ocaml-caqti/blob/master/caqti/lib-template/row_type.mli *) + +(** Module providing types and functions to define a config schema. *) + +type nil = Nil + +type param + +type dir + +type ('a, 'b) directive = + { name : string + ; params : 'a + ; children : 'b + } + +module Field_type : sig + type _ t = + | String : string t + | Bool : bool t + | Int : int t + | Float : float t +end + +type (_, _) t = + | Field : 'a Field_type.t -> ('a, param) t + | Directive : + string * ('a, param) t * ('b, dir) t + -> (('a, 'b) directive, dir) t + | Product : ('i * ('a, 'i, 'k) product) -> ('a, 'k) t + +and (_, _, _) product = + | Proj_end : ('a, 'a, 'k) product + | Proj : (('b, 'k) t * ('a, 'i, 'k) product) -> ('a, 'b -> 'i, 'k) product + +val string : (string, param) t + +val bool : (bool, param) t + +val int : (int, param) t + +val float : (float, param) t + +val directive : + string -> ('a, param) t -> ('b, dir) t -> (('a, 'b) directive, dir) t + +val custom : ('a -> ('b, string) result) -> ('a, 'k) t -> ('b, 'k) t + +val nil : (nil, 'k) t + +val t1 : ('a, 'k) t -> ('a, 'k) t + +val t2 : ('a, 'k) t -> ('b, 'k) t -> ('a * 'b, 'k) t + +val t3 : ('a, 'k) t -> ('b, 'k) t -> ('c, 'k) t -> ('a * 'b * 'c, 'k) t + +val t4 : + ('a, 'k) t + -> ('b, 'k) t + -> ('c, 'k) t + -> ('d, 'k) t + -> ('a * 'b * 'c * 'd, 'k) t + +val t5 : + ('a, 'k) t + -> ('b, 'k) t + -> ('c, 'k) t + -> ('d, 'k) t + -> ('e, 'k) t + -> ('a * 'b * 'c * 'd * 'e, 'k) t + +val conv : ('a, dir) t -> Types.config -> ('a, [ `Msg of string ]) result diff --git a/src/types.ml b/src/types.ml new file mode 100644 index 0000000..07b2412 --- /dev/null +++ b/src/types.ml @@ -0,0 +1,12 @@ +(** Module defining types used to represent a config. *) + +(** A directive has a name, a list of parameters and children (a list of + directive). *) +type directive = + { name : string + ; params : string list + ; children : directive list + } + +(** A config is a list of directives. *) +type config = directive list diff --git a/test/cram/bug1.scfg b/test/cram/bug1.scfg new file mode 100644 index 0000000..4d5129c --- /dev/null +++ b/test/cram/bug1.scfg @@ -0,0 +1,10 @@ +"" { + "" { + "" + } + "" { + "" + } + "" "" + "" +} diff --git a/test/cram/bug2.scfg b/test/cram/bug2.scfg new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/test/cram/bug2.scfg @@ -0,0 +1 @@ + diff --git a/test/cram/bug3.scfg b/test/cram/bug3.scfg new file mode 100644 index 0000000..1f0b090 --- /dev/null +++ b/test/cram/bug3.scfg @@ -0,0 +1 @@ +edff diff --git a/test/cram/dune b/test/cram/dune new file mode 100644 index 0000000..be9db6c --- /dev/null +++ b/test/cram/dune @@ -0,0 +1,10 @@ +(cram + (deps + %{bin:scfg} + bug1.scfg + bug2.scfg + bug3.scfg + lex_error.scfg + parse_start_with_newline.scfg + (glob_files parse_error*.scfg) + test1.scfg)) diff --git a/test/cram/lex_error.scfg b/test/cram/lex_error.scfg new file mode 100644 index 0000000..e38ad51 --- /dev/null +++ b/test/cram/lex_error.scfg @@ -0,0 +1,2 @@ +a "b +" diff --git a/test/cram/parse_error1.scfg b/test/cram/parse_error1.scfg new file mode 100644 index 0000000..077916c --- /dev/null +++ b/test/cram/parse_error1.scfg @@ -0,0 +1 @@ +a { } } diff --git a/test/cram/parse_error2.scfg b/test/cram/parse_error2.scfg new file mode 100644 index 0000000..af4a964 --- /dev/null +++ b/test/cram/parse_error2.scfg @@ -0,0 +1 @@ +a { diff --git a/test/cram/parse_error3.scfg b/test/cram/parse_error3.scfg new file mode 100644 index 0000000..ae32682 --- /dev/null +++ b/test/cram/parse_error3.scfg @@ -0,0 +1 @@ +a } diff --git a/test/cram/parse_error4.scfg b/test/cram/parse_error4.scfg new file mode 100644 index 0000000..077f6dd --- /dev/null +++ b/test/cram/parse_error4.scfg @@ -0,0 +1 @@ +a {} diff --git a/test/cram/parse_start_with_newline.scfg b/test/cram/parse_start_with_newline.scfg new file mode 100644 index 0000000..1a0c8eb --- /dev/null +++ b/test/cram/parse_start_with_newline.scfg @@ -0,0 +1,4 @@ + +wo lo lo { + a b c +} diff --git a/test/cram/test.t b/test/cram/test.t new file mode 100644 index 0000000..4022cb1 --- /dev/null +++ b/test/cram/test.t @@ -0,0 +1,65 @@ +test 1: + $ dune exec -- scfg test1.scfg + train Shinkansen { + model E5 { + max-speed 320km/h + weight 453.5t + lines-served Tōhoku Hokkaido + } + model E7 { + max-speed 275km/h + weight 540t + lines-served Hokuriku Jōetsu + } + escapetests "A random line" "abracket{line" "anotherbracket}line" 'aquote"line' "anesc\\line" "asq'line" 'adq"line' 'asinglequote"{} \line' 'asinglequote"justewithadq' "dq\"and'sq" + emptytests "" "" + "an escaped name" a b c + namewithoutparam + p "unesc\nnewline" "unesc\rcr" "unesc\ttab" + q "new line \n" "cr \r" "tab \t" + } +test 2: + $ dune exec -- scfg parse_start_with_newline.scfg + wo lo lo { + a b c + } +lex error 1: + $ dune exec -- scfg lex_error.scfg + File lex_error.scfg, line 1, character 2: unexpected lexeme `"` + [1] +parse error 1: + $ dune exec -- scfg parse_error1.scfg + File parse_error1.scfg, line 1, character 2: unexpected lexeme `{` + [1] +parse error 2: + $ dune exec -- scfg parse_error2.scfg + File parse_error2.scfg, line 2, character 0: unexpected token EOF + [1] +parse error 3: + $ dune exec -- scfg parse_error3.scfg + File parse_error3.scfg, line 1, character 2: unexpected lexeme `}` + [1] +parse error 4: + $ dune exec -- scfg parse_error4.scfg + File parse_error4.scfg, line 1, character 2: unexpected lexeme `{` + [1] +bug 1: + $ dune exec -- scfg bug1.scfg + "" { + "" { + "" + } + "" { + "" + } + "" "" + "" + } +bug 2: + $ dune exec -- scfg bug2.scfg + + + +bug 3: + $ dune exec -- scfg bug3.scfg + edff diff --git a/test/cram/test1.scfg b/test/cram/test1.scfg new file mode 100644 index 0000000..0b5b113 --- /dev/null +++ b/test/cram/test1.scfg @@ -0,0 +1,22 @@ +train "Shinkansen" { + model "E5" { + max-speed 320km/h + weight 453.5t + + lines-served "Tōhoku" "Hokkaido" + } + + model "E7" { + max-speed 275km/h + weight 540t + + lines-served "Hokuriku" "Jōetsu" + } + + escapetests "A random line" abracket\{line anotherbracket\}line aquote\"line anesc\\line asq\'line adq\"line 'asinglequote"{} \line' 'asinglequote"justewithadq' "dq\"and'sq" + emptytests "" '' + "an escaped name" a b c + namewithoutparam + p unesc\nnewline unesc\rcr unesc\ttab + q "new line \n" "cr \r" "tab \t" +} diff --git a/test/fuzz/dune b/test/fuzz/dune new file mode 100644 index 0000000..0ce1776 --- /dev/null +++ b/test/fuzz/dune @@ -0,0 +1,6 @@ +(executable + (name fuzz) + (modules fuzz gen) + (flags + (:standard -open Prelude)) + (libraries crowbar fmt prelude scfg)) diff --git a/test/fuzz/fuzz.ml b/test/fuzz/fuzz.ml new file mode 100644 index 0000000..77b154f --- /dev/null +++ b/test/fuzz/fuzz.ml @@ -0,0 +1,18 @@ +open Scfg + +let () = Random.self_init () + +let () = + Crowbar.add_test ~name:"Print and parse fuzzing" [ Gen.config ] (fun config -> + let printed = Fmt.str "%a" Pp.config config in + match Parse.from_string printed with + | Error (`Msg msg) -> + Crowbar.failf "%s on the given input@\n***`%S`@\n***`%s`@\n" msg printed + printed + | Ok config -> ( + let printed = Fmt.str "%a" Pp.config config in + match Parse.from_string printed with + | Error (`Msg msg) -> + Crowbar.failf "%s on the given input@\n***`%S`@\n***`%s`@\n" msg printed + printed + | Ok parsed -> Crowbar.check_eq ~pp:Pp.config config parsed ) ) diff --git a/test/fuzz/gen.ml b/test/fuzz/gen.ml new file mode 100644 index 0000000..a654b39 --- /dev/null +++ b/test/fuzz/gen.ml @@ -0,0 +1,21 @@ +open Crowbar + +let ( let* ) = dynamic_bind + +let ( let+ ) = map + +let unicode_bytes = + let+ values = [ list1 uchar ] in + let buf = Buffer.create (List.length values) in + List.iter (fun u -> Buffer.add_utf_8_uchar buf u) values; + Buffer.contents buf + +let directive = + fix (fun directive -> + let* name = unicode_bytes in + let* params = list unicode_bytes in + let+ children = [ list directive ] in + let open Scfg.Types in + { name; params; children } ) + +let config = list directive diff --git a/test/unit/dune b/test/unit/dune new file mode 100644 index 0000000..2bedb8d --- /dev/null +++ b/test/unit/dune @@ -0,0 +1,5 @@ +(test + (name main) + (modules main) + (libraries fpath prelude scfg) + (deps query.scfg test_chan.scfg)) diff --git a/test/unit/main.ml b/test/unit/main.ml new file mode 100644 index 0000000..ea57265 --- /dev/null +++ b/test/unit/main.ml @@ -0,0 +1,115 @@ +open Scfg + +(** Testing other functions in Parse module. *) +let () = + let chan = open_in "test_chan.scfg" in + match Parse.from_channel chan with + | Error _e -> assert false + | Ok config -> + let expected = "a b c" in + let s = Format.asprintf "%a" Pp.config config in + assert (s = expected) + +let () = + let s = {|a b c|} in + match Parse.from_string s with + | Error (`Msg e) -> + Format.eprintf "ERROR: %s@\n" e; + assert false + | Ok config -> + let expected = {|a b c|} in + let s = Format.asprintf "%a" Pp.config config in + assert (s = expected) + +(** Testing queries. *) +let file = + match Fpath.of_string "query.scfg" with + | Error _e -> assert false + | Ok file -> file + +let () = + let config = + match Parse.from_file file with + | Error _e -> assert false + | Ok config -> config + in + assert (List.length config = 5); + let n1 = Query.get_dirs "n1" config in + assert (List.length n1 = 2); + let n11 = Query.get_dir "n1" n1 in + let n11 = match n11 with None -> assert false | Some n11 -> n11 in + let n12 = + match Query.get_dir "n1.2" n11.children with + | None -> assert false + | Some n12 -> n12 + in + let pn12 = + match Query.get_params 2 n12 with + | Error _e -> assert false + | Ok pn12 -> pn12 + in + assert (pn12 = [ "p1"; "p2" ]); + begin match Query.get_params 3 n12 with + | Error (`Msg "directive n1.2: want 3 params, got only 2") -> () + | Error _ | Ok _ -> assert false + end; + begin match Query.get_param 0 n12 with + | Error _ -> assert false + | Ok p -> assert (p = "p1") + end; + match Query.get_param 5 n12 with + | Error (`Msg "directive n1.2: want param at index 5, got only 2") -> () + | Error _ | Ok _ -> assert false + +(** Testing schema. *) +module Test_schema = struct + open Schema + + let rev_string : (string, param) t -> (string, param) t = + let decode = + fun s -> + let len = String.length s in + let s_rev = String.init len (fun i -> String.get s (len - i - 1)) in + Ok s_rev + in + custom decode + + let len_string : (string, param) t -> (int, param) t = + let decode = fun s -> String.length s |> Result.ok in + custom decode + + let schema = + directive "dir" (t3 bool string string) + (t2 + (directive "dir_1" + (t2 bool (rev_string string)) + (directive "dir_1_1" nil nil) ) + (directive "dir_2" (t2 string (len_string string)) nil) ) + + let txt = + Parse.from_string + {|dir true str1 "str2" { + dir_1 false naquadah { + dir_1_1 + } + dir_2 str3 four + }|} + |> Result.get_ok + + let () = + let v = conv schema txt |> Result.get_ok in + assert ( + v + = { name = "dir" + ; params = (true, "str1", "str2") + ; children = + ( { name = "dir_1" + ; params = (false, "hadauqan") + ; children = { name = "dir_1_1"; params = Nil; children = Nil } + } + , { name = "dir_2"; params = ("str3", 4); children = Nil } ) + } ); + () +end + +let () = Format.printf "all tests OK! 🐱" diff --git a/test/unit/query.scfg b/test/unit/query.scfg new file mode 100644 index 0000000..8701c7b --- /dev/null +++ b/test/unit/query.scfg @@ -0,0 +1,11 @@ +n1 p1 p2 { + n1.2 p1 p2 + n1.3 p1 p2 +} +n1 p3 p4 { + n1.2 p3 p4 + n1.3 p3 p4 +} +a b c +a c d +gggggggggggggggg g g g diff --git a/test/unit/test_chan.scfg b/test/unit/test_chan.scfg new file mode 100644 index 0000000..3774da6 --- /dev/null +++ b/test/unit/test_chan.scfg @@ -0,0 +1 @@ +a b c