From 208d8346c11a7053e1162108725afdfdce82537a Mon Sep 17 00:00:00 2001 From: Simon Grondin Date: Sun, 11 Jun 2023 11:44:26 -0500 Subject: [PATCH] Add angstrom-eio --- .gitignore | 1 + README.md | 4 +- angstrom-eio.opam | 20 ++++++++ benchmarks/dune | 5 ++ benchmarks/eio_benchmark.ml | 17 +++++++ eio/angstrom_eio.ml | 94 +++++++++++++++++++++++++++++++++++++ eio/angstrom_eio.mli | 68 +++++++++++++++++++++++++++ eio/dune | 5 ++ lib/angstrom.mli | 2 +- 9 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 angstrom-eio.opam create mode 100644 benchmarks/eio_benchmark.ml create mode 100644 eio/angstrom_eio.ml create mode 100644 eio/angstrom_eio.mli create mode 100644 eio/dune diff --git a/.gitignore b/.gitignore index 098c481..b062f57 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .*.sw[a-z] *~ _build/ +_opam/ _tests/ lib_test/tests_ setup.log diff --git a/README.md b/README.md index 612fff4..43511bd 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ to achieve backtracking, rather than providing it by default. They also all use something akin to a lazy character stream as the underlying input abstraction. While this suits Haskell quite nicely, it requires blocking read calls when the entire input is not immediately available—an approach that is inherently -incompatible with monadic concurrency libraries such as [Async] and [Lwt], and +incompatible with monadic concurrency libraries such as [Async], [Lwt] and [Eio], and writing high-performance, concurrent applications in general. Another consequence of this approach to modeling and retrieving input is that the parsers cannot iterate over sections of input in a tight loop, which adversely @@ -94,6 +94,7 @@ other parser-combinator libraries. [parsec]: https://hackage.haskell.org/package/parsec [async]: https://github.com/janestreet/async [lwt]: https://ocsigen.org/lwt/ +[eio]: https://github.com/ocaml-multicore/eio Feature \ Library | Angstrom | [mparser] | [planck] | [opal] | @@ -107,6 +108,7 @@ Unbuffered (zero-copy) interface | ✅ | ❌ | ❌ | Non-blocking incremental interface | ✅ | ❌ | ❌ | ❌ | Async Support | ✅ | ❌ | ❌ | ❌ | Lwt Support | ✅ | ❌ | ❌ | ❌ | +Eio Support | ✅ | ❌ | ❌ | ❌ | [mparser]: https://github.com/cakeplus/mparser [opal]: https://github.com/pyrocat101/opal diff --git a/angstrom-eio.opam b/angstrom-eio.opam new file mode 100644 index 0000000..454f372 --- /dev/null +++ b/angstrom-eio.opam @@ -0,0 +1,20 @@ +opam-version: "2.0" +maintainer: "Simon Grondin" +authors: [ "Simon Grondin" ] +license: "BSD-3-clause" +homepage: "https://github.com/inhabitedtype/angstrom" +bug-reports: "https://github.com/inhabitedtype/angstrom/issues" +dev-repo: "git+https://github.com/inhabitedtype/angstrom.git" +build: [ + ["dune" "subst"] {pinned} + ["dune" "build" "-p" name "-j" jobs] + ["dune" "runtest" "-p" name "-j" jobs] {with-test} +] +depends: [ + "ocaml" {>= "5.0.0"} + "dune" {>= "1.8"} + "angstrom" + "eio_main" {>= "0.12"} + "base-unix" +] +synopsis: "Eio support for Angstrom" diff --git a/benchmarks/dune b/benchmarks/dune index 9101a97..1d887f6 100644 --- a/benchmarks/dune +++ b/benchmarks/dune @@ -12,3 +12,8 @@ (libraries angstrom-lwt-unix RFC2616 RFC7159) (modules lwt_benchmark) (names lwt_benchmark)) + +(executables + (libraries angstrom-eio RFC2616 RFC7159) + (modules eio_benchmark) + (names eio_benchmark)) diff --git a/benchmarks/eio_benchmark.ml b/benchmarks/eio_benchmark.ml new file mode 100644 index 0000000..55a09aa --- /dev/null +++ b/benchmarks/eio_benchmark.ml @@ -0,0 +1,17 @@ +open! Eio.Std + +let main env = + let toss _ = () in + let parser = + match Sys.argv.(1) with + | "http" -> Angstrom.(RFC2616.request >>| fun x -> `Http x) + | "json" -> Angstrom.(RFC7159.json >>| fun x -> `Json x) + | _ -> print_endline "usage: eio_json_benchmark.native PARSER"; exit 1 + in + Angstrom_eio.parse_many parser toss (Eio.Stdenv.stdin env) + |> function + | _, Ok () -> () + | _, Error err -> failwith err +;; + +Eio_main.run (main) diff --git a/eio/angstrom_eio.ml b/eio/angstrom_eio.ml new file mode 100644 index 0000000..cbc397f --- /dev/null +++ b/eio/angstrom_eio.ml @@ -0,0 +1,94 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2023 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + +open Angstrom.Buffered + +let default_buffer_size = 4096 + +let default_pushback () = () + +let handle_parse_result state = + match state_to_unconsumed state with + | None -> assert false + | Some us -> us, state_to_result state + +let finalize = function +| Partial feed -> feed `Eof +| state -> state + +let parse ?(pushback = default_pushback) parser src = + let buf = Cstruct.create default_buffer_size in + let rec loop = function + | (Done _ as state) + | (Fail _ as state) -> + handle_parse_result state + | Partial feed as state -> ( + match Eio.Flow.single_read src buf with + | 0 + | (exception End_of_file) -> + finalize state |> handle_parse_result + | len -> + let next = feed (`Bigstring (Bigstringaf.sub buf.buffer ~off:0 ~len)) in + pushback (); + loop next ) + in + loop (parse parser) + +let rec buffered_state_loop pushback state src (buf : Cstruct.t) = + match state with + | Partial k -> + let next = + match Eio.Flow.single_read src buf with + | 0 + | (exception End_of_file) -> + k `Eof + | len -> k (`Bigstring (Bigstringaf.sub buf.buffer ~off:0 ~len)) + in + pushback (); + buffered_state_loop pushback next src buf + | state -> state + +let with_buffered_parse_state ?(pushback = default_pushback) state src = + let buf = Cstruct.create default_buffer_size in + ( match state with + | Partial _ -> buffered_state_loop pushback state src buf + | _ -> state ) + |> handle_parse_result + +let async_many e k = Angstrom.(skip_many (e <* commit >>| k) "async_many") + +let parse_many p write src = + let wait = ref (default_pushback ()) in + let k x = wait := write x in + let pushback () = !wait in + parse ~pushback (async_many p k) src diff --git a/eio/angstrom_eio.mli b/eio/angstrom_eio.mli new file mode 100644 index 0000000..b7240bb --- /dev/null +++ b/eio/angstrom_eio.mli @@ -0,0 +1,68 @@ +(*---------------------------------------------------------------------------- + Copyright (c) 2023 Inhabited Type LLC. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + ----------------------------------------------------------------------------*) + + open Angstrom + + val parse + : ?pushback:(unit -> unit) + -> 'a t + -> _ Eio.Flow.source + -> (Buffered.unconsumed * ('a, string) result) + + val parse_many + : 'a t + -> ('a -> unit) + -> _ Eio.Flow.source + -> (Buffered.unconsumed * (unit, string) result) + + (** Useful for resuming a {!parse} that returns unconsumed data. Construct a + [Buffered.state] by using [Buffered.parse] and provide it into this + function. This is essentially what {!parse_many} does, so consider using + that if you don't require fine-grained control over how many times you want + the parser to succeed. + + Usage example: + + {[ + match parse parser flow with + | { buf; off; len }, Ok a -> + let state = Buffered.parse parser in + let state = Buffered.feed state (`Bigstring (Bigstringaf.sub ~off ~len buf)) in + with_buffered_parse_state state flow + | _, Error err -> failwith err + ]} *) + val with_buffered_parse_state + : ?pushback:(unit -> unit) + -> 'a Buffered.state + -> _ Eio.Flow.source + -> (Buffered.unconsumed * ('a, string) result) diff --git a/eio/dune b/eio/dune new file mode 100644 index 0000000..3a14f60 --- /dev/null +++ b/eio/dune @@ -0,0 +1,5 @@ +(library + (name angstrom_eio) + (public_name angstrom-eio) + (flags :standard -safe-string) + (libraries angstrom eio_main)) diff --git a/lib/angstrom.mli b/lib/angstrom.mli index d669595..9fb6c6f 100644 --- a/lib/angstrom.mli +++ b/lib/angstrom.mli @@ -639,7 +639,7 @@ end The logic that must be implemented in order to make proper use of this module is intricate and tied to your OS environment. It's advisable to use the {!Buffered} module when initially developing and testing your parsers. - For production use-cases, consider the Async and Lwt support that this + For production use-cases, consider the Async, Lwt and Eio support that this library includes before attempting to use this module directly. *) module Unbuffered : sig type more =