From f52764052c5073820ef96bd3de41768c93bcb220 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Wed, 2 Feb 2022 20:29:12 +0100 Subject: [PATCH 01/11] add line by line file processing Signed-off-by: Marcello Seri --- bin/doi2bib.ml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/bin/doi2bib.ml b/bin/doi2bib.ml index 8409980..ad196cb 100644 --- a/bin/doi2bib.ml +++ b/bin/doi2bib.ml @@ -30,6 +30,22 @@ let doi2bib id = or 'PMC' as appropriate." id) +let process_file operation fname = + let lines = + let f = open_in_bin fname in + Fun.protect + ~finally:(fun () -> close_in_noerr f) + (fun () -> + Seq.unfold + (fun c -> + try Some (input_line c, c) + with End_of_file | _ -> + close_in c; + None) + f) + in + Seq.iter operation lines + let () = let open Cmdliner in let id = From 43a95bd79d1c41768e09f4b6d71aec6978e4e4a8 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Wed, 2 Feb 2022 20:50:11 +0100 Subject: [PATCH 02/11] Prepare file processing Signed-off-by: Marcello Seri --- bin/doi2bib.ml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/bin/doi2bib.ml b/bin/doi2bib.ml index ad196cb..1c571fc 100644 --- a/bin/doi2bib.ml +++ b/bin/doi2bib.ml @@ -30,13 +30,13 @@ let doi2bib id = or 'PMC' as appropriate." id) -let process_file operation fname = +let process_file outfile infile = let lines = - let f = open_in_bin fname in + let f = open_in_bin infile in Fun.protect ~finally:(fun () -> close_in_noerr f) (fun () -> - Seq.unfold + Lwt_seq.unfold (fun c -> try Some (input_line c, c) with End_of_file | _ -> @@ -44,7 +44,19 @@ let process_file operation fname = None) f) in - Seq.iter operation lines + let open Lwt.Syntax in + let* f = Lwt_io.open_file ~mode:Output outfile in + let process f id = + match Http.get_bib_entry @@ Parser.parse_id id with + | bibtex -> + let* bibtex in + let* () = Lwt_io.write f bibtex in + Lwt_io.write_char f '\n' + | exception e -> Lwt_io.eprintf "Error for %s: %s" id (Printexc.to_string e) + in + Lwt.finalize + (fun () -> Lwt_seq.iter_s (process f) lines) + (fun () -> Lwt_io.close f) let () = let open Cmdliner in From 664466742d5c72845855413ca1719bf3e3b8b3c9 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 3 Feb 2022 09:34:15 +0100 Subject: [PATCH 03/11] Add option to write to file Signed-off-by: Marcello Seri --- bin/doi2bib.ml | 98 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 63 insertions(+), 35 deletions(-) diff --git a/bin/doi2bib.ml b/bin/doi2bib.ml index 1c571fc..920f438 100644 --- a/bin/doi2bib.ml +++ b/bin/doi2bib.ml @@ -2,12 +2,54 @@ open Doi2bib let err s = `Error (false, s) -let doi2bib id = - match id with - | None -> `Help (`Pager, None) - | Some id -> ( - match Lwt_main.run (Http.get_bib_entry @@ Parser.parse_id id) with - | bibtex -> `Ok (Printf.printf "%s" bibtex) +let process_id outfile id = + let open Lwt.Syntax in + let* bibtex = Http.get_bib_entry @@ Parser.parse_id id in + match outfile with + | "stdout" -> Lwt_io.print bibtex + | outfile -> + Lwt_io.with_file ~mode:Output outfile (fun oc -> + let* len = Lwt_io.length oc in + let* () = Lwt_io.set_position oc len in + Lwt_io.write_line oc bibtex) + +let process_file outfile infile = + let open Lwt.Syntax in + let write_out f = + match outfile with + | "stdout" -> f Lwt_io.stdout + | outfile -> Lwt_io.with_file ~mode:Output outfile f + in + let lines ic = + Lwt_seq.unfold_lwt + (fun ic -> + let* line = Lwt_io.read_line_opt ic in + Lwt.return @@ Option.map (fun x -> (x, ic)) line) + ic + in + let process oc id = + match Http.get_bib_entry @@ Parser.parse_id id with + | bibtex -> + let* bibtex = bibtex in + Lwt_io.write_line oc bibtex + | exception e -> Lwt_io.eprintf "Error for %s: %s" id (Printexc.to_string e) + in + Lwt_io.with_file ~mode:Input infile (fun ic -> + write_out (fun oc -> + let* len = Lwt_io.length oc in + let* () = Lwt_io.set_position oc len in + Lwt_seq.iter_s (process oc) (lines ic))) + +let doi2bib id file outfile = + match (id, file) with + | None, "" -> `Help (`Pager, None) + | None, infile -> ( + match Lwt_main.run (process_file outfile infile) with + | () -> `Ok () + | exception e -> err @@ Printexc.to_string e) + | Some id, "" -> ( + match Lwt_main.run (process_id outfile id) with + | () -> `Ok () | exception Http.PubMed_DOI_not_found -> err @@ Printf.sprintf "Error: unable to find a DOI entry for %s.\n" id | exception Http.Entry_not_found -> @@ -29,37 +71,23 @@ let doi2bib id = You can force me to consider it by prepending 'doi:', 'arxiv:' \ or 'PMC' as appropriate." id) - -let process_file outfile infile = - let lines = - let f = open_in_bin infile in - Fun.protect - ~finally:(fun () -> close_in_noerr f) - (fun () -> - Lwt_seq.unfold - (fun c -> - try Some (input_line c, c) - with End_of_file | _ -> - close_in c; - None) - f) - in - let open Lwt.Syntax in - let* f = Lwt_io.open_file ~mode:Output outfile in - let process f id = - match Http.get_bib_entry @@ Parser.parse_id id with - | bibtex -> - let* bibtex in - let* () = Lwt_io.write f bibtex in - Lwt_io.write_char f '\n' - | exception e -> Lwt_io.eprintf "Error for %s: %s" id (Printexc.to_string e) - in - Lwt.finalize - (fun () -> Lwt_seq.iter_s (process f) lines) - (fun () -> Lwt_io.close f) + | Some _, _ -> `Help (`Pager, None) let () = let open Cmdliner in + let file = + let doc = + "With this flag, the tool reads the file and process its lines \ + sequentially, treating them as DOIs, arXiv IDs or PubMedIDs. Errors \ + will be printed on standard error but will not terminate the operation." + in + Arg.(value & opt string "" & info [ "f"; "file" ] ~docv:"FILE" ~doc) + in + let out = + let doc = "Append the bibtex output to the specified file." in + Arg.( + value & opt string "stdout" & info [ "o"; "output" ] ~docv:"OUTPUT" ~doc) + in let id = let doc = "A DOI, an arXiv ID or a PubMed ID. The tool tries to automatically \ @@ -70,7 +98,7 @@ let () = in Arg.(value & pos 0 (some string) None & info ~docv:"ID" ~doc []) in - let doi2bib_t = Term.(ret (const doi2bib $ id)) in + let doi2bib_t = Term.(ret (const doi2bib $ id $ file $ out)) in let info = let doc = "A little CLI tool to get the bibtex entry for a given DOI, arXiv or \ From 08c8cb13f98904924c16baccd81287bdf954cf13 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 3 Feb 2022 09:49:34 +0100 Subject: [PATCH 04/11] Add necessary plumbing to enable the funcionality Signed-off-by: Marcello Seri --- bin/doi2bib.ml | 14 ++++++-------- bin/dune | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/bin/doi2bib.ml b/bin/doi2bib.ml index 920f438..aa0e049 100644 --- a/bin/doi2bib.ml +++ b/bin/doi2bib.ml @@ -8,9 +8,8 @@ let process_id outfile id = match outfile with | "stdout" -> Lwt_io.print bibtex | outfile -> - Lwt_io.with_file ~mode:Output outfile (fun oc -> - let* len = Lwt_io.length oc in - let* () = Lwt_io.set_position oc len in + let flags = [ Unix.O_WRONLY; O_APPEND; O_CREAT ] in + Lwt_io.with_file ~mode:Output ~flags outfile (fun oc -> Lwt_io.write_line oc bibtex) let process_file outfile infile = @@ -18,7 +17,9 @@ let process_file outfile infile = let write_out f = match outfile with | "stdout" -> f Lwt_io.stdout - | outfile -> Lwt_io.with_file ~mode:Output outfile f + | outfile -> + let flags = [ Unix.O_WRONLY; O_APPEND; O_CREAT ] in + Lwt_io.with_file ~mode:Output ~flags outfile f in let lines ic = Lwt_seq.unfold_lwt @@ -35,10 +36,7 @@ let process_file outfile infile = | exception e -> Lwt_io.eprintf "Error for %s: %s" id (Printexc.to_string e) in Lwt_io.with_file ~mode:Input infile (fun ic -> - write_out (fun oc -> - let* len = Lwt_io.length oc in - let* () = Lwt_io.set_position oc len in - Lwt_seq.iter_s (process oc) (lines ic))) + write_out (fun oc -> Lwt_seq.iter_s (process oc) (lines ic))) let doi2bib id file outfile = match (id, file) with diff --git a/bin/dune b/bin/dune index b009ef4..22e23cf 100644 --- a/bin/dune +++ b/bin/dune @@ -1,5 +1,5 @@ (executable (name doi2bib) (public_name doi2bib) - (libraries cmdliner doi2bib) + (libraries cmdliner doi2bib unix) (preprocess future_syntax)) From 697deb51d7096bc1f5d01cc02c3ce80a3b2a8c37 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 3 Feb 2022 09:52:31 +0100 Subject: [PATCH 05/11] README: update Signed-off-by: Marcello Seri --- README.md | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 99b8b59..f42e593 100644 --- a/README.md +++ b/README.md @@ -9,44 +9,43 @@ Usage: arXiv or PubMed ID. SYNOPSIS - doi2bib [OPTION]... [ID] + doi2bib [OPTION]... [ID] ARGUMENTS - ID A DOI, an arXiv ID or a PubMed ID. The tool tries to automatically - infer what kind of ID you are using. You can force the cli to - lookup a DOI by using the form 'doi:ID' or an arXiv ID by using - the form 'arXiv:ID'. PubMed IDs always start with 'PMC'. + ID A DOI, an arXiv ID or a PubMed ID. The tool tries to automatically + infer what kind of ID you are using. You can force the cli to + lookup a DOI by using the form 'doi:ID' or an arXiv ID by using + the form 'arXiv:ID'. PubMed IDs always start with 'PMC'. OPTIONS - --help[=FMT] (default=auto) - Show this help in format FMT. The value FMT must be one of `auto', - `pager', `groff' or `plain'. With `auto', the format is `pager` or - `plain' whenever the TERM env var is `dumb' or undefined. + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of `auto', + `pager', `groff' or `plain'. With `auto', the format is `pager` or + `plain' whenever the TERM env var is `dumb' or undefined. - --version - Show version information. + --version + Show version information. EXIT STATUS - doi2bib exits with the following status: + doi2bib exits with the following status: - 0 on success. + 0 on success. - 124 on command line parsing errors. + 124 on command line parsing errors. - 125 on unexpected internal errors (bugs). + 125 on unexpected internal errors (bugs). BUGS - Report bugs to https://github.com/mseri/doi2bib/issues - + Report bugs to https://github.com/mseri/doi2bib/issues -It will output the bibtex entry, using the published details when possible. +It will retrieve the bibtex entry, using the published details when possible. Examples of use (the bibtex entry is printed on standard output): $ doi2bib 10.1007/s10569-019-9946-9 - $ doi2bib doi:10.4171/JST/226 - $ doi2bib arXiv:1609.01724 + $ doi2bib doi:10.4171/JST/226 -o "bibliography.bib" $ doi2bib 1902.00436 + $ doi2bib arXiv:1609.01724 $ doi2bib PMC2883744 Each release comes with attached binaries for windows, mac and linux. From 6de1ac495b922a5aac0b1372b9d623fef783bcab Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 3 Feb 2022 09:53:47 +0100 Subject: [PATCH 06/11] Update CHANGES Signed-off-by: Marcello Seri --- CHANGES.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index cf339b3..80c56b2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,8 @@ +# 0.6.0 (2022-02-03) + +- Support batch processing of files of IDs +- Support append result to file + # 0.5.2 (2021-12-17) - Move from cuz to the published clz From 469ddf4dc118060b04fd6dd0672576d7f4730d5f Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 3 Feb 2022 09:59:53 +0100 Subject: [PATCH 07/11] CI: Fix compiler selection Signed-off-by: Marcello Seri --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index ae4c36d..c2e1be1 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -55,7 +55,7 @@ jobs: - name: Use OCaml ${{ matrix.ocaml-version }} uses: ocaml/setup-ocaml@v2 with: - ocaml-version: ${{ matrix.ocaml-version }} + ocaml-compiler: ${{ matrix.ocaml-version }} dune-cache: true - name: Set git user From 1060008ae6a3bc5fa942c9856369c5de37517630 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 3 Feb 2022 10:38:21 +0100 Subject: [PATCH 08/11] Prevents let punning, while I still support 4.08 Signed-off-by: Marcello Seri --- .ocamlformat | 1 + 1 file changed, 1 insertion(+) diff --git a/.ocamlformat b/.ocamlformat index e69de29..47c54d4 100644 --- a/.ocamlformat +++ b/.ocamlformat @@ -0,0 +1 @@ +ocaml-version = 4.08 From d9b3e7d1fec4f9df299092b9ced244646823eada Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 3 Feb 2022 10:51:19 +0100 Subject: [PATCH 09/11] Update flag doc Signed-off-by: Marcello Seri --- bin/doi2bib.ml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/doi2bib.ml b/bin/doi2bib.ml index aa0e049..ad9b109 100644 --- a/bin/doi2bib.ml +++ b/bin/doi2bib.ml @@ -82,7 +82,10 @@ let () = Arg.(value & opt string "" & info [ "f"; "file" ] ~docv:"FILE" ~doc) in let out = - let doc = "Append the bibtex output to the specified file." in + let doc = + "Append the bibtex output to the specified file. It will create the file \ + if it does not exist." + in Arg.( value & opt string "stdout" & info [ "o"; "output" ] ~docv:"OUTPUT" ~doc) in From 0763a91c6ec9e18a5a656c3cbabfbb8d40cd7b48 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 3 Feb 2022 10:51:38 +0100 Subject: [PATCH 10/11] Update README Signed-off-by: Marcello Seri --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f42e593..a379745 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,21 @@ Usage: the form 'arXiv:ID'. PubMed IDs always start with 'PMC'. OPTIONS + -f FILE, --file=FILE + With this flag, the tool reads the file and process its lines + sequentially, treating them as DOIs, arXiv IDs or PubMedIDs. + Errors will be printed on standard error but will not terminate + the operation. + --help[=FMT] (default=auto) Show this help in format FMT. The value FMT must be one of `auto', `pager', `groff' or `plain'. With `auto', the format is `pager` or `plain' whenever the TERM env var is `dumb' or undefined. + -o OUTPUT, --output=OUTPUT (absent=stdout) + Append the bibtex output to the specified file. It will create the + file if it does not exist. + --version Show version information. @@ -36,7 +46,7 @@ Usage: 125 on unexpected internal errors (bugs). BUGS - Report bugs to https://github.com/mseri/doi2bib/issues + Report bugs to https://github.com/mseri/doi2bib/issues It will retrieve the bibtex entry, using the published details when possible. From 4e8ca03a79380beffbbc3c947b13913cb274a3e2 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 3 Feb 2022 10:57:15 +0100 Subject: [PATCH 11/11] CI: streamline for release Signed-off-by: Marcello Seri --- .github/workflows/workflow.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index c2e1be1..1dcf4ca 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -90,6 +90,7 @@ jobs: opam exec -- dune build @fmt - name: Run tests + if: ${{ !startsWith(github.ref, 'refs/tags/') }} run: opam exec -- dune runtest - name: Build