From f1f464e00d7165b6219012b70dd783a467f9a995 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Thu, 30 Jul 2020 07:10:30 +0200 Subject: [PATCH] support crossref report type --- Gemfile.lock | 2 +- lib/bolognese/readers/crossref_reader.rb | 28 +++- lib/bolognese/utils.rb | 11 +- lib/bolognese/version.rb | 2 +- .../get_crossref_metadata/report_osti.yml | 138 ++++++++++++++++++ spec/readers/crossref_reader_spec.rb | 29 +++- spec/writers/datacite_writer_spec.rb | 6 +- 7 files changed, 194 insertions(+), 22 deletions(-) create mode 100644 spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/report_osti.yml diff --git a/Gemfile.lock b/Gemfile.lock index 68f88ef2..e56e67ca 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - bolognese (1.8.4) + bolognese (1.8.5) activesupport (>= 4.2.5) benchmark_methods (~> 0.7) bibtex-ruby (>= 5.1.0) diff --git a/lib/bolognese/readers/crossref_reader.rb b/lib/bolognese/readers/crossref_reader.rb index 6803a224..5f3c51f9 100644 --- a/lib/bolognese/readers/crossref_reader.rb +++ b/lib/bolognese/readers/crossref_reader.rb @@ -31,7 +31,7 @@ def read_crossref(string: nil, **options) end # model should be one of book, conference, database, dissertation, journal, peer_review, posted_content, - # report-paper, sa_component, standard + # report_paper, sa_component, standard model = meta.dig("crossref").to_h.keys.last resource_type = nil @@ -82,6 +82,9 @@ def read_crossref(string: nil, **options) when "database" bibliographic_metadata = meta.dig("crossref", "database", "dataset").to_h resource_type = "dataset" + when "report_paper" + bibliographic_metadata = meta.dig("crossref", "report_paper", "report_paper_metadata").to_h + resource_type = "report" end resource_type = (resource_type || model).to_s.underscore.camelcase.presence @@ -156,7 +159,7 @@ def read_crossref(string: nil, **options) end id = normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig("doi_data", "doi")) - identifiers = [crossref_alternate_identifiers(bibliographic_metadata)].compact + identifiers = crossref_alternate_identifiers(bibliographic_metadata) { "id" => id, "types" => types, @@ -187,14 +190,23 @@ def read_crossref(string: nil, **options) def crossref_alternate_identifiers(bibliographic_metadata) if bibliographic_metadata.dig("publisher_item", "item_number").present? - { "identifier" => parse_attributes(bibliographic_metadata.dig("publisher_item", "item_number")), - "identifierType" => "Publisher ID" } + Array.wrap(bibliographic_metadata.dig("publisher_item", "item_number")).map do |item| + if item.is_a?(String) + { "identifier" => item, + "identifierType" => "Publisher ID" } + else + { "identifier" => item.fetch("__content__", nil), + "identifierType" => item.fetch("item_number_type", nil) || "Publisher ID" } + end + end elsif parse_attributes(bibliographic_metadata.fetch("item_number", nil)).present? - { "identifier" => parse_attributes(bibliographic_metadata.fetch("item_number", nil)), - "identifierType" => "Publisher ID" } + [{ "identifier" => parse_attributes(bibliographic_metadata.fetch("item_number", nil)), + "identifierType" => "Publisher ID" }] elsif parse_attributes(bibliographic_metadata.fetch("isbn", nil)).present? - { "identifier" => parse_attributes(bibliographic_metadata.fetch("isbn", nil), first: true), - "identifierType" => "ISBN" } + [{ "identifier" => parse_attributes(bibliographic_metadata.fetch("isbn", nil), first: true), + "identifierType" => "ISBN" }] + else + [] end end diff --git a/lib/bolognese/utils.rb b/lib/bolognese/utils.rb index 65249967..412c84e9 100644 --- a/lib/bolognese/utils.rb +++ b/lib/bolognese/utils.rb @@ -117,7 +117,7 @@ module Utils "EditedBook" => "Book", "JournalArticle" => "ScholarlyArticle", "Journal" => nil, - "Report" => nil, + "Report" => "Report", "BookSeries" => nil, "ReportSeries" => nil, "BookTrack" => nil, @@ -146,7 +146,7 @@ module Utils "EditedBook" => "book", "JournalArticle" => "article", "Journal" => nil, - "Report" => nil, + "Report" => "techreport", "BookSeries" => nil, "ReportSeries" => nil, "BookTrack" => nil, @@ -214,6 +214,7 @@ module Utils "ImageObject" => "Image", "Movie" => "Audiovisual", "PublicationIssue" => "Text", + "Report" => "Text", "ScholarlyArticle" => "Text", "Thesis" => "Text", "Service" => "Service", @@ -258,6 +259,7 @@ module Utils "ImageObject" => "graphic", "Movie" => "motion_picture", "PublicationIssue" => nil, + "Report" => "report", "ScholarlyArticle" => "article-journal", "Service" => nil, "Thesis" => "thesis", @@ -278,6 +280,7 @@ module Utils "Event" => nil, "ImageObject" => "FIGURE", "Movie" => "MPCT", + "Report" => "RPRT", "PublicationIssue" => nil, "ScholarlyArticle" => "JOUR", "Service" => nil, @@ -298,11 +301,11 @@ module Utils "EditedBook" => "BOOK", "JournalArticle" => "JOUR", "Journal" => nil, - "Report" => nil, + "Report" => "RPRT", "BookSeries" => nil, "ReportSeries" => nil, "BookTrack" => nil, - "Standard" => nil, + "Standard" => "STAND", "BookSection" => "CHAP", "BookPart" => "CHAP", "Book" => "BOOK", diff --git a/lib/bolognese/version.rb b/lib/bolognese/version.rb index 7926fa45..6e94ca62 100644 --- a/lib/bolognese/version.rb +++ b/lib/bolognese/version.rb @@ -1,3 +1,3 @@ module Bolognese - VERSION = "1.8.4" + VERSION = "1.8.5" end diff --git a/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/report_osti.yml b/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/report_osti.yml new file mode 100644 index 00000000..821f8739 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/report_osti.yml @@ -0,0 +1,138 @@ +--- +http_interactions: +- request: + method: get + uri: https://doi.org/ra/10.2172 + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (compatible; Maremma/4.7.1; mailto:info@datacite.org) + Accept: + - text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5 + response: + status: + code: 200 + message: '' + headers: + Date: + - Wed, 29 Jul 2020 21:23:48 GMT + Content-Type: + - application/json;charset=UTF-8 + Connection: + - keep-alive + Set-Cookie: + - __cfduid=d01ef02c612a86c1cf7a059f5c9c803201596057828; expires=Fri, 28-Aug-20 + 21:23:48 GMT; path=/; domain=.doi.org; HttpOnly; SameSite=Lax; Secure + Cf-Cache-Status: + - DYNAMIC + Cf-Request-Id: + - 043e0ed580000096d473a7a200000001 + Expect-Ct: + - max-age=604800, report-uri="https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Server: + - cloudflare + Cf-Ray: + - 5ba9e7359f0c96d4-FRA + body: + encoding: ASCII-8BIT + string: |- + [ + { + "DOI": "10.2172", + "RA": "Crossref" + } + ] + http_version: + recorded_at: Wed, 29 Jul 2020 21:23:48 GMT +- request: + method: get + uri: https://api.crossref.org/works/10.2172/972169/transform/application/vnd.crossref.unixsd+xml + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (compatible; Maremma/4.7.1; mailto:info@datacite.org) + Accept: + - text/xml + response: + status: + code: 200 + message: OK + headers: + Link: + - ; rel="canonical" + Access-Control-Allow-Origin: + - "*" + Access-Control-Allow-Headers: + - X-Requested-With + Content-Length: + - '4217' + Server: + - http-kit + Date: + - Wed, 29 Jul 2020 21:23:49 GMT + X-Rate-Limit-Limit: + - '50' + X-Rate-Limit-Interval: + - 1s + Connection: + - close + body: + encoding: ASCII-8BIT + string: "\n\r\n \r\n + \ \r\n none\r\n \r\n \r\n + \ \r\n 10.2172/972169\r\n + \ Office of Scientific + and Technical Information (OSTI)\r\n Office of Scientific and Technical Information\r\n + \ 960\r\n 41225045\r\n 584445\r\n 20100218105553\r\n 10.2172\r\n 2010-02-19T03:56:05Z\r\n 2010-02-19T03:56:03Z\r\n 159\r\n \r\n \r\n \r\n \r\n + \ \r\n \r\n P.\r\n + \ Denholm\r\n \r\n + \ \r\n + \ E.\r\n Ela\r\n + \ \r\n \r\n B.\r\n + \ Kirby\r\n \r\n + \ \r\n + \ M.\r\n Milligan\r\n + \ \r\n \r\n \r\n + \ Role of Energy Storage with Renewable Electricity + Generation\r\n \r\n \r\n 01\r\n 01\r\n + \ 2010\r\n \r\n + \ \r\n 02\r\n + \ 18\r\n 2010\r\n + \ \r\n \r\n National + Renewable Energy Laboratory (NREL)\r\n NREL\r\n + \ Golden, CO (United States)\r\n + \ \r\n \r\n USDOE\r\n + \ \r\n \r\n NREL/TP-6A2-47187\r\n 972169\r\n \r\n + \ AC36-99-GO10337\r\n \r\n + \ 10.2172/972169\r\n 20100218105553\r\n + \ http://www.osti.gov/servlets/purl/972169-1QXROM/\r\n + \ \r\n \r\n + \ \r\n \r\n \r\n + \ \r\n \r\n \r\n" + http_version: + recorded_at: Wed, 29 Jul 2020 21:23:49 GMT +recorded_with: VCR 3.0.3 diff --git a/spec/readers/crossref_reader_spec.rb b/spec/readers/crossref_reader_spec.rb index e405d608..085b89ef 100644 --- a/spec/readers/crossref_reader_spec.rb +++ b/spec/readers/crossref_reader_spec.rb @@ -19,7 +19,7 @@ it "DOI with data citation" do expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.7554/elife.01567") - expect(subject.identifiers).to eq([{"identifier"=>"e01567", "identifierType"=>"Publisher ID"}]) + expect(subject.identifiers).to eq([{"identifier"=>"e01567", "identifierType"=>"article_number"}]) expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-journal", "resourceType"=>"JournalArticle", "resourceTypeGeneral"=>"Text", "ris"=>"JOUR", "schemaOrg"=>"ScholarlyArticle") expect(subject.url).to eq("https://elifesciences.org/articles/01567") expect(subject.creators.length).to eq(5) @@ -82,7 +82,7 @@ subject = Bolognese::Metadata.new(input: input) expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.3389/fpls.2019.00816") - expect(subject.identifiers).to eq([{"identifier"=>"816", "identifierType"=>"Publisher ID"}]) + expect(subject.identifiers).to eq([{"identifier"=>"816", "identifierType"=>"article_number"}]) expect(subject.url).to eq("https://www.frontiersin.org/article/10.3389/fpls.2019.00816/full") expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-journal", "resourceType"=>"JournalArticle", "resourceTypeGeneral"=>"Text", "ris"=>"JOUR", "schemaOrg"=>"ScholarlyArticle") expect(subject.creators.length).to eq(4) @@ -229,7 +229,7 @@ subject = Bolognese::Metadata.new(input: input) expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.1016/j.ejphar.2015.03.018") - expect(subject.identifiers).to eq([{"identifier"=>"S0014299915002332", "identifierType"=>"Publisher ID"}]) + expect(subject.identifiers).to eq([{"identifier"=>"S0014299915002332", "identifierType"=>"sequence-number"}]) expect(subject.url).to eq("https://linkinghub.elsevier.com/retrieve/pii/S0014299915002332") expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-journal", "resourceType"=>"JournalArticle", "resourceTypeGeneral"=>"Text", "ris"=>"JOUR", "schemaOrg"=>"ScholarlyArticle") expect(subject.creators.length).to eq(10) @@ -433,7 +433,7 @@ subject = Bolognese::Metadata.new(input: input) expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.3280/ecag2018-001005") - expect(subject.identifiers).to eq([{"identifier"=>"5", "identifierType"=>"Publisher ID"}]) + expect(subject.identifiers).to eq([{"identifier"=>"5", "identifierType"=>"article_number"}]) expect(subject.url).to eq("http://www.francoangeli.it/riviste/Scheda_Riviste.asp?IDArticolo=61645") expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-journal", "resourceType"=>"JournalArticle", "resourceTypeGeneral"=>"Text", "ris"=>"JOUR", "schemaOrg"=>"ScholarlyArticle") expect(subject.creators.length).to eq(2) @@ -547,7 +547,7 @@ subject = Bolognese::Metadata.new(input: input) expect(subject.valid?).to be true expect(subject.id).to eq("https://doi.org/10.1055/s-0039-1690894") - expect(subject.identifiers).to eq([{"identifier"=>"s-0039-1690894", "identifierType"=>"Publisher ID"}]) + expect(subject.identifiers).to eq([{"identifier"=>"s-0039-1690894", "identifierType"=>"sequence-number"}]) expect(subject.url).to eq("http://www.thieme-connect.de/DOI/DOI?10.1055/s-0039-1690894") expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-journal", "resourceType"=>"JournalArticle", "resourceTypeGeneral"=>"Text", "ris"=>"JOUR", "schemaOrg"=>"ScholarlyArticle") expect(subject.creators.length).to eq(4) @@ -643,6 +643,25 @@ expect(subject.agency).to eq("crossref") end + it "report osti" do + input = "10.2172/972169" + subject = Bolognese::Metadata.new(input: input) + expect(subject.valid?).to be true + expect(subject.url).to eq("http://www.osti.gov/servlets/purl/972169-1QXROM/") + expect(subject.types).to eq("bibtex"=>"techreport", "citeproc"=>"report", "resourceType"=>"Report", "resourceTypeGeneral"=>"Text", "ris"=>"RPRT", "schemaOrg"=>"Report") + expect(subject.creators.count).to eq(4) + expect(subject.creators.first).to eq("familyName"=>"Denholm", "givenName"=>"P.", "name"=>"Denholm, P.", "nameType"=>"Personal") + expect(subject.contributors.count).to eq(0) + expect(subject.titles).to eq([{"title"=>"Role of Energy Storage with Renewable Electricity Generation"}]) + expect(subject.id).to eq("https://doi.org/10.2172/972169") + expect(subject.identifiers).to eq( [{"identifier"=>"NREL/TP-6A2-47187", "identifierType"=>"report-number"}, {"identifier"=>"972169", "identifierType"=>"sequence-number"}]) + expect(subject.descriptions).to be_empty + expect(subject.dates).to include({"date"=>"2010-01-01", "dateType"=>"Issued"}) + expect(subject.publication_year).to eq("2010") + expect(subject.publisher).to eq("Office of Scientific and Technical Information (OSTI)") + expect(subject.agency).to eq("crossref") + end + it "journal issue" do input = "https://doi.org/10.6002/ect.2015.0371" subject = Bolognese::Metadata.new(input: input) diff --git a/spec/writers/datacite_writer_spec.rb b/spec/writers/datacite_writer_spec.rb index d4cbdb63..1388733a 100644 --- a/spec/writers/datacite_writer_spec.rb +++ b/spec/writers/datacite_writer_spec.rb @@ -386,11 +386,11 @@ input = "10.7554/eLife.01567" subject = Bolognese::Metadata.new(input: input, from: "crossref") expect(subject.valid?).to be true - expect(subject.identifiers).to eq( [{"identifier"=>"e01567", "identifierType"=>"Publisher ID"}]) - subject.identifiers = [{ "identifierType" => "Publisher ID", "identifier" => "abc" }] + expect(subject.identifiers).to eq([{"identifier"=>"e01567", "identifierType"=>"article_number"}]) + subject.identifiers = [{ "identifierType" => "article_number", "identifier" => "abc" }] datacite = Maremma.from_xml(subject.datacite).fetch("resource", {}) expect(datacite.dig("identifier", "__content__")).to eq("10.7554/elife.01567") - expect(subject.identifiers).to eq([{"identifier"=>"abc", "identifierType"=>"Publisher ID"}]) + expect(subject.identifiers).to eq([{"identifier"=>"abc", "identifierType"=>"article_number"}]) end it "validates against schema" do