From ca90901d4f8caf7e791589832dfd483b1cef950e Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Mon, 15 Jul 2019 19:28:54 +0200 Subject: [PATCH 1/8] remove batchloader from controller --- app/controllers/events_controller.rb | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index dd55a4cca..6882f3e86 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -179,13 +179,7 @@ def index if @include.include? :dois doi_names = (results.map { |event| event.doi}).join(",") - events_serialized[:included] = if params["batchload"] == "true" || params["batchload"].nil? - logger.info "batchload" - DoiSerializer.new(load_doi(doi_names), {is_collection: true}).serializable_hash.dig(:data) - elsif params["batchload"] == "false" - logger.info "find_by_doi" - Doi.find_by_id(doi_names).results - end + events_serialized[:included] = DoiSerializer.new(Doi.find_by_id(doi_names).results, {is_collection: true}).serializable_hash.dig(:data) end render json: events_serialized, status: :ok From 9b89b18652cdd9ae6ac819013b285399042d668a Mon Sep 17 00:00:00 2001 From: Richard Hallett Date: Wed, 17 Jul 2019 17:41:20 +0200 Subject: [PATCH 2/8] Add support for xml detail return in dois endpoint Issue #315 --- app/controllers/dois_controller.rb | 1 + app/serializers/doi_serializer.rb | 2 +- spec/requests/dois_spec.rb | 49 ++++++++++++++++++------------ 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index c83d17861..f2b657e50 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -189,6 +189,7 @@ def index options[:is_collection] = true options[:params] = { :current_ability => current_ability, + :detail => params[:detail] } bmr = Benchmark.ms { diff --git a/app/serializers/doi_serializer.rb b/app/serializers/doi_serializer.rb index 2346cd068..ba81de691 100644 --- a/app/serializers/doi_serializer.rb +++ b/app/serializers/doi_serializer.rb @@ -12,7 +12,7 @@ class DoiSerializer has_many :media, if: Proc.new { |object, params| params && params[:detail] } attribute :xml, if: Proc.new { |object, params| params && params[:detail] } do |object| - object.xml_encoded + Base64.strict_encode64(object.xml) if object.xml.present? end attribute :doi do |object| diff --git a/spec/requests/dois_spec.rb b/spec/requests/dois_spec.rb index 1d72b5445..101124532 100644 --- a/spec/requests/dois_spec.rb +++ b/spec/requests/dois_spec.rb @@ -29,6 +29,17 @@ expect(json['data'].size).to eq(3) expect(json.dig('meta', 'total')).to eq(3) end + + it 'returns dois with extra detail' do + get '/dois?detail=true', nil, headers + + expect(last_response.status).to eq(200) + expect(json['data'].size).to eq(3) + json['data'].each{ + |doi| + expect(doi.dig('attributes')).to include('xml') + } + end end describe 'GET /dois/:id', elasticsearch: true do @@ -59,7 +70,7 @@ context 'anonymous user' do it 'returns the Doi' do - get "/dois/#{doi.doi}" + get "/dois/#{doi.doi}" expect(last_response.status).to eq(401) expect(json.fetch('errors')).to eq([{"status"=>"401", "title"=>"Bad credentials."}]) @@ -786,7 +797,7 @@ it 'updates the record' do patch "/dois/#{doi.doi}", valid_attributes, headers - + expect(last_response.status).to eq(200) expect(json.dig('data', 'attributes', 'url')).to eq("http://www.bl.uk/pdf/pat.pdf") expect(json.dig('data', 'attributes', 'doi')).to eq(doi.doi.downcase) @@ -959,7 +970,7 @@ "nameIdentifiers" => [{"nameIdentifier"=>"https://orcid.org/0000-0002-1825-0097", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType" => "Personal") expect(json.dig('data', 'attributes', 'creators')[2]).to eq("nameType"=>"Organizational", "name"=>"The Psychoceramics Study Group", "affiliation"=>[{"affiliationIdentifier"=>"https://ror.org/05gq02987", "name"=>"Brown University", "affiliationIdentifierScheme"=>"ROR"}]) - + xml = Maremma.from_xml(Base64.decode64(json.dig('data', 'attributes', 'xml'))).fetch("resource", {}) expect(xml.dig("creators", "creator")[0]).to eq("affiliation" => {"__content__"=>"DataCite", "affiliationIdentifier"=>"https://ror.org/04wxnsj81", "affiliationIdentifierScheme"=>"ROR"}, "creatorName" => {"__content__"=>"Miller, Elizabeth", "nameType"=>"Personal"}, @@ -1003,7 +1014,7 @@ context 'crossref url', vcr: true do let(:provider) { create(:provider, name: "Crossref", symbol: "CROSSREF", role_name: "ROLE_REGISTRATION_AGENCY") } let(:client) { create(:client, provider: provider, name: "Crossref Citations", symbol: "CROSSREF.CITATIONS") } - + let(:xml) { Base64.strict_encode64("https://doi.org/10.7554/elife.01567") } let(:valid_attributes) do { @@ -1035,7 +1046,7 @@ expect(json.dig('data', 'attributes', 'titles')).to eq([{"title"=>"Automated quantitative histology reveals vascular morphodynamics during Arabidopsis hypocotyl secondary growth"}]) # expect(json.dig('data', 'attributes', 'agency')).to eq("Crossref") expect(json.dig('data', 'attributes', 'state')).to eq("findable") - + xml = Maremma.from_xml(Base64.decode64(json.dig('data', 'attributes', 'xml'))).fetch("resource", {}) expect(xml.dig("titles", "title")).to eq("Automated quantitative histology reveals vascular morphodynamics during Arabidopsis hypocotyl secondary growth") end @@ -1044,7 +1055,7 @@ context 'crossref url not found', vcr: true do let(:provider) { create(:provider, name: "Crossref", symbol: "CROSSREF", role_name: "ROLE_REGISTRATION_AGENCY") } let(:client) { create(:client, provider: provider, name: "Crossref Citations", symbol: "CROSSREF.CITATIONS") } - + let(:xml) { Base64.strict_encode64("https://doi.org/10.3389/fmicb.2019.01425") } let(:valid_attributes) do { @@ -1078,7 +1089,7 @@ context 'medra url', vcr: true do let(:provider) { create(:provider, name: "mEDRA", symbol: "MEDRA", role_name: "ROLE_REGISTRATION_AGENCY") } let(:client) { create(:client, provider: provider, name: "mEDRA Citations", symbol: "MEDRA.CITATIONS") } - + let(:xml) { Base64.strict_encode64("https://doi.org/10.3280/ecag2018-001005") } let(:valid_attributes) do { @@ -1110,7 +1121,7 @@ expect(json.dig('data', 'attributes', 'titles')).to eq([{"title"=>"Substitutability between organic and conventional poultry products and organic price premiums"}]) # expect(json.dig('data', 'attributes', 'agency')).to eq("mEDRA") expect(json.dig('data', 'attributes', 'state')).to eq("findable") - + xml = Maremma.from_xml(Base64.decode64(json.dig('data', 'attributes', 'xml'))).fetch("resource", {}) expect(xml.dig("titles", "title")).to eq("Substitutability between organic and conventional poultry products and organic price premiums") end @@ -1119,7 +1130,7 @@ context 'kisti url', vcr: true do let(:provider) { create(:provider, name: "KISTI", symbol: "KISTI", role_name: "ROLE_REGISTRATION_AGENCY") } let(:client) { create(:client, provider: provider, name: "KISTI Citations", symbol: "KISTI.CITATIONS") } - + let(:xml) { Base64.strict_encode64("https://doi.org/10.5012/bkcs.2013.34.10.2889") } let(:valid_attributes) do { @@ -1151,7 +1162,7 @@ expect(json.dig('data', 'attributes', 'titles')).to eq([{"title"=>"Synthesis, Crystal Structure and Theoretical Calculation of a Novel Nickel(II) Complex with Dibromotyrosine and 1,10-Phenanthroline"}]) # expect(json.dig('data', 'attributes', 'agency')).to eq("mEDRA") expect(json.dig('data', 'attributes', 'state')).to eq("findable") - + xml = Maremma.from_xml(Base64.decode64(json.dig('data', 'attributes', 'xml'))).fetch("resource", {}) expect(xml.dig("titles", "title")).to eq("Synthesis, Crystal Structure and Theoretical Calculation of a Novel Nickel(II) Complex with Dibromotyrosine and 1,10-Phenanthroline") end @@ -1160,7 +1171,7 @@ context 'jalc url', vcr: true do let(:provider) { create(:provider, name: "JaLC", symbol: "JALC", role_name: "ROLE_REGISTRATION_AGENCY") } let(:client) { create(:client, provider: provider, name: "JALC Citations", symbol: "JALC.CITATIONS") } - + let(:xml) { Base64.strict_encode64("https://doi.org/10.1241/johokanri.39.979") } let(:valid_attributes) do { @@ -1192,7 +1203,7 @@ expect(json.dig('data', 'attributes', 'titles')).to eq([{"title"=>"Utilizing the Internet. 12 Series. Future of the Internet."}]) # expect(json.dig('data', 'attributes', 'agency')).to eq("mEDRA") expect(json.dig('data', 'attributes', 'state')).to eq("findable") - + xml = Maremma.from_xml(Base64.decode64(json.dig('data', 'attributes', 'xml'))).fetch("resource", {}) expect(xml.dig("titles", "title")).to eq("Utilizing the Internet. 12 Series. Future of the Internet.") end @@ -1201,7 +1212,7 @@ context 'op url', vcr: true do let(:provider) { create(:provider, name: "OP", symbol: "OP", role_name: "ROLE_REGISTRATION_AGENCY") } let(:client) { create(:client, provider: provider, name: "OP Citations", symbol: "OP.CITATIONS") } - + let(:xml) { Base64.strict_encode64("https://doi.org/10.2903/j.efsa.2018.5239") } let(:valid_attributes) do { @@ -1233,7 +1244,7 @@ expect(json.dig('data', 'attributes', 'titles')).to eq([{"title"=>"Scientific opinion on the safety of green tea catechins"}]) # expect(json.dig('data', 'attributes', 'agency')).to eq("mEDRA") expect(json.dig('data', 'attributes', 'state')).to eq("findable") - + xml = Maremma.from_xml(Base64.decode64(json.dig('data', 'attributes', 'xml'))).fetch("resource", {}) expect(xml.dig("titles", "title")).to eq("Scientific opinion on the safety of green tea catechins") end @@ -2284,7 +2295,7 @@ context 'landing page' do let(:url) { "https://blog.datacite.org/re3data-science-europe/" } let(:xml) { Base64.strict_encode64(file_fixture('datacite.xml').read) } - let(:landing_page) do + let(:landing_page) do { "checked" => Time.zone.now.utc.iso8601, "status" => 200, @@ -2330,7 +2341,7 @@ context 'update with landing page info as admin' do let(:url) { "https://blog.datacite.org/re3data-science-europe/" } let(:doi) { create(:doi, doi: "10.14454/10703", url: url, client: client) } - let(:landing_page) do + let(:landing_page) do { "checked" => Time.zone.now.utc.iso8601, "status" => 200, @@ -2372,7 +2383,7 @@ context 'landing page schema-org-id array' do let(:url) { "https://blog.datacite.org/re3data-science-europe/" } let(:xml) { Base64.strict_encode64(file_fixture('datacite.xml').read) } - let(:landing_page) do + let(:landing_page) do { "checked" => Time.zone.now.utc.iso8601, "status" => 200, @@ -2728,7 +2739,7 @@ it 'returns error message' do get "/dois/#{doi.doi}", nil, { "HTTP_ACCEPT" => "application/vnd.jats+xml" } - + expect(last_response.status).to eq(401) expect(json["errors"]).to eq([{"status"=>"401", "title"=>"Bad credentials."}]) end @@ -2984,7 +2995,7 @@ context "style and locale" do it 'returns the Doi' do get "/dois/#{doi.doi}?style=vancouver&locale=de", nil, { "HTTP_ACCEPT" => "text/x-bibliography", 'HTTP_AUTHORIZATION' => 'Bearer ' + bearer } - + expect(last_response.status).to eq(200) expect(last_response.body).to start_with("Ollomo B") end From 4db893177e00ac01f29cf427e040c18a29fd9c44 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Wed, 17 Jul 2019 19:18:19 +0200 Subject: [PATCH 3/8] keep error handling for base64 encoding --- app/serializers/doi_serializer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/serializers/doi_serializer.rb b/app/serializers/doi_serializer.rb index ba81de691..db6c1b7a4 100644 --- a/app/serializers/doi_serializer.rb +++ b/app/serializers/doi_serializer.rb @@ -12,7 +12,7 @@ class DoiSerializer has_many :media, if: Proc.new { |object, params| params && params[:detail] } attribute :xml, if: Proc.new { |object, params| params && params[:detail] } do |object| - Base64.strict_encode64(object.xml) if object.xml.present? + xml_encoded end attribute :doi do |object| From 3608fc82d368a137950819f6603303e1816b1de5 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Wed, 17 Jul 2019 22:43:16 +0200 Subject: [PATCH 4/8] handle base-encoding errors. #315 --- app/models/doi.rb | 10 ++++++++-- app/serializers/doi_serializer.rb | 6 +++++- spec/requests/dois_spec.rb | 5 ++--- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index 6ff6072f5..24bca9259 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -459,7 +459,7 @@ def self.import_by_ids(options={}) # get every id between from_id and end_id (from_id..until_id).step(500).each do |id| - DoiImportByIdJob.perform_later(id: id) + DoiImportByIdJob.perform_later(options.merge(id: id)) puts "Queued importing for DOIs with IDs starting with #{id}." end @@ -470,7 +470,13 @@ def self.import_by_id(options={}) return nil unless options[:id].present? id = options[:id].to_i - index = Rails.env.test? ? "dois-test" : self.inactive_index + index = if Rails.env.test? + "dois-test" + elsif options[:index].present? + options[:index] + else + self.inactive_index + end errors = 0 count = 0 diff --git a/app/serializers/doi_serializer.rb b/app/serializers/doi_serializer.rb index db6c1b7a4..cfe6dc254 100644 --- a/app/serializers/doi_serializer.rb +++ b/app/serializers/doi_serializer.rb @@ -12,7 +12,11 @@ class DoiSerializer has_many :media, if: Proc.new { |object, params| params && params[:detail] } attribute :xml, if: Proc.new { |object, params| params && params[:detail] } do |object| - xml_encoded + begin + Base64.strict_encode64(object.xml) if object.xml.present? + rescue ArgumentError + nil + end end attribute :doi do |object| diff --git a/spec/requests/dois_spec.rb b/spec/requests/dois_spec.rb index 101124532..cda0e6b00 100644 --- a/spec/requests/dois_spec.rb +++ b/spec/requests/dois_spec.rb @@ -35,10 +35,9 @@ expect(last_response.status).to eq(200) expect(json['data'].size).to eq(3) - json['data'].each{ - |doi| + json['data'].each do |doi| expect(doi.dig('attributes')).to include('xml') - } + end end end From 6127784e6764d0d53f0cf664c7ae6aac5ee64157 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Thu, 18 Jul 2019 06:55:22 +0200 Subject: [PATCH 5/8] update affiliation format. datacite/schema#56 --- app/models/doi.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index 24bca9259..e44acf1b2 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -848,7 +848,7 @@ def set_defaults def self.convert_affiliations logger = Logger.new(STDOUT) - response = Doi.query("creators.affiliation:* -creators.affiliation.name:*", page: { size: 1, cursor: [] }) + response = Doi.query("creators.affiliation:*", page: { size: 1, cursor: [] }) logger.info "#{response.results.total} DOIs found that have the affiliation in the old format." if response.results.total > 0 @@ -856,7 +856,7 @@ def self.convert_affiliations cursor = [] while response.results.results.length > 0 do - response = Doi.query("creators.affiliation:* -creators.affiliation.name:*", page: { size: 1000, cursor: cursor }) + response = Doi.query("creators.affiliation:*", page: { size: 1000, cursor: cursor }) break unless response.results.results.length > 0 logger.info "[Affiliation] Updating #{response.results.results.length} DOIs starting with _id #{response.results.to_a.first[:_id]}." From 730d0c2e21a1958fd302a17ef067bad535d0f372 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Thu, 18 Jul 2019 07:01:37 +0200 Subject: [PATCH 6/8] updated bolognese gem --- Gemfile.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 7074ebe26..43696566f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -77,7 +77,7 @@ GEM aws-sdk-core (~> 3, >= 3.58.0) aws-sdk-kms (~> 1) aws-sigv4 (~> 1.1) - aws-sdk-sqs (1.18.0) + aws-sdk-sqs (1.18.1) aws-sdk-core (~> 3, >= 3.58.0) aws-sigv4 (~> 1.1) aws-sigv4 (1.1.0) @@ -106,7 +106,7 @@ GEM latex-decode (~> 0.0) binding_of_caller (0.8.0) debug_inspector (>= 0.0.1) - bolognese (1.3.2) + bolognese (1.3.3) activesupport (>= 4.2.5, < 6) benchmark_methods (~> 0.7) bibtex-ruby (~> 4.1) @@ -134,7 +134,7 @@ GEM builder (3.2.3) byebug (11.0.1) cancancan (2.3.0) - capybara (3.25.0) + capybara (3.26.0) addressable mini_mime (>= 0.1.3) nokogiri (~> 1.8) @@ -248,7 +248,7 @@ GEM tilt hamster (3.0.0) concurrent-ruby (~> 1.0) - hashdiff (1.0.0.beta1) + hashdiff (1.0.0) hashie (3.6.0) htmlentities (4.3.4) http-cookie (1.0.3) @@ -415,7 +415,7 @@ GEM rdf (>= 2.2, < 4.0) rdf-xsd (3.0.1) rdf (~> 3.0) - regexp_parser (1.5.1) + regexp_parser (1.6.0) request_store (1.4.1) rack (>= 1.4) rest-client (2.0.2) From 04f4c1b8a301bb0795b1bbcdc0607f874d175eff Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Thu, 18 Jul 2019 07:45:06 +0200 Subject: [PATCH 7/8] fixed variable names --- app/jobs/affiliation_job.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/jobs/affiliation_job.rb b/app/jobs/affiliation_job.rb index b68cf8131..5a4efe802 100644 --- a/app/jobs/affiliation_job.rb +++ b/app/jobs/affiliation_job.rb @@ -6,11 +6,11 @@ def perform(doi_id) doi = Doi.where(doi: doi_id).first if doi.present? - new_creators = Array.wrap(creators).map do |c| + new_creators = Array.wrap(doi.creators).map do |c| c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String) c end - new_contributors = Array.wrap(contributors).map do |c| + new_contributors = Array.wrap(doi.contributors).map do |c| c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String) c end From 249d12e59add974c814728c88c16c50f2771d400 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Thu, 18 Jul 2019 09:09:53 +0200 Subject: [PATCH 8/8] updated bolognese gem --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 43696566f..ff2f8c19d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -106,7 +106,7 @@ GEM latex-decode (~> 0.0) binding_of_caller (0.8.0) debug_inspector (>= 0.0.1) - bolognese (1.3.3) + bolognese (1.3.4) activesupport (>= 4.2.5, < 6) benchmark_methods (~> 0.7) bibtex-ruby (~> 4.1)