From c0ea3568fceb7a47c01b396581c45ca3e930d1e7 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 18 Mar 2020 09:47:04 +0100 Subject: [PATCH 01/10] change casing of nested objects in injection `ActiveModelSerializers::Deserialization.jsonapi_parse!` changes the cases of nested objects. See: https://github.com/rails-api/active_model_serializers/blob/6b093c965f75f87d5bbf0510e64b5193f4c6f157/lib/active_model_serializers/adapter/json_api/deserialization.rb and https://github.com/rails-api/case_transform. we need to change the keys to camelcase addresses https://github.com/datacite/lupo/issues/367 --- app/models/event.rb | 35 +++++++++++++++++++++++++++++++++++ lib/tasks/event.rake | 10 ++++++++++ spec/requests/events_spec.rb | 28 ++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/app/models/event.rb b/app/models/event.rb index d6a3c9a0a..0f788aa1c 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -504,6 +504,37 @@ def self.subj_id_check(options = {}) end end + def self.modify_nested_objects(options = {}) + size = (options[:size] || 1000).to_i + cursor = [options[:from_id], options[:until_id]] + + response = Event.query(nil, page: { size: 1, cursor: [] }) + Rails.logger.warn "[modify_nested_objects] #{response.results.total} events for source datacite-crossref." + + # walk through results using cursor + if response.results.total.positive? + while response.results.results.length.positive? + response = Event.query(nil, page: { size: size, cursor: cursor }) + break unless response.results.results.length.positive? + + Rails.logger.warn "[modify_nested_objects] modify_nested_objects #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." + cursor = response.results.to_a.last[:sort] + Rails.logger.warn "[modify_nested_objects] Cursor: #{cursor} " + + ids = response.results.results.map(&:obj_id).uniq + CamelcaseNestedObjectsJob.perform_later(ids, options) + end + end + end + + + def self.camelcace_nested_objects(uuid) + event = Event.find_by(uuid: uuid) + subj = event.subj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } + obj = event.obj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } + event.update_attributes(subj: subj, obj: obj) + end + def self.label_state_event(event) subj_prefix = event[:subj_id][/(10\.\d{4,5})/, 1] unless Prefix.where(uid: subj_prefix).exists? @@ -658,6 +689,10 @@ def set_defaults self.subj = subj.to_h.merge("id" => self.subj_id) self.obj = obj.to_h.merge("id" => self.obj_id) + ### makes keys camel case to match JSONAPI + self.subj.transform_keys! { |key| key.to_s.underscore.camelcase(:lower) } + self.obj.transform_keys! { |key| key.to_s.underscore.camelcase(:lower) } + self.total = 1 if total.blank? self.relation_type_id = "references" if relation_type_id.blank? self.occurred_at = Time.zone.now.utc if occurred_at.blank? diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 63f052a04..f0b647651 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -88,6 +88,16 @@ namespace :subj_id_check do end end +namespace :modify_nested_objects do + desc 'changes casing of nested objects in the database' + task :check => :environment do + from_id = (ENV['FROM_ID'] || Event.minimum(:id)).to_i + until_id = (ENV['UNTIL_ID'] || Event.maximum(:id)).to_i + + Event.modify_nested_objects(from_id: from_id, until_id: until_id) + end +end + namespace :datacite_crossref do desc 'Import crossref dois for all events' task :import_doi => :environment do diff --git a/spec/requests/events_spec.rb b/spec/requests/events_spec.rb index e26189ea2..62472328e 100644 --- a/spec/requests/events_spec.rb +++ b/spec/requests/events_spec.rb @@ -266,6 +266,34 @@ expect(json.dig("meta", "registrants", 0, "id")).to eq("datacite.crossref.citations") end end + + context "with nested attrtibutes" do + let(:uri) { "/events" } + let(:params) do + { "data" => { "type" => "events", + "attributes" => { + "subjId" => "https://doi.org/10.18713/jimis-170117-1-2", + "subj" => { "@id":"https://doi.org/10.18713/jimis-170117-1-2", "@type":"ScholarlyArticle", "datePublished":"2017", "proxyIdentifiers":[], "registrantId":"datacite.inist.umr7300" }, + "obj" => { "@id":"https://doi.org/10.1016/j.jastp.2013.05.001", "@type":"ScholarlyArticle", "datePublished":"2013-09", "proxyIdentifiers":["13646826"], "registrantId":"datacite.crossref.citations" }, + "objId" => "https://doi.org/10.1016/j.jastp.2013.05.001", + "relationTypeId" => "references", + "sourceId" => "datacite-crossref", + "sourceToken" => "sourceToken" } } } + end + + it "are correctly stored" do + post uri, params, headers + + + expect(last_response.status).to eq(201) + puts json.dig("data", "id") + event = Event.where(uuid: json.dig("data", "id")).first + puts event.inspect + expect(event[:obj].has_key?('datePublished')).to be_truthy + expect(event[:obj].has_key?('registrantId')).to be_truthy + expect(event[:obj].has_key?('proxyIdentifiers')).to be_truthy + end + end end context "upsert" do From 7d1d5ffaf7ba19754457b1449b0f7a1bd59bafe2 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 18 Mar 2020 09:47:19 +0100 Subject: [PATCH 02/10] rake task to change indexed values addresses https://github.com/datacite/lupo/issues/367 --- app/jobs/camelcase_nested_objects_by_id_job.rb | 7 +++++++ app/jobs/camelcase_nested_objects_job.rb | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 app/jobs/camelcase_nested_objects_by_id_job.rb create mode 100644 app/jobs/camelcase_nested_objects_job.rb diff --git a/app/jobs/camelcase_nested_objects_by_id_job.rb b/app/jobs/camelcase_nested_objects_by_id_job.rb new file mode 100644 index 000000000..0b13971ce --- /dev/null +++ b/app/jobs/camelcase_nested_objects_by_id_job.rb @@ -0,0 +1,7 @@ +class CamelcaseNestedObjectsByIdJob < ActiveJob::Base + queue_as :lupo_background + + def perform(uuid, options = {}) + Event.camelcase_nested_objects(uuid) + end +end diff --git a/app/jobs/camelcase_nested_objects_job.rb b/app/jobs/camelcase_nested_objects_job.rb new file mode 100644 index 000000000..aa29fe4f9 --- /dev/null +++ b/app/jobs/camelcase_nested_objects_job.rb @@ -0,0 +1,7 @@ +class CamelcaseNestedObjectsJob < ActiveJob::Base + queue_as :lupo_background + + def perform(ids, options = {}) + ids.each { |id| CamelcaseNestedObjectsByIdJob.perform_later(id, options) } + end +end From 4b0de48b31851e563a0d065a7be62655635f643b Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 18 Mar 2020 13:03:53 +0100 Subject: [PATCH 03/10] fix aggregation --- app/models/event.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/event.rb b/app/models/event.rb index 0f788aa1c..1d8dbd18b 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -579,7 +579,7 @@ def uuid_format end def registrant_id - [subj["registrant_id"], obj["registrant_id"], subj["provider_id"], obj["provider_id"]].compact + [subj["registrantId"], obj["registrantId"], subj["providerId"], obj["providerId"]].compact end def subtype From 7d75ae5cc6de8cd6d5d58be77733760178945747 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 8 Apr 2020 13:38:12 +0200 Subject: [PATCH 04/10] fix wrong key to make search --- app/models/event.rb | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index 1d8dbd18b..243a43bd5 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -521,18 +521,20 @@ def self.modify_nested_objects(options = {}) cursor = response.results.to_a.last[:sort] Rails.logger.warn "[modify_nested_objects] Cursor: #{cursor} " - ids = response.results.results.map(&:obj_id).uniq + ids = response.results.results.map(&:uuid).uniq CamelcaseNestedObjectsJob.perform_later(ids, options) end end end - def self.camelcace_nested_objects(uuid) + def self.camelcase_nested_objects(uuid) event = Event.find_by(uuid: uuid) - subj = event.subj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } - obj = event.obj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } - event.update_attributes(subj: subj, obj: obj) + if event.present? + subj = event.subj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } + obj = event.obj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } + event.update_attributes(subj: subj, obj: obj) + end end def self.label_state_event(event) From 0ecbfe3a019d352a68b28c5599fd39528cf9ca9b Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 9 Apr 2020 10:12:42 +0200 Subject: [PATCH 05/10] dealing with cases in which rake task has not been ran yet --- app/jobs/event_registrant_update_by_id_job.rb | 2 +- app/models/event.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/jobs/event_registrant_update_by_id_job.rb b/app/jobs/event_registrant_update_by_id_job.rb index 45cc4bc78..68e2a5fe5 100644 --- a/app/jobs/event_registrant_update_by_id_job.rb +++ b/app/jobs/event_registrant_update_by_id_job.rb @@ -19,7 +19,7 @@ def perform(id, options={}) registrant_id = get_crossref_member_id(item.obj_id) end - obj = item.obj.merge("registrant_id" => registrant_id) unless registrant_id.nil? + obj = item.obj.merge("registrantId" => registrant_id) unless registrant_id.nil? Rails.logger.info obj.inspect item.update_attributes(obj: obj) if obj.present? when "crossref" diff --git a/app/models/event.rb b/app/models/event.rb index 243a43bd5..0139a49ae 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -630,8 +630,8 @@ def obj_cache_key def citation_year "" unless (INCLUDED_RELATION_TYPES + RELATIONS_RELATION_TYPES).include?(relation_type_id) - subj_publication = subj["date_published"] || (date_published(subj_id) || year_month) - obj_publication = obj["date_published"] || (date_published(obj_id) || year_month) + subj_publication = subj["datePublished"] || subj["date_published"] || (date_published(subj_id) || year_month) + obj_publication = obj["datePublished"] || obj["date_published"] || (date_published(obj_id) || year_month) [subj_publication[0..3].to_i, obj_publication[0..3].to_i].max end From 9753d2ed6181f50fac43a525018c6498f8123e0e Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 30 Apr 2020 11:42:19 +0200 Subject: [PATCH 06/10] feedback --- app/models/event.rb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index 0139a49ae..adaaa6bee 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -509,7 +509,7 @@ def self.modify_nested_objects(options = {}) cursor = [options[:from_id], options[:until_id]] response = Event.query(nil, page: { size: 1, cursor: [] }) - Rails.logger.warn "[modify_nested_objects] #{response.results.total} events for source datacite-crossref." + Rails.logger.info "[modify_nested_objects] #{response.results.total} events for source datacite-crossref." # walk through results using cursor if response.results.total.positive? @@ -517,12 +517,14 @@ def self.modify_nested_objects(options = {}) response = Event.query(nil, page: { size: size, cursor: cursor }) break unless response.results.results.length.positive? - Rails.logger.warn "[modify_nested_objects] modify_nested_objects #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." + Rails.logger.info "[modify_nested_objects] modify_nested_objects #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." cursor = response.results.to_a.last[:sort] - Rails.logger.warn "[modify_nested_objects] Cursor: #{cursor} " + Rails.logger.info "[modify_nested_objects] Cursor: #{cursor} " ids = response.results.results.map(&:uuid).uniq - CamelcaseNestedObjectsJob.perform_later(ids, options) + ids.each do |id| + CamelcaseNestedObjectsByIdJob.perform_later(id, options) + end end end end From 74bfaa6da4649116c5191580114760fd0cce5d22 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 30 Apr 2020 11:48:37 +0200 Subject: [PATCH 07/10] remove job that is not needed anymore --- app/jobs/camelcase_nested_objects_job.rb | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 app/jobs/camelcase_nested_objects_job.rb diff --git a/app/jobs/camelcase_nested_objects_job.rb b/app/jobs/camelcase_nested_objects_job.rb deleted file mode 100644 index aa29fe4f9..000000000 --- a/app/jobs/camelcase_nested_objects_job.rb +++ /dev/null @@ -1,7 +0,0 @@ -class CamelcaseNestedObjectsJob < ActiveJob::Base - queue_as :lupo_background - - def perform(ids, options = {}) - ids.each { |id| CamelcaseNestedObjectsByIdJob.perform_later(id, options) } - end -end From d14ea081f76f7bfaae93bcbb456fcdd38af585b6 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 6 May 2020 17:07:34 +0200 Subject: [PATCH 08/10] lint --- app/models/event.rb | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index adaaa6bee..8efa22c8a 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -529,14 +529,13 @@ def self.modify_nested_objects(options = {}) end end - def self.camelcase_nested_objects(uuid) - event = Event.find_by(uuid: uuid) - if event.present? - subj = event.subj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } - obj = event.obj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } - event.update_attributes(subj: subj, obj: obj) - end + event = Event.find_by(uuid: uuid) + if event.present? + subj = event.subj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } + obj = event.obj.transform_keys { |key| key.to_s.underscore.camelcase(:lower) } + event.update_attributes(subj: subj, obj: obj) + end end def self.label_state_event(event) From b6679341975c23c4c53c458442040f4996bf5e3f Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 6 May 2020 17:07:44 +0200 Subject: [PATCH 09/10] added extra test --- spec/factories/default.rb | 2 +- spec/models/event_spec.rb | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/spec/factories/default.rb b/spec/factories/default.rb index 0826595ef..e887c6847 100644 --- a/spec/factories/default.rb +++ b/spec/factories/default.rb @@ -337,7 +337,7 @@ source_id { "datacite_related" } source_token { "datacite_related_123" } sequence(:subj_id) { |n| "http://doi.org/10.5061/DRYAD.47SD5e/#{n}" } - subj { { "datePublished" => "2006-06-13T16:14:19Z" } } + subj { { "date_published" => "2006-06-13T16:14:19Z", "registrant_id" => "datacite.datacite" } } obj_id { "http://doi.org/10.5061/DRYAD.47SD5/1" } relation_type_id { "references" } end diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index c3da94b7f..4c4aac7cb 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -75,4 +75,13 @@ # end # end end + + describe "camelcase_nested_objects" do + subject { create(:event_for_datacite_related) } + + it "should transform keys" do + Event.camelcase_nested_objects(subject.uuid) + expect(subject.subj).to eq({"datePublished"=>"2006-06-13T16:14:19Z", "id"=>"https://doi.org/10.5061/dryad.47sd5e/1", "registrantId"=>"datacite.datacite"}) + end + end end From 7d6102cdfb7876b9645140c3ec9ee4f1fa67c076 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 6 May 2020 17:36:18 +0200 Subject: [PATCH 10/10] fix --- spec/models/event_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index 4c4aac7cb..a1ddf699c 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -81,7 +81,7 @@ it "should transform keys" do Event.camelcase_nested_objects(subject.uuid) - expect(subject.subj).to eq({"datePublished"=>"2006-06-13T16:14:19Z", "id"=>"https://doi.org/10.5061/dryad.47sd5e/1", "registrantId"=>"datacite.datacite"}) + expect(subject.subj.keys).to include("datePublished", "registrantId", "id") end end end