From e49a611144926334c78b21918fea3a2aad2badfc Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Sun, 2 Feb 2020 10:18:30 +0100 Subject: [PATCH] define and test associations between dois and events #388 --- app/models/concerns/indexable.rb | 20 ++++++- app/models/doi.rb | 88 ++++++++++++++++++++++------ app/models/event.rb | 14 +++-- app/serializers/doi_serializer.rb | 2 +- spec/factories/default.rb | 22 ++++++- spec/models/doi_spec.rb | 96 ++++++++++++++++++++++++++++--- 6 files changed, 208 insertions(+), 34 deletions(-) diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 56d20e58f..56b5f44dc 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -4,12 +4,28 @@ module Indexable require 'aws-sdk-sqs' included do - after_commit on: [:create, :update] do + after_commit on: [:create] do # use index_document instead of update_document to also update virtual attributes IndexJob.perform_later(self) if self.class.name == "Doi" update_column(:indexed, Time.zone.now) - send_import_message(self.to_jsonapi) if aasm_state == "findable" unless (Rails.env.test? || %w(crossref medra kisti jalc op).include?(client.symbol.downcase.split(".").first)) + send_import_message(self.to_jsonapi) if aasm_state == "findable" && !Rails.env.test? && !%w(crossref medra kisti jalc op).include?(client.symbol.downcase.split(".").first) + elsif self.class.name == "Event" + # reindex dois associated with Event + @source_doi = Doi.where(doi: source_doi).first if source_doi + IndexJob.perform_later(@source_doi) if @source_doi + + @target_doi = Doi.where(doi: target_doi).first if target_doi + IndexJob.perform_later(@target_doi) if @target_doi + end + end + + after_commit on: [:update] do + # use index_document instead of update_document to also update virtual attributes + IndexJob.perform_later(self) + if self.class.name == "Doi" + update_column(:indexed, Time.zone.now) + send_import_message(self.to_jsonapi) if aasm_state == "findable" && !Rails.env.test? && !%w(crossref medra kisti jalc op).include?(client.symbol.downcase.split(".").first) end end diff --git a/app/models/doi.rb b/app/models/doi.rb index 1e6c876a8..4b46557f5 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -75,8 +75,12 @@ class Doi < ActiveRecord::Base has_many :views, -> { where target_relation_type_id: "views" }, class_name: "Event", primary_key: :doi, foreign_key: :target_doi, dependent: :destroy has_many :downloads, -> { where target_relation_type_id: "downloads" }, class_name: "Event", primary_key: :doi, foreign_key: :target_doi, dependent: :destroy has_many :references, -> { where source_relation_type_id: "references" }, class_name: "Event", primary_key: :doi, foreign_key: :source_doi, dependent: :destroy - has_many :citations, -> { where target_relation_type_id: "references" }, class_name: "Event", primary_key: :doi, foreign_key: :source_doi, dependent: :destroy - + has_many :citations, -> { where target_relation_type_id: "citations" }, class_name: "Event", primary_key: :doi, foreign_key: :target_doi, dependent: :destroy + has_many :parts, -> { where source_relation_type_id: "parts" }, class_name: "Event", primary_key: :doi, foreign_key: :source_doi, dependent: :destroy + has_many :part_of, -> { where target_relation_type_id: "part_of" }, class_name: "Event", primary_key: :doi, foreign_key: :target_doi, dependent: :destroy + has_many :versions, -> { where source_relation_type_id: "versions" }, class_name: "Event", primary_key: :doi, foreign_key: :source_doi, dependent: :destroy + has_many :version_of, -> { where target_relation_type_id: "version_of" }, class_name: "Event", primary_key: :doi, foreign_key: :target_doi, dependent: :destroy + delegate :provider, to: :client, allow_nil: true delegate :consortium_id, to: :provider, allow_nil: true @@ -366,17 +370,17 @@ class Doi < ActiveRecord::Base technical_contact: { type: :object, properties: { email: { type: :text }, given_name: { type: :text}, - family_name: { type: :text } + family_name: { type: :text }, } }, secondary_technical_contact: { type: :object, properties: { email: { type: :text }, given_name: { type: :text}, - family_name: { type: :text } + family_name: { type: :text }, } }, billing_contact: { type: :object, properties: { email: { type: :text }, given_name: { type: :text}, - family_name: { type: :text } + family_name: { type: :text }, } }, secondary_billing_contact: { type: :object, properties: { email: { type: :text }, @@ -410,8 +414,24 @@ class Doi < ActiveRecord::Base indexes :download_count, type: :integer indexes :reference_count, type: :integer indexes :citation_count, type: :integer + indexes :part_count, type: :integer + indexes :part_of_count, type: :integer + indexes :version_count, type: :integer + indexes :version_of_count, type: :integer indexes :views_over_time, type: :object indexes :downloads_over_time, type: :object + indexes :reference_ids, type: :keyword + indexes :citation_ids, type: :keyword + indexes :part_ids, type: :keyword + indexes :part_of_ids, type: :keyword + indexes :version_ids, type: :keyword + indexes :version_of_ids, type: :keyword + indexes :references, type: :object + indexes :citations, type: :object + indexes :parts, type: :object + indexes :part_of, type: :object + indexes :versions, type: :object + indexes :version_of, type: :object end end @@ -433,16 +453,22 @@ def as_indexed_json(options={}) "consortium_id" => consortium_id, "resource_type_id" => resource_type_id, "media_ids" => media_ids, - "view_ids" => view_ids, "view_count" => view_count, "views_over_time" => views_over_time, - "download_ids" => download_ids, "download_count" => download_count, "downloads_over_time" => downloads_over_time, "reference_ids" => reference_ids, "reference_count" => reference_count, "citation_ids" => citation_ids, "citation_count" => citation_count, + "part_ids" => part_ids, + "part_count" => part_count, + "part_of_ids" => part_of_ids, + "part_of_count" => part_of_count, + "version_ids" => version_ids, + "version_count" => version_count, + "version_of_ids" => version_of_ids, + "version_of_count" => version_of_count, "prefix" => prefix, "suffix" => suffix, "types" => types, @@ -478,8 +504,12 @@ def as_indexed_json(options={}) "provider" => provider.try(:as_indexed_json), "resource_type" => resource_type.try(:as_indexed_json), "media" => media.map { |m| m.try(:as_indexed_json) }, - # "views" => views.map { |m| m.try(:as_indexed_json) }, - # "downloads" => downloads.map { |m| m.try(:as_indexed_json) } + "references" => references.map { |m| m.try(:as_indexed_json) }, + "citations" => citations.map { |m| m.try(:as_indexed_json) }, + "parts" => parts.map { |m| m.try(:as_indexed_json) }, + "part_of" => part_of.map { |m| m.try(:as_indexed_json) }, + "versions" => versions.map { |m| m.try(:as_indexed_json) }, + "version_of" => version_of.map { |m| m.try(:as_indexed_json) }, } end @@ -871,10 +901,6 @@ def media_ids media.pluck(:id).map { |m| Base32::URL.encode(m, split: 4, length: 16) }.compact end - def view_ids - views.pluck(:uuid) - end - def view_count views.pluck(:total).inject(:+).to_i end @@ -883,10 +909,6 @@ def views_over_time views.pluck(:occurred_at, :total).map { |v| { year_month: v[0].present? ? v[0].utc.iso8601[0..6] : nil, total: v[1] } } end - def download_ids - downloads.pluck(:uuid) - end - def download_count downloads.pluck(:total).inject(:+).to_i end @@ -911,6 +933,38 @@ def citation_count citations.count end + def part_ids + parts.pluck(:uuid) + end + + def part_count + parts.count + end + + def part_of_ids + part_of.pluck(:uuid) + end + + def part_of_count + part_of.count + end + + def version_ids + versions.pluck(:uuid) + end + + def version_count + versions.count + end + + def version_of_ids + version_of.pluck(:uuid) + end + + def version_of_count + version_of.count + end + def xml_encoded Base64.strict_encode64(xml) if xml.present? rescue ArgumentError diff --git a/app/models/event.rb b/app/models/event.rb index 1585aa763..a2315308f 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -21,7 +21,7 @@ class Event < ActiveRecord::Base before_validation :set_defaults before_create :set_source_and_target_doi - validates :uuid, format: { with: /\A[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\z/i } + validate :uuid_format # include state machine include AASM @@ -552,6 +552,10 @@ def issn nil end + def uuid_format + errors.add(:uuid, "#{uuid} is not a valid UUID") unless UUID.validate(uuid) + end + def registrant_id [subj["registrant_id"], obj["registrant_id"], subj["provider_id"], obj["provider_id"]].compact end @@ -621,10 +625,10 @@ def set_source_and_target_doi self.source_relation_type_id = "references" self.target_relation_type_id = "citations" when *PASSIVE_RELATION_TYPES - self.source_doi = doi_from_url(subj_id) - self.target_doi = doi_from_url(obj_id) - self.source_relation_type_id = "citations" - self.target_relation_type_id = "references" + self.source_doi = doi_from_url(obj_id) + self.target_doi = doi_from_url(subj_id) + self.source_relation_type_id = "references" + self.target_relation_type_id = "citations" when "unique-dataset-investigations-regular" self.target_doi = doi_from_url(obj_id) self.target_relation_type_id = "views" diff --git a/app/serializers/doi_serializer.rb b/app/serializers/doi_serializer.rb index d1ae3c9bb..a25050883 100644 --- a/app/serializers/doi_serializer.rb +++ b/app/serializers/doi_serializer.rb @@ -7,7 +7,7 @@ class DoiSerializer set_id :uid # don't cache dois, as works are cached using the doi model - attributes :doi, :prefix, :suffix, :identifiers, :creators, :titles, :publisher, :container, :publication_year, :subjects, :contributors, :dates, :language, :types, :related_identifiers, :sizes, :formats, :version, :rights_list, :descriptions, :geo_locations, :funding_references, :xml, :url, :content_url, :metadata_version, :schema_version, :source, :is_active, :state, :reason, :landing_page, :view_count, :download_count, :reference_count, :citation_count, :views_over_time, :downloads_over_time, :created, :registered, :published, :updated, :citations + attributes :doi, :prefix, :suffix, :identifiers, :creators, :titles, :publisher, :container, :publication_year, :subjects, :contributors, :dates, :language, :types, :related_identifiers, :sizes, :formats, :version, :rights_list, :descriptions, :geo_locations, :funding_references, :xml, :url, :content_url, :metadata_version, :schema_version, :source, :is_active, :state, :reason, :landing_page, :view_count, :download_count, :reference_count, :citation_count, :part_count, :part_of_count, :version_count, :version_of_count, :views_over_time, :downloads_over_time, :created, :registered, :published, :updated, :citations attributes :prefix, :suffix, if: Proc.new { |object, params| params && params[:detail] } belongs_to :client, record_type: :clients diff --git a/spec/factories/default.rb b/spec/factories/default.rb index 819cce4bd..fb3f92944 100644 --- a/spec/factories/default.rb +++ b/spec/factories/default.rb @@ -335,12 +335,30 @@ relation_type_id { "references" } end + factory :event_for_datacite_parts do + source_id { "datacite_related" } + source_token { "datacite_related_123" } + subj_id { "http://doi.org/10.5061/DRYAD.47SD5" } + subj { { "datePublished" => "2006-06-13T16:14:19Z" } } + sequence(:obj_id) { |n| "http://doi.org/10.5061/DRYAD.47SD5/#{n}" } + relation_type_id { "has-part" } + end + + factory :event_for_datacite_part_of do + source_id { "datacite_related" } + source_token { "datacite_related_123" } + subj_id { "http://doi.org/10.5061/DRYAD.47SD5/1" } + subj { { "datePublished" => "2006-06-13T16:14:19Z" } } + obj_id { "http://doi.org/10.5061/DRYAD.47SD5" } + relation_type_id { "is-part-of" } + end + factory :event_for_datacite_crossref do source_id { "datacite_crossref" } source_token { "datacite_crossref_123" } - subj_id { "https://doi.org/10.5061/DRYAD.47SD5e" } + sequence(:subj_id) { |n| "https://doi.org/10.5061/DRYAD.47SD5e/#{n}" } subj { { "datePublished" => "2006-06-13T16:14:19Z" } } - sequence(:obj_id) { |n| "https://doi.org/10.1371/journal.pbio.200141#{n}" } + obj_id { "https://doi.org/10.1371/journal.pbio.2001414" } relation_type_id { "is-referenced-by" } end diff --git a/spec/models/doi_spec.rb b/spec/models/doi_spec.rb index 8f9aaa5c1..e87439b77 100644 --- a/spec/models/doi_spec.rb +++ b/spec/models/doi_spec.rb @@ -552,7 +552,6 @@ it "has views" do expect(doi.views.count).to eq(3) - expect(doi.view_ids.count).to eq(3) expect(doi.view_count).to eq(75) expect(doi.views_over_time.first).to eq(:total=>25, :year_month=>"2015-06") @@ -574,7 +573,6 @@ it "has downloads" do expect(doi.downloads.count).to eq(3) - expect(doi.download_ids.count).to eq(3) expect(doi.download_count).to eq(30) expect(doi.downloads_over_time.first).to eq(:total=>10, :year_month=>"2015-06") @@ -608,7 +606,7 @@ describe "citations", elasticsearch: true do let(:client) { create(:client) } let(:doi) { create(:doi, client: client, aasm_state: "findable") } - let!(:citations) { create_list(:event_for_datacite_crossref, 3, subj_id: "https://doi.org/#{doi.doi}", relation_type_id: "is-referenced-by") } + let!(:citations) { create_list(:event_for_datacite_crossref, 1, subj_id: "https://doi.org/#{doi.doi}", relation_type_id: "is-referenced-by") } before do Doi.import @@ -616,16 +614,100 @@ end it "has citations" do - expect(doi.citations.count).to eq(3) - expect(doi.citation_ids.count).to eq(3) - expect(doi.citation_count).to eq(3) + expect(doi.citations.count).to eq(1) + expect(doi.citation_ids.count).to eq(1) + expect(doi.citation_count).to eq(1) citation = doi.citations.first - expect(citation.source_doi).to eq(doi.uid) + expect(citation.target_doi).to eq(doi.uid) expect(citation.total).to eq(1) end end + describe "parts", elasticsearch: true do + let(:client) { create(:client) } + let(:doi) { create(:doi, client: client, aasm_state: "findable") } + let!(:parts) { create_list(:event_for_datacite_parts, 3, subj_id: "https://doi.org/#{doi.doi}", relation_type_id: "has-part") } + + before do + Doi.import + sleep 1 + end + + it "has parts" do + expect(doi.parts.count).to eq(3) + expect(doi.part_ids.count).to eq(3) + expect(doi.part_count).to eq(3) + + part = doi.parts.first + expect(part.source_doi).to eq(doi.uid) + expect(part.total).to eq(1) + end + end + + describe "part of", elasticsearch: true do + let(:client) { create(:client) } + let(:doi) { create(:doi, client: client, aasm_state: "findable") } + let!(:part_of) { create_list(:event_for_datacite_part_of, 1, subj_id: "https://doi.org/#{doi.doi}", relation_type_id: "is-part-of") } + + before do + Doi.import + sleep 1 + end + + it "has part of" do + expect(doi.part_of.count).to eq(1) + expect(doi.part_of_ids.count).to eq(1) + expect(doi.part_of_count).to eq(1) + + part_of = doi.part_of.first + expect(part_of.target_doi).to eq(doi.uid) + expect(part_of.total).to eq(1) + end + end + + describe "versions", elasticsearch: true do + let(:client) { create(:client) } + let(:doi) { create(:doi, client: client, aasm_state: "findable") } + let!(:versions) { create_list(:event_for_datacite_parts, 3, subj_id: "https://doi.org/#{doi.doi}", relation_type_id: "has-version") } + + before do + Doi.import + sleep 1 + end + + it "has versions" do + expect(doi.versions.count).to eq(3) + expect(doi.version_ids.count).to eq(3) + expect(doi.version_count).to eq(3) + + version = doi.versions.first + expect(version.source_doi).to eq(doi.uid) + expect(version.total).to eq(1) + end + end + + describe "version of", elasticsearch: true do + let(:client) { create(:client) } + let(:doi) { create(:doi, client: client, aasm_state: "findable") } + let!(:part_of) { create_list(:event_for_datacite_part_of, 1, subj_id: "https://doi.org/#{doi.doi}", relation_type_id: "is-version-of") } + + before do + Doi.import + sleep 1 + end + + it "has version of" do + expect(doi.version_of.count).to eq(1) + expect(doi.version_of_ids.count).to eq(1) + expect(doi.version_of_count).to eq(1) + + version_of = doi.version_of.first + expect(version_of.target_doi).to eq(doi.uid) + expect(version_of.total).to eq(1) + end + end + describe "convert_affiliations" do let(:doi) { create(:doi)}