Skip to content

Commit

Permalink
Merge pull request #929 from datacite/revert-926-doi-enrichment-redo
Browse files Browse the repository at this point in the history
Revert "Doi-enrichment Redo"
  • Loading branch information
jrhoads authored Mar 14, 2023
2 parents 0409f57 + 3cedaae commit 9d60a4a
Show file tree
Hide file tree
Showing 16 changed files with 33 additions and 614 deletions.
272 changes: 0 additions & 272 deletions app/graphql/schema.graphql

Large diffs are not rendered by default.

34 changes: 3 additions & 31 deletions app/graphql/types/doi_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,6 @@ module DoiItem
field :fields_of_science,
[FieldOfScienceType],
null: true, description: "OECD Fields of Science of the resource"

field :fields_of_science_combined,
[FieldOfScienceType],
null: true, description: "OECD Fields of Science of the resource and containing repository"

field :fields_of_science_repository,
[FieldOfScienceType],
null: true, description: "OECD Fields of Science of the containing repository"

field :dates,
[DateType],
null: true, description: "Different dates relevant to the work"
Expand Down Expand Up @@ -427,33 +418,14 @@ def registration_agency
{ id: object.agency, name: REGISTRATION_AGENCIES[object.agency] }.compact
end

def _fos_to_facet(fos_list)
Array.wrap(fos_list).map do |name|
{ "id" => name.parameterize(separator: "_"), "name" => name }
end.uniq
end

def fields_of_science_repository
if object.client.blank?
return []
end
_fos_to_facet(object.fields_of_science_repository)
end

def fields_of_science_combined
_fos_to_facet(object.fields_of_science_combined)
end

def _fos_temp
def fields_of_science
Array.wrap(object.subjects).select do |s|
s["subjectScheme"] == "Fields of Science and Technology (FOS)"
end.map do |s|
s["subject"].gsub("FOS: ", "")
name = s["subject"].gsub("FOS: ", "")
{ "id" => name.parameterize(separator: "_"), "name" => name }
end.uniq
end
def fields_of_science
_fos_to_facet(_fos_temp)
end

def creators(**args)
Array.wrap(object.creators)[0...args[:first]].map do |c|
Expand Down
39 changes: 2 additions & 37 deletions app/graphql/types/query_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,7 @@ def actor(id:)
end

def works(**args)
ElasticsearchModelResponseConnection.new(
response(args), {
context: context,
first: args[:first],
after: args[:after]
})
ElasticsearchModelResponseConnection.new(response(args), context: context, first: args[:first], after: args[:after])
end

field :work, WorkType, null: false do
Expand Down Expand Up @@ -1258,37 +1253,7 @@ def usage_report(id:)
end

def response(**args)
Doi.gql_query(
args[:query],
ids: args[:ids],
user_id: args[:user_id],
client_id: args[:repository_id],
provider_id: args[:member_id],
resource_type_id: args[:resource_type_id],
resource_type: args[:resource_type],
published: args[:published],
agency: args[:registration_agency],
language: args[:language],
license: args[:license],
has_person: args[:has_person],
has_funder: args[:has_funder],
has_organization: args[:has_organization],
has_affiliation: args[:has_affiliation],
has_member: args[:has_member],
has_citations: args[:has_citations],
has_parts: args[:has_parts],
has_versions: args[:has_versions],
has_views: args[:has_views],
has_downloads: args[:has_downloads],
field_of_science: args[:field_of_science],
facet_count: args[:facet_count],
pid_entity: args[:pid_entity],
state: "findable",
page: {
cursor: args[:after].present? ? Base64.urlsafe_decode64(args[:after]) : [],
size: args[:first]
}
)
Doi.gql_query(args[:query], ids: args[:ids], user_id: args[:user_id], client_id: args[:repository_id], provider_id: args[:member_id], resource_type_id: args[:resource_type_id], resource_type: args[:resource_type], published: args[:published], agency: args[:registration_agency], language: args[:language], license: args[:license], has_person: args[:has_person], has_funder: args[:has_funder], has_organization: args[:has_organization], has_affiliation: args[:has_affiliation], has_member: args[:has_member], has_citations: args[:has_citations], has_parts: args[:has_parts], has_versions: args[:has_versions], has_views: args[:has_views], has_downloads: args[:has_downloads], field_of_science: args[:field_of_science], facet_count: args[:facet_count], pid_entity: args[:pid_entity], state: "findable", page: { cursor: args[:after].present? ? Base64.urlsafe_decode64(args[:after]) : [], size: args[:first] })
end

def set_doi(id)
Expand Down
18 changes: 0 additions & 18 deletions app/graphql/types/work_connection_with_total_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ class WorkConnectionWithTotalType < BaseConnection
field :affiliations, [FacetType], null: true, cache: true
field :authors, [FacetType], null: true, cache: true
field :fields_of_science, [FacetType], null: true, cache: true
field :fields_of_science_combined, [FacetType], null: true, cache: true
field :fields_of_science_repository, [FacetType], null: true, cache: true
field :licenses, [FacetType], null: true, cache: true
field :languages, [FacetType], null: true, cache: true

Expand Down Expand Up @@ -108,22 +106,6 @@ def fields_of_science
end
end

def fields_of_science_combined
if object.aggregations.fields_of_science_combined
facet_by_fos(object.aggregations.fields_of_science_combined.buckets)
else
[]
end
end

def fields_of_science_repository
if object.aggregations.fields_of_science_repository
facet_by_fos(object.aggregations.fields_of_science_repository.buckets)
else
[]
end
end

def languages
if object.aggregations.languages
facet_by_language(object.aggregations.languages.buckets)
Expand Down
31 changes: 2 additions & 29 deletions app/models/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ class Client < ApplicationRecord
attr_accessor :password_input, :target_id
attr_reader :from_salesforce

validate :subjects_only_for_disciplinary_repos
validates :subjects, if: :subjects?,
json: {
message: ->(errors) { errors },
Expand Down Expand Up @@ -97,16 +96,12 @@ class Client < ApplicationRecord
has_many :activities, as: :auditable, dependent: :destroy

before_validation :set_defaults
before_validation :convert_subject_hashes_to_camelcase
before_create { self.created = Time.zone.now.utc.iso8601 }
before_save { self.updated = Time.zone.now.utc.iso8601 }
after_create :assign_prefix
after_create_commit :create_reference_repository
after_update_commit :update_reference_repository
after_destroy_commit :destroy_reference_repository
after_commit on: %i[update] do
::Client.import_dois(self.symbol)
end

# use different index for testing
if Rails.env.test?
Expand Down Expand Up @@ -401,7 +396,6 @@ def as_indexed_json(options = {})
end,
"analytics_dashboard_url" => analytics_dashboard_url,
"analytics_tracking_id" => analytics_tracking_id,
"subjects" => Array.wrap(subjects),
}
end

Expand Down Expand Up @@ -510,7 +504,7 @@ def re3data=(value)
end

def subjects=(value)
write_attribute(:subjects, Array.wrap(value).uniq)
write_attribute(:subjects, Array.wrap(value))
end

def opendoar=(value)
Expand Down Expand Up @@ -916,15 +910,6 @@ def freeze_symbol
errors.add(:symbol, "cannot be changed") if symbol_changed?
end

def subjects_only_for_disciplinary_repos
if Array.wrap(subjects).any? && Array.wrap(repository_type).exclude?("disciplinary")
errors.add(
:subjects,
"Subjects are only allowed for disciplinary repositories. This repository_type is: #{repository_type}",
)
end
end

def check_id
if symbol && symbol.split(".").first != provider.symbol
errors.add(
Expand Down Expand Up @@ -970,7 +955,7 @@ def assign_prefix
ClientPrefix.create(
client_id: symbol,
provider_prefix_id: provider_prefix.uid,
prefix_id: provider_prefix.prefix.uid
prefix_id: provider_prefix.prefix.uid,
)
end
end
Expand All @@ -995,18 +980,6 @@ def set_defaults
self.doi_quota_allowed = -1 unless doi_quota_allowed.to_i > 0
end

def convert_subject_hashes_to_camelcase
if self.subjects?
self.subjects = Array.wrap(self.subjects).map { |subject|
subject.transform_keys! do |key|
key.to_s.camelcase(:lower)
end
}
else
[]
end
end

def create_reference_repository
ReferenceRepository.create_from_client(self)
end
Expand Down
3 changes: 1 addition & 2 deletions app/models/datacite_doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,12 @@ def self.import_by_ids(options = {})
DataciteDoi.where(type: "DataciteDoi").maximum(:id)
).
to_i
batch_size = options[:batch_size] || 50
count = 0

# TODO remove query for type once STI is enabled
# SQS message size limit is 256 kB, up to 2 GB with S3
DataciteDoi.where(type: "DataciteDoi").where(id: from_id..until_id).
find_in_batches(batch_size: batch_size) do |dois|
find_in_batches(batch_size: 50) do |dois|
ids = dois.pluck(:id)
DataciteDoiImportInBulkJob.perform_later(ids, index: index)
count += ids.length
Expand Down
36 changes: 0 additions & 36 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@ class Doi < ApplicationRecord
before_save :set_defaults, :save_metadata
before_create { self.created = Time.zone.now.utc.iso8601 }

FIELD_OF_SCIENCE_SCHEME = "Fields of Science and Technology (FOS)"

scope :q, ->(query) { where("dataset.doi = ?", query) }

# use different index for testing
Expand Down Expand Up @@ -410,14 +408,6 @@ class Doi < ApplicationRecord
updated: { type: :date },
deleted_at: { type: :date },
cumulative_years: { type: :integer, index: "false" },
subjects: { type: :object, properties: {
subjectScheme: { type: :keyword },
subject: { type: :keyword },
schemeUri: { type: :keyword },
valueUri: { type: :keyword },
lang: { type: :keyword },
classificationCode: { type: :keyword },
} }
}
indexes :provider, type: :object, properties: {
id: { type: :keyword },
Expand Down Expand Up @@ -522,9 +512,6 @@ class Doi < ApplicationRecord
titleType: { type: :keyword },
lang: { type: :keyword },
}
indexes :fields_of_science, type: :keyword
indexes :fields_of_science_combined, type: :keyword
indexes :fields_of_science_repository, type: :keyword
end
end

Expand Down Expand Up @@ -580,9 +567,6 @@ def as_indexed_json(_options = {})
"sizes" => Array.wrap(sizes),
"language" => language,
"subjects" => Array.wrap(subjects),
"fields_of_science" => fields_of_science,
"fields_of_science_repository" => fields_of_science_repository,
"fields_of_science_combined" => fields_of_science_combined,
"xml" => xml,
"is_active" => is_active,
"landing_page" => landing_page,
Expand Down Expand Up @@ -1728,26 +1712,6 @@ def client_id
client.symbol.downcase if client.present?
end

def _fos_filter(subject_array)
Array.wrap(subject_array).select { |sub|
sub.dig("subjectScheme") == FIELD_OF_SCIENCE_SCHEME
}.map do |fos|
fos["subject"].gsub("FOS: ", "")
end
end

def fields_of_science
_fos_filter(subjects).uniq
end

def fields_of_science_repository
_fos_filter(client&.subjects).uniq
end

def fields_of_science_combined
fields_of_science | fields_of_science_repository
end

def client_id_and_name
"#{client_id}:#{client.name}" if client.present?
end
Expand Down
26 changes: 21 additions & 5 deletions app/models/schemas/client/subjects.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,34 @@
"items": {
"type": "object",
"properties": {
"classification_code": { "type": "string" },
"classificationCode": { "type": "string" },
"scheme_uri": { "type": "string" },
"schemeUri": { "type": "string" },
"value_uri": { "type": "string" },
"valueUri": { "type": "string" },
"lang": { "type": "string" },
"subject": { "type": "string" },
"subject_scheme": { "type": "string" },
"subjectScheme": { "type": "string" }
},
"required": [
"classificationCode",
"schemeUri",
"subject",
"subjectScheme"
"oneOf": [
{
"required": [
"classification_code",
"scheme_uri",
"subject",
"subject_scheme"
]
},
{
"required": [
"classificationCode",
"schemeUri",
"subject",
"subjectScheme"
]
}
],
"additionalProperties": false
}
Expand Down
10 changes: 1 addition & 9 deletions db/seeds/development/base.seeds.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,11 @@
client =
Client.where(symbol: "DATACITE.TEST").first ||
FactoryBot.create(
:client_with_fos,
:client,
provider: provider,
symbol: ENV["MDS_USERNAME"],
password_input: ENV["MDS_PASSWORD"],
)
if Prefix.where(uid: "10.14454").blank?
prefix = FactoryBot.create(:prefix, uid: "10.14454")
### This creates both the client_prefix and the provider association
FactoryBot.create(
:client_prefix,
client_id: client.symbol, prefix_id: prefix.uid,
)
end
dois = FactoryBot.create_list(:doi, 10, client: client, state: "findable")
FactoryBot.create_list(:event_for_datacite_related, 3, obj_id: dois.first.doi)
FactoryBot.create_list(:event_for_datacite_usage, 2, obj_id: dois.first.doi)
10 changes: 2 additions & 8 deletions lib/tasks/datacite_doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,8 @@ namespace :datacite_doi do
task import: :environment do
from_id = (ENV["FROM_ID"] || DataciteDoi.minimum(:id)).to_i
until_id = (ENV["UNTIL_ID"] || DataciteDoi.maximum(:id)).to_i
batch_size = ENV["BATCH_SIZE"].nil? ? 50 : ENV["BATCH_SIZE"].to_i

DataciteDoi.import_by_ids(
from_id: from_id,
until_id: until_id,
batch_size: batch_size,
index: ENV["INDEX"] || DataciteDoi.inactive_index
)

DataciteDoi.import_by_ids(from_id: from_id, until_id: until_id, index: ENV["INDEX"] || DataciteDoi.inactive_index)
end

desc "Import one datacite DOI"
Expand Down
Loading

0 comments on commit 9d60a4a

Please sign in to comment.