Skip to content

Commit

Permalink
Revert "Revert "Doi-enrichment Redo""
Browse files Browse the repository at this point in the history
This reverts commit 3cedaae.
  • Loading branch information
jrhoads committed Mar 31, 2023
1 parent e7aa3a9 commit 2dd72e2
Show file tree
Hide file tree
Showing 16 changed files with 611 additions and 33 deletions.
272 changes: 272 additions & 0 deletions app/graphql/schema.graphql

Large diffs are not rendered by default.

34 changes: 31 additions & 3 deletions app/graphql/types/doi_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ module DoiItem
field :fields_of_science,
[FieldOfScienceType],
null: true, description: "OECD Fields of Science of the resource"

field :fields_of_science_combined,
[FieldOfScienceType],
null: true, description: "OECD Fields of Science of the resource and containing repository"

field :fields_of_science_repository,
[FieldOfScienceType],
null: true, description: "OECD Fields of Science of the containing repository"

field :dates,
[DateType],
null: true, description: "Different dates relevant to the work"
Expand Down Expand Up @@ -418,14 +427,33 @@ def registration_agency
{ id: object.agency, name: REGISTRATION_AGENCIES[object.agency] }.compact
end

def fields_of_science
def _fos_to_facet(fos_list)
Array.wrap(fos_list).map do |name|
{ "id" => name.parameterize(separator: "_"), "name" => name }
end.uniq
end

def fields_of_science_repository
if object.client.blank?
return []
end
_fos_to_facet(object.fields_of_science_repository)
end

def fields_of_science_combined
_fos_to_facet(object.fields_of_science_combined)
end

def _fos_temp
Array.wrap(object.subjects).select do |s|
s["subjectScheme"] == "Fields of Science and Technology (FOS)"
end.map do |s|
name = s["subject"].gsub("FOS: ", "")
{ "id" => name.parameterize(separator: "_"), "name" => name }
s["subject"].gsub("FOS: ", "")
end.uniq
end
def fields_of_science
_fos_to_facet(_fos_temp)
end

def creators(**args)
Array.wrap(object.creators)[0...args[:first]].map do |c|
Expand Down
39 changes: 37 additions & 2 deletions app/graphql/types/query_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,12 @@ def actor(id:)
end

def works(**args)
ElasticsearchModelResponseConnection.new(response(args), context: context, first: args[:first], after: args[:after])
ElasticsearchModelResponseConnection.new(
response(args), {
context: context,
first: args[:first],
after: args[:after]
})
end

field :work, WorkType, null: false do
Expand Down Expand Up @@ -1253,7 +1258,37 @@ def usage_report(id:)
end

def response(**args)
Doi.gql_query(args[:query], ids: args[:ids], user_id: args[:user_id], client_id: args[:repository_id], provider_id: args[:member_id], resource_type_id: args[:resource_type_id], resource_type: args[:resource_type], published: args[:published], agency: args[:registration_agency], language: args[:language], license: args[:license], has_person: args[:has_person], has_funder: args[:has_funder], has_organization: args[:has_organization], has_affiliation: args[:has_affiliation], has_member: args[:has_member], has_citations: args[:has_citations], has_parts: args[:has_parts], has_versions: args[:has_versions], has_views: args[:has_views], has_downloads: args[:has_downloads], field_of_science: args[:field_of_science], facet_count: args[:facet_count], pid_entity: args[:pid_entity], state: "findable", page: { cursor: args[:after].present? ? Base64.urlsafe_decode64(args[:after]) : [], size: args[:first] })
Doi.gql_query(
args[:query],
ids: args[:ids],
user_id: args[:user_id],
client_id: args[:repository_id],
provider_id: args[:member_id],
resource_type_id: args[:resource_type_id],
resource_type: args[:resource_type],
published: args[:published],
agency: args[:registration_agency],
language: args[:language],
license: args[:license],
has_person: args[:has_person],
has_funder: args[:has_funder],
has_organization: args[:has_organization],
has_affiliation: args[:has_affiliation],
has_member: args[:has_member],
has_citations: args[:has_citations],
has_parts: args[:has_parts],
has_versions: args[:has_versions],
has_views: args[:has_views],
has_downloads: args[:has_downloads],
field_of_science: args[:field_of_science],
facet_count: args[:facet_count],
pid_entity: args[:pid_entity],
state: "findable",
page: {
cursor: args[:after].present? ? Base64.urlsafe_decode64(args[:after]) : [],
size: args[:first]
}
)
end

def set_doi(id)
Expand Down
18 changes: 18 additions & 0 deletions app/graphql/types/work_connection_with_total_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class WorkConnectionWithTotalType < BaseConnection
field :affiliations, [FacetType], null: true, cache: true
field :authors, [FacetType], null: true, cache: true
field :fields_of_science, [FacetType], null: true, cache: true
field :fields_of_science_combined, [FacetType], null: true, cache: true
field :fields_of_science_repository, [FacetType], null: true, cache: true
field :licenses, [FacetType], null: true, cache: true
field :languages, [FacetType], null: true, cache: true

Expand Down Expand Up @@ -106,6 +108,22 @@ def fields_of_science
end
end

def fields_of_science_combined
if object.aggregations.fields_of_science_combined
facet_by_fos(object.aggregations.fields_of_science_combined.buckets)
else
[]
end
end

def fields_of_science_repository
if object.aggregations.fields_of_science_repository
facet_by_fos(object.aggregations.fields_of_science_repository.buckets)
else
[]
end
end

def languages
if object.aggregations.languages
facet_by_language(object.aggregations.languages.buckets)
Expand Down
28 changes: 26 additions & 2 deletions app/models/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class Client < ApplicationRecord
attr_accessor :password_input, :target_id
attr_reader :from_salesforce

validate :subjects_only_for_disciplinary_repos
validates :subjects, if: :subjects?,
json: {
message: ->(errors) { errors },
Expand Down Expand Up @@ -96,6 +97,7 @@ class Client < ApplicationRecord
has_many :activities, as: :auditable, dependent: :destroy

before_validation :set_defaults
before_validation :convert_subject_hashes_to_camelcase
before_create { self.created = Time.zone.now.utc.iso8601 }
before_save { self.updated = Time.zone.now.utc.iso8601 }
after_create_commit :assign_prefix
Expand Down Expand Up @@ -399,6 +401,7 @@ def as_indexed_json(options = {})
end,
"analytics_dashboard_url" => analytics_dashboard_url,
"analytics_tracking_id" => analytics_tracking_id,
"subjects" => Array.wrap(subjects),
}
end

Expand Down Expand Up @@ -507,7 +510,7 @@ def re3data=(value)
end

def subjects=(value)
write_attribute(:subjects, Array.wrap(value))
write_attribute(:subjects, Array.wrap(value).uniq)
end

def opendoar=(value)
Expand Down Expand Up @@ -913,6 +916,15 @@ def freeze_symbol
errors.add(:symbol, "cannot be changed") if symbol_changed?
end

def subjects_only_for_disciplinary_repos
if Array.wrap(subjects).any? && Array.wrap(repository_type).exclude?("disciplinary")
errors.add(
:subjects,
"Subjects are only allowed for disciplinary repositories. This repository_type is: #{repository_type}",
)
end
end

def check_id
if symbol && symbol.split(".").first != provider.symbol
errors.add(
Expand Down Expand Up @@ -958,7 +970,7 @@ def assign_prefix
ClientPrefix.create(
client_id: symbol,
provider_prefix_id: provider_prefix.uid,
prefix_id: provider_prefix.prefix.uid,
prefix_id: provider_prefix.prefix.uid
)
end
end
Expand All @@ -983,6 +995,18 @@ def set_defaults
self.doi_quota_allowed = -1 unless doi_quota_allowed.to_i > 0
end

def convert_subject_hashes_to_camelcase
if self.subjects?
self.subjects = Array.wrap(self.subjects).map { |subject|
subject.transform_keys! do |key|
key.to_s.camelcase(:lower)
end
}
else
[]
end
end

def create_reference_repository
ReferenceRepository.create_from_client(self)
end
Expand Down
3 changes: 2 additions & 1 deletion app/models/datacite_doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,13 @@ def self.import_by_ids(options = {})
DataciteDoi.where(type: "DataciteDoi").maximum(:id)
).
to_i
batch_size = options[:batch_size] || 50
count = 0

# TODO remove query for type once STI is enabled
# SQS message size limit is 256 kB, up to 2 GB with S3
DataciteDoi.where(type: "DataciteDoi").where(id: from_id..until_id).
find_in_batches(batch_size: 50) do |dois|
find_in_batches(batch_size: batch_size) do |dois|
ids = dois.pluck(:id)
DataciteDoiImportInBulkJob.perform_later(ids, index: index)
count += ids.length
Expand Down
36 changes: 36 additions & 0 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ class Doi < ApplicationRecord
before_save :set_defaults, :save_metadata
before_create { self.created = Time.zone.now.utc.iso8601 }

FIELD_OF_SCIENCE_SCHEME = "Fields of Science and Technology (FOS)"

scope :q, ->(query) { where("dataset.doi = ?", query) }

# use different index for testing
Expand Down Expand Up @@ -408,6 +410,14 @@ class Doi < ApplicationRecord
updated: { type: :date },
deleted_at: { type: :date },
cumulative_years: { type: :integer, index: "false" },
subjects: { type: :object, properties: {
subjectScheme: { type: :keyword },
subject: { type: :keyword },
schemeUri: { type: :keyword },
valueUri: { type: :keyword },
lang: { type: :keyword },
classificationCode: { type: :keyword },
} }
}
indexes :provider, type: :object, properties: {
id: { type: :keyword },
Expand Down Expand Up @@ -512,6 +522,9 @@ class Doi < ApplicationRecord
titleType: { type: :keyword },
lang: { type: :keyword },
}
indexes :fields_of_science, type: :keyword
indexes :fields_of_science_combined, type: :keyword
indexes :fields_of_science_repository, type: :keyword
end
end

Expand Down Expand Up @@ -567,6 +580,9 @@ def as_indexed_json(_options = {})
"sizes" => Array.wrap(sizes),
"language" => language,
"subjects" => Array.wrap(subjects),
"fields_of_science" => fields_of_science,
"fields_of_science_repository" => fields_of_science_repository,
"fields_of_science_combined" => fields_of_science_combined,
"xml" => xml,
"is_active" => is_active,
"landing_page" => landing_page,
Expand Down Expand Up @@ -1712,6 +1728,26 @@ def client_id
client.symbol.downcase if client.present?
end

def _fos_filter(subject_array)
Array.wrap(subject_array).select { |sub|
sub.dig("subjectScheme") == FIELD_OF_SCIENCE_SCHEME
}.map do |fos|
fos["subject"].gsub("FOS: ", "")
end
end

def fields_of_science
_fos_filter(subjects).uniq
end

def fields_of_science_repository
_fos_filter(client&.subjects).uniq
end

def fields_of_science_combined
fields_of_science | fields_of_science_repository
end

def client_id_and_name
"#{client_id}:#{client.name}" if client.present?
end
Expand Down
26 changes: 5 additions & 21 deletions app/models/schemas/client/subjects.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,18 @@
"items": {
"type": "object",
"properties": {
"classification_code": { "type": "string" },
"classificationCode": { "type": "string" },
"scheme_uri": { "type": "string" },
"schemeUri": { "type": "string" },
"value_uri": { "type": "string" },
"valueUri": { "type": "string" },
"lang": { "type": "string" },
"subject": { "type": "string" },
"subject_scheme": { "type": "string" },
"subjectScheme": { "type": "string" }
},
"oneOf": [
{
"required": [
"classification_code",
"scheme_uri",
"subject",
"subject_scheme"
]
},
{
"required": [
"classificationCode",
"schemeUri",
"subject",
"subjectScheme"
]
}
"required": [
"classificationCode",
"schemeUri",
"subject",
"subjectScheme"
],
"additionalProperties": false
}
Expand Down
10 changes: 9 additions & 1 deletion db/seeds/development/base.seeds.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,19 @@
client =
Client.where(symbol: "DATACITE.TEST").first ||
FactoryBot.create(
:client,
:client_with_fos,
provider: provider,
symbol: ENV["MDS_USERNAME"],
password_input: ENV["MDS_PASSWORD"],
)
if Prefix.where(uid: "10.14454").blank?
prefix = FactoryBot.create(:prefix, uid: "10.14454")
### This creates both the client_prefix and the provider association
FactoryBot.create(
:client_prefix,
client_id: client.symbol, prefix_id: prefix.uid,
)
end
dois = FactoryBot.create_list(:doi, 10, client: client, state: "findable")
FactoryBot.create_list(:event_for_datacite_related, 3, obj_id: dois.first.doi)
FactoryBot.create_list(:event_for_datacite_usage, 2, obj_id: dois.first.doi)
10 changes: 8 additions & 2 deletions lib/tasks/datacite_doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,14 @@ namespace :datacite_doi do
task import: :environment do
from_id = (ENV["FROM_ID"] || DataciteDoi.minimum(:id)).to_i
until_id = (ENV["UNTIL_ID"] || DataciteDoi.maximum(:id)).to_i

DataciteDoi.import_by_ids(from_id: from_id, until_id: until_id, index: ENV["INDEX"] || DataciteDoi.inactive_index)
batch_size = ENV["BATCH_SIZE"].nil? ? 50 : ENV["BATCH_SIZE"].to_i

DataciteDoi.import_by_ids(
from_id: from_id,
until_id: until_id,
batch_size: batch_size,
index: ENV["INDEX"] || DataciteDoi.inactive_index
)
end

desc "Import one datacite DOI"
Expand Down
Loading

0 comments on commit 2dd72e2

Please sign in to comment.