Skip to content

Commit

Permalink
Merge pull request #1042 from datacite/schema-4.5-r2
Browse files Browse the repository at this point in the history
Schema 4.5 - Initial release
  • Loading branch information
svogt0511 authored Dec 4, 2023
2 parents 78f867e + 580e305 commit 5c802b8
Show file tree
Hide file tree
Showing 21 changed files with 1,139 additions and 39 deletions.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ gem "aws-sdk-sqs", "~> 1.3"
gem "base32-url", "~> 0.3"
gem "batch-loader", "~> 1.4", ">= 1.4.1"
gem "bcrypt", "~> 3.1.7"
gem "bolognese", "~> 1.11.5"
gem "bolognese", "~> 2.0.0"
gem "bootsnap", "~> 1.4", ">= 1.4.4", require: false
gem "cancancan", "~> 3.0"
gem "commonmarker", "~> 0.23.4"
Expand Down
6 changes: 3 additions & 3 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ GEM
latex-decode (~> 0.0)
binding_of_caller (1.0.0)
debug_inspector (>= 0.0.1)
bolognese (1.11.5)
bolognese (2.0.0)
activesupport (>= 4.2.5)
benchmark_methods (~> 0.7)
bibtex-ruby (>= 5.1.0)
Expand Down Expand Up @@ -656,7 +656,7 @@ DEPENDENCIES
bcrypt (~> 3.1.7)
better_errors
binding_of_caller
bolognese (~> 1.11.5)
bolognese (~> 2.0.0)
bootsnap (~> 1.4, >= 1.4.4)
bullet (~> 6.1)
byebug
Expand Down Expand Up @@ -749,4 +749,4 @@ DEPENDENCIES
webmock (~> 3.1)

BUNDLED WITH
2.4.10
2.4.20
9 changes: 9 additions & 0 deletions app/controllers/activities_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ def index
end,
}.compact
options[:is_collection] = true
options[:params] = {
publisher: params[:publisher],
}

render json: ActivitySerializer.new(results, options).serialized_json,
status: :ok
Expand Down Expand Up @@ -105,6 +108,9 @@ def index
}.compact
options[:include] = @include
options[:is_collection] = true
options[:params] = {
publisher: params[:publisher],
}

render json: ActivitySerializer.new(results, options).serialized_json,
status: :ok
Expand All @@ -129,6 +135,9 @@ def show
options = {}
options[:include] = @include
options[:is_collection] = false
options[:params] = {
publisher: params[:publisher],
}

render json: ActivitySerializer.new(@activity, options).serialized_json,
status: :ok
Expand Down
12 changes: 11 additions & 1 deletion app/controllers/datacite_dois_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def index
current_ability: current_ability,
detail: params[:detail],
affiliation: params[:affiliation],
publisher: params[:publisher],
is_collection: options[:is_collection],
}

Expand Down Expand Up @@ -339,6 +340,7 @@ def index
detail: params[:detail],
composite: params[:composite],
affiliation: params[:affiliation],
publisher: params[:publisher],
# The cursor link should be an array of values, but we want to encode it into a single string for the URL
"page[cursor]" =>
page[:cursor] ? make_cursor(results) : nil,
Expand All @@ -359,6 +361,7 @@ def index
detail: params[:detail],
composite: params[:composite],
affiliation: params[:affiliation],
publisher: params[:publisher],
is_collection: options[:is_collection],
}

Expand Down Expand Up @@ -459,6 +462,7 @@ def show
detail: true,
composite: nil,
affiliation: params[:affiliation],
publisher: params[:publisher],
}

render json: DataciteDoiSerializer.new(doi, options).serialized_json,
Expand Down Expand Up @@ -511,7 +515,11 @@ def validate
options = {}
options[:include] = @include
options[:is_collection] = false
options[:params] = { current_ability: current_ability }
options[:params] = {
current_ability: current_ability,
affiliation: params[:affiliation],
publisher: params[:publisher]
}

render json: DataciteDoiSerializer.new(@doi, options).serialized_json,
status: :ok
Expand Down Expand Up @@ -539,6 +547,7 @@ def create
current_ability: current_ability,
detail: true,
affiliation: params[:affiliation],
publisher: params[:publisher],
}

render json: DataciteDoiSerializer.new(@doi, options).serialized_json,
Expand Down Expand Up @@ -597,6 +606,7 @@ def update
current_ability: current_ability,
detail: true,
affiliation: params[:affiliation],
publisher: params[:publisher],
}

render json: DataciteDoiSerializer.new(@doi, options).serialized_json,
Expand Down
13 changes: 12 additions & 1 deletion app/graphql/types/doi_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,17 @@ def identifiers
end
end

def publisher
case object.publisher
when Hash
object.publisher["name"]
when String
object.publisher
else
object.publisher
end
end

def bibtex
pages =
if object.container.to_h["firstPage"].present?
Expand Down Expand Up @@ -909,7 +920,7 @@ def citeproc_hsh
"volume" => object.container.to_h["volume"],
"issue" => object.container.to_h["issue"],
"page" => page,
"publisher" => object.publisher,
"publisher" => (object.publisher.is_a?(Hash) ? object.publisher&.fetch("name", nil) : object.publisher),
"title" => parse_attributes(object.titles, content: "title", first: true),
"URL" => object.url,
"version" => object.version_info,
Expand Down
14 changes: 14 additions & 0 deletions app/jobs/doi_convert_publisher_by_id_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# frozen_string_literal: true

class DoiConvertPublisherByIdJob < ApplicationJob
queue_as :lupo_background

rescue_from ActiveJob::DeserializationError,
Elasticsearch::Transport::Transport::Errors::BadRequest do |error|
Rails.logger.error error.message
end

def perform(options = {})
Doi.convert_publisher_by_id(options)
end
end
9 changes: 9 additions & 0 deletions app/lib/params_sanitizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ class ParamsSanitizer
:titles,
{ titles: %i[title titleType lang] },
:publisher,
{
publisher: %i[
name
publisherIdentifier
publisherIdentifierScheme
schemeUri
lang
],
},
:publicationYear,
:created,
:prefix,
Expand Down
112 changes: 109 additions & 3 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
require "benchmark"

class Doi < ApplicationRecord
audited only: %i[doi url creators contributors titles publisher publication_year types descriptions container sizes formats version_info language dates identifiers related_identifiers related_items funding_references geo_locations rights_list subjects schema_version content_url landing_page aasm_state source reason]
PUBLISHER_JSON_SCHEMA = "#{Rails.root}/app/models/schemas/doi/publisher.json"
audited only: %i[doi url creators contributors titles publisher publisher_obj publication_year types descriptions container sizes formats version_info language dates identifiers related_identifiers related_items funding_references geo_locations rights_list subjects schema_version content_url landing_page aasm_state source reason]

# disable STI
self.inheritance_column = :_type_disabled
Expand Down Expand Up @@ -103,6 +104,17 @@ class Doi < ApplicationRecord
validates_presence_of :doi
validates_presence_of :url, if: Proc.new { |doi| doi.is_registered_or_findable? }

json_schema_validation = {
message: ->(errors) { errors },
schema: PUBLISHER_JSON_SCHEMA
}

def validate_publisher_obj?(doi)
doi.validatable? && doi.publisher_obj? && !(doi.publisher.blank? || doi.publisher.all?(nil))
end

validates :publisher_obj, if: ->(doi) { validate_publisher_obj?(doi) }, json: json_schema_validation

# from https://www.crossref.org/blog/dois-and-matching-regular-expressions/ but using uppercase
validates_format_of :doi, with: /\A10\.\d{4,5}\/[-._;()\/:a-zA-Z0-9*~$=]+\z/, on: :create
validates_format_of :url, with: /\A(ftp|http|https):\/\/\S+/, if: :url?, message: "URL is not valid"
Expand All @@ -117,6 +129,7 @@ class Doi < ApplicationRecord
validate :check_descriptions, if: :descriptions?
validate :check_types, if: :types?
validate :check_container, if: :container?
validate :check_publisher, if: :publisher?
validate :check_subjects, if: :subjects?
validate :check_creators, if: :creators?
validate :check_contributors, if: :contributors?
Expand All @@ -131,6 +144,7 @@ class Doi < ApplicationRecord
after_commit :update_url, on: %i[create update]
after_commit :update_media, on: %i[create update]

before_validation :update_publisher, if: [ :will_save_change_to_publisher?, :publisher? ]
before_validation :update_xml, if: :regenerate
before_validation :update_agency
before_validation :update_field_of_science
Expand Down Expand Up @@ -565,7 +579,7 @@ def as_indexed_json(_options = {})
"creator_names" => creator_names,
"titles" => Array.wrap(titles),
"descriptions" => Array.wrap(descriptions),
"publisher" => publisher,
"publisher" => publisher && publisher["name"],
"client_id" => client_id,
"provider_id" => provider_id,
"consortium_id" => consortium_id,
Expand Down Expand Up @@ -1452,6 +1466,45 @@ def self.convert_affiliation_by_id(options = {})
count
end

def self.convert_publishers(options = {})
from_id = (options[:from_id] || Doi.minimum(:id)).to_i
until_id = (options[:until_id] || Doi.maximum(:id)).to_i

# get every id between from_id and until_id
(from_id..until_id).step(500).each do |id|
DoiConvertPublisherByIdJob.perform_later(options.merge(id: id))
Rails.logger.info "Queued converting publisher to publisher_obj for DOIs with IDs starting with #{id}." unless Rails.env.test?
end

"Queued converting #{(from_id..until_id).size} publishers."
end

def self.convert_publisher_by_id(options = {})
return nil if options[:id].blank?

id = options[:id].to_i
count = 0

Doi.where(id: id..(id + 499)).find_each do |doi|
should_update = true

if should_update
Doi.auditing_enabled = false
doi.update_columns(publisher_obj: doi.publisher)
Doi.auditing_enabled = true

count += 1
end
end

Rails.logger.info "[MySQL] Converted publishers for #{count} DOIs with IDs #{id} - #{id + 499}." if count > 0

count
rescue TypeError, ActiveRecord::ActiveRecordError, ActiveRecord::LockWaitTimeout => e
Rails.logger.error "[MySQL] Error converting publishers for DOIs with IDs #{id} - #{id + 499}: #{e.message}."
count
end

def self.convert_containers(options = {})
from_id = (options[:from_id] || Doi.minimum(:id)).to_i
until_id = (options[:until_id] || Doi.maximum(:id)).to_i
Expand Down Expand Up @@ -1901,6 +1954,10 @@ def check_container
errors.add(:container, "Container '#{container}' should be an object instead of a string.") unless container.is_a?(Hash)
end

def check_publisher
errors.add(:publisher, "Publisher '#{publisher}' should be an object instead of a string.") unless publisher.is_a?(Hash)
end

def check_language
errors.add(:language, "Language #{language} is in an invalid format.") if !language.match?(/^[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*$/)
end
Expand Down Expand Up @@ -2230,6 +2287,30 @@ def update_types
).compact
end

def update_publisher
case publisher_before_type_cast
when Hash
update_publisher_from_hash
when String
update_publisher_from_string
else
reset_publishers
end
end

def publisher
pub = read_attribute("publisher")
pub_obj = read_attribute("publisher_obj")

return nil if pub.nil? && pub_obj.nil?

if !(pub_obj.nil? || pub_obj.empty?)
pub_obj
else
{ "name" => pub || "" }
end
end

def self.repair_landing_page(id: nil)
if id.blank?
Rails.logger.error "[Error] No id provided."
Expand Down Expand Up @@ -2368,7 +2449,6 @@ def self.add_index_type(options = {})
"Finished updating dois, total #{count}"
end


# QUICK FIX UNTIL PROJECT IS A RESOURCE_TYPE_GENERAL IN THE SCHEMA
def handle_resource_type(types)
if types.present? && types["resourceType"] == "Project" && (types["resourceTypeGeneral"] == "Text" || types["resourceTypeGeneral"] == "Other")
Expand All @@ -2377,4 +2457,30 @@ def handle_resource_type(types)
types.to_h["resourceTypeGeneral"]
end
end

private
def update_publisher_from_hash
if !publisher_before_type_cast.values.all?(nil)
self.publisher_obj = {
name: publisher_before_type_cast.fetch(:name, nil),
lang: publisher_before_type_cast.fetch(:lang, nil),
schemeUri: publisher_before_type_cast.fetch(:schemeUri, nil),
publisherIdentifier: publisher_before_type_cast.fetch(:publisherIdentifier, nil),
publisherIdentifierScheme: publisher_before_type_cast.fetch(:publisherIdentifierScheme, nil)
}.compact
self.publisher = publisher_before_type_cast.dig(:name)
else
reset_publishers
end
end

def update_publisher_from_string
self.publisher_obj = { name: publisher_before_type_cast }
self.publisher = publisher_before_type_cast
end

def reset_publishers
self.publisher_obj = nil
self.publisher = nil
end
end
12 changes: 12 additions & 0 deletions app/models/schemas/doi/publisher.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"type": "object",
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"name": { "type": "string" },
"publisherIdentifier": { "type": "string" },
"publisherIdentifierScheme": { "type": "string" },
"schemeUri": { "type": "string" },
"lang": { "type": "string" }
},
"additionalProperties": false
}
Loading

0 comments on commit 5c802b8

Please sign in to comment.