diff --git a/app/controllers/datacite_dois_controller.rb b/app/controllers/datacite_dois_controller.rb index 9f5ae49c6..ffd37da12 100644 --- a/app/controllers/datacite_dois_controller.rb +++ b/app/controllers/datacite_dois_controller.rb @@ -666,10 +666,12 @@ def safe_params xml = xml.strip end + fail(ActionController::UnpermittedParameters, ["creators must be an Array"]) if p[:creators]&.respond_to?(:keys) p[:creators]&.each do |c| fail(ActionController::UnpermittedParameters, ["nameIdentifiers must be an Array"]) if c[:nameIdentifiers]&.respond_to?(:keys) end + fail(ActionController::UnpermittedParameters, ["contributors must be an Array"]) if p[:contributors]&.respond_to?(:keys) p[:contributors]&.each do |c| fail(ActionController::UnpermittedParameters, ["nameIdentifiers must be an Array"]) if c[:nameIdentifiers]&.respond_to?(:keys) end diff --git a/app/models/doi.rb b/app/models/doi.rb index c6d671c2c..79829a879 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -868,7 +868,7 @@ def self.gql_query(query, options={}) filter << { terms: { "client.certificate" => options[:certificate].split(",") }} if options[:certificate].present? filter << { term: { "creators.nameIdentifiers.nameIdentifier" => "https://orcid.org/#{orcid_from_url(options[:user_id])}" }} if options[:user_id].present? filter << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ORCID" }} if options[:has_person].present? - + # match either one of has_affiliation, has_organization, has_funder or has_member if options[:has_organization].present? should << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ROR" }} @@ -1067,7 +1067,7 @@ def self.query(query, options={}) filter << { terms: { "client.certificate" => options[:certificate].split(",") }} if options[:certificate].present? filter << { term: { "creators.nameIdentifiers.nameIdentifier" => "https://orcid.org/#{orcid_from_url(options[:user_id])}" }} if options[:user_id].present? filter << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ORCID" }} if options[:has_person].present? - + # match either one of has_affiliation, has_organization, or has_funder if options[:has_organization].present? should << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ROR" }} @@ -2133,7 +2133,7 @@ def set_defaults end def update_agency - if agency.blank? || agency.casecmp?("datacite") + if agency.blank? || agency.casecmp?("datacite") self.agency = "datacite" self.type = "DataciteDoi" elsif agency.casecmp?("crossref") @@ -2310,4 +2310,41 @@ def self.migrate_landing_page(options={}) "Finished migrating landing pages." end -end + + def self.add_index_type(options={}) + return nil unless options[:from_id].present? + + + from_id = options[:from_id].to_i + until_id = (options[:until_id] || (from_id + 499)).to_i + + # get every id between from_id and end_id + count = 0 + + Rails.logger.info "[migration_index_types] adding type information for DOIs with IDs #{from_id} - #{until_id}." + + Doi.where(id: from_id..until_id).where('type' => nil).find_each(batch_size: 500) do |doi| + begin + if doi.agency.casecmp?("datacite") + type = "DataciteDoi" + elsif doi.agency.casecmp?("crossref") + type = "OtherDoi" + else + type = "DataciteDoi" + end + + doi.update_columns("type" => type) + + count += 1 + Rails.logger.info "Updated #{doi.doi} (#{doi.id})" + + rescue => error + Rails.logger.error "Error updating #{doi.doi} (#{doi.id}), #{error.message}" + end + end + + "Finished updating dois, total #{count}" + end + + +end \ No newline at end of file diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index e38f45348..4c24e5bb2 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -243,4 +243,13 @@ namespace :doi do count = Doi.delete_dois_by_prefix(ENV['PREFIX']) puts "#{count} DOIs with prefix #{ENV['PREFIX']} deleted." end + + desc 'Add type information to dois based on id range' + task :add_index_type => :environment do + options = { + from_id: ENV['FROM_ID'], + until_id: ENV['UNTIL_ID'] + } + puts Doi.add_index_type(options) + end end diff --git a/spec/fixtures/files/nasa_error.json b/spec/fixtures/files/nasa_error.json new file mode 100644 index 000000000..82023f91c --- /dev/null +++ b/spec/fixtures/files/nasa_error.json @@ -0,0 +1,66 @@ +{ + "data": { + "type":"dois", + "id":"10.14454/rdnt.1x18sn", + "attributes":{ + "doi": "10.14454/rdnt.1x18sn", + "prefix":"10.14454", + "creators":[ + { + "name":"Maskey, Manil", + "nameType":"Personal", + "givenName":"Manil", + "familyName":"Maskey" + }, + { + "name":"Ramachandran, Rahul", + "nameType":"Personal", + "givenName":"Rahul", + "familyName":"Ramachandran" + }, + { + "name":"Gurung, Iksha", + "nameType":"Personal", + "givenName":"Iksha", + "familyName":"Gurung" + }, + { + "name":"Ramasubramanian, Muthukumaran", + "nameType":"Personal", + "givenName":"Muthukumaran", + "familyName":"Ramasubramanian" + } + ], + "titles":{ + "title":"Tropical Cyclone Satellite Imagery and Wind Speed Dataset" + }, + "publisher": "Radiant MLHub", + "publicationYear": 2020, + "types":{ + "resourceTypeGeneral":"Dataset" + }, + "version":1.0, + "contributors":{ + "nameType":"Organizational", + "name":"NASA Interagency Implementation and Advanced Concepts Team (IMPACT)", + "contributorType":"DataCurator" + }, + "dates":{ + "dateType":"Valid", + "date": "2000/2019" + }, + "fundingReferences":{ + "funderName":"NASA Earth Science Data Systems Program" + }, + "rightsList":{ + "rights":"CC BY 4.0", + "rightsUri":"https://creativecommons.org/licenses/by/4.0/" + }, + "url":"http://registry.mlhub.earth/10.34911/rdnt.1x18sn", + "landingPage":{ + "url":"http://registry.mlhub.earth/10.34911/rdnt.1x18sn" + }, + "event":"publish" + } + } +} \ No newline at end of file diff --git a/spec/fixtures/vcr_cassettes/DataciteDoisController/downloads/has_downloads_meta.yml b/spec/fixtures/vcr_cassettes/DataciteDoisController/downloads/has_downloads_meta.yml new file mode 100644 index 000000000..61e755128 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/DataciteDoisController/downloads/has_downloads_meta.yml @@ -0,0 +1,51 @@ +--- +http_interactions: +- request: + method: get + uri: https://doi.org/ra/10.14454 + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (compatible; Maremma/4.7.2; mailto:info@datacite.org) + Accept: + - text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5 + response: + status: + code: 200 + message: '' + headers: + Date: + - Mon, 28 Sep 2020 10:11:53 GMT + Content-Type: + - application/json;charset=UTF-8 + Connection: + - keep-alive + Set-Cookie: + - __cfduid=d58afafd88534ac7ebc3f22717efd88aa1601287913; expires=Wed, 28-Oct-20 + 10:11:53 GMT; path=/; domain=.doi.org; HttpOnly; SameSite=Lax; Secure + Cf-Cache-Status: + - DYNAMIC + Cf-Request-Id: + - 0575cb979600001f154824e200000001 + Expect-Ct: + - max-age=604800, report-uri="https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Server: + - cloudflare + Cf-Ray: + - 5d9caed288151f15-FRA + body: + encoding: ASCII-8BIT + string: |- + [ + { + "DOI": "10.14454", + "RA": "DataCite" + } + ] + http_version: null + recorded_at: Mon, 28 Sep 2020 10:11:53 GMT +recorded_with: VCR 5.1.0 diff --git a/spec/requests/datacite_dois_spec.rb b/spec/requests/datacite_dois_spec.rb index a3b4c5fdb..1e07e177a 100644 --- a/spec/requests/datacite_dois_spec.rb +++ b/spec/requests/datacite_dois_spec.rb @@ -2184,6 +2184,16 @@ end end + context 'when the request has wrong object in nameIDentifiers' do + let(:valid_attributes) { JSON.parse(file_fixture('nasa_error.json').read) } + + + it 'fails to create a Doi' do + post '/dois', valid_attributes, headers + + expect(last_response.status).to eq(422) + end + end # context 'when the request is a large xml file' do