diff --git a/Dockerfile b/Dockerfile index a166aa11b..dc48bb109 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,6 +42,7 @@ WORKDIR /home/app/webapp RUN mkdir -p vendor/bundle && \ chown -R app:app . && \ chmod -R 755 . && \ + gem update --system && \ gem install bundler && \ /sbin/setuser app bundle install --path vendor/bundle diff --git a/Gemfile b/Gemfile index 0f529fa50..ba41f35c7 100644 --- a/Gemfile +++ b/Gemfile @@ -14,7 +14,6 @@ gem 'nokogiri', '~> 1.8.1' gem 'diffy', '~> 3.2', '>= 3.2.1' gem 'commonmarker', '~> 0.17.9' gem 'iso8601', '~> 0.9.0' -gem 'patron', '~> 0.13.1', require: false gem 'maremma', '>= 4.1' gem 'bolognese', '~> 1.0' gem 'dalli', '~> 2.7', '>= 2.7.6' diff --git a/Gemfile.lock b/Gemfile.lock index d8374312a..aaab7a8f9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -55,7 +55,7 @@ GEM api-pagination (4.8.2) arel (9.0.0) aws-eventstream (1.0.1) - aws-partitions (1.127.0) + aws-partitions (1.128.0) aws-sdk-core (3.44.1) aws-eventstream (~> 1.0) aws-partitions (~> 1.0) @@ -166,7 +166,7 @@ GEM database_cleaner (1.7.0) debug_inspector (0.0.3) diff-lcs (1.3) - diffy (3.2.1) + diffy (3.3.0) docile (1.1.5) docopt (0.6.1) domain_name (0.5.20180417) @@ -230,12 +230,12 @@ GEM tilt hamster (3.0.0) concurrent-ruby (~> 1.0) - hashdiff (0.3.7) + hashdiff (0.3.8) hashie (3.6.0) htmlentities (4.3.4) http-cookie (1.0.3) domain_name (~> 0.5) - i18n (1.3.0) + i18n (1.4.0) concurrent-ruby (~> 1.0) i18n_data (0.8.0) iso8601 (0.9.1) @@ -318,7 +318,6 @@ GEM oj (2.18.5) oj_mimic_json (1.0.1) pandoc-ruby (2.0.2) - patron (0.13.1) postrank-uri (1.0.23) addressable (>= 2.3.0, < 2.6) nokogiri (>= 1.6.1, < 1.9) @@ -367,7 +366,7 @@ GEM rb-fsevent (0.10.3) rb-inotify (0.10.0) ffi (~> 1.0) - rdf (3.0.7) + rdf (3.0.9) hamster (~> 3.0) link_header (~> 0.0, >= 0.0.8) rdf-aggregate-repo (2.2.1) @@ -535,7 +534,6 @@ DEPENDENCIES nokogiri (~> 1.8.1) oj (>= 2.8.3) oj_mimic_json (~> 1.0, >= 1.0.1) - patron (~> 0.13.1) premailer (~> 1.11, >= 1.11.1) pwqgen.rb (~> 0.1.0) rack-cors (~> 1.0, >= 1.0.2) diff --git a/app/controllers/clients_controller.rb b/app/controllers/clients_controller.rb index c1f8373cb..c500b220c 100644 --- a/app/controllers/clients_controller.rb +++ b/app/controllers/clients_controller.rb @@ -31,43 +31,51 @@ def index elsif params[:ids].present? response = Client.find_by_ids(params[:ids], page: page, sort: sort) else - response = Client.query(params[:query], year: params[:year], provider_id: params[:provider_id], software: params[:software], query_fields: params[:query_fields], page: page, sort: sort) + response = Client.query(params[:query], year: params[:year], provider_id: params[:provider_id], software: params[:software], page: page, sort: sort) end - total = response.results.total - total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 - years = total > 0 ? facet_by_year(response.response.aggregations.years.buckets) : nil - providers = total > 0 ? facet_by_provider(response.response.aggregations.providers.buckets) : nil - software = total > 0 ? facet_by_software(response.response.aggregations.software.buckets) : nil + begin + total = response.results.total + total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 + years = total > 0 ? facet_by_year(response.response.aggregations.years.buckets) : nil + providers = total > 0 ? facet_by_provider(response.response.aggregations.providers.buckets) : nil + software = total > 0 ? facet_by_software(response.response.aggregations.software.buckets) : nil - @clients = response.results.results + @clients = response.results.results - options = {} - options[:meta] = { - total: total, - "totalPages" => total_pages, - page: page[:number], - years: years, - providers: providers, - software: software - }.compact - - options[:links] = { - self: request.original_url, - next: @clients.blank? ? nil : request.base_url + "/clients?" + { - query: params[:query], - "provider-id" => params[:provider_id], - software: params[:software], - year: params[:year], - fields: params[:fields], - "page[number]" => page[:number] + 1, - "page[size]" => page[:size], - sort: params[:sort] }.compact.to_query + options = {} + options[:meta] = { + total: total, + "totalPages" => total_pages, + page: page[:number], + years: years, + providers: providers, + software: software }.compact - options[:include] = @include - options[:is_collection] = true - render json: ClientSerializer.new(@clients, options).serialized_json, status: :ok + options[:links] = { + self: request.original_url, + next: @clients.blank? ? nil : request.base_url + "/clients?" + { + query: params[:query], + "provider-id" => params[:provider_id], + software: params[:software], + year: params[:year], + fields: params[:fields], + "page[number]" => page[:number] + 1, + "page[size]" => page[:size], + sort: params[:sort] }.compact.to_query + }.compact + options[:include] = @include + options[:is_collection] = true + + render json: ClientSerializer.new(@clients, options).serialized_json, status: :ok + rescue Elasticsearch::Transport::Transport::Errors::BadRequest => exception + Bugsnag.notify(exception) + + message = JSON.parse(exception.message[6..-1]).to_h.dig("error", "root_cause", 0, "reason") + + render json: { "errors" => { "title" => message }}.to_json, status: :bad_request + end end def show diff --git a/app/controllers/data_centers_controller.rb b/app/controllers/data_centers_controller.rb index b55e64a19..ccf7b556f 100644 --- a/app/controllers/data_centers_controller.rb +++ b/app/controllers/data_centers_controller.rb @@ -30,38 +30,46 @@ def index response = Client.query(params[:query], year: params[:year], provider_id: params[:member_id], fields: params[:fields], page: page, sort: sort) end - total = response.results.total - total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 - years = total > 0 ? facet_by_year(response.response.aggregations.years.buckets) : nil - providers = total > 0 ? facet_by_provider(response.response.aggregations.providers.buckets) : nil + begin + total = response.results.total + total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 + years = total > 0 ? facet_by_year(response.response.aggregations.years.buckets) : nil + providers = total > 0 ? facet_by_provider(response.response.aggregations.providers.buckets) : nil - @clients = response.results.results + @clients = response.results.results - options = {} - options[:meta] = { - total: total, - "total-pages" => total_pages, - page: page[:number], - years: years, - members: providers - }.compact + options = {} + options[:meta] = { + total: total, + "total-pages" => total_pages, + page: page[:number], + years: years, + members: providers + }.compact - options[:links] = { - self: request.original_url, - next: @clients.blank? ? nil : request.base_url + "/data-centers?" + { - query: params[:query], - "member-id" => params[:member_id], - year: params[:year], - fields: params[:fields], - "page[number]" => page[:number] + 1, - "page[size]" => page[:size], - sort: params[:sort] }.compact.to_query - }.compact - options[:include] = @include - options[:is_collection] = true - options[:links] = nil + options[:links] = { + self: request.original_url, + next: @clients.blank? ? nil : request.base_url + "/data-centers?" + { + query: params[:query], + "member-id" => params[:member_id], + year: params[:year], + fields: params[:fields], + "page[number]" => page[:number] + 1, + "page[size]" => page[:size], + sort: params[:sort] }.compact.to_query + }.compact + options[:include] = @include + options[:is_collection] = true + options[:links] = nil + + render json: DataCenterSerializer.new(@clients, options).serialized_json, status: :ok + rescue Elasticsearch::Transport::Transport::Errors::BadRequest => exception + Bugsnag.notify(exception) - render json: DataCenterSerializer.new(@clients, options).serialized_json, status: :ok + message = JSON.parse(exception.message[6..-1]).to_h.dig("error", "root_cause", 0, "reason") + + render json: { "errors" => { "title" => message }}.to_json, status: :bad_request + end end def show diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index d1c8c6946..36b6e9f5d 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -13,130 +13,162 @@ class DoisController < ApplicationController def index authorize! :read, Doi - if Rails.env.production? && !current_user.try(:is_admin_or_staff?) - # don't use elasticsearch - - # support nested routes - if params[:client_id].present? - client = Client.where('datacentre.symbol = ?', params[:client_id]).first - collection = client.present? ? client.dois : Doi.none - total = client.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } - elsif params[:provider_id].present? && params[:provider_id] != "admin" - provider = Provider.where('allocator.symbol = ?', params[:provider_id]).first - collection = provider.present? ? Doi.joins(:client).where("datacentre.allocator = ?", provider.id) : Doi.none - total = provider.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } - elsif params[:id].present? - collection = Doi.where(doi: params[:id]) - total = collection.all.size - else - provider = Provider.unscoped.where('allocator.symbol = ?', "ADMIN").first - total = provider.present? ? provider.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } : 0 - collection = Doi - end - - if params[:query].present? - collection = Doi.q(params[:query]) - total = collection.all.size - end - - page = params[:page] || {} - if page[:size].present? - page[:size] = [page[:size].to_i, 1000].min - max_number = page[:size] > 0 ? 10000/page[:size] : 1 - else - page[:size] = 25 - max_number = 10000/page[:size] - end - page[:number] = page[:number].to_i > 0 ? [page[:number].to_i, max_number].min : 1 - total_pages = (total.to_f / page[:size]).ceil - - order = case params[:sort] - when "name" then "dataset.doi" - when "-name" then "dataset.doi DESC" - when "created" then "dataset.created" - else "dataset.created DESC" - end + # if Rails.env.production? && !current_user.try(:is_admin_or_staff?) + # # don't use elasticsearch + + # # support nested routes + # if params[:client_id].present? + # client = Client.where('datacentre.symbol = ?', params[:client_id]).first + # collection = client.present? ? client.dois : Doi.none + # total = client.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } + # elsif params[:provider_id].present? && params[:provider_id] != "admin" + # provider = Provider.where('allocator.symbol = ?', params[:provider_id]).first + # collection = provider.present? ? Doi.joins(:client).where("datacentre.allocator = ?", provider.id) : Doi.none + # total = provider.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } + # elsif params[:id].present? + # collection = Doi.where(doi: params[:id]) + # total = collection.all.size + # else + # provider = Provider.unscoped.where('allocator.symbol = ?', "ADMIN").first + # total = provider.present? ? provider.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } : 0 + # collection = Doi + # end + + # if params[:query].present? + # collection = Doi.q(params[:query]) + # total = collection.all.size + # end + + # page = params[:page] || {} + # if page[:size].present? + # page[:size] = [page[:size].to_i, 1000].min + # max_number = page[:size] > 0 ? 10000/page[:size] : 1 + # else + # page[:size] = 25 + # max_number = 10000/page[:size] + # end + # page[:number] = page[:number].to_i > 0 ? [page[:number].to_i, max_number].min : 1 + # total_pages = (total.to_f / page[:size]).ceil + + # order = case params[:sort] + # when "name" then "dataset.doi" + # when "-name" then "dataset.doi DESC" + # when "created" then "dataset.created" + # else "dataset.created DESC" + # end + + # @dois = collection.order(order).page(page[:number]).per(page[:size]).without_count + + # options = {} + # options[:meta] = { + # total: total, + # "totalPages" => total_pages, + # page: page[:number].to_i + # }.compact + + # options[:links] = { + # self: request.original_url, + # next: @dois.blank? ? nil : request.base_url + "/dois?" + { + # query: params[:query], + # "provider-id" => params[:provider_id], + # "client-id" => params[:client_id], + # "page[number]" => page[:number] + 1, + # "page[size]" => page[:size], + # sort: params[:sort] }.compact.to_query + # }.compact + # options[:include] = @include + # options[:is_collection] = true + # options[:params] = { + # :current_ability => current_ability, + # } + + # render json: DoiSerializer.new(@dois, options).serialized_json, status: :ok + # else + sort = case params[:sort] + when "name" then { "doi" => { order: 'asc' }} + when "-name" then { "doi" => { order: 'desc' }} + when "created" then { created: { order: 'asc' }} + when "-created" then { created: { order: 'desc' }} + when "updated" then { updated: { order: 'asc' }} + when "-updated" then { updated: { order: 'desc' }} + when "relevance" then { "_score": { "order": "desc" }} + else { updated: { order: 'desc' }} + end + + page = params[:page] || {} + + if page[:size].present? + page[:size] = [page[:size].to_i, 1000].min + max_number = page[:size] > 0 ? 10000/page[:size] : 1 + else + page[:size] = 25 + max_number = 10000/page[:size] + end + page[:number] = page[:number].to_i > 0 ? [page[:number].to_i, max_number].min : 1 + + sample_group_field = case params[:sample_group] + when "client" then "client_id" + when "data-center" then "client_id" + when "provider" then "provider_id" + when "resource-type" then "types.resourceTypeGeneral" + else nil + end - @dois = collection.order(order).page(page[:number]).per(page[:size]).without_count + if params[:id].present? + response = Doi.find_by_id(params[:id]) + elsif params[:ids].present? + response = Doi.find_by_ids(params[:ids], page: page, sort: sort) + else + response = Doi.query(params[:query], + state: params[:state], + created: params[:created], + registered: params[:registered], + provider_id: params[:provider_id], + client_id: params[:client_id], + prefix: params[:prefix], + person_id: params[:person_id], + resource_type_id: params[:resource_type_id], + schema_version: params[:schema_version], + subject: params[:subject], + link_check_status: params[:link_check_status], + link_check_has_schema_org: params[:link_check_has_schema_org], + link_check_body_has_pid: params[:link_check_body_has_pid], + link_check_found_schema_org_id: params[:link_check_found_schema_org_id], + link_check_found_dc_identifier: params[:link_check_found_dc_identifier], + link_check_found_citation_doi: params[:link_check_found_citation_doi], + link_check_redirect_count_gte: params[:link_check_redirect_count_gte], + sample_group: sample_group_field, + sample_size: params[:sample], + source: params[:source], + page: page, + sort: sort, + random: params[:random]) + end - options = {} - options[:meta] = { - total: total, - "totalPages" => total_pages, - page: page[:number].to_i - }.compact - - options[:links] = { - self: request.original_url, - next: @dois.blank? ? nil : request.base_url + "/dois?" + { - query: params[:query], - "provider-id" => params[:provider_id], - "client-id" => params[:client_id], - "page[number]" => page[:number] + 1, - "page[size]" => page[:size], - sort: params[:sort] }.compact.to_query - }.compact - options[:include] = @include - options[:is_collection] = true - options[:params] = { - :current_ability => current_ability, - } + begin - render json: DoiSerializer.new(@dois, options).serialized_json, status: :ok - else - sort = case params[:sort] - when "name" then { "doi" => { order: 'asc' }} - when "-name" then { "doi" => { order: 'desc' }} - when "created" then { created: { order: 'asc' }} - when "-created" then { created: { order: 'desc' }} - when "updated" then { updated: { order: 'asc' }} - when "-updated" then { updated: { order: 'desc' }} - when "relevance" then { "_score": { "order": "desc" }} - else { updated: { order: 'desc' }} - end - - page = params[:page] || {} - - if page[:size].present? - page[:size] = [page[:size].to_i, 1000].min - max_number = page[:size] > 0 ? 10000/page[:size] : 1 - else - page[:size] = 25 - max_number = 10000/page[:size] + # If we're using sample groups we need to unpack the results from the aggregation bucket hits. + if sample_group_field.present? + sample_dois = [] + response.response.aggregations.samples.buckets.each do |bucket| + bucket.samples_hits.hits.hits.each do |hit| + sample_dois << hit._source + end + end end - page[:number] = page[:number].to_i > 0 ? [page[:number].to_i, max_number].min : 1 - if params[:id].present? - response = Doi.find_by_id(params[:id]) - elsif params[:ids].present? - response = Doi.find_by_ids(params[:ids], page: page, sort: sort) + # Results to return are either our sample group dois or the regular hit results + if sample_dois + results = sample_dois + # The total is just the length because for sample grouping we get everything back in one shot no pagination. + total = sample_dois.length + total_pages = 1 else - response = Doi.query(params[:query], - state: params[:state], - created: params[:created], - registered: params[:registered], - provider_id: params[:provider_id], - client_id: params[:client_id], - prefix: params[:prefix], - person_id: params[:person_id], - resource_type_id: params[:resource_type_id], - query_fields: params[:query_fields], - schema_version: params[:schema_version], - link_check_status: params[:link_check_status], - link_check_has_schema_org: params[:link_check_has_schema_org], - link_check_body_has_pid: params[:link_check_body_has_pid], - link_check_found_schema_org_id: params[:link_check_found_schema_org_id], - link_check_found_dc_identifier: params[:link_check_found_dc_identifier], - link_check_found_citation_doi: params[:link_check_found_citation_doi], - link_check_redirect_count_gte: params[:link_check_redirect_count_gte], - source: params[:source], - page: page, - sort: sort) + results = response.results.results + total = response.results.total + total_pages = page[:size] > 0 ? ([total.to_f, 10000].min / page[:size]).ceil : 0 end - total = response.results.total - total_pages = page[:size] > 0 ? ([total.to_f, 10000].min / page[:size]).ceil : 0 - states = total > 0 ? facet_by_key(response.response.aggregations.states.buckets) : nil resource_types = total > 0 ? facet_by_resource_type(response.response.aggregations.resource_types.buckets) : nil created = total > 0 ? facet_by_year(response.response.aggregations.created.buckets) : nil @@ -155,7 +187,7 @@ def index respond_to do |format| format.json do - @dois = response.results.results + @dois = results options = {} options[:meta] = { total: total, @@ -177,7 +209,7 @@ def index "linkChecksDcIdentifier" => link_checks_dc_identifier, "linkChecksCitationDoi" => link_checks_citation_doi }.compact - + options[:links] = { self: request.original_url, next: @dois.blank? ? nil : request.base_url + "/dois?" + { @@ -193,7 +225,7 @@ def index options[:params] = { :current_ability => current_ability, } - + render json: DoiSerializer.new(@dois, options).serialized_json, status: :ok end format.citation do @@ -202,6 +234,12 @@ def index end format.any(:bibtex, :citeproc, :codemeta, :crosscite, :datacite, :datacite_json, :jats, :ris, :schema_org) { render request.format.to_sym => response.records.to_a } end + rescue Elasticsearch::Transport::Transport::Errors::BadRequest => exception + Bugsnag.notify(exception) + + message = JSON.parse(exception.message[6..-1]).to_h.dig("error", "root_cause", 0, "reason") + + render json: { "errors" => { "title" => message }}.to_json, status: :bad_request end end @@ -217,7 +255,7 @@ def show current_ability: current_ability, detail: true } - + render json: DoiSerializer.new(@doi, options).serialized_json, status: :ok end format.citation do @@ -290,7 +328,7 @@ def update if params.dig(:data, :attributes, :mode) == "transfer" # only update client_id - + authorize! :transfer, @doi @doi.assign_attributes(safe_params.slice(:client_id)) else @@ -411,6 +449,11 @@ def delete_test_dois render json: { message: "Test DOIs deleted." }.to_json, status: :ok end + # legacy method + def status + render json: { message: "Not Implemented." }.to_json, status: :not_implemented + end + protected def set_doi diff --git a/app/controllers/members_controller.rb b/app/controllers/members_controller.rb index 6b26f4abe..57d17cd63 100644 --- a/app/controllers/members_controller.rb +++ b/app/controllers/members_controller.rb @@ -29,44 +29,52 @@ def index response = Provider.query(params[:query], all_members: true, year: params[:year], region: params[:region], organization_type: params[:organization_type], focus_area: params[:focus_area], fields: params[:fields], page: page, sort: sort) end - total = response.results.total - total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 - years = total > 0 ? facet_by_year(response.response.aggregations.years.buckets) : nil - regions = total > 0 ? facet_by_region(response.response.aggregations.regions.buckets) : nil - organization_types = total > 0 ? facet_by_key(response.response.aggregations.organization_types.buckets) : nil - focus_areas = total > 0 ? facet_by_key(response.response.aggregations.focus_areas.buckets) : nil + begin + total = response.results.total + total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 + years = total > 0 ? facet_by_year(response.response.aggregations.years.buckets) : nil + regions = total > 0 ? facet_by_region(response.response.aggregations.regions.buckets) : nil + organization_types = total > 0 ? facet_by_key(response.response.aggregations.organization_types.buckets) : nil + focus_areas = total > 0 ? facet_by_key(response.response.aggregations.focus_areas.buckets) : nil - @members = response.results.results + @members = response.results.results - options = {} - options[:meta] = { - total: total, - "total-pages" => total_pages, - page: page[:number], - years: years, - regions: regions, - "organization-types" => organization_types, - "focus-areas" => focus_areas - }.compact - - options[:links] = { - self: request.original_url, - next: @members.blank? ? nil : request.base_url + "/members?" + { - query: params[:query], - year: params[:year], - region: params[:region], - "organization-type" => params[:organization_type], - "focus-area" => params[:focus_area], - fields: params[:fields], - "page[number]" => params.dig(:page, :number), - "page[size]" => params.dig(:page, :size), - sort: sort }.compact.to_query + options = {} + options[:meta] = { + total: total, + "total-pages" => total_pages, + page: page[:number], + years: years, + regions: regions, + "organization-types" => organization_types, + "focus-areas" => focus_areas }.compact - options[:include] = @include - options[:is_collection] = true - options[:links] = nil - render json: MemberSerializer.new(@members, options).serialized_json, status: :ok + options[:links] = { + self: request.original_url, + next: @members.blank? ? nil : request.base_url + "/members?" + { + query: params[:query], + year: params[:year], + region: params[:region], + "organization-type" => params[:organization_type], + "focus-area" => params[:focus_area], + fields: params[:fields], + "page[number]" => params.dig(:page, :number), + "page[size]" => params.dig(:page, :size), + sort: sort }.compact.to_query + }.compact + options[:include] = @include + options[:is_collection] = true + options[:links] = nil + + render json: MemberSerializer.new(@members, options).serialized_json, status: :ok + rescue Elasticsearch::Transport::Transport::Errors::BadRequest => exception + Bugsnag.notify(exception) + + message = JSON.parse(exception.message[6..-1]).to_h.dig("error", "root_cause", 0, "reason") + + render json: { "errors" => { "title" => message }}.to_json, status: :bad_request + end end def show diff --git a/app/controllers/providers_controller.rb b/app/controllers/providers_controller.rb index 85b5bd950..df31a2080 100644 --- a/app/controllers/providers_controller.rb +++ b/app/controllers/providers_controller.rb @@ -30,46 +30,54 @@ def index elsif params[:ids].present? response = Provider.find_by_ids(params[:ids], page: page, sort: sort) else - response = Provider.query(params[:query], year: params[:year], region: params[:region], organization_type: params[:organization_type], focus_area: params[:focus_area], query_fields: params[:query_fields], page: page, sort: sort) + response = Provider.query(params[:query], year: params[:year], region: params[:region], organization_type: params[:organization_type], focus_area: params[:focus_area], page: page, sort: sort) end - total = response.results.total - total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 - years = total > 0 ? facet_by_year(response.response.aggregations.years.buckets) : nil - regions = total > 0 ? facet_by_region(response.response.aggregations.regions.buckets) : nil - organization_types = total > 0 ? facet_by_key(response.response.aggregations.organization_types.buckets) : nil - focus_areas = total > 0 ? facet_by_key(response.response.aggregations.focus_areas.buckets) : nil + begin + total = response.results.total + total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 + years = total > 0 ? facet_by_year(response.response.aggregations.years.buckets) : nil + regions = total > 0 ? facet_by_region(response.response.aggregations.regions.buckets) : nil + organization_types = total > 0 ? facet_by_key(response.response.aggregations.organization_types.buckets) : nil + focus_areas = total > 0 ? facet_by_key(response.response.aggregations.focus_areas.buckets) : nil - @providers = response.results.results + @providers = response.results.results - options = {} - options[:meta] = { - total: total, - "totalPages" => total_pages, - page: page[:number], - years: years, - regions: regions, - "organizationTypes" => organization_types, - "focusAreas" => focus_areas - }.compact - - options[:links] = { - self: request.original_url, - next: @providers.blank? ? nil : request.base_url + "/providers?" + { - query: params[:query], - year: params[:year], - region: params[:region], - "organization_type" => params[:organization_type], - "focus-area" => params[:focus_area], - fields: params[:fields], - "page[number]" => params.dig(:page, :number), - "page[size]" => params.dig(:page, :size), - sort: sort }.compact.to_query + options = {} + options[:meta] = { + total: total, + "totalPages" => total_pages, + page: page[:number], + years: years, + regions: regions, + "organizationTypes" => organization_types, + "focusAreas" => focus_areas }.compact - options[:include] = @include - options[:is_collection] = true - render json: ProviderSerializer.new(@providers, options).serialized_json, status: :ok + options[:links] = { + self: request.original_url, + next: @providers.blank? ? nil : request.base_url + "/providers?" + { + query: params[:query], + year: params[:year], + region: params[:region], + "organization_type" => params[:organization_type], + "focus-area" => params[:focus_area], + fields: params[:fields], + "page[number]" => params.dig(:page, :number), + "page[size]" => params.dig(:page, :size), + sort: sort }.compact.to_query + }.compact + options[:include] = @include + options[:is_collection] = true + + render json: ProviderSerializer.new(@providers, options).serialized_json, status: :ok + rescue Elasticsearch::Transport::Transport::Errors::BadRequest => exception + Bugsnag.notify(exception) + + message = JSON.parse(exception.message[6..-1]).to_h.dig("error", "root_cause", 0, "reason") + + render json: { "errors" => { "title" => message }}.to_json, status: :bad_request + end end def show diff --git a/app/controllers/works_controller.rb b/app/controllers/works_controller.rb index 568894575..1cb92b18f 100644 --- a/app/controllers/works_controller.rb +++ b/app/controllers/works_controller.rb @@ -28,6 +28,14 @@ def index end page[:number] = page[:number].to_i > 0 ? [page[:number].to_i, max_number].min : 1 + sample_group_field = case params[:sample_group] + when "client" then "client_id" + when "data-center" then "client_id" + when "provider" then "provider_id" + when "resource-type" then "types.resourceTypeGeneral" + else nil + end + if params[:id].present? response = Doi.find_by_id(params[:id]) elsif params[:ids].present? @@ -43,46 +51,73 @@ def index person_id: params[:person_id], resource_type_id: params[:resource_type_id], schema_version: params[:schema_version], + sample_group: sample_group_field, + sample_size: params[:sample], page: page, - sort: sort) + sort: sort, + random: params[:sample].present? ? true : false) end - total = response.results.total - total_pages = page[:size] > 0 ? ([total.to_f, 10000].min / page[:size]).ceil : 0 + begin + total = response.results.total + total_pages = page[:size] > 0 ? ([total.to_f, 10000].min / page[:size]).ceil : 0 + + resource_types = total > 0 ? facet_by_resource_type(response.response.aggregations.resource_types.buckets) : nil + registered = total > 0 ? facet_by_year(response.response.aggregations.registered.buckets) : nil + providers = total > 0 ? facet_by_provider(response.response.aggregations.providers.buckets) : nil + clients = total > 0 ? facet_by_client(response.response.aggregations.clients.buckets) : nil + + @dois = response.results.results + + options = {} + options[:meta] = { + "resource-types" => resource_types, + registered: registered, + "data-centers" => clients, + total: total, + "total-pages" => total_pages, + page: page[:number] + }.compact - resource_types = total > 0 ? facet_by_resource_type(response.response.aggregations.resource_types.buckets) : nil - registered = total > 0 ? facet_by_year(response.response.aggregations.registered.buckets) : nil - providers = total > 0 ? facet_by_provider(response.response.aggregations.providers.buckets) : nil - clients = total > 0 ? facet_by_client(response.response.aggregations.clients.buckets) : nil + options[:links] = { + self: request.original_url, + next: @dois.blank? ? nil : request.base_url + "/dois?" + { + query: params[:query], + "member-id" => params[:provider_id], + "data-center-id" => params[:client_id], + "page[size]" => params.dig(:page, :size) }.compact.to_query + }.compact + options[:include] = @include + options[:is_collection] = true + options[:links] = nil + options[:params] = { + :current_ability => current_ability, + } - @dois = response.results.results + # If we're using sample groups we need to unpack the results from the aggregation bucket hits. + if sample_group_field.present? + sample_dois = [] + response.response.aggregations.samples.buckets.each do |bucket| + bucket.samples_hits.hits.hits.each do |hit| + sample_dois << hit._source + end + end + end - options = {} - options[:meta] = { - "resource-types" => resource_types, - registered: registered, - "data-centers" => clients, - total: total, - "total-pages" => total_pages, - page: page[:number] - }.compact - - options[:links] = { - self: request.original_url, - next: @dois.blank? ? nil : request.base_url + "/dois?" + { - query: params[:query], - "member-id" => params[:provider_id], - "data-center-id" => params[:client_id], - "page[size]" => params.dig(:page, :size) }.compact.to_query - }.compact - options[:include] = @include - options[:is_collection] = true - options[:links] = nil - options[:params] = { - :current_ability => current_ability, - } + # Results to return are either our sample group dois or the regular hit results + if sample_dois + @dois = sample_dois + else + @dois = response.results.results + end + render json: WorkSerializer.new(@dois, options).serialized_json, status: :ok + rescue Elasticsearch::Transport::Transport::Errors::BadRequest => exception + Bugsnag.notify(exception) - render json: WorkSerializer.new(@dois, options).serialized_json, status: :ok + message = JSON.parse(exception.message[6..-1]).to_h.dig("error", "root_cause", 0, "reason") + + render json: { "errors" => { "title" => message }}.to_json, status: :bad_request + end end def show @@ -91,9 +126,9 @@ def show options = {} options[:include] = @include options[:is_collection] = false - options[:params] = { + options[:params] = { current_ability: current_ability, - detail: true + detail: true } render json: WorkSerializer.new(@doi, options).serialized_json, status: :ok diff --git a/app/jobs/doi_index_by_id_job.rb b/app/jobs/doi_index_by_id_job.rb new file mode 100644 index 000000000..7689e4a7c --- /dev/null +++ b/app/jobs/doi_index_by_id_job.rb @@ -0,0 +1,7 @@ +class DoiIndexByIdJob < ActiveJob::Base + queue_as :lupo_background + + def perform(options={}) + Doi.index_by_id(options) + end +end \ No newline at end of file diff --git a/app/models/client.rb b/app/models/client.rb index 5f16f2118..7a3380efe 100644 --- a/app/models/client.rb +++ b/app/models/client.rb @@ -127,9 +127,9 @@ def as_indexed_json(options={}) } end - def self.query_fields - ['symbol^10', 'name^10', 'description^10', 'contact_name^10', 'contact_email^10', 'domains', 'url', 'software^3', 'repository.subjects.text^3', 'repository.certificates.text^3', '_all'] - end + # def self.query_fields + # ['symbol^10', 'name^10', 'description^10', 'contact_name^10', 'contact_email^10', 'domains', 'url', 'software^3', 'repository.subjects.text^3', 'repository.certificates.text^3', '_all'] + # end def self.query_aggregations { diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 3dc06de11..fb1b839ff 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -100,6 +100,8 @@ def find_by_ids(ids, options={}) end def query(query, options={}) + aggregations = query_aggregations + # enable cursor-based pagination for DOIs if self.name == "Doi" && options.dig(:page, :cursor).present? from = 0 @@ -111,10 +113,24 @@ def query(query, options={}) sort = options[:sort] end - fields = options[:query_fields].presence || query_fields + # currently not used + # fields = options[:query_fields].presence || query_fields + + # make sure field name uses underscore + # escape forward slashes in query + if query.present? + query = query.gsub(/publicationYear/, "publication_year") + query = query.gsub(/relatedIdentifiers/, "related_identifiers") + query = query.gsub(/rightsList/, "rights_list") + query = query.gsub(/fundingReferences/, "funding_references") + query = query.gsub(/geoLocations/, "geo_locations") + query = query.gsub(/landingPage/, "landing_page") + query = query.gsub(/contentUrl/, "content_url") + query = query.gsub("/", '\/') + end must = [] - must << { multi_match: { query: query, fields: fields, type: "phrase_prefix", slop: 3, max_expansions: 10 }} if query.present? + must << { query_string: { query: query }} if query.present? must << { term: { aasm_state: options[:state] }} if options[:state].present? must << { term: { "types.resourceTypeGeneral": options[:resource_type_id].underscore.camelize }} if options[:resource_type_id].present? must << { terms: { provider_id: options[:provider_id].split(",") }} if options[:provider_id].present? @@ -123,6 +139,7 @@ def query(query, options={}) must << { term: { "author.id" => "https://orcid.org/#{options[:person_id]}" }} if options[:person_id].present? must << { range: { created: { gte: "#{options[:created].split(",").min}||/y", lte: "#{options[:created].split(",").max}||/y", format: "yyyy" }}} if options[:created].present? must << { term: { schema_version: "http://datacite.org/schema/kernel-#{options[:schema_version]}" }} if options[:schema_version].present? + must << { terms: { "subjects.subject": options[:subject].split(",") }} if options[:subject].present? must << { term: { source: options[:source] }} if options[:source].present? must << { term: { "landing_page.status": options[:link_check_status] }} if options[:link_check_status].present? must << { exists: { field: "landing_page.checked" }} if options[:link_checked].present? @@ -141,7 +158,7 @@ def query(query, options={}) must << { term: { region: options[:region].upcase }} if options[:region].present? must << { term: { organization_type: options[:organization_type] }} if options[:organization_type].present? must << { term: { focus_area: options[:focus_area] }} if options[:focus_area].present? - + if options[:all_members] must << { terms: { role_name: %w(ROLE_ALLOCATOR ROLE_MEMBER) }} else @@ -157,18 +174,62 @@ def query(query, options={}) must << { range: { registered: { gte: "#{options[:registered].split(",").min}||/y", lte: "#{options[:registered].split(",").max}||/y", format: "yyyy" }}} if options[:registered].present? end + # ES query can be optionally defined in different ways + # So here we build it differently based upon options + # This is mostly useful when trying to wrap it in a function_score query + es_query = {} + + # The main bool query with filters + bool_query = { + must: must, + must_not: must_not + } + + # Function score is used to provide varying score to return different values + # We use the bool query above as our principle query + # Then apply additional function scoring as appropriate + # Note this can be performance intensive. + function_score = { + query: { + bool: bool_query + }, + random_score: { + "seed": Rails.env.test? ? "random_1234" : "random_#{rand(1...100000)}" + } + } + + if options[:random].present? + es_query['function_score'] = function_score + # Don't do any sorting for random results + sort = nil + else + es_query['bool'] = bool_query + end + + # Sample grouping is optional included aggregation + if options[:sample_group].present? + aggregations[:samples] = { + terms: { + field: options[:sample_group], + size: 10000 + }, + aggs: { + "samples_hits": { + top_hits: { + size: options[:sample_size].present? ? options[:sample_size] : 1 + } + } + } + } + end + __elasticsearch__.search({ size: options.dig(:page, :size), from: from, search_after: search_after, sort: sort, - query: { - bool: { - must: must, - must_not: must_not - } - }, - aggregations: query_aggregations + query: es_query, + aggregations: aggregations }.compact) end diff --git a/app/models/doi.rb b/app/models/doi.rb index 6cee7b878..2f0a23a34 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -79,7 +79,7 @@ class Doi < ActiveRecord::Base validates_format_of :url, :with => /\A(ftp|http|https):\/\/[\S]+/ , if: :url?, message: "URL is not valid" validates_uniqueness_of :doi, message: "This DOI has already been taken", unless: :only_validate validates :last_landing_page_status, numericality: { only_integer: true }, if: :last_landing_page_status? - validates :xml, presence: true, xml_schema: true, :if => Proc.new { |doi| doi.validatable? } + validates :xml, xml_schema: true, :if => Proc.new { |doi| doi.validatable? } after_commit :update_url, on: [:create, :update] after_commit :update_media, on: [:create, :update] @@ -326,9 +326,9 @@ def self.query_aggregations } end - def self.query_fields - ['doi^10', 'titles.title^10', 'creator_names^10', 'creators.name^10', 'creators.id^10', 'publisher^10', 'descriptions.description^10', 'types.resourceTypeGeneral^10', 'subjects.subject^10', 'identifiers.identifier^10', 'related_identifiers.relatedIdentifier^10', '_all'] - end + # def self.query_fields + # ['doi^10', 'titles.title^10', 'creator_names^10', 'creators.name^10', 'creators.id^10', 'publisher^10', 'descriptions.description^10', 'types.resourceTypeGeneral^10', 'subjects.subject^10', 'identifiers.identifier^10', 'related_identifiers.relatedIdentifier^10', '_all'] + # end def self.find_by_id(id, options={}) return nil unless id.present? @@ -357,7 +357,7 @@ def self.import_one(doi_id: nil) logger.error "[MySQL] No metadata for DOI " + doi.doi + " found: " + doi.current_metadata.inspect return nil end - + meta = doi.read_datacite(string: string, sandbox: doi.sandbox) attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url).map do |a| [a.to_sym, meta[a]] @@ -409,7 +409,7 @@ def self.import_by_day(options={}) logger.error "[MySQL] No metadata for DOI " + doi.doi + " found." return nil end - + meta = doi.read_datacite(string: string, sandbox: doi.sandbox) attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url).map do |a| [a.to_sym, meta[a]] @@ -445,7 +445,7 @@ def self.import_by_day_missing(options={}) logger.error "[MySQL] No metadata for DOI " + doi.doi + " found." return nil end - + meta = doi.read_datacite(string: string, sandbox: doi.sandbox) attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url).map do |a| [a.to_sym, meta[a]] @@ -528,6 +528,59 @@ def self.index_by_day(options={}) logger.info "[Elasticsearch] Indexed #{count} DOIs created on #{options[:from_date]}." end + def self.index_by_ids(options={}) + from_id = (options[:from_id] || 1).to_i + until_id = (options[:until_id] || from_id + 499).to_i + + # get every id between from_id and end_id + (from_id..until_id).step(500).each do |id| + DoiIndexByIdJob.perform_later(id: id) + puts "Queued indexing for DOIs with IDs starting with #{id}." + end + end + + def self.index_by_id(options={}) + return nil unless options[:id].present? + id = options[:id].to_i + + errors = 0 + count = 0 + + logger = Logger.new(STDOUT) + + Doi.where(id: id..(id + 499)).find_in_batches(batch_size: 500) do |dois| + response = Doi.__elasticsearch__.client.bulk \ + index: Doi.index_name, + type: Doi.document_type, + body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } } + + # log errors + errors += response['items'].map { |k, v| k.values.first['error'] }.compact.length + response['items'].select { |k, v| k.values.first['error'].present? }.each do |err| + logger.error "[Elasticsearch] " + err.inspect + end + + count += dois.length + end + + if errors > 1 + logger.error "[Elasticsearch] #{errors} errors indexing #{count} DOIs with IDs #{id} - #{(id + 499)}." + elsif count > 1 + logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 499)}." + end + rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed, ActiveRecord::LockWaitTimeout => error + logger.info "[Elasticsearch] Error #{error.message} indexing DOIs with IDs #{id} - #{(id + 499)}." + + count = 0 + + Doi.where(id: id..(id + 499)).find_each do |doi| + IndexJob.perform_later(doi) + count += 1 + end + + logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 499)}." + end + def uid doi.downcase end @@ -615,7 +668,7 @@ def validatable? # providers europ and ethz do their own handle registration, so fetch url from handle system instead def update_url return nil if current_user.nil? || !is_registered_or_findable? - + if %w(europ ethz).include?(provider_id) || %w(Crossref).include?(agency) UrlJob.perform_later(self) else diff --git a/app/models/provider.rb b/app/models/provider.rb index 259c85606..bcdee6860 100644 --- a/app/models/provider.rb +++ b/app/models/provider.rb @@ -134,9 +134,9 @@ def as_indexed_json(options={}) } end - def self.query_fields - ['symbol^10', 'name^10', 'contact_name^10', 'contact_email^10', '_all'] - end + # def self.query_fields + # ['symbol^10', 'name^10', 'contact_name^10', 'contact_email^10', '_all'] + # end def self.query_aggregations { diff --git a/app/serializers/client_serializer.rb b/app/serializers/client_serializer.rb index c77bf4976..47e32759e 100644 --- a/app/serializers/client_serializer.rb +++ b/app/serializers/client_serializer.rb @@ -3,6 +3,7 @@ class ClientSerializer set_key_transform :camel_lower set_type :clients set_id :uid + cache_options enabled: true, cache_length: 24.hours attributes :name, :symbol, :year, :contact_name, :contact_email, :description, :domains, :url, :created, :updated diff --git a/app/serializers/doi_serializer.rb b/app/serializers/doi_serializer.rb index b7089d63b..4f1762077 100644 --- a/app/serializers/doi_serializer.rb +++ b/app/serializers/doi_serializer.rb @@ -3,6 +3,7 @@ class DoiSerializer set_key_transform :camel_lower set_type :dois set_id :uid + # don't cache dois, as works are cached using the doi model attributes :doi, :prefix, :suffix, :identifiers, :creators, :titles, :publisher, :container, :publication_year, :subjects, :contributors, :dates, :language, :types, :related_identifiers, :sizes, :formats, :version, :rights_list, :descriptions, :geo_locations, :funding_references, :xml, :url, :content_url, :metadata_version, :schema_version, :source, :is_active, :state, :reason, :landing_page, :created, :registered, :updated attributes :prefix, :suffix, if: Proc.new { |object, params| params && params[:detail] } diff --git a/app/serializers/provider_prefix_serializer.rb b/app/serializers/provider_prefix_serializer.rb index b89aaf335..470ab51d2 100644 --- a/app/serializers/provider_prefix_serializer.rb +++ b/app/serializers/provider_prefix_serializer.rb @@ -4,6 +4,7 @@ class ProviderPrefixSerializer set_type "provider-prefixes" set_id :uid attributes :created, :updated + cache_options enabled: true, cache_length: 24.hours belongs_to :provider, record_type: :providers belongs_to :prefix, record_type: :prefixes diff --git a/app/serializers/provider_serializer.rb b/app/serializers/provider_serializer.rb index 65aad32aa..fc46d31c1 100644 --- a/app/serializers/provider_serializer.rb +++ b/app/serializers/provider_serializer.rb @@ -3,6 +3,7 @@ class ProviderSerializer set_key_transform :camel_lower set_type :providers set_id :uid + cache_options enabled: true, cache_length: 24.hours attributes :name, :symbol, :website, :contact_name, :contact_email, :phone, :description, :region, :country, :logo_url, :organization_type, :focus_area, :is_active, :has_password, :joined, :created, :updated diff --git a/app/serializers/work_serializer.rb b/app/serializers/work_serializer.rb index f5a216793..644817b94 100644 --- a/app/serializers/work_serializer.rb +++ b/app/serializers/work_serializer.rb @@ -3,6 +3,7 @@ class WorkSerializer set_key_transform :dash set_type :works set_id :identifier + cache_options enabled: true, cache_length: 24.hours attributes :doi, :identifier, :url, :author, :title, :container_title, :description, :resource_type_subtype, :data_center_id, :member_id, :resource_type_id, :version, :license, :schema_version, :results, :related_identifiers, :published, :registered, :checked, :updated, :media, :xml diff --git a/config/initializers/_version.rb b/config/initializers/_version.rb index c03df7440..0649c1ca5 100644 --- a/config/initializers/_version.rb +++ b/config/initializers/_version.rb @@ -1,5 +1,5 @@ module Lupo class Application - VERSION = "2.1.8" + VERSION = "2.3.2" end end \ No newline at end of file diff --git a/config/initializers/elasticsearch.rb b/config/initializers/elasticsearch.rb index 0dbbcb378..a2ee09c2d 100644 --- a/config/initializers/elasticsearch.rb +++ b/config/initializers/elasticsearch.rb @@ -9,7 +9,7 @@ } } Elasticsearch::Model.client = Elasticsearch::Client.new(host: ENV['ES_HOST'], user: "elastic", password: ENV['ELASTIC_PASSWORD']) do |f| - f.adapter Faraday.default_adapter + f.adapter :excon end else Elasticsearch::Model.client = Elasticsearch::Client.new(host: ENV['ES_HOST'], port: '80', scheme: 'http') do |f| @@ -18,6 +18,6 @@ service: 'es', region: ENV['AWS_REGION'] - f.adapter Faraday.default_adapter + f.adapter :excon end end \ No newline at end of file diff --git a/config/routes.rb b/config/routes.rb index 2bc26fb08..ac8e2c787 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -19,6 +19,7 @@ get '/dois/application/x-research-info-systems/:id', :to => 'dois#show', constraints: { :id => /.+/ }, defaults: { format: :ris } get '/dois/text/x-bibliography/:id', :to => 'dois#show', constraints: { :id => /.+/ }, defaults: { format: :citation } + # content negotiation for collections get '/dois/application/vnd.datacite.datacite+xml', :to => 'dois#index', defaults: { format: :datacite } get '/dois/application/vnd.datacite.datacite+json', :to => 'dois#index', defaults: { format: :datacite_json } get '/dois/application/vnd.crosscite.crosscite+json', :to => 'dois#index', defaults: { format: :crosscite } @@ -42,11 +43,6 @@ get 'providers/totals', :to => 'providers#totals' get 'clients/totals', :to => 'clients#totals' - # manage prefixes, keep database in sync for changes via MDS - post 'client-prefixes/set-created', :to => 'client_prefixes#set_created' - post 'client-prefixes/set-provider', :to => 'client_prefixes#set_provider' - post 'provider-prefixes/set-created', :to => 'provider_prefixes#set_created' - resources :heartbeat, only: [:index] resources :index, only: [:index] diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 0982b7525..b9c9e6f9d 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -74,6 +74,14 @@ namespace :doi do puts "DOIs created on #{from_date} indexed." end + desc 'Index DOIs by ID' + task :index_by_ids => :environment do + from_id = (ENV['FROM_ID'] || 1).to_i + until_id = (ENV['UNTIL_ID'] || from_id + 499).to_i + + Doi.index_by_ids(from_id: from_id, until_id: until_id) + end + desc 'Set minted' task :set_minted => :environment do from_date = ENV['FROM_DATE'] || Time.zone.now - 1.day diff --git a/lib/xml_schema_validator.rb b/lib/xml_schema_validator.rb index 37272c68f..243ee9164 100644 --- a/lib/xml_schema_validator.rb +++ b/lib/xml_schema_validator.rb @@ -32,18 +32,26 @@ def get_valid_kernel(sv) end def validate_each(record, attribute, value) + unless value.present? + record.errors[:xml] << "xml should be present" + return false + end + kernel = get_valid_kernel(record.schema_version) - return false unless kernel.present? - + unless kernel.present? + record.errors[:xml] << "schema should be present" + return false + end + filepath = Bundler.rubygems.find_name('bolognese').first.full_gem_path + "/resources/#{kernel}/metadata.xsd" schema = Nokogiri::XML::Schema(open(filepath)) - + schema.validate(Nokogiri::XML(value, nil, 'UTF-8')).reduce({}) do |sum, error| location, level, source, text = error.message.split(": ", 4) line, column = location.split(":", 2) title = text.to_s.strip + " at line #{line}, column #{column}" if line.present? source = source.split("}").last[0..-2] if line.present? - source = schema_attributes(source) if source.present? + source = schema_attributes(source) if source.present? record.errors[source.to_sym] << title end rescue Nokogiri::XML::SyntaxError => e diff --git a/spec/requests/dois_spec.rb b/spec/requests/dois_spec.rb index ac72c056b..a86457aa4 100644 --- a/spec/requests/dois_spec.rb +++ b/spec/requests/dois_spec.rb @@ -718,6 +718,44 @@ expect(json.dig('data', 'attributes', 'state')).to eq("findable") end end + + context 'when updating landing page attributes' do + let(:doi) { create(:doi, doi: "10.14454/10705", url: "http://www.bl.uk/pdf/pat.pdf", client: client, aasm_state: "findable") } + + let(:landingPage) { { + "checked" => Time.zone.now.utc.iso8601, + "status" => 200, + "url" => doi.url, + "contentType" => "text/html", + "error" => nil, + "redirectCount" => 0, + "redirectUrls" => [], + "downloadLatency" => 200, + "hasSchemaOrg" => true, + "schemaOrgId" => doi.doi, + "dcIdentifier" => nil, + "citationDoi" => nil, + "bodyHasPid" => true + } } + let(:valid_attributes) do + { + "data" => { + "type" => "dois", + "attributes" => { + "landingPage" => landingPage, + } + } + } + end + + before { patch "/dois/#{doi.doi}", params: valid_attributes.to_json, headers: headers } + + it 'updating landingPage data' do + expect(response).to have_http_status(200) + expect(json.dig('data', 'attributes', 'landingPage')).to eq(landingPage) + end + end + end describe 'POST /dois' do @@ -754,7 +792,7 @@ expect(json.dig('data', 'attributes', 'schemaVersion')).to eq("http://datacite.org/schema/kernel-4") expect(json.dig('data', 'attributes', 'source')).to eq("test") expect(json.dig('data', 'attributes', 'types')).to eq("bibtex"=>"article", "citeproc"=>"article-journal", "resourceType"=>"BlogPosting", "resourceTypeGeneral"=>"Text", "ris"=>"RPRT", "schemaOrg"=>"ScholarlyArticle") - + doc = Nokogiri::XML(Base64.decode64(json.dig('data', 'attributes', 'xml')), nil, 'UTF-8', &:noblanks) expect(doc.at_css("identifier").content).to eq("10.14454/10703") end @@ -2240,364 +2278,364 @@ let(:client) { create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD']) } let(:bearer) { Client.generate_token(role_id: "client_admin", uid: client.symbol, provider_id: provider.symbol.downcase, client_id: client.symbol.downcase, password: client.password) } let(:doi) { create(:doi, client: client, aasm_state: "findable") } - + context "no permission" do let(:doi) { create(:doi) } - + before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.jats+xml", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns error message' do expect(json["errors"]).to eq([{"status"=>"403", "title"=>"You are not authorized to access this resource."}]) end - + it 'returns status code 403' do expect(response).to have_http_status(403) end end - + context "no authentication" do - let(:doi) { create(:doi) } + let(:doi) { create(:doi) } before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.jats+xml" } } - + it 'returns error message' do expect(json["errors"]).to eq([{"status"=>"401", "title"=>"Bad credentials."}]) end - + it 'returns status code 401' do expect(response).to have_http_status(401) end end - + context "application/vnd.jats+xml" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.jats+xml", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do jats = Maremma.from_xml(response.body).fetch("element_citation", {}) expect(jats.dig("publication_type")).to eq("data") expect(jats.dig("data_title")).to eq("Data from: A new malaria agent in African hominids.") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.jats+xml link" do before { get "/dois/application/vnd.jats+xml/#{doi.doi}" } - + it 'returns the Doi' do jats = Maremma.from_xml(response.body).fetch("element_citation", {}) expect(jats.dig("publication_type")).to eq("data") expect(jats.dig("data_title")).to eq("Data from: A new malaria agent in African hominids.") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.datacite.datacite+xml" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.datacite.datacite+xml", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do data = Maremma.from_xml(response.body).to_h.fetch("resource", {}) expect(data.dig("xmlns")).to eq("http://datacite.org/schema/kernel-4") expect(data.dig("publisher")).to eq("Dryad Digital Repository") expect(data.dig("titles", "title")).to eq("Data from: A new malaria agent in African hominids.") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.datacite.datacite+xml link" do before { get "/dois/application/vnd.datacite.datacite+xml/#{doi.doi}" } - + it 'returns the Doi' do data = Maremma.from_xml(response.body).to_h.fetch("resource", {}) expect(data.dig("xmlns")).to eq("http://datacite.org/schema/kernel-4") expect(data.dig("publisher")).to eq("Dryad Digital Repository") expect(data.dig("titles", "title")).to eq("Data from: A new malaria agent in African hominids.") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.datacite.datacite+xml schema 3" do let(:xml) { file_fixture('datacite_schema_3.xml').read } let(:doi) { create(:doi, xml: xml, client: client, regenerate: false) } - + before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.datacite.datacite+xml", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do data = Maremma.from_xml(response.body).to_h.fetch("resource", {}) expect(data.dig("xmlns")).to eq("http://datacite.org/schema/kernel-3") expect(data.dig("publisher")).to eq("Dryad Digital Repository") expect(data.dig("titles", "title")).to eq("Data from: A new malaria agent in African hominids.") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + # context "no metadata" do # let(:doi) { create(:doi, xml: nil, client: client) } - + # before { get "/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.datacite.datacite+xml", 'Authorization' => 'Bearer ' + bearer } } - + # it 'returns the Doi' do # expect(response.body).to eq('') # end - + # it 'returns status code 200' do # expect(response).to have_http_status(200) # end # end - + context "application/vnd.datacite.datacite+xml not found" do before { get "/dois/xxx", headers: { "HTTP_ACCEPT" => "application/vnd.datacite.datacite+xml", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns error message' do expect(json["errors"]).to eq([{"status"=>"404", "title"=>"The resource you are looking for doesn't exist."}]) end - + it 'returns status code 404' do expect(response).to have_http_status(404) end end - + context "application/vnd.datacite.datacite+json" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.datacite.datacite+json", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(json["doi"]).to eq(doi.doi) end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.datacite.datacite+json link" do before { get "/dois/application/vnd.datacite.datacite+json/#{doi.doi}" } - + it 'returns the Doi' do expect(json["doi"]).to eq(doi.doi) end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.crosscite.crosscite+json" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.crosscite.crosscite+json", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(json["doi"]).to eq(doi.doi) end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.crosscite.crosscite+json link" do before { get "/dois/application/vnd.crosscite.crosscite+json/#{doi.doi}" } - + it 'returns the Doi' do expect(json["doi"]).to eq(doi.doi) end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.schemaorg.ld+json" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.schemaorg.ld+json", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(json["@type"]).to eq("Dataset") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.schemaorg.ld+json link" do before { get "/dois/application/vnd.schemaorg.ld+json/#{doi.doi}" } - + it 'returns the Doi' do expect(json["@type"]).to eq("Dataset") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.citationstyles.csl+json" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/vnd.citationstyles.csl+json", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(json["type"]).to eq("dataset") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/vnd.citationstyles.csl+json link" do before { get "/dois/application/vnd.citationstyles.csl+json/#{doi.doi}" } - + it 'returns the Doi' do expect(json["type"]).to eq("dataset") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/x-research-info-systems" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/x-research-info-systems", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(response.body).to start_with("TY - DATA") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/x-research-info-systems link" do before { get "/dois/application/x-research-info-systems/#{doi.doi}" } - + it 'returns the Doi' do expect(response.body).to start_with("TY - DATA") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/x-bibtex" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "application/x-bibtex", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(response.body).to start_with("@misc{https://doi.org/#{doi.doi.downcase}") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "application/x-bibtex link" do before { get "/dois/application/x-bibtex/#{doi.doi}" } - + it 'returns the Doi' do expect(response.body).to start_with("@misc{https://doi.org/#{doi.doi.downcase}") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "text/x-bibliography", vcr: true do context "default style" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "text/x-bibliography", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(response.body).to start_with("Ollomo, B.") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "default style link" do before { get "/dois/text/x-bibliography/#{doi.doi}" } - + it 'returns the Doi' do expect(response.body).to start_with("Ollomo, B.") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "ieee style" do before { get "/dois/#{doi.doi}?style=ieee", headers: { "HTTP_ACCEPT" => "text/x-bibliography", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(response.body).to start_with("B. Ollomo") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - - context "ieee style link" do + + context "ieee style link" do before { get "/dois/text/x-bibliography/#{doi.doi}?style=ieee" } - + it 'returns the Doi' do expect(response.body).to start_with("B. Ollomo") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - + context "style and locale" do before { get "/dois/#{doi.doi}?style=vancouver&locale=de", headers: { "HTTP_ACCEPT" => "text/x-bibliography", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(response.body).to start_with("Ollomo B") end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end end - + context "unknown content type" do before { get "/dois/#{doi.doi}", headers: { "HTTP_ACCEPT" => "text/csv", 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(json["errors"]).to eq([{"status"=>"406", "title"=>"The content type is not recognized."}]) end - + it 'returns status code 406' do expect(response).to have_http_status(406) end end - + context "missing content type" do before { get "/dois/#{doi.doi}", headers: { 'Authorization' => 'Bearer ' + bearer } } - + it 'returns the Doi' do expect(json.dig('data', 'attributes', 'doi')).to eq(doi.doi.downcase) end - + it 'returns status code 200' do expect(response).to have_http_status(200) end end - end + end end