From 5f2e78a9a73b7fe2b4a670b138cd2b0c1f465c5f Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Fri, 4 Jan 2019 10:22:45 +0100 Subject: [PATCH] use elasticsearch in production --- app/controllers/dois_controller.rb | 379 ++++++++++++++--------------- app/models/concerns/indexable.rb | 3 + config/initializers/_version.rb | 2 +- lib/tasks/doi.rake | 6 +- 4 files changed, 196 insertions(+), 194 deletions(-) diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index 0f6442aea..db8f0faf9 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -13,203 +13,202 @@ class DoisController < ApplicationController def index authorize! :read, Doi - if Rails.env.production? && !current_user.try(:is_admin_or_staff?) - # don't use elasticsearch - - # support nested routes - if params[:client_id].present? - client = Client.where('datacentre.symbol = ?', params[:client_id]).first - collection = client.present? ? client.dois : Doi.none - total = client.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } - elsif params[:provider_id].present? && params[:provider_id] != "admin" - provider = Provider.where('allocator.symbol = ?', params[:provider_id]).first - collection = provider.present? ? Doi.joins(:client).where("datacentre.allocator = ?", provider.id) : Doi.none - total = provider.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } - elsif params[:id].present? - collection = Doi.where(doi: params[:id]) - total = collection.all.size - else - provider = Provider.unscoped.where('allocator.symbol = ?', "ADMIN").first - total = provider.present? ? provider.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } : 0 - collection = Doi - end - - if params[:query].present? - collection = Doi.q(params[:query]) - total = collection.all.size - end - - page = params[:page] || {} - if page[:size].present? - page[:size] = [page[:size].to_i, 1000].min - max_number = page[:size] > 0 ? 10000/page[:size] : 1 - else - page[:size] = 25 - max_number = 10000/page[:size] - end - page[:number] = page[:number].to_i > 0 ? [page[:number].to_i, max_number].min : 1 - total_pages = (total.to_f / page[:size]).ceil - - order = case params[:sort] - when "name" then "dataset.doi" - when "-name" then "dataset.doi DESC" - when "created" then "dataset.created" - else "dataset.created DESC" - end - - @dois = collection.order(order).page(page[:number]).per(page[:size]).without_count - - options = {} - options[:meta] = { - total: total, - "totalPages" => total_pages, - page: page[:number].to_i - }.compact - - options[:links] = { - self: request.original_url, - next: @dois.blank? ? nil : request.base_url + "/dois?" + { - query: params[:query], - "provider-id" => params[:provider_id], - "client-id" => params[:client_id], - "page[number]" => page[:number] + 1, - "page[size]" => page[:size], - sort: params[:sort] }.compact.to_query - }.compact - options[:include] = @include - options[:is_collection] = true - options[:params] = { - :current_ability => current_ability, - } + # if Rails.env.production? && !current_user.try(:is_admin_or_staff?) + # # don't use elasticsearch + + # # support nested routes + # if params[:client_id].present? + # client = Client.where('datacentre.symbol = ?', params[:client_id]).first + # collection = client.present? ? client.dois : Doi.none + # total = client.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } + # elsif params[:provider_id].present? && params[:provider_id] != "admin" + # provider = Provider.where('allocator.symbol = ?', params[:provider_id]).first + # collection = provider.present? ? Doi.joins(:client).where("datacentre.allocator = ?", provider.id) : Doi.none + # total = provider.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } + # elsif params[:id].present? + # collection = Doi.where(doi: params[:id]) + # total = collection.all.size + # else + # provider = Provider.unscoped.where('allocator.symbol = ?', "ADMIN").first + # total = provider.present? ? provider.cached_doi_count.reduce(0) { |sum, d| sum + d[:count].to_i } : 0 + # collection = Doi + # end + + # if params[:query].present? + # collection = Doi.q(params[:query]) + # total = collection.all.size + # end + + # page = params[:page] || {} + # if page[:size].present? + # page[:size] = [page[:size].to_i, 1000].min + # max_number = page[:size] > 0 ? 10000/page[:size] : 1 + # else + # page[:size] = 25 + # max_number = 10000/page[:size] + # end + # page[:number] = page[:number].to_i > 0 ? [page[:number].to_i, max_number].min : 1 + # total_pages = (total.to_f / page[:size]).ceil + + # order = case params[:sort] + # when "name" then "dataset.doi" + # when "-name" then "dataset.doi DESC" + # when "created" then "dataset.created" + # else "dataset.created DESC" + # end + + # @dois = collection.order(order).page(page[:number]).per(page[:size]).without_count + + # options = {} + # options[:meta] = { + # total: total, + # "totalPages" => total_pages, + # page: page[:number].to_i + # }.compact + + # options[:links] = { + # self: request.original_url, + # next: @dois.blank? ? nil : request.base_url + "/dois?" + { + # query: params[:query], + # "provider-id" => params[:provider_id], + # "client-id" => params[:client_id], + # "page[number]" => page[:number] + 1, + # "page[size]" => page[:size], + # sort: params[:sort] }.compact.to_query + # }.compact + # options[:include] = @include + # options[:is_collection] = true + # options[:params] = { + # :current_ability => current_ability, + # } + + # render json: DoiSerializer.new(@dois, options).serialized_json, status: :ok + # else + sort = case params[:sort] + when "name" then { "doi" => { order: 'asc' }} + when "-name" then { "doi" => { order: 'desc' }} + when "created" then { created: { order: 'asc' }} + when "-created" then { created: { order: 'desc' }} + when "updated" then { updated: { order: 'asc' }} + when "-updated" then { updated: { order: 'desc' }} + when "relevance" then { "_score": { "order": "desc" }} + else { updated: { order: 'desc' }} + end - render json: DoiSerializer.new(@dois, options).serialized_json, status: :ok + page = params[:page] || {} + + if page[:size].present? + page[:size] = [page[:size].to_i, 1000].min + max_number = page[:size] > 0 ? 10000/page[:size] : 1 else - sort = case params[:sort] - when "name" then { "doi" => { order: 'asc' }} - when "-name" then { "doi" => { order: 'desc' }} - when "created" then { created: { order: 'asc' }} - when "-created" then { created: { order: 'desc' }} - when "updated" then { updated: { order: 'asc' }} - when "-updated" then { updated: { order: 'desc' }} - when "relevance" then { "_score": { "order": "desc" }} - else { updated: { order: 'desc' }} - end - - page = params[:page] || {} - - if page[:size].present? - page[:size] = [page[:size].to_i, 1000].min - max_number = page[:size] > 0 ? 10000/page[:size] : 1 - else - page[:size] = 25 - max_number = 10000/page[:size] - end - page[:number] = page[:number].to_i > 0 ? [page[:number].to_i, max_number].min : 1 + page[:size] = 25 + max_number = 10000/page[:size] + end + page[:number] = page[:number].to_i > 0 ? [page[:number].to_i, max_number].min : 1 - if params[:id].present? - response = Doi.find_by_id(params[:id]) - elsif params[:ids].present? - response = Doi.find_by_ids(params[:ids], page: page, sort: sort) - else - response = Doi.query(params[:query], - state: params[:state], - created: params[:created], - registered: params[:registered], - provider_id: params[:provider_id], - client_id: params[:client_id], - prefix: params[:prefix], - person_id: params[:person_id], - resource_type_id: params[:resource_type_id], - schema_version: params[:schema_version], - subject: params[:subject], - link_check_status: params[:link_check_status], - link_check_has_schema_org: params[:link_check_has_schema_org], - link_check_body_has_pid: params[:link_check_body_has_pid], - link_check_found_schema_org_id: params[:link_check_found_schema_org_id], - link_check_found_dc_identifier: params[:link_check_found_dc_identifier], - link_check_found_citation_doi: params[:link_check_found_citation_doi], - link_check_redirect_count_gte: params[:link_check_redirect_count_gte], - source: params[:source], - page: page, - sort: sort) - end + if params[:id].present? + response = Doi.find_by_id(params[:id]) + elsif params[:ids].present? + response = Doi.find_by_ids(params[:ids], page: page, sort: sort) + else + response = Doi.query(params[:query], + state: params[:state], + created: params[:created], + registered: params[:registered], + provider_id: params[:provider_id], + client_id: params[:client_id], + prefix: params[:prefix], + person_id: params[:person_id], + resource_type_id: params[:resource_type_id], + schema_version: params[:schema_version], + subject: params[:subject], + link_check_status: params[:link_check_status], + link_check_has_schema_org: params[:link_check_has_schema_org], + link_check_body_has_pid: params[:link_check_body_has_pid], + link_check_found_schema_org_id: params[:link_check_found_schema_org_id], + link_check_found_dc_identifier: params[:link_check_found_dc_identifier], + link_check_found_citation_doi: params[:link_check_found_citation_doi], + link_check_redirect_count_gte: params[:link_check_redirect_count_gte], + source: params[:source], + page: page, + sort: sort) + end - begin - total = response.results.total - total_pages = page[:size] > 0 ? ([total.to_f, 10000].min / page[:size]).ceil : 0 - - states = total > 0 ? facet_by_key(response.response.aggregations.states.buckets) : nil - resource_types = total > 0 ? facet_by_resource_type(response.response.aggregations.resource_types.buckets) : nil - created = total > 0 ? facet_by_year(response.response.aggregations.created.buckets) : nil - registered = total > 0 ? facet_by_year(response.response.aggregations.registered.buckets) : nil - providers = total > 0 ? facet_by_provider(response.response.aggregations.providers.buckets) : nil - clients = total > 0 ? facet_by_client(response.response.aggregations.clients.buckets) : nil - prefixes = total > 0 ? facet_by_key(response.response.aggregations.prefixes.buckets) : nil - schema_versions = total > 0 ? facet_by_schema(response.response.aggregations.schema_versions.buckets) : nil - sources = total > 0 ? facet_by_key(response.response.aggregations.sources.buckets) : nil - link_checks_status = total > 0 ? facet_by_cumulative_year(response.response.aggregations.link_checks_status.buckets) : nil - links_with_schema_org = total > 0 ? facet_by_cumulative_year(response.response.aggregations.link_checks_has_schema_org.buckets) : nil - link_checks_schema_org_id = total > 0 ? response.response.aggregations.link_checks_schema_org_id.value : nil - link_checks_dc_identifier = total > 0 ? response.response.aggregations.link_checks_dc_identifier.value : nil - link_checks_citation_doi = total > 0 ? response.response.aggregations.link_checks_citation_doi.value : nil - links_checked = total > 0 ? response.response.aggregations.links_checked.value : nil - - respond_to do |format| - format.json do - @dois = response.results.results - options = {} - options[:meta] = { - total: total, - "totalPages" => total_pages, - page: page[:number], - states: states, - "resourceTypes" => resource_types, - created: created, - registered: registered, - providers: providers, - clients: clients, - prefixes: prefixes, - "schemaVersions" => schema_versions, - sources: sources, - "linkChecksStatus" => link_checks_status, - "linksChecked" => links_checked, - "linksWithSchemaOrg" => links_with_schema_org, - "linkChecksSchemaOrgId" => link_checks_schema_org_id, - "linkChecksDcIdentifier" => link_checks_dc_identifier, - "linkChecksCitationDoi" => link_checks_citation_doi + begin + total = response.results.total + total_pages = page[:size] > 0 ? ([total.to_f, 10000].min / page[:size]).ceil : 0 + + states = total > 0 ? facet_by_key(response.response.aggregations.states.buckets) : nil + resource_types = total > 0 ? facet_by_resource_type(response.response.aggregations.resource_types.buckets) : nil + created = total > 0 ? facet_by_year(response.response.aggregations.created.buckets) : nil + registered = total > 0 ? facet_by_year(response.response.aggregations.registered.buckets) : nil + providers = total > 0 ? facet_by_provider(response.response.aggregations.providers.buckets) : nil + clients = total > 0 ? facet_by_client(response.response.aggregations.clients.buckets) : nil + prefixes = total > 0 ? facet_by_key(response.response.aggregations.prefixes.buckets) : nil + schema_versions = total > 0 ? facet_by_schema(response.response.aggregations.schema_versions.buckets) : nil + sources = total > 0 ? facet_by_key(response.response.aggregations.sources.buckets) : nil + link_checks_status = total > 0 ? facet_by_cumulative_year(response.response.aggregations.link_checks_status.buckets) : nil + links_with_schema_org = total > 0 ? facet_by_cumulative_year(response.response.aggregations.link_checks_has_schema_org.buckets) : nil + link_checks_schema_org_id = total > 0 ? response.response.aggregations.link_checks_schema_org_id.value : nil + link_checks_dc_identifier = total > 0 ? response.response.aggregations.link_checks_dc_identifier.value : nil + link_checks_citation_doi = total > 0 ? response.response.aggregations.link_checks_citation_doi.value : nil + links_checked = total > 0 ? response.response.aggregations.links_checked.value : nil + + respond_to do |format| + format.json do + @dois = response.results.results + options = {} + options[:meta] = { + total: total, + "totalPages" => total_pages, + page: page[:number], + states: states, + "resourceTypes" => resource_types, + created: created, + registered: registered, + providers: providers, + clients: clients, + prefixes: prefixes, + "schemaVersions" => schema_versions, + sources: sources, + "linkChecksStatus" => link_checks_status, + "linksChecked" => links_checked, + "linksWithSchemaOrg" => links_with_schema_org, + "linkChecksSchemaOrgId" => link_checks_schema_org_id, + "linkChecksDcIdentifier" => link_checks_dc_identifier, + "linkChecksCitationDoi" => link_checks_citation_doi + }.compact + + options[:links] = { + self: request.original_url, + next: @dois.blank? ? nil : request.base_url + "/dois?" + { + query: params[:query], + "provider-id" => params[:provider_id], + "client-id" => params[:client_id], + fields: params[:fields], + "page[cursor]" => Array.wrap(@dois.last[:sort]).first, + "page[size]" => params.dig(:page, :size) }.compact.to_query }.compact - - options[:links] = { - self: request.original_url, - next: @dois.blank? ? nil : request.base_url + "/dois?" + { - query: params[:query], - "provider-id" => params[:provider_id], - "client-id" => params[:client_id], - fields: params[:fields], - "page[cursor]" => Array.wrap(@dois.last[:sort]).first, - "page[size]" => params.dig(:page, :size) }.compact.to_query - }.compact - options[:include] = @include - options[:is_collection] = true - options[:params] = { - :current_ability => current_ability, - } - - render json: DoiSerializer.new(@dois, options).serialized_json, status: :ok - end - format.citation do - # fetch formatted citations - render citation: response.records.to_a, style: params[:style] || "apa", locale: params[:locale] || "en-US" - end - format.any(:bibtex, :citeproc, :codemeta, :crosscite, :datacite, :datacite_json, :jats, :ris, :schema_org) { render request.format.to_sym => response.records.to_a } + options[:include] = @include + options[:is_collection] = true + options[:params] = { + :current_ability => current_ability, + } + + render json: DoiSerializer.new(@dois, options).serialized_json, status: :ok end - rescue Elasticsearch::Transport::Transport::Errors::BadRequest => exception - Bugsnag.notify(exception) - - message = JSON.parse(exception.message[6..-1]).to_h.dig("error", "root_cause", 0, "reason") - - render json: { "errors" => { "title" => message }}.to_json, status: :bad_request + format.citation do + # fetch formatted citations + render citation: response.records.to_a, style: params[:style] || "apa", locale: params[:locale] || "en-US" + end + format.any(:bibtex, :citeproc, :codemeta, :crosscite, :datacite, :datacite_json, :jats, :ris, :schema_org) { render request.format.to_sym => response.records.to_a } end + rescue Elasticsearch::Transport::Transport::Errors::BadRequest => exception + Bugsnag.notify(exception) + + message = JSON.parse(exception.message[6..-1]).to_h.dig("error", "root_cause", 0, "reason") + + render json: { "errors" => { "title" => message }}.to_json, status: :bad_request end end diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 93744832e..4d1b26708 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -114,6 +114,9 @@ def query(query, options={}) # currently not used # fields = options[:query_fields].presence || query_fields + # escape forward slashes in query + query = query.gsub("/", '\/') if query.present? + must = [] must << { query_string: { query: query }} if query.present? must << { term: { aasm_state: options[:state] }} if options[:state].present? diff --git a/config/initializers/_version.rb b/config/initializers/_version.rb index 178e3cb93..f3025678e 100644 --- a/config/initializers/_version.rb +++ b/config/initializers/_version.rb @@ -1,5 +1,5 @@ module Lupo class Application - VERSION = "2.2.6" + VERSION = "2.3" end end \ No newline at end of file diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index d605a2e04..b9c9e6f9d 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -60,10 +60,10 @@ namespace :doi do until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F") end - from_id = Doi.where(created: from_date).first - until_id = Doi.where(created: until_date).last + index_time = ENV['INDEX_TIME'] || Time.zone.now.utc.iso8601 + client_id = ENV['CLIENT_ID'] - Doi.index_by_ids(from_id: from_id, until_id: until_id) + Doi.index(from_date: from_date, until_date: until_date, index_time: index_time, client_id: client_id) end desc 'Index DOIs per day'