From 5d767deb4a9bcba08f649dc7bcc65f6d86039808 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Sun, 16 Jun 2019 18:00:15 +0200 Subject: [PATCH] support query by repository_id for clients. #292 --- app/controllers/clients_controller.rb | 8 +- app/models/client.rb | 1 - app/models/concerns/indexable.rb | 3 +- app/models/doi.rb | 351 ++++++++++++++------------ 4 files changed, 202 insertions(+), 161 deletions(-) diff --git a/app/controllers/clients_controller.rb b/app/controllers/clients_controller.rb index 4e0cb7c30..0ed458beb 100644 --- a/app/controllers/clients_controller.rb +++ b/app/controllers/clients_controller.rb @@ -25,7 +25,13 @@ def index elsif params[:ids].present? response = Client.find_by_id(params[:ids], page: page, sort: sort) else - response = Client.query(params[:query], year: params[:year], provider_id: params[:provider_id], software: params[:software], page: page, sort: sort) + response = Client.query(params[:query], + year: params[:year], + provider_id: params[:provider_id], + repository_id: params[:repository_id], + software: params[:software], + page: page, + sort: sort) end begin diff --git a/app/models/client.rb b/app/models/client.rb index bf091dabb..a2d2e7f23 100644 --- a/app/models/client.rb +++ b/app/models/client.rb @@ -80,7 +80,6 @@ class Client < ActiveRecord::Base indexes :description, type: :text indexes :contact_name, type: :text indexes :contact_email, type: :text, fields: { keyword: { type: "keyword" }} - indexes :re3data, type: :keyword indexes :version, type: :integer indexes :is_active, type: :keyword indexes :domains, type: :text diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index e47703bb9..87734a40c 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -173,12 +173,13 @@ def query(query, options={}) elsif self.name == "Client" must << { range: { created: { gte: "#{options[:year].split(",").min}||/y", lte: "#{options[:year].split(",").max}||/y", format: "yyyy" }}} if options[:year].present? must << { terms: { "software.raw" => options[:software].split(",") }} if options[:software].present? + must << { term: { repository_id: options[:repository_id].gsub("/", '\/') }} if options[:repository_id].present? must_not << { exists: { field: "deleted_at" }} unless options[:include_deleted] must_not << { terms: { provider_id: ["crossref"] }} if options[:exclude_registration_agencies] elsif self.name == "Doi" must << { terms: { aasm_state: options[:state].to_s.split(",") }} if options[:state].present? must << { range: { registered: { gte: "#{options[:registered].split(",").min}||/y", lte: "#{options[:registered].split(",").max}||/y", format: "yyyy" }}} if options[:registered].present? - must << { term: { "client.repository_id": options[:repository_id].upcase }} if options[:repository_id].present? + must << { term: { "client.repository_id" => options[:repository_id].upcase.gsub("/", '\/') }} if options[:repository_id].present? must_not << { terms: { provider_id: ["crossref"] }} if options[:exclude_registration_agencies] elsif self.name == "Event" must << { term: { subj_id: options[:subj_id] }} if options[:subj_id].present? diff --git a/app/models/doi.rb b/app/models/doi.rb index f6aadf69d..5316ee83d 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -99,165 +99,200 @@ class Doi < ActiveRecord::Base # use different index for testing index_name Rails.env.test? ? "dois-test" : "dois" - mapping dynamic: 'false' do - indexes :id, type: :keyword - indexes :uid, type: :keyword - indexes :doi, type: :keyword - indexes :identifier, type: :keyword - indexes :url, type: :text, fields: { keyword: { type: "keyword" }} - indexes :creators, type: :object, properties: { - nameType: { type: :keyword }, - nameIdentifiers: { type: :object, properties: { - nameIdentifier: { type: :keyword }, - nameIdentifierScheme: { type: :keyword }, - schemeUri: { type: :keyword } - }}, - name: { type: :text }, - givenName: { type: :text }, - familyName: { type: :text }, - affiliation: { type: :text } - } - indexes :contributors, type: :object, properties: { - nameType: { type: :keyword }, - nameIdentifiers: { type: :object, properties: { - nameIdentifier: { type: :keyword }, - nameIdentifierScheme: { type: :keyword }, - schemeUri: { type: :keyword } - }}, - name: { type: :text }, - givenName: { type: :text }, - familyName: { type: :text }, - affiliation: { type: :text }, - contributorType: { type: :keyword } - } - indexes :creator_names, type: :text - indexes :titles, type: :object, properties: { - title: { type: :text, fields: { keyword: { type: "keyword" }}}, - titleType: { type: :keyword }, - lang: { type: :keyword } - } - indexes :descriptions, type: :object, properties: { - description: { type: :text }, - descriptionType: { type: :keyword }, - lang: { type: :keyword } - } - indexes :publisher, type: :text, fields: { keyword: { type: "keyword" }} - indexes :publication_year, type: :date, format: "yyyy", ignore_malformed: true - indexes :client_id, type: :keyword - indexes :provider_id, type: :keyword - indexes :resource_type_id, type: :keyword - indexes :media_ids, type: :keyword - indexes :media, type: :object, properties: { - type: { type: :keyword }, - id: { type: :keyword }, - uid: { type: :keyword }, - url: { type: :text }, - media_type: { type: :keyword }, - version: { type: :keyword }, - created: { type: :date, ignore_malformed: true }, - updated: { type: :date, ignore_malformed: true } - } - indexes :identifiers, type: :object, properties: { - identifierType: { type: :keyword }, - identifier: { type: :keyword } - } - indexes :related_identifiers, type: :object, properties: { - relatedIdentifierType: { type: :keyword }, - relatedIdentifier: { type: :keyword }, - relationType: { type: :keyword }, - relatedMetadataScheme: { type: :keyword }, - schemeUri: { type: :keyword }, - schemeType: { type: :keyword }, - resourceTypeGeneral: { type: :keyword } - } - indexes :types, type: :object, properties: { - resourceTypeGeneral: { type: :keyword }, - resourceType: { type: :keyword }, - schemaOrg: { type: :keyword }, - bibtex: { type: :keyword }, - citeproc: { type: :keyword }, - ris: { type: :keyword } - } - indexes :funding_references, type: :object, properties: { - funderName: { type: :keyword }, - funderIdentifier: { type: :keyword }, - funderIdentifierType: { type: :keyword }, - awardNumber: { type: :keyword }, - awardUri: { type: :keyword }, - awardTitle: { type: :keyword } - } - indexes :dates, type: :object, properties: { - date: { type: :date, format: "date_optional_time", ignore_malformed: true, fields: { raw: { type: :text }} }, - dateType: { type: :keyword } - } - indexes :geo_locations, type: :object, properties: { - geoLocationPoint: { type: :object }, - geoLocationBox: { type: :object }, - geoLocationPlace: { type: :keyword } - } - indexes :rights_list, type: :object, properties: { - rights: { type: :keyword }, - rightsUri: { type: :keyword }, - lang: { type: :keyword } - } - indexes :subjects, type: :object, properties: { - subject: { type: :keyword }, - subjectScheme: { type: :keyword }, - schemeUri: { type: :keyword }, - valueUri: { type: :keyword }, - lang: { type: :keyword } - } - indexes :container, type: :object, properties: { - type: { type: :keyword }, - identifier: { type: :keyword }, - identifierType: { type: :keyword }, - title: { type: :keyword }, - volume: { type: :keyword }, - issue: { type: :keyword }, - firstPage: { type: :keyword }, - lastPage: { type: :keyword } - } - - indexes :xml, type: :text, index: "false" - indexes :content_url, type: :keyword - indexes :version_info, type: :keyword - indexes :formats, type: :keyword - indexes :sizes, type: :keyword - indexes :language, type: :keyword - indexes :is_active, type: :keyword - indexes :aasm_state, type: :keyword - indexes :schema_version, type: :keyword - indexes :metadata_version, type: :keyword - indexes :agency, type: :keyword - indexes :source, type: :keyword - indexes :prefix, type: :keyword - indexes :suffix, type: :keyword - indexes :reason, type: :text - indexes :landing_page, type: :object, properties: { - checked: { type: :date, ignore_malformed: true }, - url: { type: :text, fields: { keyword: { type: "keyword" }}}, - status: { type: :integer }, - contentType: { type: :keyword }, - error: { type: :keyword }, - redirectCount: { type: :integer }, - redirectUrls: { type: :keyword }, - downloadLatency: { type: :scaled_float, scaling_factor: 100 }, - hasSchemaOrg: { type: :boolean }, - schemaOrgId: { type: :keyword }, - dcIdentifier: { type: :keyword }, - citationDoi: { type: :keyword }, - bodyHasPid: { type: :boolean } + settings index: { + analysis: { + analyzer: { + string_lowercase: { tokenizer: 'keyword', filter: %w(lowercase ascii_folding) } + }, + normalizer: { + keyword_lowercase: { type: "custom", filter: %w(lowercase) } + }, + filter: { + ascii_folding: { type: 'asciifolding', preserve_original: true } + } } - indexes :cache_key, type: :keyword - indexes :registered, type: :date, ignore_malformed: true - indexes :published, type: :date, ignore_malformed: true - indexes :created, type: :date, ignore_malformed: true - indexes :updated, type: :date, ignore_malformed: true - - # include parent objects - indexes :client, type: :object - indexes :provider, type: :object - indexes :resource_type, type: :object + } do + mapping dynamic: 'false' do + indexes :id, type: :keyword + indexes :uid, type: :keyword + indexes :doi, type: :keyword + indexes :identifier, type: :keyword + indexes :url, type: :text, fields: { keyword: { type: "keyword" }} + indexes :creators, type: :object, properties: { + nameType: { type: :keyword }, + nameIdentifiers: { type: :object, properties: { + nameIdentifier: { type: :keyword }, + nameIdentifierScheme: { type: :keyword }, + schemeUri: { type: :keyword } + }}, + name: { type: :text }, + givenName: { type: :text }, + familyName: { type: :text }, + affiliation: { type: :text } + } + indexes :contributors, type: :object, properties: { + nameType: { type: :keyword }, + nameIdentifiers: { type: :object, properties: { + nameIdentifier: { type: :keyword }, + nameIdentifierScheme: { type: :keyword }, + schemeUri: { type: :keyword } + }}, + name: { type: :text }, + givenName: { type: :text }, + familyName: { type: :text }, + affiliation: { type: :text }, + contributorType: { type: :keyword } + } + indexes :creator_names, type: :text + indexes :titles, type: :object, properties: { + title: { type: :text, fields: { keyword: { type: "keyword" }}}, + titleType: { type: :keyword }, + lang: { type: :keyword } + } + indexes :descriptions, type: :object, properties: { + description: { type: :text }, + descriptionType: { type: :keyword }, + lang: { type: :keyword } + } + indexes :publisher, type: :text, fields: { keyword: { type: "keyword" }} + indexes :publication_year, type: :date, format: "yyyy", ignore_malformed: true + indexes :client_id, type: :keyword + indexes :provider_id, type: :keyword + indexes :resource_type_id, type: :keyword + indexes :media_ids, type: :keyword + indexes :media, type: :object, properties: { + type: { type: :keyword }, + id: { type: :keyword }, + uid: { type: :keyword }, + url: { type: :text }, + media_type: { type: :keyword }, + version: { type: :keyword }, + created: { type: :date, ignore_malformed: true }, + updated: { type: :date, ignore_malformed: true } + } + indexes :identifiers, type: :object, properties: { + identifierType: { type: :keyword }, + identifier: { type: :keyword } + } + indexes :related_identifiers, type: :object, properties: { + relatedIdentifierType: { type: :keyword }, + relatedIdentifier: { type: :keyword }, + relationType: { type: :keyword }, + relatedMetadataScheme: { type: :keyword }, + schemeUri: { type: :keyword }, + schemeType: { type: :keyword }, + resourceTypeGeneral: { type: :keyword } + } + indexes :types, type: :object, properties: { + resourceTypeGeneral: { type: :keyword }, + resourceType: { type: :keyword }, + schemaOrg: { type: :keyword }, + bibtex: { type: :keyword }, + citeproc: { type: :keyword }, + ris: { type: :keyword } + } + indexes :funding_references, type: :object, properties: { + funderName: { type: :keyword }, + funderIdentifier: { type: :keyword }, + funderIdentifierType: { type: :keyword }, + awardNumber: { type: :keyword }, + awardUri: { type: :keyword }, + awardTitle: { type: :keyword } + } + indexes :dates, type: :object, properties: { + date: { type: :date, format: "date_optional_time", ignore_malformed: true, fields: { raw: { type: :text }} }, + dateType: { type: :keyword } + } + indexes :geo_locations, type: :object, properties: { + geoLocationPoint: { type: :object }, + geoLocationBox: { type: :object }, + geoLocationPlace: { type: :keyword } + } + indexes :rights_list, type: :object, properties: { + rights: { type: :keyword }, + rightsUri: { type: :keyword }, + lang: { type: :keyword } + } + indexes :subjects, type: :object, properties: { + subject: { type: :keyword }, + subjectScheme: { type: :keyword }, + schemeUri: { type: :keyword }, + valueUri: { type: :keyword }, + lang: { type: :keyword } + } + indexes :container, type: :object, properties: { + type: { type: :keyword }, + identifier: { type: :keyword }, + identifierType: { type: :keyword }, + title: { type: :keyword }, + volume: { type: :keyword }, + issue: { type: :keyword }, + firstPage: { type: :keyword }, + lastPage: { type: :keyword } + } + + indexes :xml, type: :text, index: "false" + indexes :content_url, type: :keyword + indexes :version_info, type: :keyword + indexes :formats, type: :keyword + indexes :sizes, type: :keyword + indexes :language, type: :keyword + indexes :is_active, type: :keyword + indexes :aasm_state, type: :keyword + indexes :schema_version, type: :keyword + indexes :metadata_version, type: :keyword + indexes :agency, type: :keyword + indexes :source, type: :keyword + indexes :prefix, type: :keyword + indexes :suffix, type: :keyword + indexes :reason, type: :text + indexes :landing_page, type: :object, properties: { + checked: { type: :date, ignore_malformed: true }, + url: { type: :text, fields: { keyword: { type: "keyword" }}}, + status: { type: :integer }, + contentType: { type: :keyword }, + error: { type: :keyword }, + redirectCount: { type: :integer }, + redirectUrls: { type: :keyword }, + downloadLatency: { type: :scaled_float, scaling_factor: 100 }, + hasSchemaOrg: { type: :boolean }, + schemaOrgId: { type: :keyword }, + dcIdentifier: { type: :keyword }, + citationDoi: { type: :keyword }, + bodyHasPid: { type: :boolean } + } + indexes :cache_key, type: :keyword + indexes :registered, type: :date, ignore_malformed: true + indexes :published, type: :date, ignore_malformed: true + indexes :created, type: :date, ignore_malformed: true + indexes :updated, type: :date, ignore_malformed: true + + # include parent objects + indexes :client, type: :object, properties: { + id: { type: :keyword }, + symbol: { type: :keyword }, + provider_id: { type: :keyword }, + repository_id: { type: :keyword }, + prefix_ids: { type: :keyword }, + name: { type: :text, fields: { keyword: { type: "keyword" }, raw: { type: "text", analyzer: "string_lowercase", "fielddata": true }} }, + description: { type: :text }, + contact_name: { type: :text }, + contact_email: { type: :text, fields: { keyword: { type: "keyword" }} }, + version: { type: :integer }, + is_active: { type: :keyword }, + domains: { type: :text }, + year: { type: :integer }, + url: { type: :text, fields: { keyword: { type: "keyword" }} }, + software: { type: :text, fields: { keyword: { type: "keyword" }, raw: { type: "text", analyzer: "string_lowercase", "fielddata": true }} }, + cache_key: { type: :keyword }, + created: { type: :date }, + updated: { type: :date }, + deleted_at: { type: :date }, + cumulative_years: { type: :integer, index: "false" } + } + indexes :provider, type: :object + indexes :resource_type, type: :object + end end def as_indexed_json(options={})