From c36a1f42447de472173227a505df2afebce31384 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Thu, 24 Feb 2022 14:36:06 -0500 Subject: [PATCH 1/9] Reference repositoy first pass --- app/models/reference_repository.rb | 11 +++++++++++ .../20220218154500_create_reference_repositories.rb | 10 ++++++++++ db/schema.rb | 11 ++++++++++- spec/models/reference_repository_spec.rb | 5 +++++ 4 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 app/models/reference_repository.rb create mode 100644 db/migrate/20220218154500_create_reference_repositories.rb create mode 100644 spec/models/reference_repository_spec.rb diff --git a/app/models/reference_repository.rb b/app/models/reference_repository.rb new file mode 100644 index 000000000..c63e57577 --- /dev/null +++ b/app/models/reference_repository.rb @@ -0,0 +1,11 @@ +class ReferenceRepository < ApplicationRecord + include Hashid::Rails + validates_uniqueness_of :re3doi, :allow_nil => true + + def client + @client ||= Client.find_by_id(self[:client_id]) + end + def re3repo + @re3repo ||= DataCatalog.find_by_id(self[:re3doi]) + end +end diff --git a/db/migrate/20220218154500_create_reference_repositories.rb b/db/migrate/20220218154500_create_reference_repositories.rb new file mode 100644 index 000000000..3b7855107 --- /dev/null +++ b/db/migrate/20220218154500_create_reference_repositories.rb @@ -0,0 +1,10 @@ +class CreateReferenceRepositories < ActiveRecord::Migration[5.2] + def change + create_table :reference_repositories do |t| + t.string :client_id, null:true + t.string :re3doi, null:true + + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index f55b1eeb4..fb9c61950 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -13,7 +13,8 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2022_02_17_020855) do +ActiveRecord::Schema.define(version: 2022_02_18_154500) do + create_table "active_storage_attachments", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t| t.string "name", limit: 191, null: false t.string "record_type", null: false @@ -301,4 +302,12 @@ t.index ["provider_id"], name: "FKE7FBD67446EBD781" t.index ["uid"], name: "index_provider_prefixes_on_uid", length: 128 end + + create_table "reference_repositories", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t| + t.string "client_id" + t.string "re3doi" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + end diff --git a/spec/models/reference_repository_spec.rb b/spec/models/reference_repository_spec.rb new file mode 100644 index 000000000..e85048bb5 --- /dev/null +++ b/spec/models/reference_repository_spec.rb @@ -0,0 +1,5 @@ +require 'rails_helper' + +RSpec.describe ReferenceRepository, type: :model do + pending "add some examples to (or delete) #{__FILE__}" +end From 8af6e5f446ab1656ed6df4e796baab3dbe30ec30 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 25 Feb 2022 10:53:47 -0500 Subject: [PATCH 2/9] Initial indexing for reference_repositories --- app/models/reference_repository.rb | 41 +++++++++-- .../reference_repository_denormalizer.rb | 72 +++++++++++++++++++ 2 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 app/models/reference_repository_denormalizer.rb diff --git a/app/models/reference_repository.rb b/app/models/reference_repository.rb index c63e57577..9eed77dac 100644 --- a/app/models/reference_repository.rb +++ b/app/models/reference_repository.rb @@ -1,11 +1,44 @@ class ReferenceRepository < ApplicationRecord + include Elasticsearch::Model + include Elasticsearch::Model::Callbacks include Hashid::Rails + + before_save :force_index + validates_uniqueness_of :re3doi, :allow_nil => true - def client - @client ||= Client.find_by_id(self[:client_id]) + def client_repo + if @dsclient&.symbol == self[:client_id] + @dsclient + else + @dsclient = ::Client.where(symbol: self[:client_id]).where(deleted_at: nil).first + end end - def re3repo - @re3repo ||= DataCatalog.find_by_id(self[:re3doi]) + + def re3_repo + @re3repo ||= DataCatalog.find_by_id(self[:re3doi]).fetch(:data, []).first + end + + def as_indexed_json(_options = {}) + ReferenceRepositoryDenormalizer.new(self).to_hash + end + + settings index: { number_of_shards: 1 } do + mapping dynamic: 'false' do + indexes :id + indexes :client_id + indexes :re3doi + indexes :re3data_url + indexes :created_at, type: :date, format: :date_optional_time + indexes :updated_at, type: :date, format: :date_optional_time + indexes :name + indexes :description + indexes :pid_system, type: :keyword + indexes :url + end + end + + def force_index + __elasticsearch__.instance_variable_set(:@__changed_model_attributes, nil) end end diff --git a/app/models/reference_repository_denormalizer.rb b/app/models/reference_repository_denormalizer.rb new file mode 100644 index 000000000..209a22655 --- /dev/null +++ b/app/models/reference_repository_denormalizer.rb @@ -0,0 +1,72 @@ +class ReferenceRepositoryDenormalizer + attr_reader :repository + + def initialize(repository) + @repository = repository + end + + def to_hash + %w[ + id + client_id + re3doi + re3data_url + created_at + updated_at + name + description + pid_system + url + ].map { |method_name| [ method_name, send(method_name)] }.to_h + end + + def id + @repository.hashid + end + + def client_id + @repository.client_id + end + + def re3doi + @repository.re3doi + end + + def created_at + @repository.created_at + end + + def updated_at + @repository.updated_at + end + + def name + @repository.client_repo&.name || @repository.re3_repo.name + end + + def description + @repository.client_repo&.description || @repository.re3_repo&.description + end + + def url + @repository.client_repo&.url || @repository.re3_repo&.url + end + + def re3data_url + doi_as_url + end + + def pid_system + ret = Array.wrap(@repository.re3_repo&.pid_systems).map { |k| k.text } + ret += Array.wrap(@repository.client_id.nil? ? nil : 'DOI') + ret.uniq + end + + def doi_as_url + doi = @repository.re3doi + return nil if doi.blank? + "https://doi.org/#{doi.downcase}" + end + + +end From c9d72aefd6fbb9fd52b363b91cc8f258a20b3659 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 4 Mar 2022 12:08:33 -0500 Subject: [PATCH 3/9] Index additional fields for reference_repositories --- app/models/reference_repository.rb | 11 ++++ .../reference_repository_denormalizer.rb | 63 +++++++++++++++++-- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/app/models/reference_repository.rb b/app/models/reference_repository.rb index 9eed77dac..abc3919ad 100644 --- a/app/models/reference_repository.rb +++ b/app/models/reference_repository.rb @@ -35,6 +35,17 @@ def as_indexed_json(_options = {}) indexes :description indexes :pid_system, type: :keyword indexes :url + indexes :keyword, type: :keyword + indexes :subject + indexes :contact + indexes :language, type: :keyword + indexes :certificate, type: :keyword + indexes :data_access, type: :keyword + indexes :data_upload, type: :keyword + indexes :provider_type, type: :keyword + indexes :repository_type, type: :keyword + indexes :data_upload_licenses, type: :keyword + indexes :software, type: :keyword end end diff --git a/app/models/reference_repository_denormalizer.rb b/app/models/reference_repository_denormalizer.rb index 209a22655..832bb291d 100644 --- a/app/models/reference_repository_denormalizer.rb +++ b/app/models/reference_repository_denormalizer.rb @@ -5,6 +5,12 @@ def initialize(repository) @repository = repository end + def doi_as_url + doi = @repository.re3doi + return nil if doi.blank? + "https://doi.org/#{doi.downcase}" + end + def to_hash %w[ id @@ -17,6 +23,16 @@ def to_hash description pid_system url + keyword + contact + software + language + certificate + data_access + data_upload + provider_type + repository_type + subject ].map { |method_name| [ method_name, send(method_name)] }.to_h end @@ -62,11 +78,50 @@ def pid_system ret.uniq end - def doi_as_url - doi = @repository.re3doi - return nil if doi.blank? - "https://doi.org/#{doi.downcase}" + def keyword + ret = Array.wrap(@repository.re3_repo&.keywords).map { |k| k.text } + ret.uniq + end + + def contact + ret = Array.wrap(@repository.re3_repo&.contacts).map { |k| k.text} + ret.uniq end + def language + ret = Array.wrap(@repository.re3_repo&.repository_languages).map { |k| k.text } + ret += Array.wrap(@repository.client_repo&.language) + ret.uniq + end + def certificate + ret = Array.wrap(@repository.re3_repo&.certificates).map { |k| k.text } + ret += Array.wrap(@repository.client_repo&.certificate) + ret.uniq + end + + def software + ret = Array.wrap(@repository.re3_repo&.software).map { |k| k.name } + ret.uniq + end + + def data_access + Array.wrap(@repository.re3_repo&.data_accesses).map { |k| k.type } + end + + def data_upload + Array.wrap(@repository.re3_repo&.data_uploads).map { |k| k.type } + end + + def provider_type + Array.wrap(@repository.re3_repo&.provider_type).map { |k| k.text } + end + + def repository_type + Array.wrap(@repository.re3_repo&.types).map { |k| k.text } + end + + def subject + Array.wrap(@repository.re3_repo&.subjects).map { |k| k.text } + end end From 3f3aadeb504e34b827d2e3511bd53291d0892974 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 8 Mar 2022 21:03:35 -0500 Subject: [PATCH 4/9] Refactor into class methods --- app/models/reference_repository.rb | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/app/models/reference_repository.rb b/app/models/reference_repository.rb index abc3919ad..c4bf7b440 100644 --- a/app/models/reference_repository.rb +++ b/app/models/reference_repository.rb @@ -7,16 +7,24 @@ class ReferenceRepository < ApplicationRecord validates_uniqueness_of :re3doi, :allow_nil => true + def self.find_client(client_id) + ::Client.where(symbol: client_id).where(deleted_at: nil).first + end + + def self.find_re3(doi) + DataCatalog.find_by_id(doi).fetch(:data, []).first + end + def client_repo if @dsclient&.symbol == self[:client_id] @dsclient else - @dsclient = ::Client.where(symbol: self[:client_id]).where(deleted_at: nil).first + @dsclient = ReferenceRepository.find_client(self[:client_id]) end end def re3_repo - @re3repo ||= DataCatalog.find_by_id(self[:re3doi]).fetch(:data, []).first + @re3repo ||= ReferenceRepository.find_re3(self[:re3doi]) end def as_indexed_json(_options = {}) From 6befd21a30ed2a7a2d89079d61f0e4d41d0bd46f Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 8 Mar 2022 21:04:31 -0500 Subject: [PATCH 5/9] Guard against nil --- app/models/reference_repository_denormalizer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/reference_repository_denormalizer.rb b/app/models/reference_repository_denormalizer.rb index 832bb291d..639046102 100644 --- a/app/models/reference_repository_denormalizer.rb +++ b/app/models/reference_repository_denormalizer.rb @@ -57,7 +57,7 @@ def updated_at end def name - @repository.client_repo&.name || @repository.re3_repo.name + @repository.client_repo&.name || @repository.re3_repo&.name end def description From 66cf3797f94a7fa9ebaeb31108fd2a136d3b5ad6 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 8 Mar 2022 21:05:42 -0500 Subject: [PATCH 6/9] Add rake tasks to load Client and Re3data Repositories into ReferenceRepositories --- lib/tasks/repository.rake | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 lib/tasks/repository.rake diff --git a/lib/tasks/repository.rake b/lib/tasks/repository.rake new file mode 100644 index 000000000..99aa2d486 --- /dev/null +++ b/lib/tasks/repository.rake @@ -0,0 +1,44 @@ +namespace :repository do + desc "Load all Clients into Reference Repostories" + task load_client_repos: :environment do + puts "Processing Client Repositories" + progressbar = ProgressBar.create( + format: "%a %e %P% Processed: %c from %C %t", + title: "Client Repositories", + total: Client.all.count + ) + Client.all.each do |c| + progressbar.increment + ReferenceRepository.find_or_create_by( + client_id: c.symbol, + re3doi: c.re3data_id + ) + end + end + + desc "Load all Re3data Repositories into Reference Repostories" + task :load_re3data_repos, [:pages] => :environment do |t, args| + pages = (args[:pages] || 3).to_i + re3repos = [] + (1..pages).each do |page| + puts "Fetching Re3Data Repositories: Fetch Group #{page}" + re3repos += DataCatalog.query("", limit: 1000, offset:page).fetch(:data, []) + end + re3repos.uniq! + puts "Processing Re3Data Repositories" + progressbar = ProgressBar.create( + format: "%a %e %P% Processed: %c from %C %t", + title: "Re3data Repositories", + total: re3repos.length + ) + re3repos.each do |repo| + progressbar.increment + doi = repo.id&.gsub('https://doi.org/','') + if not doi.blank? + ReferenceRepository.find_or_create_by( + re3doi: doi + ) + end + end + end +end From 13a976d6e2fb7385dd25504fcb24afbe38c41418 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 8 Mar 2022 22:06:13 -0500 Subject: [PATCH 7/9] Appease the rubocop --- app/models/reference_repository.rb | 102 +++---- .../reference_repository_denormalizer.rb | 252 +++++++++--------- ...218154500_create_reference_repositories.rb | 6 +- db/schema.rb | 2 - lib/tasks/repository.rake | 6 +- spec/models/reference_repository_spec.rb | 4 +- 6 files changed, 190 insertions(+), 182 deletions(-) diff --git a/app/models/reference_repository.rb b/app/models/reference_repository.rb index c4bf7b440..e8aa5fa72 100644 --- a/app/models/reference_repository.rb +++ b/app/models/reference_repository.rb @@ -1,63 +1,65 @@ +# frozen_string_literal: true + class ReferenceRepository < ApplicationRecord - include Elasticsearch::Model - include Elasticsearch::Model::Callbacks - include Hashid::Rails + include Elasticsearch::Model + include Elasticsearch::Model::Callbacks + include Hashid::Rails - before_save :force_index + before_save :force_index - validates_uniqueness_of :re3doi, :allow_nil => true + validates_uniqueness_of :re3doi, allow_nil: true - def self.find_client(client_id) - ::Client.where(symbol: client_id).where(deleted_at: nil).first - end + def self.find_client(client_id) + ::Client.where(symbol: client_id).where(deleted_at: nil).first + end - def self.find_re3(doi) - DataCatalog.find_by_id(doi).fetch(:data, []).first - end + def self.find_re3(doi) + DataCatalog.find_by_id(doi).fetch(:data, []).first + end - def client_repo - if @dsclient&.symbol == self[:client_id] - @dsclient - else - @dsclient = ReferenceRepository.find_client(self[:client_id]) - end + def client_repo + if @dsclient&.symbol == self[:client_id] + @dsclient + else + @dsclient = ReferenceRepository.find_client(self[:client_id]) end + end - def re3_repo - @re3repo ||= ReferenceRepository.find_re3(self[:re3doi]) - end + def re3_repo + @re3repo ||= ReferenceRepository.find_re3(self[:re3doi]) + end - def as_indexed_json(_options = {}) - ReferenceRepositoryDenormalizer.new(self).to_hash - end + def as_indexed_json(_options = {}) + ReferenceRepositoryDenormalizer.new(self).to_hash + end - settings index: { number_of_shards: 1 } do - mapping dynamic: 'false' do - indexes :id - indexes :client_id - indexes :re3doi - indexes :re3data_url - indexes :created_at, type: :date, format: :date_optional_time - indexes :updated_at, type: :date, format: :date_optional_time - indexes :name - indexes :description - indexes :pid_system, type: :keyword - indexes :url - indexes :keyword, type: :keyword - indexes :subject - indexes :contact - indexes :language, type: :keyword - indexes :certificate, type: :keyword - indexes :data_access, type: :keyword - indexes :data_upload, type: :keyword - indexes :provider_type, type: :keyword - indexes :repository_type, type: :keyword - indexes :data_upload_licenses, type: :keyword - indexes :software, type: :keyword - end + settings index: { number_of_shards: 1 } do + mapping dynamic: "false" do + indexes :id + indexes :client_id + indexes :re3doi + indexes :re3data_url + indexes :created_at, type: :date, format: :date_optional_time + indexes :updated_at, type: :date, format: :date_optional_time + indexes :name + indexes :description + indexes :pid_system, type: :keyword + indexes :url + indexes :keyword, type: :keyword + indexes :subject + indexes :contact + indexes :language, type: :keyword + indexes :certificate, type: :keyword + indexes :data_access, type: :keyword + indexes :data_upload, type: :keyword + indexes :provider_type, type: :keyword + indexes :repository_type, type: :keyword + indexes :data_upload_licenses, type: :keyword + indexes :software, type: :keyword end + end - def force_index - __elasticsearch__.instance_variable_set(:@__changed_model_attributes, nil) - end + def force_index + __elasticsearch__.instance_variable_set(:@__changed_model_attributes, nil) + end end diff --git a/app/models/reference_repository_denormalizer.rb b/app/models/reference_repository_denormalizer.rb index 639046102..cc07fd567 100644 --- a/app/models/reference_repository_denormalizer.rb +++ b/app/models/reference_repository_denormalizer.rb @@ -1,127 +1,129 @@ +# frozen_string_literal: true + class ReferenceRepositoryDenormalizer - attr_reader :repository - - def initialize(repository) - @repository = repository - end - - def doi_as_url - doi = @repository.re3doi - return nil if doi.blank? - "https://doi.org/#{doi.downcase}" - end - - def to_hash - %w[ - id - client_id - re3doi - re3data_url - created_at - updated_at - name - description - pid_system - url - keyword - contact - software - language - certificate - data_access - data_upload - provider_type - repository_type - subject - ].map { |method_name| [ method_name, send(method_name)] }.to_h - end - - def id - @repository.hashid - end - - def client_id - @repository.client_id - end - - def re3doi - @repository.re3doi - end - - def created_at - @repository.created_at - end - - def updated_at - @repository.updated_at - end - - def name - @repository.client_repo&.name || @repository.re3_repo&.name - end - - def description - @repository.client_repo&.description || @repository.re3_repo&.description - end - - def url - @repository.client_repo&.url || @repository.re3_repo&.url - end - - def re3data_url - doi_as_url - end - - def pid_system - ret = Array.wrap(@repository.re3_repo&.pid_systems).map { |k| k.text } - ret += Array.wrap(@repository.client_id.nil? ? nil : 'DOI') - ret.uniq - end - - def keyword - ret = Array.wrap(@repository.re3_repo&.keywords).map { |k| k.text } - ret.uniq - end - - def contact - ret = Array.wrap(@repository.re3_repo&.contacts).map { |k| k.text} - ret.uniq - end - - def language - ret = Array.wrap(@repository.re3_repo&.repository_languages).map { |k| k.text } - ret += Array.wrap(@repository.client_repo&.language) - ret.uniq - end - - def certificate - ret = Array.wrap(@repository.re3_repo&.certificates).map { |k| k.text } - ret += Array.wrap(@repository.client_repo&.certificate) - ret.uniq - end - - def software - ret = Array.wrap(@repository.re3_repo&.software).map { |k| k.name } - ret.uniq - end - - def data_access - Array.wrap(@repository.re3_repo&.data_accesses).map { |k| k.type } - end - - def data_upload - Array.wrap(@repository.re3_repo&.data_uploads).map { |k| k.type } - end - - def provider_type - Array.wrap(@repository.re3_repo&.provider_type).map { |k| k.text } - end - - def repository_type - Array.wrap(@repository.re3_repo&.types).map { |k| k.text } - end - - def subject - Array.wrap(@repository.re3_repo&.subjects).map { |k| k.text } - end + attr_reader :repository + + def initialize(repository) + @repository = repository + end + + def doi_as_url + doi = @repository.re3doi + return nil if doi.blank? + "https://doi.org/#{doi.downcase}" + end + + def to_hash + %w[ + id + client_id + re3doi + re3data_url + created_at + updated_at + name + description + pid_system + url + keyword + contact + software + language + certificate + data_access + data_upload + provider_type + repository_type + subject + ].map { |method_name| [ method_name, send(method_name)] }.to_h + end + + def id + @repository.hashid + end + + def client_id + @repository.client_id + end + + def re3doi + @repository.re3doi + end + + def created_at + @repository.created_at + end + + def updated_at + @repository.updated_at + end + + def name + @repository.client_repo&.name || @repository.re3_repo&.name + end + + def description + @repository.client_repo&.description || @repository.re3_repo&.description + end + + def url + @repository.client_repo&.url || @repository.re3_repo&.url + end + + def re3data_url + doi_as_url + end + + def pid_system + ret = Array.wrap(@repository.re3_repo&.pid_systems).map { |k| k.text } + ret += Array.wrap(@repository.client_id.nil? ? nil : "DOI") + ret.uniq + end + + def keyword + ret = Array.wrap(@repository.re3_repo&.keywords).map { |k| k.text } + ret.uniq + end + + def contact + ret = Array.wrap(@repository.re3_repo&.contacts).map { |k| k.text } + ret.uniq + end + + def language + ret = Array.wrap(@repository.re3_repo&.repository_languages).map { |k| k.text } + ret += Array.wrap(@repository.client_repo&.language) + ret.uniq + end + + def certificate + ret = Array.wrap(@repository.re3_repo&.certificates).map { |k| k.text } + ret += Array.wrap(@repository.client_repo&.certificate) + ret.uniq + end + + def software + ret = Array.wrap(@repository.re3_repo&.software).map { |k| k.name } + ret.uniq + end + + def data_access + Array.wrap(@repository.re3_repo&.data_accesses).map { |k| k.type } + end + + def data_upload + Array.wrap(@repository.re3_repo&.data_uploads).map { |k| k.type } + end + + def provider_type + Array.wrap(@repository.re3_repo&.provider_type).map { |k| k.text } + end + + def repository_type + Array.wrap(@repository.re3_repo&.types).map { |k| k.text } + end + + def subject + Array.wrap(@repository.re3_repo&.subjects).map { |k| k.text } + end end diff --git a/db/migrate/20220218154500_create_reference_repositories.rb b/db/migrate/20220218154500_create_reference_repositories.rb index 3b7855107..f7456a994 100644 --- a/db/migrate/20220218154500_create_reference_repositories.rb +++ b/db/migrate/20220218154500_create_reference_repositories.rb @@ -1,8 +1,10 @@ +# frozen_string_literal: true + class CreateReferenceRepositories < ActiveRecord::Migration[5.2] def change create_table :reference_repositories do |t| - t.string :client_id, null:true - t.string :re3doi, null:true + t.string :client_id, null: true + t.string :re3doi, null: true t.timestamps end diff --git a/db/schema.rb b/db/schema.rb index fb9c61950..f730af62b 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -14,7 +14,6 @@ # It's strongly recommended that you check this file into your version control system. ActiveRecord::Schema.define(version: 2022_02_18_154500) do - create_table "active_storage_attachments", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t| t.string "name", limit: 191, null: false t.string "record_type", null: false @@ -309,5 +308,4 @@ t.datetime "created_at", null: false t.datetime "updated_at", null: false end - end diff --git a/lib/tasks/repository.rake b/lib/tasks/repository.rake index 99aa2d486..c0381eadf 100644 --- a/lib/tasks/repository.rake +++ b/lib/tasks/repository.rake @@ -1,3 +1,5 @@ +# frozen_string_literal: true + namespace :repository do desc "Load all Clients into Reference Repostories" task load_client_repos: :environment do @@ -22,7 +24,7 @@ namespace :repository do re3repos = [] (1..pages).each do |page| puts "Fetching Re3Data Repositories: Fetch Group #{page}" - re3repos += DataCatalog.query("", limit: 1000, offset:page).fetch(:data, []) + re3repos += DataCatalog.query("", limit: 1000, offset: page).fetch(:data, []) end re3repos.uniq! puts "Processing Re3Data Repositories" @@ -33,7 +35,7 @@ namespace :repository do ) re3repos.each do |repo| progressbar.increment - doi = repo.id&.gsub('https://doi.org/','') + doi = repo.id&.gsub("https://doi.org/", "") if not doi.blank? ReferenceRepository.find_or_create_by( re3doi: doi diff --git a/spec/models/reference_repository_spec.rb b/spec/models/reference_repository_spec.rb index e85048bb5..c88d34f32 100644 --- a/spec/models/reference_repository_spec.rb +++ b/spec/models/reference_repository_spec.rb @@ -1,4 +1,6 @@ -require 'rails_helper' +# frozen_string_literal: true + +require "rails_helper" RSpec.describe ReferenceRepository, type: :model do pending "add some examples to (or delete) #{__FILE__}" From 48f484955479627f8316c02f05c347de722071ac Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 8 Mar 2022 22:43:16 -0500 Subject: [PATCH 8/9] Update Gemfile/lock --- Gemfile | 2 ++ Gemfile.lock | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 62f9dceec..983668d4c 100644 --- a/Gemfile +++ b/Gemfile @@ -113,3 +113,5 @@ group :test do gem "vcr", "~> 5.1" gem "webmock", "~> 3.1" end + +gem "hashid-rails", "~> 1.4" diff --git a/Gemfile.lock b/Gemfile.lock index 755c54136..62fcdfbdc 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -273,6 +273,10 @@ GEM hamster (3.0.0) concurrent-ruby (~> 1.0) hashdiff (1.0.1) + hashid-rails (1.4.1) + activerecord (>= 4.0) + hashids (~> 1.0) + hashids (1.0.6) hashie (4.1.0) htmlentities (4.3.4) http-accept (1.7.0) @@ -669,6 +673,7 @@ DEPENDENCIES graphql-cache (~> 0.6.0) graphql-errors (~> 0.4.0) hashdiff (>= 1.0.0.beta1, < 2.0.0) + hashid-rails iso-639 (~> 0.3.5) iso8601 (~> 0.9.0) jsonlint (~> 0.3.0) @@ -722,4 +727,4 @@ DEPENDENCIES webmock (~> 3.1) BUNDLED WITH - 2.2.30 + 2.2.33 From 27fda4997e32375fe57345f313a0a083b497386c Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 15 Mar 2022 14:51:54 -0400 Subject: [PATCH 9/9] Add Indexable concern to RefereceRepository model as well as associated rake tasks --- app/models/reference_repository.rb | 1 + lib/tasks/repository.rake | 65 ++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/app/models/reference_repository.rb b/app/models/reference_repository.rb index e8aa5fa72..32f643453 100644 --- a/app/models/reference_repository.rb +++ b/app/models/reference_repository.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true class ReferenceRepository < ApplicationRecord + include Indexable include Elasticsearch::Model include Elasticsearch::Model::Callbacks include Hashid::Rails diff --git a/lib/tasks/repository.rake b/lib/tasks/repository.rake index c0381eadf..7606bd159 100644 --- a/lib/tasks/repository.rake +++ b/lib/tasks/repository.rake @@ -43,4 +43,69 @@ namespace :repository do end end end + + desc "Create index for reference_repositories" + task create_index: :environment do + puts ReferenceRepository.create_index + end + + desc "Delete index for reference_repositories" + task delete_index: :environment do + puts ReferenceRepository.delete_index(index: ENV["INDEX"]) + end + + desc "Upgrade index for reference_repositories" + task upgrade_index: :environment do + puts ReferenceRepository.upgrade_index + end + + desc "Show index stats for reference_repositories" + task index_stats: :environment do + puts ReferenceRepository.index_stats + end + + desc "Switch index for reference_repositories" + task switch_index: :environment do + puts ReferenceRepository.switch_index + end + + desc "Return active index for reference_repositories" + task active_index: :environment do + puts ReferenceRepository.active_index + " is the active index." + end + + desc "Monitor reindexing for reference_repositories" + task monitor_reindex: :environment do + puts ReferenceRepository.monitor_reindex + end + + desc "Create alias for reference_repositories" + task create_alias: :environment do + puts ReferenceRepository.create_alias(index: ENV["INDEX"], alias: ENV["ALIAS"]) + end + + desc "List aliases for reference_repositories" + task list_aliases: :environment do + puts ReferenceRepository.list_aliases + end + + desc "Delete alias for reference_repositories" + task delete_alias: :environment do + puts ReferenceRepository.delete_alias(index: ENV["INDEX"], alias: ENV["ALIAS"]) + end + + desc "Import all reference_repositories" + task import: :environment do + ReferenceRepository.import(index: ReferenceRepository.inactive_index) + end + + desc "Delete from index by query" + task delete_by_query: :environment do + if ENV["QUERY"].nil? + puts "ENV['QUERY'] is required" + exit + end + + puts ReferenceRepository.delete_by_query(index: ENV["INDEX"], query: ENV["QUERY"]) + end end