-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #791 from datacite/reference-repository
Combined index for Client and Re3Data Repositories
- Loading branch information
Showing
8 changed files
with
341 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,3 +113,5 @@ group :test do | |
gem "vcr", "~> 5.1" | ||
gem "webmock", "~> 3.1" | ||
end | ||
|
||
gem "hashid-rails", "~> 1.4" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# frozen_string_literal: true | ||
|
||
class ReferenceRepository < ApplicationRecord | ||
include Indexable | ||
include Elasticsearch::Model | ||
include Elasticsearch::Model::Callbacks | ||
include Hashid::Rails | ||
|
||
before_save :force_index | ||
|
||
validates_uniqueness_of :re3doi, allow_nil: true | ||
|
||
def self.find_client(client_id) | ||
::Client.where(symbol: client_id).where(deleted_at: nil).first | ||
end | ||
|
||
def self.find_re3(doi) | ||
DataCatalog.find_by_id(doi).fetch(:data, []).first | ||
end | ||
|
||
def client_repo | ||
if @dsclient&.symbol == self[:client_id] | ||
@dsclient | ||
else | ||
@dsclient = ReferenceRepository.find_client(self[:client_id]) | ||
end | ||
end | ||
|
||
def re3_repo | ||
@re3repo ||= ReferenceRepository.find_re3(self[:re3doi]) | ||
end | ||
|
||
def as_indexed_json(_options = {}) | ||
ReferenceRepositoryDenormalizer.new(self).to_hash | ||
end | ||
|
||
settings index: { number_of_shards: 1 } do | ||
mapping dynamic: "false" do | ||
indexes :id | ||
indexes :client_id | ||
indexes :re3doi | ||
indexes :re3data_url | ||
indexes :created_at, type: :date, format: :date_optional_time | ||
indexes :updated_at, type: :date, format: :date_optional_time | ||
indexes :name | ||
indexes :description | ||
indexes :pid_system, type: :keyword | ||
indexes :url | ||
indexes :keyword, type: :keyword | ||
indexes :subject | ||
indexes :contact | ||
indexes :language, type: :keyword | ||
indexes :certificate, type: :keyword | ||
indexes :data_access, type: :keyword | ||
indexes :data_upload, type: :keyword | ||
indexes :provider_type, type: :keyword | ||
indexes :repository_type, type: :keyword | ||
indexes :data_upload_licenses, type: :keyword | ||
indexes :software, type: :keyword | ||
end | ||
end | ||
|
||
def force_index | ||
__elasticsearch__.instance_variable_set(:@__changed_model_attributes, nil) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
# frozen_string_literal: true | ||
|
||
class ReferenceRepositoryDenormalizer | ||
attr_reader :repository | ||
|
||
def initialize(repository) | ||
@repository = repository | ||
end | ||
|
||
def doi_as_url | ||
doi = @repository.re3doi | ||
return nil if doi.blank? | ||
"https://doi.org/#{doi.downcase}" | ||
end | ||
|
||
def to_hash | ||
%w[ | ||
id | ||
client_id | ||
re3doi | ||
re3data_url | ||
created_at | ||
updated_at | ||
name | ||
description | ||
pid_system | ||
url | ||
keyword | ||
contact | ||
software | ||
language | ||
certificate | ||
data_access | ||
data_upload | ||
provider_type | ||
repository_type | ||
subject | ||
].map { |method_name| [ method_name, send(method_name)] }.to_h | ||
end | ||
|
||
def id | ||
@repository.hashid | ||
end | ||
|
||
def client_id | ||
@repository.client_id | ||
end | ||
|
||
def re3doi | ||
@repository.re3doi | ||
end | ||
|
||
def created_at | ||
@repository.created_at | ||
end | ||
|
||
def updated_at | ||
@repository.updated_at | ||
end | ||
|
||
def name | ||
@repository.client_repo&.name || @repository.re3_repo&.name | ||
end | ||
|
||
def description | ||
@repository.client_repo&.description || @repository.re3_repo&.description | ||
end | ||
|
||
def url | ||
@repository.client_repo&.url || @repository.re3_repo&.url | ||
end | ||
|
||
def re3data_url | ||
doi_as_url | ||
end | ||
|
||
def pid_system | ||
ret = Array.wrap(@repository.re3_repo&.pid_systems).map { |k| k.text } | ||
ret += Array.wrap(@repository.client_id.nil? ? nil : "DOI") | ||
ret.uniq | ||
end | ||
|
||
def keyword | ||
ret = Array.wrap(@repository.re3_repo&.keywords).map { |k| k.text } | ||
ret.uniq | ||
end | ||
|
||
def contact | ||
ret = Array.wrap(@repository.re3_repo&.contacts).map { |k| k.text } | ||
ret.uniq | ||
end | ||
|
||
def language | ||
ret = Array.wrap(@repository.re3_repo&.repository_languages).map { |k| k.text } | ||
ret += Array.wrap(@repository.client_repo&.language) | ||
ret.uniq | ||
end | ||
|
||
def certificate | ||
ret = Array.wrap(@repository.re3_repo&.certificates).map { |k| k.text } | ||
ret += Array.wrap(@repository.client_repo&.certificate) | ||
ret.uniq | ||
end | ||
|
||
def software | ||
ret = Array.wrap(@repository.re3_repo&.software).map { |k| k.name } | ||
ret.uniq | ||
end | ||
|
||
def data_access | ||
Array.wrap(@repository.re3_repo&.data_accesses).map { |k| k.type } | ||
end | ||
|
||
def data_upload | ||
Array.wrap(@repository.re3_repo&.data_uploads).map { |k| k.type } | ||
end | ||
|
||
def provider_type | ||
Array.wrap(@repository.re3_repo&.provider_type).map { |k| k.text } | ||
end | ||
|
||
def repository_type | ||
Array.wrap(@repository.re3_repo&.types).map { |k| k.text } | ||
end | ||
|
||
def subject | ||
Array.wrap(@repository.re3_repo&.subjects).map { |k| k.text } | ||
end | ||
end |
12 changes: 12 additions & 0 deletions
12
db/migrate/20220218154500_create_reference_repositories.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# frozen_string_literal: true | ||
|
||
class CreateReferenceRepositories < ActiveRecord::Migration[5.2] | ||
def change | ||
create_table :reference_repositories do |t| | ||
t.string :client_id, null: true | ||
t.string :re3doi, null: true | ||
|
||
t.timestamps | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# frozen_string_literal: true | ||
|
||
namespace :repository do | ||
desc "Load all Clients into Reference Repostories" | ||
task load_client_repos: :environment do | ||
puts "Processing Client Repositories" | ||
progressbar = ProgressBar.create( | ||
format: "%a %e %P% Processed: %c from %C %t", | ||
title: "Client Repositories", | ||
total: Client.all.count | ||
) | ||
Client.all.each do |c| | ||
progressbar.increment | ||
ReferenceRepository.find_or_create_by( | ||
client_id: c.symbol, | ||
re3doi: c.re3data_id | ||
) | ||
end | ||
end | ||
|
||
desc "Load all Re3data Repositories into Reference Repostories" | ||
task :load_re3data_repos, [:pages] => :environment do |t, args| | ||
pages = (args[:pages] || 3).to_i | ||
re3repos = [] | ||
(1..pages).each do |page| | ||
puts "Fetching Re3Data Repositories: Fetch Group #{page}" | ||
re3repos += DataCatalog.query("", limit: 1000, offset: page).fetch(:data, []) | ||
end | ||
re3repos.uniq! | ||
puts "Processing Re3Data Repositories" | ||
progressbar = ProgressBar.create( | ||
format: "%a %e %P% Processed: %c from %C %t", | ||
title: "Re3data Repositories", | ||
total: re3repos.length | ||
) | ||
re3repos.each do |repo| | ||
progressbar.increment | ||
doi = repo.id&.gsub("https://doi.org/", "") | ||
if not doi.blank? | ||
ReferenceRepository.find_or_create_by( | ||
re3doi: doi | ||
) | ||
end | ||
end | ||
end | ||
|
||
desc "Create index for reference_repositories" | ||
task create_index: :environment do | ||
puts ReferenceRepository.create_index | ||
end | ||
|
||
desc "Delete index for reference_repositories" | ||
task delete_index: :environment do | ||
puts ReferenceRepository.delete_index(index: ENV["INDEX"]) | ||
end | ||
|
||
desc "Upgrade index for reference_repositories" | ||
task upgrade_index: :environment do | ||
puts ReferenceRepository.upgrade_index | ||
end | ||
|
||
desc "Show index stats for reference_repositories" | ||
task index_stats: :environment do | ||
puts ReferenceRepository.index_stats | ||
end | ||
|
||
desc "Switch index for reference_repositories" | ||
task switch_index: :environment do | ||
puts ReferenceRepository.switch_index | ||
end | ||
|
||
desc "Return active index for reference_repositories" | ||
task active_index: :environment do | ||
puts ReferenceRepository.active_index + " is the active index." | ||
end | ||
|
||
desc "Monitor reindexing for reference_repositories" | ||
task monitor_reindex: :environment do | ||
puts ReferenceRepository.monitor_reindex | ||
end | ||
|
||
desc "Create alias for reference_repositories" | ||
task create_alias: :environment do | ||
puts ReferenceRepository.create_alias(index: ENV["INDEX"], alias: ENV["ALIAS"]) | ||
end | ||
|
||
desc "List aliases for reference_repositories" | ||
task list_aliases: :environment do | ||
puts ReferenceRepository.list_aliases | ||
end | ||
|
||
desc "Delete alias for reference_repositories" | ||
task delete_alias: :environment do | ||
puts ReferenceRepository.delete_alias(index: ENV["INDEX"], alias: ENV["ALIAS"]) | ||
end | ||
|
||
desc "Import all reference_repositories" | ||
task import: :environment do | ||
ReferenceRepository.import(index: ReferenceRepository.inactive_index) | ||
end | ||
|
||
desc "Delete from index by query" | ||
task delete_by_query: :environment do | ||
if ENV["QUERY"].nil? | ||
puts "ENV['QUERY'] is required" | ||
exit | ||
end | ||
|
||
puts ReferenceRepository.delete_by_query(index: ENV["INDEX"], query: ENV["QUERY"]) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# frozen_string_literal: true | ||
|
||
require "rails_helper" | ||
|
||
RSpec.describe ReferenceRepository, type: :model do | ||
pending "add some examples to (or delete) #{__FILE__}" | ||
end |