Skip to content

Commit

Permalink
define write index for es aliases. #677
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Nov 15, 2020
1 parent 946ea52 commit 57afb13
Show file tree
Hide file tree
Showing 15 changed files with 623 additions and 75 deletions.
7 changes: 4 additions & 3 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ gem "rack-cors", "~> 1.0", require: "rack/cors"
gem "strip_attributes", "~> 1.8"
gem "slack-notifier", "~> 2.1"
gem "mini_magick", "~> 4.8"
gem "elasticsearch", "~> 7.1.0"
gem "elasticsearch-model", "~> 7.0", require: "elasticsearch/model"
gem "elasticsearch-rails", "~> 7.0"
gem "elasticsearch", "7.5"
gem 'elasticsearch-transport', '7.5'
gem 'elasticsearch-model', '~> 7.1', '>= 7.1.1', require: "elasticsearch/model"
gem 'elasticsearch-rails', '~> 7.1', '>= 7.1.1'
gem "faraday", "~> 0.17.3"
gem "faraday_middleware-aws-sigv4", "~> 0.3.0"
gem "rack-utf8_sanitizer", "~> 1.6"
Expand Down
64 changes: 32 additions & 32 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,16 @@ GEM
audited (4.9.0)
activerecord (>= 4.2, < 6.1)
aws-eventstream (1.1.0)
aws-partitions (1.381.0)
aws-sdk-core (3.109.1)
aws-partitions (1.393.0)
aws-sdk-core (3.109.2)
aws-eventstream (~> 1, >= 1.0.2)
aws-partitions (~> 1, >= 1.239.0)
aws-sigv4 (~> 1.1)
jmespath (~> 1.0)
aws-sdk-kms (1.39.0)
aws-sdk-core (~> 3, >= 3.109.0)
aws-sigv4 (~> 1.1)
aws-sdk-s3 (1.83.0)
aws-sdk-s3 (1.84.1)
aws-sdk-core (~> 3, >= 3.109.0)
aws-sdk-kms (~> 1)
aws-sigv4 (~> 1.1)
Expand All @@ -94,15 +94,15 @@ GEM
oj (>= 2.8.3)
pandoc-ruby (~> 2.0, >= 2.0.0)
safe_yaml (~> 1.0, >= 1.0.4)
better_errors (2.8.3)
better_errors (2.9.1)
coderay (>= 1.0.0)
erubi (>= 1.0.0)
rack (>= 0.9.0)
bibtex-ruby (5.1.4)
bibtex-ruby (5.1.5)
latex-decode (~> 0.0)
binding_of_caller (0.8.0)
debug_inspector (>= 0.0.1)
bolognese (1.8.13)
bolognese (1.8.18)
activesupport (>= 4.2.5)
benchmark_methods (~> 0.7)
bibtex-ruby (>= 5.1.0)
Expand All @@ -127,7 +127,7 @@ GEM
rdf-rdfxml (~> 3.1)
rdf-turtle (~> 3.1)
thor (>= 0.19)
bootsnap (1.4.8)
bootsnap (1.5.1)
msgpack (~> 1.0)
builder (3.2.4)
bullet (6.1.0)
Expand All @@ -147,7 +147,7 @@ GEM
activesupport
citeproc (1.0.10)
namae (~> 1.0)
citeproc-ruby (1.1.12)
citeproc-ruby (1.1.13)
citeproc (~> 1.0, >= 1.0.9)
csl (~> 1.5)
climate_control (0.2.0)
Expand All @@ -167,10 +167,9 @@ GEM
sort_alphabetical (~> 1.0)
crack (0.4.4)
crass (1.0.6)
crawler_detect (1.0.2)
oj (>= 3.0)
crawler_detect (1.1.0)
qonfig (~> 0.24)
csl (1.5.1)
csl (1.5.2)
namae (~> 1.0)
csl-styles (1.0.1.10)
csl (~> 1.0)
Expand All @@ -192,17 +191,17 @@ GEM
domain_name (0.5.20190701)
unf (>= 0.0.5, < 1.0.0)
dotenv (2.7.6)
ebnf (2.1.1)
ebnf (2.1.2)
htmlentities (~> 4.3)
rdf (~> 3.1)
scanf (~> 1.0)
sxp (~> 1.1)
edtf (3.0.5)
edtf (3.0.6)
activesupport (>= 3.0, < 7.0)
elasticsearch (7.1.0)
elasticsearch-api (= 7.1.0)
elasticsearch-transport (= 7.1.0)
elasticsearch-api (7.1.0)
elasticsearch (7.5.0)
elasticsearch-api (= 7.5.0)
elasticsearch-transport (= 7.5.0)
elasticsearch-api (7.5.0)
multi_json
elasticsearch-extensions (0.0.31)
ansi
Expand All @@ -212,12 +211,12 @@ GEM
elasticsearch (> 1)
hashie
elasticsearch-rails (7.1.1)
elasticsearch-transport (7.1.0)
faraday
elasticsearch-transport (7.5.0)
faraday (>= 0.14, < 1)
multi_json
equivalent-xml (0.6.0)
nokogiri (>= 1.4.3)
erubi (1.9.0)
erubi (1.10.0)
excon (0.71.1)
facets (3.1.0)
factory_bot (4.11.1)
Expand Down Expand Up @@ -310,7 +309,7 @@ GEM
mime-types
mimemagic (~> 0.3.0)
terrapin (~> 0.6.0)
latex-decode (0.3.1)
latex-decode (0.3.2)
link_header (0.0.8)
listen (3.1.5)
rb-fsevent (~> 0.9, >= 0.9.4)
Expand Down Expand Up @@ -349,9 +348,9 @@ GEM
method_source (1.0.0)
mime-types (3.3.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2020.0512)
mime-types-data (3.2020.1104)
mimemagic (0.3.5)
mini_magick (4.10.1)
mini_magick (4.11.0)
mini_mime (1.0.2)
mini_portile2 (2.4.0)
minitest (5.14.2)
Expand All @@ -368,11 +367,11 @@ GEM
nio4r (2.5.4)
nokogiri (1.10.10)
mini_portile2 (~> 2.4.0)
oj (3.10.14)
oj (3.10.16)
oj_mimic_json (1.0.1)
optimist (3.0.1)
pandoc-ruby (2.1.4)
parallel (1.19.2)
parallel (1.20.0)
parser (2.7.2.0)
ast (~> 2.4.1)
postrank-uri (1.0.24)
Expand Down Expand Up @@ -470,7 +469,7 @@ GEM
rspec (>= 3.0.0, < 4.0.0)
rspec-core (3.9.3)
rspec-support (~> 3.9.3)
rspec-expectations (3.9.2)
rspec-expectations (3.9.4)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.9.0)
rspec-graphql_matchers (1.3.0)
Expand All @@ -486,7 +485,7 @@ GEM
rspec-expectations (~> 3.9.0)
rspec-mocks (~> 3.9.0)
rspec-support (~> 3.9.0)
rspec-support (3.9.3)
rspec-support (3.9.4)
rubocop (0.77.0)
jaro_winkler (~> 1.5.1)
parallel (~> 1.10)
Expand Down Expand Up @@ -570,7 +569,7 @@ GEM
rack (>= 1.3, < 3)
rack-accept (~> 0.4)
tilt (>= 1.4, < 3)
tzinfo (1.2.7)
tzinfo (1.2.8)
thread_safe (~> 0.1)
unf (0.1.4)
unf_ext
Expand All @@ -581,7 +580,7 @@ GEM
uuid (2.3.9)
macaddr (~> 1.0)
vcr (5.1.0)
webmock (3.9.2)
webmock (3.10.0)
addressable (>= 2.3.6)
crack (>= 0.3.2)
hashdiff (>= 0.4.0, < 2.0.0)
Expand Down Expand Up @@ -623,10 +622,11 @@ DEPENDENCIES
departure (~> 6.2)
diffy (~> 3.2, >= 3.2.1)
dotenv
elasticsearch (~> 7.1.0)
elasticsearch (= 7.5)
elasticsearch-extensions (~> 0.0.29)
elasticsearch-model (~> 7.0)
elasticsearch-rails (~> 7.0)
elasticsearch-model (~> 7.1, >= 7.1.1)
elasticsearch-rails (~> 7.1, >= 7.1.1)
elasticsearch-transport (= 7.5)
equivalent-xml (~> 0.6.0)
facets
factory_bot_rails (~> 4.8, >= 4.8.2)
Expand Down
31 changes: 28 additions & 3 deletions app/models/concerns/indexable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,21 @@ def create_alias(options={})
if client.indices.exists_alias?(name: alias_name, index: [index_name])
"Alias #{alias_name} for index #{index_name} already exists."
else
client.indices.put_alias index: index_name, name: alias_name
# alias index is writeable unless it is for OtherDoi index
client.indices.update_aliases(
body: {
actions: [
{
add: {
index: index_name,
alias: alias_name,
is_write_index: self.name != "OtherDoi"
}
}
]
}
)

"Created alias #{alias_name} for index #{index_name}."
end
# end
Expand Down Expand Up @@ -649,7 +663,15 @@ def index_stats(options={})
stats = client.indices.stats index: [active_index, inactive_index], docs: true
active_index_count = stats.dig("indices", active_index, "primaries", "docs", "count")
inactive_index_count = stats.dig("indices", inactive_index, "primaries", "docs", "count")
database_count = self.all.count

# workaround until STI is enabled
if self.name == "DataCiteDoi"
database_count = self.where(type: "DataCiteDoi").count
elsif self.name == "OtherDoi"
database_count = self.where(type: "OtherDoi").count
else
database_count = self.all.count
end

"Active index #{active_index} has #{active_index_count} documents, " \
"inactive index #{inactive_index} has #{inactive_index_count} documents, " \
Expand All @@ -658,18 +680,21 @@ def index_stats(options={})
end

# switch between the two indexes, i.e. the index that is aliased
# alias index for OtherDoi by default is not writeable,
# as we also have DataciteDoi alias
def switch_index(options={})
alias_name = options[:alias] || self.index_name
index_name = (options[:index] || self.index_name) + "_v1"
alternate_index_name = (options[:index] || self.index_name) + "_v2"
is_write_index = options[:is_write_index] || self.name != "OtherDoi"

client = Elasticsearch::Model.client

if client.indices.exists_alias?(name: alias_name, index: [index_name])
client.indices.update_aliases body: {
actions: [
{ remove: { index: index_name, alias: alias_name } },
{ add: { index: alternate_index_name, alias: alias_name } }
{ add: { index: alternate_index_name, alias: alias_name, is_write_index: is_write_index } }
]
}

Expand Down
11 changes: 7 additions & 4 deletions app/models/datacite_doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ class DataciteDoi < Doi
# index_name "dois-datacite"
# end

# TODO remove query for type once STI is enabled
def self.import_by_ids(options={})
from_id = (options[:from_id] || DataciteDoi.minimum(:id)).to_i
until_id = (options[:until_id] || DataciteDoi.maximum(:id)).to_i
from_id = (options[:from_id] || DataciteDoi.where(type: "DataciteDoi").minimum(:id)).to_i
until_id = (options[:until_id] || DataciteDoi.where(type: "DataciteDoi").maximum(:id)).to_i

# get every id between from_id and end_id
(from_id..until_id).step(500).each do |id|
Expand All @@ -45,7 +46,8 @@ def self.import_by_id(options={})
errors = 0
count = 0

DataciteDoi.where(id: id..(id + 499)).find_in_batches(batch_size: 500) do |dois|
# TODO remove query for type once STI is enabled
DataciteDoi.where(type: "DataciteDoi").where(id: id..(id + 499)).find_in_batches(batch_size: 500) do |dois|
response = DataciteDoi.__elasticsearch__.client.bulk \
index: index,
type: DataciteDoi.document_type,
Expand Down Expand Up @@ -75,7 +77,8 @@ def self.import_by_id(options={})

count = 0

DataciteDoi.where(id: id..(id + 499)).find_each do |doi|
# TODO remove query for type once STI is enabled
DataciteDoi.where(type: "DataciteDoi").where(id: id..(id + 499)).find_each do |doi|
IndexJob.perform_later(doi)
count += 1
end
Expand Down
11 changes: 7 additions & 4 deletions app/models/other_doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ def set_defaults
end

def self.import_by_ids(options={})
from_id = (options[:from_id] || OtherDoi.minimum(:id)).to_i
until_id = (options[:until_id] || OtherDoi.maximum(:id)).to_i
# TODO remove query for type once STI is enabled
from_id = (options[:from_id] || OtherDoi.where(type: "OtherDoi").minimum(:id)).to_i
until_id = (options[:until_id] || OtherDoi.where(type: "OtherDoi").maximum(:id)).to_i

# get every id between from_id and end_id
(from_id..until_id).step(500).each do |id|
Expand All @@ -48,7 +49,8 @@ def self.import_by_id(options={})
errors = 0
count = 0

OtherDoi.where(id: id..(id + 499)).find_in_batches(batch_size: 500) do |dois|
# TODO remove query for type once STI is enabled
OtherDoi.where(type: "OtherDoi").where(id: id..(id + 499)).find_in_batches(batch_size: 500) do |dois|
response = OtherDoi.__elasticsearch__.client.bulk \
index: index,
type: OtherDoi.document_type,
Expand Down Expand Up @@ -78,7 +80,8 @@ def self.import_by_id(options={})

count = 0

OtherDoi.where(id: id..(id + 499)).find_each do |doi|
# TODO remove query for type once STI is enabled
OtherDoi.where(type: "OtherDoi").where(id: id..(id + 499)).find_each do |doi|
IndexJob.perform_later(doi)
count += 1
end
Expand Down
18 changes: 0 additions & 18 deletions spec/concerns/helpable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -164,24 +164,6 @@
expect(response.body.dig("data", "values")).to eq([{"index"=>1, "type"=>"URL", "data"=>{"format"=>"string", "value"=>"https://blog.datacite.org/re3data-science-europe/"}, "ttl"=>86400, "timestamp"=>"2020-07-26T08:55:35Z"}])
end

it 'wrong domain' do
client = create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD'], domains: "example.org")
subject = build(:doi, doi: "10.5438/mcnv-ga6n", url: "https://blog.datacite.org/", client: client, aasm_state: "findable")
expect { subject.register_url }.to raise_error(ActionController::BadRequest, "[Handle] Error updating DOI 10.5438/MCNV-GA6N: URL not allowed by repository domains settings.")
end

it 'wrong subdomain' do
client = create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD'], domains: "datacite.org")
subject = build(:doi, doi: "10.5438/mcnv-ga6n", url: "https://blog.datacite.org/", client: client, aasm_state: "findable")
expect { subject.register_url }.to raise_error(ActionController::BadRequest, "[Handle] Error updating DOI 10.5438/MCNV-GA6N: URL not allowed by repository domains settings.")
end

it 'wildcard for subdomain but using naked domain' do
client = create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD'], domains: "*.datacite.org")
subject = build(:doi, doi: "10.5438/mcnv-ga6n", url: "https://datacite.org/", client: client, aasm_state: "findable")
expect { subject.register_url }.to raise_error(ActionController::BadRequest, "[Handle] Error updating DOI 10.5438/MCNV-GA6N: URL not allowed by repository domains settings.")
end

it 'draft doi' do
subject = build(:doi, doi: "10.5438/mcnv-ga6n", url: "https://blog.datacite.org/", client: client, aasm_state: "draft")
expect { subject.register_url }.to raise_error(ActionController::BadRequest, "DOI is not registered or findable.")
Expand Down
1 change: 1 addition & 0 deletions spec/factories/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@
]}
schema_version { "http://datacite.org/schema/kernel-4" }
source { "test" }
type { "DataciteDoi" }
regenerate { true }
created { Faker::Time.backward(14, :evening) }
minted { Faker::Time.backward(15, :evening) }
Expand Down
Loading

0 comments on commit 57afb13

Please sign in to comment.