From b074889524245a67e8ba054f9fdbb69931050cac Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Tue, 10 Dec 2019 10:54:27 +0000 Subject: [PATCH] properly handle expired scroll_id. #371 --- Gemfile | 1 + Gemfile.lock | 13 ++++++------ app/controllers/activities_controller.rb | 2 +- app/controllers/dois_controller.rb | 5 ++++- app/controllers/events_controller.rb | 2 +- app/controllers/old_events_controller.rb | 2 +- app/models/concerns/indexable.rb | 25 ++++++++++++++++-------- 7 files changed, 32 insertions(+), 18 deletions(-) diff --git a/Gemfile b/Gemfile index ba6ca2e5f..1b11575d3 100644 --- a/Gemfile +++ b/Gemfile @@ -51,6 +51,7 @@ gem 'mini_magick', '~> 4.8' gem 'elasticsearch', '~> 7.1.0' gem 'elasticsearch-model', '~> 7.0', require: 'elasticsearch/model' gem 'elasticsearch-rails', '~> 7.0' +gem 'faraday', '0.17.0' gem 'faraday_middleware-aws-sigv4', '~> 0.2.4' gem 'rack-utf8_sanitizer', '~> 1.6' gem 'oj_mimic_json', '~> 1.0', '>= 1.0.1' diff --git a/Gemfile.lock b/Gemfile.lock index e528f9ad6..9762d3ec8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/rmosolgo/graphql-ruby - revision: 9859534ccbc4fe91ab26b58e7ea9d0e12a47d409 + revision: b0e97e759c850cfbc03d75dfbb115f1bc443a985 specs: graphql (1.9.16) @@ -73,13 +73,13 @@ GEM audited (4.9.0) activerecord (>= 4.2, < 6.1) aws-eventstream (1.0.3) - aws-partitions (1.251.0) - aws-sdk-core (3.84.0) + aws-partitions (1.252.0) + aws-sdk-core (3.85.0) aws-eventstream (~> 1.0, >= 1.0.2) aws-partitions (~> 1, >= 1.239.0) aws-sigv4 (~> 1.1) jmespath (~> 1.0) - aws-sdk-kms (1.26.0) + aws-sdk-kms (1.27.0) aws-sdk-core (~> 3, >= 3.71.0) aws-sigv4 (~> 1.1) aws-sdk-s3 (1.59.0) @@ -225,7 +225,7 @@ GEM railties (>= 3.0.0) faker (1.9.6) i18n (>= 0.7) - faraday (0.17.1) + faraday (0.17.0) multipart-post (>= 1.2, < 3) faraday-encoding (0.0.5) faraday @@ -247,7 +247,7 @@ GEM globalid (0.4.2) activesupport (>= 4.2.0) google-protobuf (3.10.0.rc.1) - graphql-batch (0.4.1) + graphql-batch (0.4.2) graphql (>= 1.3, < 2) promise.rb (~> 0.7.2) graphql-errors (0.4.0) @@ -574,6 +574,7 @@ DEPENDENCIES facets factory_bot_rails (~> 4.8, >= 4.8.2) faker (~> 1.9) + faraday (= 0.17.0) faraday_middleware-aws-sigv4 (~> 0.2.4) fast_jsonapi (~> 1.3) flipper (~> 0.16.0) diff --git a/app/controllers/activities_controller.rb b/app/controllers/activities_controller.rb index c1d542208..7a955b9a3 100644 --- a/app/controllers/activities_controller.rb +++ b/app/controllers/activities_controller.rb @@ -42,7 +42,7 @@ def index self: request.original_url, next: results.size < page[:size] || page[:size] == 0 ? nil : request.base_url + "/activities?" + { "scroll-id" => response.scroll_id, - "page[scroll]" => "3m", + "page[scroll]" => page[:scroll], "page[size]" => page[:size] }.compact.to_query }.compact options[:is_collection] = true diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index 7c76fa0cb..3a5256bf6 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -99,6 +99,9 @@ def index total = sample_dois.length total_pages = 1 elsif page[:scroll].present? + # if scroll_id has expired + fail ActiveRecord::RecordNotFound unless response.scroll_id.present? + results = response.results total = response.total else @@ -118,7 +121,7 @@ def index self: request.original_url, next: results.size < page[:size] || page[:size] == 0 ? nil : request.base_url + "/dois?" + { "scroll-id" => response.scroll_id, - "page[scroll]" => "3m", + "page[scroll]" => page[:scroll], "page[size]" => page[:size] }.compact.to_query }.compact options[:is_collection] = true diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index f16f71fe0..b81ec8bd0 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -126,7 +126,7 @@ def index self: request.original_url, next: results.size < page[:size] || page[:size] == 0 ? nil : request.base_url + "/events?" + { "scroll-id" => response.scroll_id, - "page[scroll]" => "3m", + "page[scroll]" => page[:scroll], "page[size]" => page[:size] }.compact.to_query }.compact options[:is_collection] = true diff --git a/app/controllers/old_events_controller.rb b/app/controllers/old_events_controller.rb index 819d2ddd8..2809e3626 100644 --- a/app/controllers/old_events_controller.rb +++ b/app/controllers/old_events_controller.rb @@ -122,7 +122,7 @@ def index self: request.original_url, next: results.size < page[:size] || page[:size] == 0 ? nil : request.base_url + "/events?" + { "scroll-id" => response.scroll_id, - "page[scroll]" => "3m", + "page[scroll]" => page[:scroll], "page[size]" => page[:size] }.compact.to_query }.compact options[:is_collection] = true diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 0dcc5db41..366f3a131 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -112,15 +112,24 @@ def query(query, options={}) # support scroll api # map function is small performance hit if options[:scroll_id].present? && options.dig(:page, :scroll) - response = __elasticsearch__.client.scroll(body: - { scroll_id: options[:scroll_id], - scroll: options.dig(:page, :scroll) - }) - return Hashie::Mash.new({ - total: response.dig("hits", "total", "value"), - results: response.dig("hits", "hits").map { |r| r["_source"] }, - scroll_id: response["_scroll_id"] + begin + response = __elasticsearch__.client.scroll(body: + { scroll_id: options[:scroll_id], + scroll: options.dig(:page, :scroll) + }) + return Hashie::Mash.new({ + total: response.dig("hits", "total", "value"), + results: response.dig("hits", "hits").map { |r| r["_source"] }, + scroll_id: response["_scroll_id"] + }) + # handle expired scroll_id (Elasticsearch returns this error) + rescue Elasticsearch::Transport::Transport::Errors::NotFound + return Hashie::Mash.new({ + total: 0, + results: [], + scroll_id: nil }) + end end if options[:totals_agg] == "provider"