From bf676b85891d8b20342ee87c11f5f35c00e984cc Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Tue, 11 Jul 2023 19:47:58 +0100 Subject: [PATCH 01/35] Initial Data Dump model (datacite/1862) --- app/models/data_dump.rb | 77 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 app/models/data_dump.rb diff --git a/app/models/data_dump.rb b/app/models/data_dump.rb new file mode 100644 index 000000000..84c88e388 --- /dev/null +++ b/app/models/data_dump.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +class DataDump < ApplicationRecord + include Elasticsearch::Model + + include Indexable + include AASM + + validates_presence_of :uid + validates_presence_of :scope + validates_presence_of :start_date + validates_presence_of :end_date + + validates_uniqueness_of :uid, message: "This Data Dump UID is already in use" + + validates_inclusion_of :scope, in: %w(metadata, link), allow_blank: false + + aasm whiny_transitions: false do + # initial state should prevent public visibility + state :generating, initial: true + # we might add more here in the future depending on the granularity of status updates we wish to provide + # but for now, we have a state for when the dump is done and being transferred to S3 and one for when it is + # ready to be downloaded + state :storing, :complete + + event :store do + transitions from: :generating, to: :storing + end + + event :release do + transitions from: :storing, to: :complete + end + end + + if Rails.env.test? + index_name "data-dumps-test#{ENV['TEST_ENV_NUMBER']}" + elsif ENV["ES_PREFIX"].present? + index_name "data-dumps-#{ENV['ES_PREFIX']}" + else + index_name "data-dumps" + end + + settings index: { + number_of_shards: 1, + analysis: { + analyzer: { + string_lowercase: { + tokenizer: "keyword", filter: %w[lowercase] + }, + }, + normalizer: { + keyword_lowercase: { type: "custom", filter: %w[lowercase] }, + }, + }, + } do + mapping dynamic: "false" do + indexes :id + indexes :uid, type: :text + indexes :scope, type: :keyword + indexes :description, type: :text + indexes :start_date, type: :date, format: :date_optional_time + indexes :end_date, type: :date, format: :date_optional_time + indexes :records, type: :integer + indexes :checksum, type: :text + indexes :file_path, type: :text + indexes :aasm_state, type: :keyword + indexes :created_at, type: :date, format: :date_optional_time, + fields: { + created_sort: { type: :date } + } + indexes :updated_at, type: :date, format: :date_optional_time, + fields: { + updated_sort: { type: :date } + } + end + end +end From 87f9958c383872505f8325cfc45966614a3f6bcf Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Tue, 11 Jul 2023 19:48:22 +0100 Subject: [PATCH 02/35] Data dump DB migration (datacite/1862) --- .../20230711130313_create_data_dumps.rb | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 db/migrate/20230711130313_create_data_dumps.rb diff --git a/db/migrate/20230711130313_create_data_dumps.rb b/db/migrate/20230711130313_create_data_dumps.rb new file mode 100644 index 000000000..ee72893e3 --- /dev/null +++ b/db/migrate/20230711130313_create_data_dumps.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +class CreateDataDumps < ActiveRecord::Migration[6.1] + def change + create_table :data_dumps do |t| + t.string :uid, null: false + t.string :scope, null: false + t.text :description + t.datetime :start_date, null: false + t.datetime :end_date, null: false + t.bigint :records + t.string :checksum + t.string :file_path + t.string :aasm_state + + t.timestamps + + t.index %w[uid], { name: "index_data_dumps_on_uid", unique: true } + t.index %w[updated_at], name: "index_data_dumps_on_updated_at" + t.index %w[scope], name: "index_data_dumps_on_scope" + t.index %w[aasm_state], name: "index_data_dumps_on_aasm_state" + end + end +end From 53c45ca9d62974afe53ace89d3b3c394e41f702c Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Tue, 11 Jul 2023 19:48:45 +0100 Subject: [PATCH 03/35] Data dump model initial test suite (datacite/1862) --- spec/models/data_dump_spec.rb | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 spec/models/data_dump_spec.rb diff --git a/spec/models/data_dump_spec.rb b/spec/models/data_dump_spec.rb new file mode 100644 index 000000000..60335644b --- /dev/null +++ b/spec/models/data_dump_spec.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require "rails_helper" + +RSpec.describe DataDump, type: :model, elasticsearch: true do + describe "Validations" do + it { should validate_presence_of(:uid) } + it { should validate_presence_of(:scope) } + it { should validate_presence_of(:start_date) } + it { should validate_presence_of(:end_date) } + # Temporarily disabled as these break with the other validators + # Potentially adding a factory will resolve this? + # Otherwise shift them to a separate second suite that _does_ create the object + # it { should allow_value("metadata").for(:scope) } + # it { should allow_value("link").for(:scope) } + # it { should_not allow_value("invalid").for(:scope) } + end +end From 20d5bdeb14b26e43baf559a2a2fc38e56032e75b Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Tue, 11 Jul 2023 19:49:35 +0100 Subject: [PATCH 04/35] Add Data dump model to the RSpec ElasticSearch helper (datacite/1862) --- spec/support/elasticsearch_helper.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/support/elasticsearch_helper.rb b/spec/support/elasticsearch_helper.rb index f677494e9..f878f96dd 100644 --- a/spec/support/elasticsearch_helper.rb +++ b/spec/support/elasticsearch_helper.rb @@ -13,6 +13,7 @@ ProviderPrefix, Contact, ReferenceRepository, + DataDump, ].freeze RSpec.configure do |config| From a3af8ac8411671979583bac4d42eac9a53a67828 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Tue, 11 Jul 2023 19:49:53 +0100 Subject: [PATCH 05/35] Updated Schema after database migration (datacite/1862) --- db/schema.rb | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/db/schema.rb b/db/schema.rb index b266d86d6..9b9640691 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -12,8 +12,8 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2023_01_23_122711) do - create_table "active_storage_attachments", charset: "utf8mb4", force: :cascade do |t| +ActiveRecord::Schema.define(version: 2023_07_11_130313) do + create_table "active_storage_attachments", charset: "latin1", force: :cascade do |t| t.string "name", limit: 191, null: false t.string "record_type", null: false t.bigint "record_id", null: false @@ -23,7 +23,7 @@ t.index ["record_type", "record_id", "name", "blob_id"], name: "index_active_storage_attachments_uniqueness", unique: true end - create_table "active_storage_blobs", charset: "utf8mb4", force: :cascade do |t| + create_table "active_storage_blobs", charset: "latin1", force: :cascade do |t| t.string "key", limit: 191, null: false t.string "filename", limit: 191, null: false t.string "content_type", limit: 191 @@ -35,7 +35,7 @@ t.index ["key"], name: "index_active_storage_blobs_on_key", unique: true end - create_table "active_storage_variant_records", charset: "utf8mb4", force: :cascade do |t| + create_table "active_storage_variant_records", charset: "latin1", force: :cascade do |t| t.bigint "blob_id", null: false t.string "variation_digest", null: false t.index ["blob_id", "variation_digest"], name: "index_active_storage_variant_records_uniqueness", unique: true @@ -90,7 +90,7 @@ t.index ["symbol"], name: "symbol", unique: true end - create_table "audits", charset: "utf8mb4", force: :cascade do |t| + create_table "audits", charset: "latin1", force: :cascade do |t| t.integer "auditable_id" t.string "auditable_type" t.integer "associated_id" @@ -137,6 +137,24 @@ t.datetime "deleted_at" end + create_table "data_dumps", charset: "latin1", force: :cascade do |t| + t.string "uid", null: false + t.string "scope", null: false + t.text "description" + t.datetime "start_date", null: false + t.datetime "end_date", null: false + t.bigint "records" + t.string "checksum" + t.string "file_path" + t.string "aasm_state" + t.datetime "created_at", precision: 6, null: false + t.datetime "updated_at", precision: 6, null: false + t.index ["aasm_state"], name: "index_data_dumps_on_aasm_state" + t.index ["scope"], name: "index_data_dumps_on_scope" + t.index ["uid"], name: "index_data_dumps_on_uid", unique: true + t.index ["updated_at"], name: "index_data_dumps_on_updated_at" + end + create_table "datacentre", charset: "utf8", force: :cascade do |t| t.text "comments", size: :long t.string "system_email", null: false @@ -310,7 +328,7 @@ t.index ["uid"], name: "index_provider_prefixes_on_uid", length: 128 end - create_table "reference_repositories", charset: "utf8mb4", force: :cascade do |t| + create_table "reference_repositories", charset: "latin1", force: :cascade do |t| t.string "client_id" t.string "re3doi" t.datetime "created_at", null: false From 5a641af40ad806cfb140ec50fade267bbe2ce3e5 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Wed, 12 Jul 2023 13:51:28 +0100 Subject: [PATCH 06/35] Initial data dump controller (datacite/1863) --- app/controllers/data_dumps_controller.rb | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 app/controllers/data_dumps_controller.rb diff --git a/app/controllers/data_dumps_controller.rb b/app/controllers/data_dumps_controller.rb new file mode 100644 index 000000000..658134524 --- /dev/null +++ b/app/controllers/data_dumps_controller.rb @@ -0,0 +1,16 @@ +class DataDumpsController < ApplicationController + def index + end + + def show + data_dump = DataDump.where(uid: params[:id]).first + if data_dump.blank? || + ( + data_dump.aasm_state != "complete" + # TODO: Add conditional check for role here + ) + fail ActiveRecord::RecordNotFound + end + render json: DataDumpSerializer.new(data_dump).serialized_json, status: :ok + end +end From 7490c0bd107f6cd96ae7e034e531221947067b6e Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Wed, 12 Jul 2023 13:51:44 +0100 Subject: [PATCH 07/35] Data dump controller basic test suite (datacite/1863) --- .../controllers/data_dumps_controller_spec.rb | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 spec/controllers/data_dumps_controller_spec.rb diff --git a/spec/controllers/data_dumps_controller_spec.rb b/spec/controllers/data_dumps_controller_spec.rb new file mode 100644 index 000000000..c8f803658 --- /dev/null +++ b/spec/controllers/data_dumps_controller_spec.rb @@ -0,0 +1,19 @@ +require 'rails_helper' + +RSpec.describe DataDumpsController, type: :controller do + + describe "GET #index" do + it "returns http success" do + get :index + expect(response).to have_http_status(:success) + end + end + + describe "GET #show" do + it "returns http success" do + get :show + expect(response).to have_http_status(:success) + end + end + +end From b927152b27f027c7bdd25957bfa6a45bdcccdf83 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Wed, 12 Jul 2023 13:57:07 +0100 Subject: [PATCH 08/35] Initial data dump routes (datacite/1864) --- config/routes.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config/routes.rb b/config/routes.rb index 84da67052..180bc586a 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -230,6 +230,8 @@ resources :repository_prefixes, path: "repository-prefixes" resources :resource_types, path: "resource-types", only: %i[show index] + resources :data_dumps, constraints: { id: /.+/ }, only: %i[show index] + # custom routes for maintenance tasks post ":username", to: "datacite_dois#show", as: :user From ed04430820c502ce579d9f378c04e29d60bc9057 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Wed, 12 Jul 2023 14:39:28 +0100 Subject: [PATCH 09/35] Data Dump index controller first pass (datacite/1866) --- app/controllers/data_dumps_controller.rb | 79 ++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/app/controllers/data_dumps_controller.rb b/app/controllers/data_dumps_controller.rb index 658134524..e92d77a15 100644 --- a/app/controllers/data_dumps_controller.rb +++ b/app/controllers/data_dumps_controller.rb @@ -1,5 +1,84 @@ class DataDumpsController < ApplicationController def index + sort = + case params[:sort] + when "created" + { created_at: { order: "asc" } } + when "-created" + { created_at: { order: "desc" } } + when "start" + { start_date: { order: "asc" } } + when "-start" + { start_date: { order: "desc" } } + when "end" + { end_date: { order: "asc" } } + when "-end" + { end_date: { order: "desc"} } + else + { created_at: { order: "desc" } } + end + + page = page_from_params(params) + + response = DataDump.query( + page: page, + sort: sort + ) + + begin + total = response.results.total + total_pages = page[:size].positive? ? (total.to_f / page[:size]).ceil : 0 + + data_dumps = response.results + + options = {} + options[:meta] = { + total: total, + "totalPages" => total_pages, + page: page[:number] + }.compact + + options[:links] = { + self: request.original_url, + next: + if data_dumps.blank? || page[:number] == total_pages + nil + else + request.base_url + "/data_dumps?" + + { "page[number" => page[:number] + 1, + "page[size]" => page[:size], + sort: params[:sort], + }.compact.to_query + end, + prev: + if data_dumps.blank? || page[:number] == 1 || page[:number] == 0 + nil + else + request.base_url + "/data_dumps?" + + { "page[number" => page[:number] - 1, + "page[size]" => page[:size], + sort: params[:sort], + }.compact.to_query + end + }.compact + + render json: + DataDumpSerializer.new(data_dumps, options).serialized_json, status: :ok + + rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e + Raven.capture_exception(e) + + message = + JSON.parse(e.message[6..-1]).to_h.dig( + "error", + "root_cause", + 0, + "reason", + ) + + render json: { "errors" => { "title" => message } }.to_json, + status: :bad_request + end end def show From a367beb813d2a9eb305f6370046a75512b112d6a Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Wed, 12 Jul 2023 14:56:41 +0100 Subject: [PATCH 10/35] Add a factory for the test suites (datacite/1868) --- spec/factories/data_dump.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 spec/factories/data_dump.rb diff --git a/spec/factories/data_dump.rb b/spec/factories/data_dump.rb new file mode 100644 index 000000000..fad546876 --- /dev/null +++ b/spec/factories/data_dump.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require "faker" + +FactoryBot.define do + factory :data_dump do + uid { Faker::Internet.password(8).downcase } + scope { "metadata" } + description { "Test Metadata Data Dump Factory creation"} + records { 12345 } + checksum { Faker::Crypto.sha256} + created_at { Faker::Time.backward(1, :morning) } + updated_at { Faker::Time.backward(1, :evening) } + end +end \ No newline at end of file From 93048d821327bf878b4de3f37f479e4f1c9cbf85 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 10:52:18 +0100 Subject: [PATCH 11/35] Add a factory for an incomplete data dump (datacite/1868) --- spec/factories/data_dump.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spec/factories/data_dump.rb b/spec/factories/data_dump.rb index fad546876..378d0b03f 100644 --- a/spec/factories/data_dump.rb +++ b/spec/factories/data_dump.rb @@ -11,5 +11,12 @@ checksum { Faker::Crypto.sha256} created_at { Faker::Time.backward(1, :morning) } updated_at { Faker::Time.backward(1, :evening) } + aasm_state { :complete } + end + factory :data_dump_incomplete do + uid { Faker::Internet.password(8).downcase } + scope { "metadata" } + description { "Test Metadata Data Dump Factory creation - incomplete" } + created_at { Faker::Time.backward(2, :morning) } end end \ No newline at end of file From d7d3f9d60e7da7a5af442e0d5cad11d98583a8e7 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 11:13:29 +0100 Subject: [PATCH 12/35] Update test to create the data_dump from the factory (datacite/1868) --- spec/controllers/data_dumps_controller_spec.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/controllers/data_dumps_controller_spec.rb b/spec/controllers/data_dumps_controller_spec.rb index c8f803658..3e32a26af 100644 --- a/spec/controllers/data_dumps_controller_spec.rb +++ b/spec/controllers/data_dumps_controller_spec.rb @@ -10,8 +10,9 @@ end describe "GET #show" do + let(:data_dump) { create(:data_dump) } it "returns http success" do - get :show + get :show, { uid: data_dump.uid } expect(response).to have_http_status(:success) end end From ac6a7d5594c41e79014dd8c968bdde79d2f0befe Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 11:21:42 +0100 Subject: [PATCH 13/35] Update controller test to create an object and test presence (datacite/1868) --- spec/controllers/data_dumps_controller_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/controllers/data_dumps_controller_spec.rb b/spec/controllers/data_dumps_controller_spec.rb index 3e32a26af..b8eb47947 100644 --- a/spec/controllers/data_dumps_controller_spec.rb +++ b/spec/controllers/data_dumps_controller_spec.rb @@ -12,7 +12,7 @@ describe "GET #show" do let(:data_dump) { create(:data_dump) } it "returns http success" do - get :show, { uid: data_dump.uid } + get :show, params: { id: data_dump.uid } expect(response).to have_http_status(:success) end end From a73009adfea03ad3f70b553d77a938bedef2c233 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 11:22:04 +0100 Subject: [PATCH 14/35] Update data dump factory to add missing attributes (datacite/1868) --- spec/factories/data_dump.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/factories/data_dump.rb b/spec/factories/data_dump.rb index 378d0b03f..a90aff64d 100644 --- a/spec/factories/data_dump.rb +++ b/spec/factories/data_dump.rb @@ -7,6 +7,8 @@ uid { Faker::Internet.password(8).downcase } scope { "metadata" } description { "Test Metadata Data Dump Factory creation"} + start_date { "01/01/2022" } + end_date { "31/12/2022" } records { 12345 } checksum { Faker::Crypto.sha256} created_at { Faker::Time.backward(1, :morning) } From efcc34d87e11335cee5e9367d8d4b1226dc633bc Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 11:22:30 +0100 Subject: [PATCH 15/35] Remove erroneous comma in validator (datacite/1862) --- app/models/data_dump.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/data_dump.rb b/app/models/data_dump.rb index 84c88e388..4c04b602f 100644 --- a/app/models/data_dump.rb +++ b/app/models/data_dump.rb @@ -13,7 +13,7 @@ class DataDump < ApplicationRecord validates_uniqueness_of :uid, message: "This Data Dump UID is already in use" - validates_inclusion_of :scope, in: %w(metadata, link), allow_blank: false + validates_inclusion_of :scope, in: %w(metadata link), allow_blank: false aasm whiny_transitions: false do # initial state should prevent public visibility From 28998694bf9d5c338b5f86968693e7de80a1a955 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 11:50:50 +0100 Subject: [PATCH 16/35] Add missing `query_aggregations` property required by Indexable concern query function (datacite/1862) --- app/models/data_dump.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/models/data_dump.rb b/app/models/data_dump.rb index 4c04b602f..f1eb47029 100644 --- a/app/models/data_dump.rb +++ b/app/models/data_dump.rb @@ -74,4 +74,8 @@ class DataDump < ApplicationRecord } end end + + def self.query_aggregations + {} + end end From cc0b91c5a6a9491a2023619201a9f5c7d3be0c7f Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 11:51:21 +0100 Subject: [PATCH 17/35] Correctly pass the `query` parameter to the ES query function (datacite/1863) --- app/controllers/data_dumps_controller.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/controllers/data_dumps_controller.rb b/app/controllers/data_dumps_controller.rb index e92d77a15..0a8ad9c2b 100644 --- a/app/controllers/data_dumps_controller.rb +++ b/app/controllers/data_dumps_controller.rb @@ -21,6 +21,7 @@ def index page = page_from_params(params) response = DataDump.query( + "", page: page, sort: sort ) From 8a0a9c6bf93b3288a010c2a1c948ff727ab2da72 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 12:56:29 +0100 Subject: [PATCH 18/35] Update factory to use Faker for more attributes so it can be used to generate multiple instances (datacite/1868) --- spec/factories/data_dump.rb | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/spec/factories/data_dump.rb b/spec/factories/data_dump.rb index a90aff64d..42b30380e 100644 --- a/spec/factories/data_dump.rb +++ b/spec/factories/data_dump.rb @@ -4,13 +4,17 @@ FactoryBot.define do factory :data_dump do + transient do + year { Faker::Number.within(2010..2021).to_s } + end + uid { Faker::Internet.password(8).downcase } scope { "metadata" } description { "Test Metadata Data Dump Factory creation"} - start_date { "01/01/2022" } - end_date { "31/12/2022" } - records { 12345 } - checksum { Faker::Crypto.sha256} + start_date { "#{year}-01-01" } + end_date { "#{year}-12-31" } + records { Faker::Number.within(5_000_000..50_000_000) } + checksum { Faker::Crypto.sha256 } created_at { Faker::Time.backward(1, :morning) } updated_at { Faker::Time.backward(1, :evening) } aasm_state { :complete } From da56e40a36486f8242e9fdda175ec9f9bd94d59f Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 12:57:51 +0100 Subject: [PATCH 19/35] Update factory for incomplete objects (datacite/1868) --- spec/factories/data_dump.rb | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/spec/factories/data_dump.rb b/spec/factories/data_dump.rb index 42b30380e..9bf1e132f 100644 --- a/spec/factories/data_dump.rb +++ b/spec/factories/data_dump.rb @@ -19,10 +19,19 @@ updated_at { Faker::Time.backward(1, :evening) } aasm_state { :complete } end + factory :data_dump_incomplete do + transient do + year { Faker::Number.within(2010..2021).to_s } + end + uid { Faker::Internet.password(8).downcase } scope { "metadata" } - description { "Test Metadata Data Dump Factory creation - incomplete" } - created_at { Faker::Time.backward(2, :morning) } + description { "Test Metadata Data Dump Factory creation - incomplete"} + start_date { "#{year}-01-01" } + end_date { "#{year}-12-31" } + created_at { Faker::Time.backward(1, :morning) } + updated_at { Faker::Time.backward(1, :evening) } + aasm_state { :generating } end end \ No newline at end of file From 08c03cfd2a08042b4e4b4bfaf6339bc593af1a33 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 13:41:33 +0100 Subject: [PATCH 20/35] Initial Data Dump controller requests rspec suite (datacite/1868) --- spec/requests/data_dumps_spec.rb | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 spec/requests/data_dumps_spec.rb diff --git a/spec/requests/data_dumps_spec.rb b/spec/requests/data_dumps_spec.rb new file mode 100644 index 000000000..4b3225eec --- /dev/null +++ b/spec/requests/data_dumps_spec.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require "rails_helper" + +describe DataDumpsController, type: :request, elasticsearch: true do + let(:data_dump) { create(:data_dump, uid: "test_dump") } + + describe "GET /data_dumps", elasticsearch: true do + let!(:data_dumps) { create_list(:data_dump, 10) } + + before do + DataDump.import + sleep 1 + end + + it "returns data dumps" do + get "/data_dumps" + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(10) + expect(json.dig("meta", "total")).to eq(10) + end + end + + describe "GET /data_dumps/:id" do + context "when the record exists" do + it "returns the record" do + get "/data_dumps/#{data_dump.uid}" + + expect(last_response.status).to eq(200) + expect(json.dig("data", "attributes", "description")).to eq("Test Metadata Data Dump Factory creation") + expect(json.dig("data", "attributes", "start_date")).to eq(data_dump.start_date) + end + end + + context "when the record does not exist" do + it "returns status code 404" do + get "/data_dumps/invalid_id" + + expect(last_response.status).to eq(404) + expect(json["errors"].first).to eq("status" => "404", "title" => "The resource you are looking for doesn't exist.") + end + end + end +end From babb2ae4c0d58111a3072856667de8735f1f9fe1 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 13:51:06 +0100 Subject: [PATCH 21/35] Add pagination tests to Data Dump controller suite (datacite/1868) --- spec/requests/data_dumps_spec.rb | 69 ++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/spec/requests/data_dumps_spec.rb b/spec/requests/data_dumps_spec.rb index 4b3225eec..f91651f6b 100644 --- a/spec/requests/data_dumps_spec.rb +++ b/spec/requests/data_dumps_spec.rb @@ -20,6 +20,75 @@ expect(json["data"].size).to eq(10) expect(json.dig("meta", "total")).to eq(10) end + + it "returns data dumps with pagination" do + get "/data_dumps?page[number]=1&page[size]=4" + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(4) + expect(json.dig("meta", "total")).to eq(10) + next_link_absolute = Addressable::URI.parse(json.dig("links", "next")) + next_link = next_link_absolute.path + "?" + next_link_absolute.query + expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=4") + expect(json.dig("links", "prev")).to be_nil + + get next_link + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(4) + expect(json.dig("meta", "total")).to eq(10) + next_link_absolute = Addressable::URI.parse(json.dig("links", "next")) + next_link = next_link_absolute.path + "?" + next_link_absolute.query + expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=3&page%5Bsize%5D=4") + prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) + prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query + expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=1&page%5Bsize%5D=4") + + get next_link, nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(2) + expect(json.dig("meta", "total")).to eq(10) + expect(json.dig("links", "next")).to be_nil + prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) + prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query + expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=4") + end + + it "returns correct page links when results is exactly divisible by page size" do + get "/data_dumps?page[number]=1&page[size]=5", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(5) + expect(json.dig("meta", "total")).to eq(10) + expect(json.dig("links", "prev")).to be_nil + next_link_absolute = Addressable::URI.parse(json.dig("links", "next")) + next_link = next_link_absolute.path + "?" + next_link_absolute.query + expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=5") + + get next_link, nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(5) + expect(json.dig("meta", "total")).to eq(10) + expect(json.dig("links", "next")).to be_nil + prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) + prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query + expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=1&page%5Bsize%5D=5") + end + + it "returns a blank resultset when page is above max page" do + get "/data_dumps?page[number]=3&page[size]=5", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(0) + expect(json.dig("meta", "totalPages")).to eq(2) + expect(json.dig("meta", "page")).to eq(3) + expect(json.dig("links", "next")).to be_nil + prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) + prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query + expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=5") + end end describe "GET /data_dumps/:id" do From 83fcfdece38a832321f2ccd104d682ad37d9b041 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 13:59:22 +0100 Subject: [PATCH 22/35] Fix bad requests in test suite (datacite/1868) --- spec/requests/data_dumps_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/requests/data_dumps_spec.rb b/spec/requests/data_dumps_spec.rb index f91651f6b..b8033bd6e 100644 --- a/spec/requests/data_dumps_spec.rb +++ b/spec/requests/data_dumps_spec.rb @@ -56,7 +56,7 @@ end it "returns correct page links when results is exactly divisible by page size" do - get "/data_dumps?page[number]=1&page[size]=5", nil, headers + get "/data_dumps?page[number]=1&page[size]=5" expect(last_response.status).to eq(200) expect(json["data"].size).to eq(5) @@ -78,7 +78,7 @@ end it "returns a blank resultset when page is above max page" do - get "/data_dumps?page[number]=3&page[size]=5", nil, headers + get "/data_dumps?page[number]=3&page[size]=5" expect(last_response.status).to eq(200) expect(json["data"].size).to eq(0) From 356eb6efebbff4dde638588b4d5ce8c56427c86b Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 14:36:48 +0100 Subject: [PATCH 23/35] Fix validate inclusion model tests (datacite/1868) --- spec/models/data_dump_spec.rb | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/spec/models/data_dump_spec.rb b/spec/models/data_dump_spec.rb index 60335644b..f84e27f8b 100644 --- a/spec/models/data_dump_spec.rb +++ b/spec/models/data_dump_spec.rb @@ -8,11 +8,9 @@ it { should validate_presence_of(:scope) } it { should validate_presence_of(:start_date) } it { should validate_presence_of(:end_date) } - # Temporarily disabled as these break with the other validators - # Potentially adding a factory will resolve this? - # Otherwise shift them to a separate second suite that _does_ create the object - # it { should allow_value("metadata").for(:scope) } - # it { should allow_value("link").for(:scope) } - # it { should_not allow_value("invalid").for(:scope) } + it { should validate_inclusion_of(:scope).in_array(%w(metadata link)) } + it { should allow_value("metadata").for(:scope).on(:create) } + it { should allow_value("link").for(:scope).on(:create) } + it { should_not allow_value("invalid").for(:scope).on(:create) } end end From 538ea2eccb0d7684bdbb85360c1436a23adeaad0 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 15:00:34 +0100 Subject: [PATCH 24/35] Fix accidental conversion of database table schema to latin1 --- db/schema.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/db/schema.rb b/db/schema.rb index 9b9640691..b78ef0f86 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -13,7 +13,7 @@ # It's strongly recommended that you check this file into your version control system. ActiveRecord::Schema.define(version: 2023_07_11_130313) do - create_table "active_storage_attachments", charset: "latin1", force: :cascade do |t| + create_table "active_storage_attachments", charset: "utf8mb4", force: :cascade do |t| t.string "name", limit: 191, null: false t.string "record_type", null: false t.bigint "record_id", null: false @@ -23,7 +23,7 @@ t.index ["record_type", "record_id", "name", "blob_id"], name: "index_active_storage_attachments_uniqueness", unique: true end - create_table "active_storage_blobs", charset: "latin1", force: :cascade do |t| + create_table "active_storage_blobs", charset: "utf8mb4", force: :cascade do |t| t.string "key", limit: 191, null: false t.string "filename", limit: 191, null: false t.string "content_type", limit: 191 @@ -35,7 +35,7 @@ t.index ["key"], name: "index_active_storage_blobs_on_key", unique: true end - create_table "active_storage_variant_records", charset: "latin1", force: :cascade do |t| + create_table "active_storage_variant_records", charset: "utf8mb4", force: :cascade do |t| t.bigint "blob_id", null: false t.string "variation_digest", null: false t.index ["blob_id", "variation_digest"], name: "index_active_storage_variant_records_uniqueness", unique: true @@ -90,7 +90,7 @@ t.index ["symbol"], name: "symbol", unique: true end - create_table "audits", charset: "latin1", force: :cascade do |t| + create_table "audits", charset: "utf8mb4", force: :cascade do |t| t.integer "auditable_id" t.string "auditable_type" t.integer "associated_id" @@ -137,7 +137,7 @@ t.datetime "deleted_at" end - create_table "data_dumps", charset: "latin1", force: :cascade do |t| + create_table "data_dumps", charset: "utf8mb4", force: :cascade do |t| t.string "uid", null: false t.string "scope", null: false t.text "description" @@ -328,7 +328,7 @@ t.index ["uid"], name: "index_provider_prefixes_on_uid", length: 128 end - create_table "reference_repositories", charset: "latin1", force: :cascade do |t| + create_table "reference_repositories", charset: "utf8mb4", force: :cascade do |t| t.string "client_id" t.string "re3doi" t.datetime "created_at", null: false From 1c478bbe1df9fa7fa23ca357b043c63930b42b12 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 16:28:48 +0100 Subject: [PATCH 25/35] First pass data dump serializer (#1867) --- app/serializers/data_dump_serializer.rb | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 app/serializers/data_dump_serializer.rb diff --git a/app/serializers/data_dump_serializer.rb b/app/serializers/data_dump_serializer.rb new file mode 100644 index 000000000..273c12eb7 --- /dev/null +++ b/app/serializers/data_dump_serializer.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class DataDumpSerializer + include FastJsonapi::ObjectSerializer + set_key_transform :camel_lower + set_type "data-dump" + set_id :uid + + attributes :description, + :scope, + :start_date, + :end_date, + :records, + :checksum, + :download_link, + :created_at, + :updated_at + + attribute :download_link do |object| + "https://example.com/#{object.file_path}" + end +end \ No newline at end of file From c28a3528b25f3573259ba65e0d2bbe3e3e36afd7 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 16:45:59 +0100 Subject: [PATCH 26/35] Fix missing brackets in link generation (#1889) --- app/controllers/data_dumps_controller.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/controllers/data_dumps_controller.rb b/app/controllers/data_dumps_controller.rb index 0a8ad9c2b..ad431b7c7 100644 --- a/app/controllers/data_dumps_controller.rb +++ b/app/controllers/data_dumps_controller.rb @@ -46,7 +46,7 @@ def index nil else request.base_url + "/data_dumps?" + - { "page[number" => page[:number] + 1, + { "page[number]" => page[:number] + 1, "page[size]" => page[:size], sort: params[:sort], }.compact.to_query @@ -56,7 +56,7 @@ def index nil else request.base_url + "/data_dumps?" + - { "page[number" => page[:number] - 1, + { "page[number]" => page[:number] - 1, "page[size]" => page[:size], sort: params[:sort], }.compact.to_query From b5502dab91c2d65ae712245ef764f1b1a39b4fbd Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 16:47:55 +0100 Subject: [PATCH 27/35] Remove invalid parameters to spec requests (#1889) --- spec/requests/data_dumps_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/requests/data_dumps_spec.rb b/spec/requests/data_dumps_spec.rb index b8033bd6e..56e38d75d 100644 --- a/spec/requests/data_dumps_spec.rb +++ b/spec/requests/data_dumps_spec.rb @@ -44,7 +44,7 @@ prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=1&page%5Bsize%5D=4") - get next_link, nil, headers + get next_link expect(last_response.status).to eq(200) expect(json["data"].size).to eq(2) @@ -66,7 +66,7 @@ next_link = next_link_absolute.path + "?" + next_link_absolute.query expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=5") - get next_link, nil, headers + get next_link expect(last_response.status).to eq(200) expect(json["data"].size).to eq(5) From b07a5fdd30ef1668a06c8f7c6237dee0fca5a3df Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 16:52:17 +0100 Subject: [PATCH 28/35] Fix links to return max page when the current page is outside of the range (#1889) --- app/controllers/data_dumps_controller.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/app/controllers/data_dumps_controller.rb b/app/controllers/data_dumps_controller.rb index ad431b7c7..3dae5a737 100644 --- a/app/controllers/data_dumps_controller.rb +++ b/app/controllers/data_dumps_controller.rb @@ -52,8 +52,15 @@ def index }.compact.to_query end, prev: - if data_dumps.blank? || page[:number] == 1 || page[:number] == 0 + if page[:number] == 1 || page[:number] == 0 nil + elsif data_dumps.blank? + # use the max page size + request.base_url + "/data_dumps?" + + { "page[number]" => total_pages, + "page[size]" => page[:size], + sort: params[:sort], + }.compact.to_query else request.base_url + "/data_dumps?" + { "page[number]" => page[:number] - 1, From 7775fb6f5285051a354fefd0acaeea2d9ad201a1 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 17:00:57 +0100 Subject: [PATCH 29/35] Correct name of tested attribute to account for lowerCamel transformation (#1889) --- spec/requests/data_dumps_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/requests/data_dumps_spec.rb b/spec/requests/data_dumps_spec.rb index 56e38d75d..31f00dbba 100644 --- a/spec/requests/data_dumps_spec.rb +++ b/spec/requests/data_dumps_spec.rb @@ -98,7 +98,7 @@ expect(last_response.status).to eq(200) expect(json.dig("data", "attributes", "description")).to eq("Test Metadata Data Dump Factory creation") - expect(json.dig("data", "attributes", "start_date")).to eq(data_dump.start_date) + expect(json.dig("data", "attributes", "startDate")).to eq(data_dump.start_date) end end From 3650925567262e4fdf0e35a2602b0b3bfb32d727 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 13 Jul 2023 17:05:09 +0100 Subject: [PATCH 30/35] Correct expected date format to account for serializer behaviour (#1889) --- spec/requests/data_dumps_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/requests/data_dumps_spec.rb b/spec/requests/data_dumps_spec.rb index 31f00dbba..a18e5d19a 100644 --- a/spec/requests/data_dumps_spec.rb +++ b/spec/requests/data_dumps_spec.rb @@ -98,7 +98,7 @@ expect(last_response.status).to eq(200) expect(json.dig("data", "attributes", "description")).to eq("Test Metadata Data Dump Factory creation") - expect(json.dig("data", "attributes", "startDate")).to eq(data_dump.start_date) + expect(json.dig("data", "attributes", "startDate")).to eq(data_dump.start_date.rfc3339(3)) end end From 18d560822428c315d3e8b62cc8b8c91450071d18 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Wed, 19 Jul 2023 19:41:05 +0100 Subject: [PATCH 31/35] Update data dump controller spec to acquire and use a token (datacite/1868) --- spec/controllers/data_dumps_controller_spec.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spec/controllers/data_dumps_controller_spec.rb b/spec/controllers/data_dumps_controller_spec.rb index b8eb47947..7647b882f 100644 --- a/spec/controllers/data_dumps_controller_spec.rb +++ b/spec/controllers/data_dumps_controller_spec.rb @@ -2,8 +2,11 @@ RSpec.describe DataDumpsController, type: :controller do + let(:token) { User.generate_token } + describe "GET #index" do it "returns http success" do + request.headers["Authorization"] = "Bearer " + token get :index expect(response).to have_http_status(:success) end @@ -12,6 +15,7 @@ describe "GET #show" do let(:data_dump) { create(:data_dump) } it "returns http success" do + request.headers["Authorization"] = "Bearer " + token get :show, params: { id: data_dump.uid } expect(response).to have_http_status(:success) end From a45e6b76c2f69976f4b7df7dbb57a045d2ec9918 Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Wed, 19 Jul 2023 19:41:42 +0100 Subject: [PATCH 32/35] Update data dump request spec to test authorization and abilities (datacite/1868) (datacite/1865) --- spec/requests/data_dumps_spec.rb | 237 ++++++++++++++++++++----------- 1 file changed, 158 insertions(+), 79 deletions(-) diff --git a/spec/requests/data_dumps_spec.rb b/spec/requests/data_dumps_spec.rb index a18e5d19a..2ff8078a6 100644 --- a/spec/requests/data_dumps_spec.rb +++ b/spec/requests/data_dumps_spec.rb @@ -4,6 +4,12 @@ describe DataDumpsController, type: :request, elasticsearch: true do let(:data_dump) { create(:data_dump, uid: "test_dump") } + let(:token) { User.generate_token } + let(:headers) { { "HTTP_ACCEPT" => "application/vnd.api+json", "HTTP_AUTHORIZATION" => "Bearer " + token } } + let(:bad_headers) { { "HTTP_ACCEPT" => "application/vnd.api+json", "HTTP_AUTHORIZATION" => "Basic 12345" } } + let(:user_token) { User.generate_token(role_id: "user") } + let(:user_headers) { { "HTTP_ACCEPT" => "application/vnd.api+json", "HTTP_AUTHORIZATION" => "Bearer " + user_token } } + describe "GET /data_dumps", elasticsearch: true do let!(:data_dumps) { create_list(:data_dump, 10) } @@ -12,103 +18,176 @@ DataDump.import sleep 1 end + context "with valid authorization" do + it "returns data dumps" do + get "/data_dumps", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(10) + expect(json.dig("meta", "total")).to eq(10) + end + + it "returns data dumps with pagination" do + get "/data_dumps?page[number]=1&page[size]=4", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(4) + expect(json.dig("meta", "total")).to eq(10) + next_link_absolute = Addressable::URI.parse(json.dig("links", "next")) + next_link = next_link_absolute.path + "?" + next_link_absolute.query + expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=4") + expect(json.dig("links", "prev")).to be_nil + + get next_link, nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(4) + expect(json.dig("meta", "total")).to eq(10) + next_link_absolute = Addressable::URI.parse(json.dig("links", "next")) + next_link = next_link_absolute.path + "?" + next_link_absolute.query + expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=3&page%5Bsize%5D=4") + prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) + prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query + expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=1&page%5Bsize%5D=4") + + get next_link, nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(2) + expect(json.dig("meta", "total")).to eq(10) + expect(json.dig("links", "next")).to be_nil + prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) + prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query + expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=4") + end + + it "returns correct page links when results is exactly divisible by page size" do + get "/data_dumps?page[number]=1&page[size]=5", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(5) + expect(json.dig("meta", "total")).to eq(10) + expect(json.dig("links", "prev")).to be_nil + next_link_absolute = Addressable::URI.parse(json.dig("links", "next")) + next_link = next_link_absolute.path + "?" + next_link_absolute.query + expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=5") + + get next_link, nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(5) + expect(json.dig("meta", "total")).to eq(10) + expect(json.dig("links", "next")).to be_nil + prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) + prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query + expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=1&page%5Bsize%5D=5") + end - it "returns data dumps" do - get "/data_dumps" + it "returns a blank resultset when page is above max page" do + get "/data_dumps?page[number]=3&page[size]=5", nil, headers - expect(last_response.status).to eq(200) - expect(json["data"].size).to eq(10) - expect(json.dig("meta", "total")).to eq(10) + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(0) + expect(json.dig("meta", "totalPages")).to eq(2) + expect(json.dig("meta", "page")).to eq(3) + expect(json.dig("links", "next")).to be_nil + prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) + prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query + expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=5") + end end - it "returns data dumps with pagination" do - get "/data_dumps?page[number]=1&page[size]=4" - - expect(last_response.status).to eq(200) - expect(json["data"].size).to eq(4) - expect(json.dig("meta", "total")).to eq(10) - next_link_absolute = Addressable::URI.parse(json.dig("links", "next")) - next_link = next_link_absolute.path + "?" + next_link_absolute.query - expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=4") - expect(json.dig("links", "prev")).to be_nil - - get next_link - - expect(last_response.status).to eq(200) - expect(json["data"].size).to eq(4) - expect(json.dig("meta", "total")).to eq(10) - next_link_absolute = Addressable::URI.parse(json.dig("links", "next")) - next_link = next_link_absolute.path + "?" + next_link_absolute.query - expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=3&page%5Bsize%5D=4") - prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) - prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query - expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=1&page%5Bsize%5D=4") - - get next_link - - expect(last_response.status).to eq(200) - expect(json["data"].size).to eq(2) - expect(json.dig("meta", "total")).to eq(10) - expect(json.dig("links", "next")).to be_nil - prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) - prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query - expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=4") + context "without authorization" do + it "returns access denied" do + get "/data_dumps" + expect(last_response.status).to eq(401) + end end - it "returns correct page links when results is exactly divisible by page size" do - get "/data_dumps?page[number]=1&page[size]=5" - - expect(last_response.status).to eq(200) - expect(json["data"].size).to eq(5) - expect(json.dig("meta", "total")).to eq(10) - expect(json.dig("links", "prev")).to be_nil - next_link_absolute = Addressable::URI.parse(json.dig("links", "next")) - next_link = next_link_absolute.path + "?" + next_link_absolute.query - expect(next_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=5") - - get next_link - - expect(last_response.status).to eq(200) - expect(json["data"].size).to eq(5) - expect(json.dig("meta", "total")).to eq(10) - expect(json.dig("links", "next")).to be_nil - prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) - prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query - expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=1&page%5Bsize%5D=5") + context "with bad authorization" do + it "returns access denied" do + get "/data_dumps", nil, bad_headers + expect(last_response.status).to eq(401) + end end - it "returns a blank resultset when page is above max page" do - get "/data_dumps?page[number]=3&page[size]=5" - - expect(last_response.status).to eq(200) - expect(json["data"].size).to eq(0) - expect(json.dig("meta", "totalPages")).to eq(2) - expect(json.dig("meta", "page")).to eq(3) - expect(json.dig("links", "next")).to be_nil - prev_link_absolute = Addressable::URI.parse(json.dig("links", "prev")) - prev_link = prev_link_absolute.path + "?" + prev_link_absolute.query - expect(prev_link).to eq("/data_dumps?page%5Bnumber%5D=2&page%5Bsize%5D=5") + context "with insufficient permission" do + it "returns access denied" do + get "/data_dumps", nil, user_headers + expect(last_response.status).to eq(403) + end end end describe "GET /data_dumps/:id" do - context "when the record exists" do - it "returns the record" do - get "/data_dumps/#{data_dump.uid}" + context "with valid authorization" do + context "when the record exists" do + it "returns the record" do + get "/data_dumps/#{data_dump.uid}", nil, headers + + expect(last_response.status).to eq(200) + expect(json.dig("data", "attributes", "description")).to eq("Test Metadata Data Dump Factory creation") + expect(json.dig("data", "attributes", "startDate")).to eq(data_dump.start_date.rfc3339(3)) + end + end - expect(last_response.status).to eq(200) - expect(json.dig("data", "attributes", "description")).to eq("Test Metadata Data Dump Factory creation") - expect(json.dig("data", "attributes", "startDate")).to eq(data_dump.start_date.rfc3339(3)) + context "when the record does not exist" do + it "returns status code 404" do + get "/data_dumps/invalid_id", nil, headers + + expect(last_response.status).to eq(404) + expect(json["errors"].first).to eq("status" => "404", "title" => "The resource you are looking for doesn't exist.") + end + end + end + + context "without authorization" do + context "when the record exists" do + it "returns access denied" do + get "/data_dumps/#{data_dump.uid}" + expect(last_response.status).to eq(401) + end + end + + context "when the record does not exist" do + it "returns access denied" do + get "/data_dumps/invalid_id" + expect(last_response.status).to eq(401) + end + end + end + + context "with bad authorization" do + context "when the record exists" do + it "returns access denied" do + get "/data_dumps/#{data_dump.uid}", nil, bad_headers + expect(last_response.status).to eq(401) + end + end + + context "when the record does not exist" do + it "returns access denied" do + get "/data_dumps/invalid_id", nil, bad_headers + expect(last_response.status).to eq(401) + end end end - context "when the record does not exist" do - it "returns status code 404" do - get "/data_dumps/invalid_id" + context "with insufficient permission" do + context "when the record exists" do + it "returns access denied" do + get "/data_dumps/#{data_dump.uid}", nil, user_headers + expect(last_response.status).to eq(403) + end + end - expect(last_response.status).to eq(404) - expect(json["errors"].first).to eq("status" => "404", "title" => "The resource you are looking for doesn't exist.") + context "when the record does not exist" do + it "returns access denied" do + get "/data_dumps/invalid_id", nil, user_headers + expect(last_response.status).to eq(403) + end end end end + end From 7b6d94d38d5b655516b93d72bb14a011c50263cd Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Wed, 19 Jul 2023 19:42:13 +0100 Subject: [PATCH 33/35] Add ability to permit reading of data dump files (datacite/1865) --- app/models/ability.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/models/ability.rb b/app/models/ability.rb index 36e5d8eb3..ab62ddc72 100644 --- a/app/models/ability.rb +++ b/app/models/ability.rb @@ -23,6 +23,7 @@ def initialize(user) can :export, :contacts can :export, :organizations can :export, :repositories + can :read, :read_data_dumps elsif user.role_id == "staff_user" can %i[read read_billing_information read_contact_information read_analytics], :all elsif user.role_id == "consortium_admin" && user.provider_id.present? From cef0eed39057d17d14d8d24e77bb20cd3be5894f Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Wed, 19 Jul 2023 19:42:36 +0100 Subject: [PATCH 34/35] Require ability to access data dump controller methods (datacite/1865) --- app/controllers/data_dumps_controller.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/controllers/data_dumps_controller.rb b/app/controllers/data_dumps_controller.rb index 3dae5a737..b7d500783 100644 --- a/app/controllers/data_dumps_controller.rb +++ b/app/controllers/data_dumps_controller.rb @@ -1,5 +1,9 @@ class DataDumpsController < ApplicationController + + prepend_before_action :authenticate_user! + # load_and_authorize_resource def index + authorize! :read, :read_data_dumps sort = case params[:sort] when "created" @@ -90,6 +94,7 @@ def index end def show + authorize! :read, :read_data_dumps data_dump = DataDump.where(uid: params[:id]).first if data_dump.blank? || ( From 344f1eee199d9d8636e41e30d8fb5cbd950b1dcd Mon Sep 17 00:00:00 2001 From: Mike Bennett Date: Thu, 10 Aug 2023 18:10:22 +0100 Subject: [PATCH 35/35] Add data dump feature 2 --- app/controllers/data_dumps_controller.rb | 17 ++- app/models/data_dump.rb | 41 ++++++ config/routes.rb | 4 +- spec/requests/data_dumps_spec.rb | 172 +++++++++++++++++++++++ 4 files changed, 231 insertions(+), 3 deletions(-) diff --git a/app/controllers/data_dumps_controller.rb b/app/controllers/data_dumps_controller.rb index b7d500783..965c3ec3d 100644 --- a/app/controllers/data_dumps_controller.rb +++ b/app/controllers/data_dumps_controller.rb @@ -25,9 +25,9 @@ def index page = page_from_params(params) response = DataDump.query( - "", page: page, - sort: sort + sort: sort, + scope: params[:scope] ) begin @@ -105,4 +105,17 @@ def show end render json: DataDumpSerializer.new(data_dump).serialized_json, status: :ok end + + def latest + authorize! :read, :read_data_dumps + data_dump = DataDump.where(scope: params[:scope], aasm_state: "complete").order(end_date: :desc).first + if data_dump.blank? || + ( + data_dump.aasm_state != "complete" + # TODO: Add conditional check for role here + ) + fail ActiveRecord::RecordNotFound + end + render json: DataDumpSerializer.new(data_dump).serialized_json, status: :ok + end end diff --git a/app/models/data_dump.rb b/app/models/data_dump.rb index f1eb47029..6a9a9d74c 100644 --- a/app/models/data_dump.rb +++ b/app/models/data_dump.rb @@ -78,4 +78,45 @@ class DataDump < ApplicationRecord def self.query_aggregations {} end + + def self.query(options = {}) + + options[:page] ||= {} + options[:page][:number] ||= 1 + options[:page][:size] ||= 25 + + from = ((options.dig(:page, :number) || 1) - 1) * (options.dig(:page, :size) || 25) + sort = options[:sort] + + filter = [] + if options[:scope].present? + filter << { term: { scope: options[:scope].downcase } } + end + + es_query = {bool: {filter: filter}} + + if options.fetch(:page, {}).key?(:cursor) + __elasticsearch__.search( + { + size: options.dig(:page, :size), + search_after: search_after, + sort: sort, + query: es_query, + track_total_hits: true, + }.compact, + ) + else + __elasticsearch__.search( + { + size: options.dig(:page, :size), + from: from, + sort: sort, + query: es_query, + track_total_hits: true, + }.compact, + ) + end + + end + end diff --git a/config/routes.rb b/config/routes.rb index 180bc586a..f4277a1ea 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -230,7 +230,9 @@ resources :repository_prefixes, path: "repository-prefixes" resources :resource_types, path: "resource-types", only: %i[show index] - resources :data_dumps, constraints: { id: /.+/ }, only: %i[show index] + get "/data_dumps/:scope/latest", to: "data_dumps#latest", constraints: { scope: /(metadata|link)/ } + get "/data_dumps/:scope", to: "data_dumps#index", constraints: { scope: /(metadata|link)/ } + resources :data_dumps, constraints: { id: /[A-Za-z0-9_-]+/ }, only: %i[show index] # custom routes for maintenance tasks post ":username", to: "datacite_dois#show", as: :user diff --git a/spec/requests/data_dumps_spec.rb b/spec/requests/data_dumps_spec.rb index 2ff8078a6..5949caf2b 100644 --- a/spec/requests/data_dumps_spec.rb +++ b/spec/requests/data_dumps_spec.rb @@ -190,4 +190,176 @@ end end + describe "GET /data_dumps/:id" do + context "with valid authorization" do + context "when the record exists" do + it "returns the record" do + get "/data_dumps/#{data_dump.uid}", nil, headers + + expect(last_response.status).to eq(200) + expect(json.dig("data", "attributes", "description")).to eq("Test Metadata Data Dump Factory creation") + expect(json.dig("data", "attributes", "startDate")).to eq(data_dump.start_date.rfc3339(3)) + end + end + + context "when the record does not exist" do + it "returns status code 404" do + get "/data_dumps/invalid_id", nil, headers + + expect(last_response.status).to eq(404) + expect(json["errors"].first).to eq("status" => "404", "title" => "The resource you are looking for doesn't exist.") + end + end + end + + context "without authorization" do + context "when the record exists" do + it "returns access denied" do + get "/data_dumps/#{data_dump.uid}" + expect(last_response.status).to eq(401) + end + end + + context "when the record does not exist" do + it "returns access denied" do + get "/data_dumps/invalid_id" + expect(last_response.status).to eq(401) + end + end + end + + context "with bad authorization" do + context "when the record exists" do + it "returns access denied" do + get "/data_dumps/#{data_dump.uid}", nil, bad_headers + expect(last_response.status).to eq(401) + end + end + + context "when the record does not exist" do + it "returns access denied" do + get "/data_dumps/invalid_id", nil, bad_headers + expect(last_response.status).to eq(401) + end + end + end + + context "with insufficient permission" do + context "when the record exists" do + it "returns access denied" do + get "/data_dumps/#{data_dump.uid}", nil, user_headers + expect(last_response.status).to eq(403) + end + end + + context "when the record does not exist" do + it "returns access denied" do + get "/data_dumps/invalid_id", nil, user_headers + expect(last_response.status).to eq(403) + end + end + end + end + + describe "GET /data_dumps/:scope", elasticsearch: true do + let!(:data_dumps) { create_list(:data_dump, 10) } + let!(:link_dumps) { create_list(:data_dump, 10, {scope: "link"}) } + + before do + DataDump.import + sleep 1 + end + + context "with valid authorization" do + it "returns metadata data dumps" do + get "/data_dumps/metadata", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(10) + expect(json.dig("meta", "total")).to eq(10) + end + + it "returns link data dumps" do + get "/data_dumps/link", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(10) + expect(json.dig("meta", "total")).to eq(10) + end + end + + context "without authorization" do + it "returns access denied" do + get "/data_dumps/metadata" + expect(last_response.status).to eq(401) + end + end + + context "with bad authorization" do + it "returns access denied" do + get "/data_dumps/metadata", nil, bad_headers + expect(last_response.status).to eq(401) + end + end + + context "with insufficient permission" do + it "returns access denied" do + get "/data_dumps/metadata", nil, user_headers + expect(last_response.status).to eq(403) + end + end + end + + describe "GET /data_dumps/:scope/latest", elasticsearch: true do + let!(:data_dumps) { create_list(:data_dump, 10) } + let!(:link_dumps) { create_list(:data_dump, 10, {scope: "link"}) } + let!(:latest_data) { create(:data_dump, uid: "latest_data", end_date:"2023-12-31")} + let!(:latest_link) { create(:data_dump, uid: "latest_link", scope: "link", end_date:"2023-12-31")} + before do + DataDump.import + sleep 1 + end + + context "with valid authorization" do + it "returns latest metadata data dump" do + get "/data_dumps/metadata/latest", nil, headers + + expect(last_response.status).to eq(200) + expect(json.dig("data", "id")).to eq("latest_data") + expect(json.dig("data", "attributes", "endDate")).to eq("2023-12-31T00:00:00.000Z") + expect(json.dig("data", "attributes", "startDate")).to eq(latest_data.start_date.rfc3339(3)) + end + + it "returns latest link data dump" do + get "/data_dumps/link/latest", nil, headers + + expect(last_response.status).to eq(200) + expect(json.dig("data", "id")).to eq("latest_link") + expect(json.dig("data", "attributes", "endDate")).to eq("2023-12-31T00:00:00.000Z") + expect(json.dig("data", "attributes", "startDate")).to eq(latest_link.start_date.rfc3339(3)) + end + end + + context "without authorization" do + it "returns access denied" do + get "/data_dumps/metadata/latest" + expect(last_response.status).to eq(401) + end + end + + context "with bad authorization" do + it "returns access denied" do + get "/data_dumps/metadata/latest", nil, bad_headers + expect(last_response.status).to eq(401) + end + end + + context "with insufficient permission" do + it "returns access denied" do + get "/data_dumps/metadata/latest", nil, user_headers + expect(last_response.status).to eq(403) + end + end + end + end