From 90779b81675ea05263e291ae2fdf6417da3f9f1a Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Tue, 5 Dec 2023 14:15:37 +0000 Subject: [PATCH] Spike: serve content-store requests directly from Publishing API. Basic copy-and-paste of find_by_path, which now works on editions. Add controller in new content-store namespace to prove concept of serving content-store requests directly from publishing-api. The new controller responds on /content-store/(live or draft)/(path) and uses the DownstreamPayload to present the relevant edition as JSON. It takes just under 0.5s to respond on a local laptop. We'd need to speed that up for production use, with some combination of optimisations and caching. That's out-of-scope for this spike, we've proved that it's potentially possible and that's enough. linting --- Gemfile | 2 - Gemfile.lock | 71 ++++++++--------- .../content_store/content_items_controller.rb | 35 ++++++++ app/models/find_by_path.rb | 79 +++++++++++++++++++ config/routes.rb | 4 + .../content_items_controller_spec.rb | 60 ++++++++++++++ spec/routes_spec.rb | 15 ++++ 7 files changed, 228 insertions(+), 38 deletions(-) create mode 100644 app/controllers/content_store/content_items_controller.rb create mode 100644 app/models/find_by_path.rb create mode 100644 spec/controllers/content_store/content_items_controller_spec.rb create mode 100644 spec/routes_spec.rb diff --git a/Gemfile b/Gemfile index 1d42d905a..f701dddf6 100644 --- a/Gemfile +++ b/Gemfile @@ -3,7 +3,6 @@ source "https://rubygems.org" gem "rails", "7.1.2" gem "aws-sdk-s3" -gem "bootsnap", require: false gem "bunny" gem "dalli" gem "fuzzy_match" @@ -14,7 +13,6 @@ gem "govuk_app_config" gem "govuk_document_types" gem "govuk_schemas" gem "govuk_sidekiq" -gem "jsonnet" gem "json-schema", require: false gem "oj" gem "pg" diff --git a/Gemfile.lock b/Gemfile.lock index 855d4cd30..14c180266 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -81,13 +81,13 @@ GEM ast (2.4.2) awesome_print (1.9.2) aws-eventstream (1.3.0) - aws-partitions (1.873.0) - aws-sdk-core (3.190.1) + aws-partitions (1.878.0) + aws-sdk-core (3.190.2) aws-eventstream (~> 1, >= 1.3.0) aws-partitions (~> 1, >= 1.651.0) aws-sigv4 (~> 1.8) jmespath (~> 1, >= 1.6.1) - aws-sdk-kms (1.75.0) + aws-sdk-kms (1.76.0) aws-sdk-core (~> 3, >= 3.188.0) aws-sigv4 (~> 1.1) aws-sdk-s3 (1.142.0) @@ -138,7 +138,7 @@ GEM diff-lcs (1.5.0) dig_rb (1.0.1) docile (1.4.0) - domain_name (0.6.20231109) + domain_name (0.6.20240107) drb (2.2.0) ruby2_keywords erubi (1.12.0) @@ -149,14 +149,14 @@ GEM factory_bot_rails (6.4.3) factory_bot (~> 6.4) railties (>= 5.0.0) - faraday (2.7.10) - faraday-net_http (>= 2.0, < 3.1) - ruby2_keywords (>= 0.0.4) - faraday-net_http (3.0.2) - ffi (1.15.5) + faraday (2.9.0) + faraday-net_http (>= 2.0, < 3.2) + faraday-net_http (3.1.0) + net-http + ffi (1.16.3) find_a_port (1.0.1) fuzzy_match (2.1.0) - gds-api-adapters (92.0.0) + gds-api-adapters (92.1.0) addressable link_header null_logger @@ -172,10 +172,10 @@ GEM warden-oauth2 (~> 0.0.1) globalid (1.2.1) activesupport (>= 6.1) - google-protobuf (3.25.1) - google-protobuf (3.25.1-aarch64-linux) - google-protobuf (3.25.1-arm64-darwin) - google-protobuf (3.25.1-x86_64-linux) + google-protobuf (3.25.2) + google-protobuf (3.25.2-aarch64-linux) + google-protobuf (3.25.2-arm64-darwin) + google-protobuf (3.25.2-x86_64-linux) googleapis-common-protos-types (1.11.0) google-protobuf (~> 3.18) govspeak (8.3.2) @@ -204,7 +204,7 @@ GEM govuk_personalisation (0.15.0) plek (>= 1.9.0) rails (>= 6, < 8) - govuk_publishing_components (37.0.0) + govuk_publishing_components (37.2.1) govuk_app_config govuk_personalisation (>= 0.7.0) kramdown @@ -225,7 +225,7 @@ GEM capybara (>= 3.36) puma selenium-webdriver (>= 4.0) - hashdiff (1.0.1) + hashdiff (1.1.0) hashie (5.0.0) htmlentities (4.3.4) http-accept (1.7.0) @@ -237,15 +237,13 @@ GEM i18n (1.14.1) concurrent-ruby (~> 1.0) io-console (0.7.1) - irb (1.11.0) + irb (1.11.1) rdoc - reline (>= 0.3.8) + reline (>= 0.4.2) jmespath (1.6.2) json (2.7.1) json-schema (4.1.1) addressable (>= 2.8) - jsonnet (0.5.3) - mini_portile2 (>= 2.2.0) jwt (2.7.1) kramdown (2.4.0) rexml @@ -267,23 +265,24 @@ GEM net-smtp marcel (1.0.2) matrix (0.4.2) - mime-types (3.5.1) + mime-types (3.5.2) mime-types-data (~> 3.2015) mime-types-data (3.2023.1205) mini_mime (1.1.5) mini_portile2 (2.8.5) minitest (5.20.0) - msgpack (1.7.2) multi_xml (0.6.0) mutex_m (0.2.0) - net-imap (0.4.8) + net-http (0.4.1) + uri + net-imap (0.4.9.1) date net-protocol net-pop (0.1.2) net-protocol net-protocol (0.2.2) timeout - net-smtp (0.4.0) + net-smtp (0.4.0.1) net-protocol netrc (0.11.0) nio4r (2.7.0) @@ -306,7 +305,7 @@ GEM version_gem (~> 1.1) oj (3.16.3) bigdecimal (>= 3.0) - omniauth (2.1.1) + omniauth (2.1.2) hashie (>= 3.4.6) rack (>= 2.2.3) rack-protection @@ -536,13 +535,13 @@ GEM term-ansicolor (~> 1.7) thor (>= 0.20, < 2.0) parallel (1.24.0) - parser (3.2.2.4) + parser (3.3.0.2) ast (~> 2.4.1) racc parslet (2.0.0) pg (1.5.4) plek (5.0.0) - prometheus_exporter (2.0.8) + prometheus_exporter (2.1.0) webrick psych (5.1.2) stringio @@ -551,7 +550,8 @@ GEM nio4r (~> 2.0) racc (1.7.3) rack (2.2.8) - rack-protection (3.1.0) + rack-protection (3.2.0) + base64 (>= 0.1.0) rack (~> 2.2, >= 2.2.4) rack-proxy (0.7.7) rack @@ -602,8 +602,8 @@ GEM redis (4.8.1) redis-namespace (1.11.0) redis (>= 4) - regexp_parser (2.8.3) - reline (0.4.1) + regexp_parser (2.9.0) + reline (0.4.2) io-console (~> 0.5) request_store (1.5.1) rack (>= 1.4) @@ -649,10 +649,10 @@ GEM unicode-display_width (>= 2.4.0, < 3.0) rubocop-ast (1.30.0) parser (>= 3.2.1.0) - rubocop-capybara (2.19.0) + rubocop-capybara (2.20.0) + rubocop (~> 1.41) + rubocop-factory_bot (2.25.1) rubocop (~> 1.41) - rubocop-factory_bot (2.24.0) - rubocop (~> 1.33) rubocop-govuk (4.13.0) rubocop (= 1.59.0) rubocop-ast (= 1.30.0) @@ -688,7 +688,7 @@ GEM sentry-sidekiq (5.16.1) sentry-ruby (~> 5.16.1) sidekiq (>= 3.0) - set (1.0.3) + set (1.1.0) sidekiq (6.5.12) connection_pool (>= 2.2.5, < 3) rack (~> 2.0) @@ -732,6 +732,7 @@ GEM tzinfo (2.0.6) concurrent-ruby (~> 1.0) unicode-display_width (2.5.0) + uri (0.13.0) version_gem (1.1.3) warden (1.2.9) rack (>= 2.0.9) @@ -767,7 +768,6 @@ PLATFORMS DEPENDENCIES aws-sdk-s3 - bootsnap bunny byebug climate_control @@ -784,7 +784,6 @@ DEPENDENCIES govuk_sidekiq govuk_test json-schema - jsonnet listen oj pact diff --git a/app/controllers/content_store/content_items_controller.rb b/app/controllers/content_store/content_items_controller.rb new file mode 100644 index 000000000..e55f57509 --- /dev/null +++ b/app/controllers/content_store/content_items_controller.rb @@ -0,0 +1,35 @@ +class ContentStore::ContentItemsController < ApplicationController + skip_before_action :authenticate_user! + + def show + @edition = find_content_item(content_store: params[:content_store], path: base_path) + raise_error(404, "Could not find a content item for #{base_path}") unless @edition + # NOTE: version here is @edition.user_facing_version, not Event.maximum(:id) + # as that is only for managing conflicts between publishing-api and content-store + # + @content_item = DownstreamPayload.new(@edition, @edition.user_facing_version, draft: draft?) + render json: @content_item.content_store_payload + end + +private + + def find_content_item(content_store:, path:) + FindByPath.new(Edition.where(content_store:)).find(path) + end + + def draft? + params[:content_store] == "draft" + end + + def raise_error(code, message) + raise CommandError.new( + code:, + error_details: { + error: { + code:, + message:, + }, + }, + ) + end +end diff --git a/app/models/find_by_path.rb b/app/models/find_by_path.rb new file mode 100644 index 000000000..3c08a1d99 --- /dev/null +++ b/app/models/find_by_path.rb @@ -0,0 +1,79 @@ +class FindByPath + attr_reader :model_class + + def initialize(model_class) + @model_class = model_class + end + + def find(path) + exact_match = model_class.where(base_path: path).take(1).first + return exact_match if exact_match + + matches = find_route_matches(path) + + if matches.count.positive? + best_route_match(matches, path) + end + end + +private + + def find_route_matches(path) + query = model_class + .where("routes @> ?", json_path_element(path, "exact")) + # ANY will match any of the given array elements (similar to IN(), but for JSON arrays) + # the ARRAY [?]::jsonb[] is typecasting for PostgreSQL's JSON operators + .or(model_class.where("routes @> ANY (ARRAY [?]::jsonb[])", potential_prefix_json_matches(path))) + + if model_class.attribute_names.include?("redirects") + query = query + .or(model_class.where("redirects @> ?", json_path_element(path, "exact"))) + .or(model_class.where("redirects @> ANY (ARRAY [?]::jsonb[])", potential_prefix_json_matches(path))) + end + query + end + + # Given a path, will decompose the path into path prefixes, and + # return a JSON array element that can be matched against the + # routes or redirects array in the model_class + def potential_prefix_json_matches(path) + potential_prefixes(path).map { |p| json_path_element(p, "prefix") } + end + + def json_path_element(path, type) + [{ path:, type: }].to_json + end + + def best_route_match(matches, path) + exact_route_match(matches, path) || best_prefix_match(matches, path) + end + + def potential_prefixes(path) + paths = path.split("/").reject(&:empty?) + (0...paths.size).map { |i| "/#{paths[0..i].join('/')}" } + end + + def exact_route_match(matches, path) + matches.detect do |item| + routes_and_redirects(item).any? do |route| + route["path"] == path && route["type"] == "exact" + end + end + end + + def best_prefix_match(matches, path) + prefixes = potential_prefixes(path) + sorted = matches.sort_by do |item| + best_match = routes_and_redirects(item) + .select { |route| route["type"] == "prefix" && prefixes.include?(route["path"]) } + .min_by { |route| -route["path"].length } + + -best_match["path"].length + end + sorted.first + end + + def routes_and_redirects(item) + item.routes + (item.respond_to?(:redirects) ? item.redirects : []) + end +end diff --git a/config/routes.rb b/config/routes.rb index 75df1b192..8be70536e 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -38,6 +38,10 @@ def content_id_constraint(request) get "/links/changes", to: "link_changes#index" end + + namespace :content_store, path: "/content-store" do + get "/:content_store(/*base_path)", to: "content_items#show" + end end get "/healthcheck/live", to: proc { [200, {}, %w[OK]] } diff --git a/spec/controllers/content_store/content_items_controller_spec.rb b/spec/controllers/content_store/content_items_controller_spec.rb new file mode 100644 index 000000000..d7293417e --- /dev/null +++ b/spec/controllers/content_store/content_items_controller_spec.rb @@ -0,0 +1,60 @@ +require "spec_helper" + +RSpec.describe ContentStore::ContentItemsController, type: :controller do + let!(:document_en) do + create(:document, locale: "en") + end + let!(:live_edition) do + create( + :live_edition, + document: document_en, + base_path: "/content.en", + document_type: "guide", + schema_name: "topic", + user_facing_version: 1, + ) + end + let!(:draft_edition) do + create( + :draft_edition, + document: document_en, + base_path: "/content.en", + document_type: "topic", + schema_name: "topic", + user_facing_version: 2, + ) + end + + describe "#show" do + context "when there is an edition at the given base_path with the given content_store" do + let(:params) { { base_path: "content.en", content_store: "live" } } + + it "responds with the json content store representation of the edition" do + get(:show, params:) + expect(parsed_response).to include( + { + "title" => live_edition.title, + "description" => live_edition.description, + }, + ) + end + + it "has version set to the user-facing version number" do + get(:show, params:) + expect(parsed_response).to include( + { + "payload_version" => live_edition.user_facing_version, + }, + ) + end + end + + context "when there is not an edition at the given base_path with the given content_store" do + let(:params) { { base_path: "no-content-here", content_store: "live" } } + it "responds with a 404" do + get(:show, params:) + expect(response.status).to eq(404) + end + end + end +end diff --git a/spec/routes_spec.rb b/spec/routes_spec.rb new file mode 100644 index 000000000..fc8b51512 --- /dev/null +++ b/spec/routes_spec.rb @@ -0,0 +1,15 @@ +require "spec_helper" + +RSpec.describe "routes", type: :routing do + it "routes /content-store/live/ to the ContentStore::ContentItems controller" do + expect(get("/content-store/live/")).to route_to("content_store/content_items#show", content_store: "live") + end + + it "routes /content-store/draft/ to the ContentStore::ContentItems controller" do + expect(get("/content-store/draft/")).to route_to("content_store/content_items#show", content_store: "draft") + end + + it "parses the base_path correctly" do + expect(get("/content-store/live/guidance/foo")).to route_to("content_store/content_items#show", content_store: "live", base_path: "guidance/foo") + end +end