diff --git a/Gemfile b/Gemfile index e26ed9879..4dd59eabf 100644 --- a/Gemfile +++ b/Gemfile @@ -68,6 +68,9 @@ gem "google-protobuf", "3.10.0.rc.1" gem "sprockets", "~> 3.7", ">= 3.7.2" gem 'uuid', '~> 2.3', '>= 2.3.9' gem 'strong_migrations', '~> 0.6.0' +gem 'crawler_detect' + + group :development, :test do gem "rspec-rails", "~> 3.8", ">= 3.8.2" diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 99fd3f4c1..992713e28 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -34,6 +34,14 @@ def set_jsonp_format end end + def detect_crawler + #### Crawlers shound't be making queires + if request.is_crawler? && params[:query].present? + render json: {}, status: :not_found + end + end + + def set_consumer_header if current_user response.headers['X-Credential-Username'] = current_user.uid diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index feb5e40e9..861043a70 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -8,6 +8,7 @@ class EventsController < ApplicationController include BatchLoaderHelper prepend_before_action :authenticate_user!, except: [:index, :show] + before_action :detect_crawler before_action :load_event, only: [:show, :destroy] before_action :set_include, only: [:index, :show, :create, :update] authorize_resource only: [:destroy] diff --git a/app/controllers/old_events_controller.rb b/app/controllers/old_events_controller.rb index dba7d2b62..7749d7928 100644 --- a/app/controllers/old_events_controller.rb +++ b/app/controllers/old_events_controller.rb @@ -5,6 +5,7 @@ class OldEventsController < ApplicationController include Facetable prepend_before_action :authenticate_user!, except: [:index, :show] + before_action :detect_crawler before_action :load_event, only: [:show, :destroy] before_action :set_include, only: [:index, :show, :create, :update] authorize_resource only: [:destroy] diff --git a/config/application.rb b/config/application.rb index abf55cad0..0ec87f9bf 100644 --- a/config/application.rb +++ b/config/application.rb @@ -65,6 +65,8 @@ class Application < Rails::Application # Initialize configuration defaults for originally generated Rails version. config.load_defaults 5.2 + config.middleware.use Rack::CrawlerDetect + # include graphql config.paths.add Rails.root.join('app', 'graphql', 'types').to_s, eager_load: true config.paths.add Rails.root.join('app', 'graphql', 'mutations').to_s, eager_load: true @@ -81,29 +83,29 @@ class Application < Rails::Application # secret_key_base is not used by Rails API, as there are no sessions config.secret_key_base = "blipblapblup" - config.lograge.enabled = true - config.lograge.formatter = Lograge::Formatters::Logstash.new - config.lograge.logger = ::LogStashLogger.new( - type: :stdout - ) - config.lograge.log_level = ENV["LOG_LEVEL"].to_sym - - config.active_job.logger = config.lograge.logger - - config.lograge.ignore_actions = ["HeartbeatController#index", "IndexController#index"] - config.lograge.ignore_custom = lambda do |event| - event.payload.inspect.length > 100000 - end - config.lograge.base_controller_class = "ActionController::API" - - config.lograge.custom_options = lambda do |event| - exceptions = %w(controller action format id) - { - params: event.payload[:params].except(*exceptions), - uid: event.payload[:uid], - } - end - config.logger = config.lograge.logger + config.lograge.enabled = true + config.lograge.formatter = Lograge::Formatters::Logstash.new + config.lograge.logger = ::LogStashLogger.new( + type: :stdout, + ) + config.lograge.log_level = ENV["LOG_LEVEL"].to_sym + + config.active_job.logger = config.lograge.logger + + config.lograge.ignore_actions = ["HeartbeatController#index", "IndexController#index"] + config.lograge.ignore_custom = lambda do |event| + event.payload.inspect.length > 100000 + end + config.lograge.base_controller_class = "ActionController::API" + + config.lograge.custom_options = lambda do |event| + exceptions = %w(controller action format id) + { + params: event.payload[:params].except(*exceptions), + uid: event.payload[:uid], + } + end + config.logger = config.lograge.logger config.active_record.logger = nil # configure caching diff --git a/spec/requests/events_spec.rb b/spec/requests/events_spec.rb index 930f50cd5..9e624c414 100644 --- a/spec/requests/events_spec.rb +++ b/spec/requests/events_spec.rb @@ -923,6 +923,20 @@ # expect(attributes["subj-id"]).to eq(event.subj_id) # end # end + context "query by source-id by Crawler" do + let(:uri) { "/events?query=datacite" } + + # Exclude the token header. + let(:headers) do + { "HTTP_ACCEPT" => "application/json", + "HTTP_USER_AGENT" => "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" } + end + + it "json" do + get uri, nil, headers + expect(last_response.status).to eq(404) + end + end end context "destroy" do