Skip to content

Commit

Permalink
Merge pull request #407 from datacite/block_googlebot_queries
Browse files Browse the repository at this point in the history
Block googlebot queries
  • Loading branch information
kjgarza authored Jan 31, 2020
2 parents dd885d2 + b3fd503 commit 437569f
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 23 deletions.
3 changes: 3 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ gem "google-protobuf", "3.10.0.rc.1"
gem "sprockets", "~> 3.7", ">= 3.7.2"
gem 'uuid', '~> 2.3', '>= 2.3.9'
gem 'strong_migrations', '~> 0.6.0'
gem 'crawler_detect'



group :development, :test do
gem "rspec-rails", "~> 3.8", ">= 3.8.2"
Expand Down
8 changes: 8 additions & 0 deletions app/controllers/application_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ def set_jsonp_format
end
end

def detect_crawler
#### Crawlers shound't be making queires
if request.is_crawler? && params[:query].present?
render json: {}, status: :not_found
end
end


def set_consumer_header
if current_user
response.headers['X-Credential-Username'] = current_user.uid
Expand Down
1 change: 1 addition & 0 deletions app/controllers/events_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class EventsController < ApplicationController
include BatchLoaderHelper

prepend_before_action :authenticate_user!, except: [:index, :show]
before_action :detect_crawler
before_action :load_event, only: [:show, :destroy]
before_action :set_include, only: [:index, :show, :create, :update]
authorize_resource only: [:destroy]
Expand Down
1 change: 1 addition & 0 deletions app/controllers/old_events_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ class OldEventsController < ApplicationController
include Facetable

prepend_before_action :authenticate_user!, except: [:index, :show]
before_action :detect_crawler
before_action :load_event, only: [:show, :destroy]
before_action :set_include, only: [:index, :show, :create, :update]
authorize_resource only: [:destroy]
Expand Down
48 changes: 25 additions & 23 deletions config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ class Application < Rails::Application
# Initialize configuration defaults for originally generated Rails version.
config.load_defaults 5.2

config.middleware.use Rack::CrawlerDetect

# include graphql
config.paths.add Rails.root.join('app', 'graphql', 'types').to_s, eager_load: true
config.paths.add Rails.root.join('app', 'graphql', 'mutations').to_s, eager_load: true
Expand All @@ -81,29 +83,29 @@ class Application < Rails::Application
# secret_key_base is not used by Rails API, as there are no sessions
config.secret_key_base = "blipblapblup"

config.lograge.enabled = true
config.lograge.formatter = Lograge::Formatters::Logstash.new
config.lograge.logger = ::LogStashLogger.new(
type: :stdout
)
config.lograge.log_level = ENV["LOG_LEVEL"].to_sym

config.active_job.logger = config.lograge.logger

config.lograge.ignore_actions = ["HeartbeatController#index", "IndexController#index"]
config.lograge.ignore_custom = lambda do |event|
event.payload.inspect.length > 100000
end
config.lograge.base_controller_class = "ActionController::API"

config.lograge.custom_options = lambda do |event|
exceptions = %w(controller action format id)
{
params: event.payload[:params].except(*exceptions),
uid: event.payload[:uid],
}
end
config.logger = config.lograge.logger
config.lograge.enabled = true
config.lograge.formatter = Lograge::Formatters::Logstash.new
config.lograge.logger = ::LogStashLogger.new(
type: :stdout,
)
config.lograge.log_level = ENV["LOG_LEVEL"].to_sym

config.active_job.logger = config.lograge.logger

config.lograge.ignore_actions = ["HeartbeatController#index", "IndexController#index"]
config.lograge.ignore_custom = lambda do |event|
event.payload.inspect.length > 100000
end
config.lograge.base_controller_class = "ActionController::API"

config.lograge.custom_options = lambda do |event|
exceptions = %w(controller action format id)
{
params: event.payload[:params].except(*exceptions),
uid: event.payload[:uid],
}
end
config.logger = config.lograge.logger
config.active_record.logger = nil

# configure caching
Expand Down
14 changes: 14 additions & 0 deletions spec/requests/events_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,20 @@
# expect(attributes["subj-id"]).to eq(event.subj_id)
# end
# end
context "query by source-id by Crawler" do
let(:uri) { "/events?query=datacite" }

# Exclude the token header.
let(:headers) do
{ "HTTP_ACCEPT" => "application/json",
"HTTP_USER_AGENT" => "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" }
end

it "json" do
get uri, nil, headers
expect(last_response.status).to eq(404)
end
end
end

context "destroy" do
Expand Down

0 comments on commit 437569f

Please sign in to comment.