Skip to content

Commit

Permalink
Merge branch 'master' into blexbot
Browse files Browse the repository at this point in the history
  • Loading branch information
alaz authored Sep 19, 2024
2 parents 6d71cf2 + 13d53f6 commit e5ad758
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 4 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,13 @@ end
## Supported

- [Ahrefs](https://ahrefs.com/robot)
- [Amazonbot](https://developer.amazon.com/amazonbot)
- [Amazon AdBot](https://adbot.amazon.com/index.html)
- [Applebot](https://support.apple.com/en-us/119829)
- [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
- [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
- [BLEXBot (WebMeUp)](http://webmeup-crawler.com/)
- [DataForSEO](https://dataforseo.com/dataforseo-bot)
- [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
- [Google crawlers](https://support.google.com/webmasters/answer/1061943)
- [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
Expand Down
1 change: 1 addition & 0 deletions lib/legitbot.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
require_relative 'legitbot/baidu'
require_relative 'legitbot/bing'
require_relative 'legitbot/blexbot'
require_relative 'legitbot/dataforseo'
require_relative 'legitbot/duckduckgo'
require_relative 'legitbot/facebook'
require_relative 'legitbot/google'
Expand Down
5 changes: 3 additions & 2 deletions lib/legitbot/amazon.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

module Legitbot # :nodoc:
# https://adbot.amazon.com/index.html
# https://developer.amazon.com/amazonbot
class Amazon < BotMatch
domains 'amazonadbot.com.'
domains 'amazon.', 'amazonadbot.com.'
end

rule Legitbot::Amazon, %w[AmazonAdBot]
rule Legitbot::Amazon, %w[Amazonbot AmazonAdBot]
end
10 changes: 10 additions & 0 deletions lib/legitbot/dataforseo.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# frozen_string_literal: true

module Legitbot # :nodoc:
# https://dataforseo.com/dataforseo-bot
class DataForSEO < BotMatch
domains 'dataforseo.com.'
end

rule Legitbot::DataForSEO, %w[DataForSeoBot]
end
27 changes: 25 additions & 2 deletions test/amazon_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_malicious_ua
refute_predicate bot, :valid?
end

def test_valid_ua
def test_user_agent1
bot = Legitbot.bot(
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
'54.166.7.90'
Expand All @@ -40,7 +40,19 @@ def test_valid_ua
assert_predicate bot, :valid?
end

def test_valid_name
# rubocop:disable Layout/LineLength
def test_user_agent2
bot = Legitbot.bot(
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
'52.70.240.171'
)

assert bot
assert_predicate bot, :valid?
end
# rubocop:enable Layout/LineLength

def test_valid_name1
bot = Legitbot.bot(
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
'54.166.7.90'
Expand All @@ -49,6 +61,17 @@ def test_valid_name
assert_equal :amazon, bot.detected_as
end

# rubocop:disable Layout/LineLength
def test_valid_name2
bot = Legitbot.bot(
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
'52.70.240.171'
)

assert_equal :amazon, bot.detected_as
end
# rubocop:enable Layout/LineLength

def test_fake_name
bot = Legitbot.bot(
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
Expand Down
60 changes: 60 additions & 0 deletions test/dataforseo_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# frozen_string_literal: true

require_relative 'test_helper'

class DataForSEOTest < Minitest::Test
include Minitest::Hooks
include DnsServerMock

def test_malicious_ip
ip = '149.210.164.47'
match = Legitbot::DataForSEO.new ip

refute_predicate match, :valid?
end

def test_valid_ip
ip = '136.243.228.176'
match = Legitbot::DataForSEO.new ip

assert_predicate match, :valid?
end

def test_malicious_ua
bot = Legitbot.bot(
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
'149.210.164.47'
)

assert bot
refute_predicate bot, :valid?
end

def test_valid_ua
bot = Legitbot.bot(
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
'136.243.228.176'
)

assert bot
assert_predicate bot, :valid?
end

def test_valid_name
bot = Legitbot.bot(
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
'136.243.228.176'
)

assert_equal :dataforseo, bot.detected_as
end

def test_fake_name
bot = Legitbot.bot(
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
'81.1.172.108'
)

assert_equal :dataforseo, bot.detected_as
end
end
13 changes: 13 additions & 0 deletions test/lib/dns_server_mock.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
'54.166.7.90' => {
ptr: %w[crawler-54-166-7-90.amazonadbot.com]
},
'52-70-240-171.crawl.amazonbot.amazon' => {
a: %w[52.70.240.171]
},
'52.70.240.171' => {
ptr: %w[52-70-240-171.crawl.amazonbot.amazon]
},

# Apple
'17-58-98-60.applebot.apple.com' => {
Expand All @@ -45,6 +51,13 @@
'65.21.113.197' => {
ptr: %w[pot22.webmeup.com]
},
# DataForSEO
'crawling-gateway-136-243-228-176.dataforseo.com' => {
a: %w[136.243.228.176]
},
'136.243.228.176' => {
ptr: %w[crawling-gateway-136-243-228-176.dataforseo.com]
},

# Google
'crawl-66-249-64-141.googlebot.com' => {
Expand Down

0 comments on commit e5ad758

Please sign in to comment.