From 8453c39a222c6a208241b2d915c85933c87b4542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Gallego?= Date: Mon, 7 Oct 2013 23:10:13 +0200 Subject: [PATCH] Assert that referer take into account blacklist --- README.md | 3 +- src/ZfrPrerender/Mvc/PrerenderListener.php | 11 ++-- .../Mvc/PrerenderListenerTest.php | 59 ++++++++++++++++++- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index a63be02..37a2c6a 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,8 @@ that happen very early in the MVC process, before the routing is actually done. #### Blacklist Blacklist a single url path or multiple url paths. Compares using regex, so be specific when possible. If a blacklist -is supplied, all url's will be prerendered except ones containing a blacklist path. +is supplied, all url's will be pre-rendered except ones containing a blacklist part. Please note that if the referer +is part of the blacklist, it won't be pre-rendered too. Here is a sample configuration that prerender all URLs *excepting* the ones that contains "/users/": diff --git a/src/ZfrPrerender/Mvc/PrerenderListener.php b/src/ZfrPrerender/Mvc/PrerenderListener.php index fe64ff2..4fed043 100644 --- a/src/ZfrPrerender/Mvc/PrerenderListener.php +++ b/src/ZfrPrerender/Mvc/PrerenderListener.php @@ -146,10 +146,11 @@ public function shouldPrerenderPage(RequestInterface $request) return false; } - // Finally, return false if it is blacklisted + // Finally, return false if it is blacklisted (or the referer) + $referer = $request->getHeader('Referer') ? $request->getHeader('Referer')->getFieldValue() : null; $blacklistUrls = $this->moduleOptions->getBlacklistUrls(); - if (!empty($blacklistUrls) && $this->isBlacklisted($uri, $blacklistUrls)) { + if (!empty($blacklistUrls) && $this->isBlacklisted($uri, $referer, $blacklistUrls)) { return false; } @@ -199,13 +200,15 @@ protected function isWhitelisted($uri, array $whitelistUrls) * Check if the request is blacklisted * * @param string $uri + * @param string $referer * @param array $blacklistUrls * @return bool */ - protected function isBlacklisted($uri, array $blacklistUrls) + protected function isBlacklisted($uri, $referer, array $blacklistUrls) { foreach ($blacklistUrls as $blacklistUrl) { - $match = preg_match('`' . $blacklistUrl . '`i', $uri); + $pattern = '`' . $blacklistUrl . '`i'; + $match = preg_match($pattern, $uri) + preg_match($pattern, $referer); if ($match > 0) { return true; diff --git a/tests/ZfrPrerenderTest/Mvc/PrerenderListenerTest.php b/tests/ZfrPrerenderTest/Mvc/PrerenderListenerTest.php index c3e3598..0c4b952 100644 --- a/tests/ZfrPrerenderTest/Mvc/PrerenderListenerTest.php +++ b/tests/ZfrPrerenderTest/Mvc/PrerenderListenerTest.php @@ -40,6 +40,7 @@ public function shouldRenderProvider() array( 'user_agent' => '', 'uri' => 'http://www.example.com', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array(), 'blacklist' => array(), @@ -49,6 +50,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25', 'uri' => 'http://www.example.com', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array(), 'blacklist' => array(), @@ -58,6 +60,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'uri' => 'http://www.example.com', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array(), 'blacklist' => array(), @@ -67,6 +70,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', 'uri' => 'http://www.example.com', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array(), 'blacklist' => array(), @@ -76,6 +80,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'uri' => 'http://www.example.com', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array(), 'blacklist' => array(), @@ -85,6 +90,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'uri' => 'http://www.example.com', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array(), 'blacklist' => array(), @@ -94,6 +100,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'uri' => 'http://www.example.com/screen.css', + 'referer' => 'http://google.com', 'ignored_extensions' => array('.jpg', '.css'), 'whitelist' => array(), 'blacklist' => array(), @@ -103,6 +110,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'uri' => 'http://www.example.com', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array('example.com'), 'blacklist' => array(), @@ -112,6 +120,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'uri' => 'http://www.example.com/users/michael', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array('/users/.*'), 'blacklist' => array(), @@ -121,6 +130,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'uri' => 'http://www.example.com/foo', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array('/bar'), 'blacklist' => array(), @@ -130,6 +140,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'uri' => 'http://www.example.com/foo', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array(), 'blacklist' => array('/foo'), @@ -139,15 +150,57 @@ public function shouldRenderProvider() array( 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'uri' => 'http://www.example.com/users/julia', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array(), 'blacklist' => array('/users/*'), 'should_prerender' => false ), + // Test a bot crawler that is not blacklisted + array( + 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', + 'uri' => 'http://www.example.com/bar', + 'referer' => 'http://google.com', + 'ignored_extensions' => array(), + 'whitelist' => array(), + 'blacklist' => array('/foo'), + 'should_prerender' => true + ), + // Test a bot crawler and a referer that is blacklisted + array( + 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', + 'uri' => 'http://www.example.com/foo', + 'referer' => '/search', + 'ignored_extensions' => array(), + 'whitelist' => array(), + 'blacklist' => array('/search'), + 'should_prerender' => false + ), + // Test a bot crawler and a referer that is not blacklisted + array( + 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', + 'uri' => 'http://www.example.com/foo', + 'referer' => '/search', + 'ignored_extensions' => array(), + 'whitelist' => array(), + 'blacklist' => array(), + 'should_prerender' => true + ), + // Test a bot crawler and a referer that is not blacklisted by a regex + array( + 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', + 'uri' => 'http://www.example.com/foo', + 'referer' => '/profile/search', + 'ignored_extensions' => array(), + 'whitelist' => array(), + 'blacklist' => array('^/search', 'help'), + 'should_prerender' => true + ), // Test a bot crawler that combines whitelist and blacklist (1) array( 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'uri' => 'http://www.example.com/users/julia', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array('/users/*'), 'blacklist' => array('/users/julia'), @@ -157,6 +210,7 @@ public function shouldRenderProvider() array( 'user_agent' => 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'uri' => 'http://www.example.com/users/julia', + 'referer' => 'http://google.com', 'ignored_extensions' => array(), 'whitelist' => array('/users/*'), 'blacklist' => array('/users/michael'), @@ -168,7 +222,7 @@ public function shouldRenderProvider() /** * @dataProvider shouldRenderProvider */ - public function testShouldRender($userAgent, $uri, $ignoredExtensions, $whitelist, $blacklist, $result) + public function testShouldRender($userAgent, $uri, $referer, $ignoredExtensions, $whitelist, $blacklist, $result) { $moduleOptions = ServiceManagerFactory::getServiceManager()->get('ZfrPrerender\Options\ModuleOptions'); $moduleOptions->setIgnoredExtensions($ignoredExtensions); @@ -177,7 +231,8 @@ public function testShouldRender($userAgent, $uri, $ignoredExtensions, $whitelis $request = new HttpRequest(); $request->setUri($uri); - $request->getHeaders()->addHeaderLine('User-Agent', $userAgent); + $request->getHeaders()->addHeaderLine('User-Agent', $userAgent) + ->addHeaderLine('Referer', $referer); $listener = new PrerenderListener($moduleOptions);