diff --git a/lib/cms_scanner/target.rb b/lib/cms_scanner/target.rb index 7f16536e..acf5d02f 100644 --- a/lib/cms_scanner/target.rb +++ b/lib/cms_scanner/target.rb @@ -37,6 +37,11 @@ def vulnerable? raise NotImplementedError end + # @return [ Regexp ] + def url_pattern + @url_pattern ||= Regexp.new(Regexp.escape(url).gsub(/https?/i, 'https?'), Regexp::IGNORECASE) + end + # @param [ String ] xpath # @param [ Regexp ] pattern # @param [ Typhoeus::Response, String ] page diff --git a/lib/cms_scanner/target/scope.rb b/lib/cms_scanner/target/scope.rb index 4524618e..8e41ba42 100644 --- a/lib/cms_scanner/target/scope.rb +++ b/lib/cms_scanner/target/scope.rb @@ -37,9 +37,22 @@ def in_scope_urls(res, xpath = '//@href|//@src|//@data-src') found end + # Similar to Target#url_pattern but considering the in scope domains as well + # + # @return [ Regexp ] + def scope_url_pattern + return @scope_url_pattern if @scope_url_pattern + + domains = [uri.host + uri.path] + scope.domains[1..-1]&.map(&:to_s) + scope.invalid_domains + + domains.map! { |d| Regexp.escape(d.gsub(%r{/$}, '')).sub('\*', '.*') } + + @scope_url_pattern = %r{https?://(?:#{domains.join('|')})/?}i + end + # Scope Implementation class Scope - # @return [ Array ] The valid domains in scope def domains @domains ||= [] end diff --git a/spec/lib/target/scope_spec.rb b/spec/lib/target/scope_spec.rb index 4dbe393b..dad8d9f3 100644 --- a/spec/lib/target/scope_spec.rb +++ b/spec/lib/target/scope_spec.rb @@ -106,4 +106,22 @@ end end end + + describe '#scope_url_pattern' do + context 'when no scope given' do + its(:scope_url_pattern) { should eql %r{https?://(?:e\.org)/?}i } + end + + context 'when scope given' do + let(:opts) { super().merge(scope: ['*.cdn.org', 'wp-lamp']) } + + its(:scope_url_pattern) { should eql %r{https?://(?:e\.org|.*\.cdn\.org|wp\-lamp)/?}i } + + context 'when target URL has a subdir' do + let(:url) { 'https://e.org/blog' } + + its(:scope_url_pattern) { should eql %r{https?://(?:e\.org/blog|.*\.cdn\.org|wp\-lamp)/?}i } + end + end + end end diff --git a/spec/lib/target_spec.rb b/spec/lib/target_spec.rb index 156fdd38..a1e793b1 100644 --- a/spec/lib/target_spec.rb +++ b/spec/lib/target_spec.rb @@ -35,6 +35,16 @@ end end + describe '#url_pattern' do + its(:url_pattern) { should eql %r{https?://e\.org/}i } + + context 'when already https protocol' do + let(:url) { 'htTpS://ex.com/' } + + its(:url_pattern) { should eql %r{https?://ex\.com/}i } + end + end + describe '#xpath_pattern_from_page' do # Handled in #comments_from_page & #javascripts_from_page end