From e5e8898c3ca3ea654871c719329a40e6a7d34c76 Mon Sep 17 00:00:00 2001 From: Botary Date: Thu, 25 Oct 2018 04:45:16 +0900 Subject: [PATCH] Fixes #1443: Support both Python 2.7 and >2.7 (#1482) * Support both python 2.7 and >2.7 * Print Python version --- build-disconnect.py => build-disconnect2.py | 0 build-disconnect3.py | 139 ++++++++++++++++++++ checkout.sh | 17 ++- 3 files changed, 155 insertions(+), 1 deletion(-) rename build-disconnect.py => build-disconnect2.py (100%) create mode 100755 build-disconnect3.py diff --git a/build-disconnect.py b/build-disconnect2.py similarity index 100% rename from build-disconnect.py rename to build-disconnect2.py diff --git a/build-disconnect3.py b/build-disconnect3.py new file mode 100755 index 0000000000..9e2061fd9f --- /dev/null +++ b/build-disconnect3.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + +import json +import urllib.parse + +categories = ("Advertising", "Analytics", "Social", "Content") + +def output_filename(category): + return "Lists/disconnect-{0}.json".format(category.lower()) + +def url_filter(resource): + return "^https?://([^/]+\\.)?" + resource.replace(".", "\\.") + + +def unless_domain(properties): + return ["*" + domain for domain in properties] + + +def create_blocklist_entry(resource, properties): + return {"trigger": {"url-filter": url_filter(resource), + "load-type": ["third-party"], + "unless-domain": unless_domain(properties)}, + "action": {"type": "block"}} + + +def generate_entity_list(path="shavar-prod-lists/disconnect-entitylist.json"): + with open(path) as fp: + entitylist = json.load(fp) + + blocklist = [] + + for name, value in list(entitylist.items()): + for resource in value['resources']: + entry = create_blocklist_entry(resource, value['properties']) + blocklist.append(entry) + + f = open('Lists/disconnect.json', 'w') + out = json.dumps(blocklist, indent=0, + separators=(',', ':')).replace('\n', '') + f.write(out) + + # Human-readable output. + # print json.dumps(blocklist, indent=2) + +def add_entry_to_blocklist(blocklist, entities, name, property_, resources): + if property_ == "dnt": + return # we don't handle dnt entries yet + if name in entities: + props = entities[name]["properties"] + else: + prop = urllib.parse.urlparse(property_).netloc.split(".") + if prop[0] == "www": + prop.pop(0) + props = [".".join(prop)] + for res in resources: + blocklist.append(create_blocklist_entry(res, props)) + + +def generate_blacklists(blacklist="shavar-prod-lists/disconnect-blacklist.json", entitylist="shavar-prod-lists/disconnect-entitylist.json"): + # Generating the categorical lists requires some manual tweaking to the + # data at the moment. + + def find_entry(entry, list_): + for d in list_: + if list(d.keys()) == [entry]: + return d + + # First, massage the existing categorical data slightly + with open(blacklist) as fp: + categories = json.load(fp)["categories"] + # Move the Twitter and Facebook entries into the Social category from + # the Disconnect category + disconnect = categories["Disconnect"] + del categories["Disconnect"] + categories["Social"].append(find_entry("Facebook", disconnect)) + categories["Social"].append(find_entry("Twitter", disconnect)) + + # Load the entitylist to map the whitelist entries. + with open(entitylist) as fp: + entities = json.load(fp) + + # Change the Google entries for the respective categories + with open("shavar-prod-lists/google_mapping.json") as fp: + tweaks = json.load(fp)["categories"] + for category in ("Advertising", "Analytics", "Social"): + cat = categories[category] + goog = find_entry("Google", cat) or None + if goog is None: + # No data exist for this category, just append + cat.append(tweaks[category][0]) + else: + for prop, resources in list(tweaks[category]["Google"].items()): + if prop not in goog: + goog[prop] = resources + continue + for resource in resources: + if resource not in goog[prop]: + goog[prop].append(resource) + goog[prop].sort() + + for category in categories: + blocklist = [] + + for entity in categories[category]: + for name, domains in entity.items(): + for property_, resources in domains.items(): + add_entry_to_blocklist(blocklist, entities, name, property_, resources) + + print("{cat} blacklist has {count} entries." + .format(cat=category, count=len(blocklist))) + + with open(output_filename(category), "w") as fp: + out = json.dumps(blocklist, indent=0, + separators=(',', ':'),sort_keys=True).replace('\n', '') + fp.write(out) + +def format_one_rule_per_line(): + for category in categories: + name = output_filename(category) + file = open(name) + line = file.read() + file.close() + line = line.replace('{"action"', '\n{"action"') + with open(name, "w") as fp: + fp.write(line) + + +if __name__ == "__main__": + # generate_entity_list() + generate_blacklists() + + # format as one action per-line, which is easier to read and diff + format_one_rule_per_line() diff --git a/checkout.sh b/checkout.sh index ebe9d6f41b..411185f665 100755 --- a/checkout.sh +++ b/checkout.sh @@ -4,6 +4,21 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -./build-disconnect.py +if ! hash python; then + echo "python is not installed" + exit 1 +fi + +ver=$(python -V 2>&1 | sed 's/.* \([0-9]\).\([0-9]\).*/\1\2/') +if [ "$ver" -lt "27" ]; then + echo "This script requires python 2.7 or greater" + exit 1 +elif [ "$ver" -eq "27" ]; then + echo "Python 27 detected. Running build-disconnect2.py" + ./build-disconnect2.py +elif [ "$ver" -gt "27" ]; then + echo "Python ${ver} detected. Running build-disconnect3.py" + ./build-disconnect3.py +fi carthage bootstrap --platform iOS