diff --git a/Makefile b/Makefile index 7109036b..c5bc178d 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,8 @@ # 2015-11-06 # James A. Overton # +# Last major modification: 2019-02-10, Michael Cuffaro +# # This file contains code for working with # Open Biomedical Ontoloiges (OBO) # Persistent Uniform Resource Locators (PURLs). @@ -12,10 +14,8 @@ # Required software: # # - [GNU Make](http://www.gnu.org/software/make/) to run this file -# - [kwalify](http://www.kuwata-lab.com/kwalify/) for YAML validation # - [Python 3](https://www.python.org/downloads/) to run scripts # - [PyYAML](http://pyyaml.org/wiki/PyYAML) for translation to Apache -# - [travis.rb](https://github.com/travis-ci/travis.rb) for Travis-CI ### Configuration @@ -29,9 +29,6 @@ # Defaults to the list of config/*.yml files. ONTOLOGY_IDS ?= $(patsubst config/%.yml,%,$(wildcard config/*.yml)) -# The GitHub owner/project -PROJECT ?= OBOFoundry/purl.obolibrary.org - # Local development server. DEVELOPMENT ?= localhost @@ -53,219 +50,107 @@ SHELL := bash ### Basic Operations -# Default goal: Remove generated files, validate config, and regenerate. +# Default goal: Remove generated files and regenerate. .PHONY: all -all: clean validate build +all: clean build -# Remove directories with generated files. +# Remove directories with generated files and tests. .PHONY: clean clean: rm -rf temp tests -### Validate YAML Config +### Build recipe for a single project. # -# Use kwalify and the tools/config.schema.yml -# to validate all YAML configuration files. -# If any INVALID results are found, exit with an error. -.PHONY: validate -validate: - kwalify -f tools/config.schema.yml config/*.yml \ - | awk '{print} /INVALID/ {status=1} END {exit status}' - -# Validate a single configuration file. -.PHONY: validate-% -validate-%: - kwalify -f tools/config.schema.yml config/$*.yml +# Convert the YAML file of a single project to a .htaccess file and place it +# in the temp/ directory. +.PHONY: build-% +build-%: + tools/translate_yaml.py --input_files config/$*.yml --output_dir temp + @echo "Built files in temp/$*" -### Generate Apache Config -# -# Convert the YAML configuration files -# to Apache .htaccess files with RedirectMatch directives. -# There are three types: -# -# - base_redirects: when the project's base_url points to something -# - product: for a project's main OWL file -# - term: for a project's terms -# - entries: PURLs under the project's base_url +# Build recipe for all projects # -# The first three are inserted into www/obo/.htaccess -# while the last is in the project's www/obo/project/.htaccess -# -# These files are built in the `temp/` directory -# then `temp/obo` replaces `www/obo` as the very last step -# to keep Apache downtime to an absolute minimum. -temp/obo temp/base_redirects temp/products temp/terms: - mkdir -p $@ +# Convert the YAML files of every project to .htaccess files and place them +# in the www/obo directory. -temp/base_redirects/%.htaccess: config/%.yml temp/base_redirects - tools/translate-base-redirects.py $< $@ - -temp/products/%.htaccess: config/%.yml temp/products - tools/translate-products.py $< $@ - -temp/terms/%.htaccess: config/%.yml temp/terms - tools/translate-terms.py $< $@ - -# Generate temp/obo/foo/.htaccess file -# and a symbolic link from the IDSPACE: -# temp/obo/FOO -> temp/obo/foo -# NOTE: The last line removes spurious links -# on case insensitive file systems such as Mac OS X. -temp/obo/%/.htaccess: config/%.yml - mkdir -p temp/obo/$* - tools/translate-entries.py $< $@ - < $< \ - grep '^idspace:' \ - | sed 's/^idspace://' \ - | tr -d ' ' \ - | awk '{print "$* temp/obo/" $$0}' \ - | xargs -t ln -s - rm -f temp/obo/$*/$* - -# Build temp files for a single project. -.PHONY: build-% -build-%: validate-% temp/obo/%/.htaccess temp/base_redirects/%.htaccess temp/products/%.htaccess temp/terms/%.htaccess - @echo "Built files in temp/$*" +# Final output directory: +www/obo/: + mkdir -p $@ +# When a new build is created, the old build's files are moved here, in a subdirectory +# whose name is generated in a portable way using python (see the target-specific +# variable BACKUP below). backup/: mkdir $@ -# Get name of a dated-backup directory, in a portable way. -BACKUP = backup/obo-$(shell python -c "import time,os;print(time.strftime('%Y%m%d-%H%M%S',time.gmtime(os.path.getmtime('www/obo'))))") - -# Convert all YAML configuration files to .htaccess -# and move the special `obo` .htaccess file. -# Generate .htaccess files for all YAML configuration files. +# The main build target: .PHONY: build -build: $(foreach o,$(ONTOLOGY_IDS),temp/obo/$o/.htaccess) -build: $(foreach o,$(ONTOLOGY_IDS),temp/base_redirects/$o.htaccess) -build: $(foreach o,$(ONTOLOGY_IDS),temp/products/$o.htaccess) -build: $(foreach o,$(ONTOLOGY_IDS),temp/terms/$o.htaccess) -build: | backup/ - cat temp/obo/obo/.htaccess > temp/obo/.htaccess - echo '' >> temp/obo/.htaccess - echo '### Generated from project configuration files' >> temp/obo/.htaccess - echo '' >> temp/obo/.htaccess - cat temp/base_redirects/*.htaccess >> temp/obo/.htaccess - cat temp/products/*.htaccess >> temp/obo/.htaccess - cat temp/terms/*.htaccess >> temp/obo/.htaccess - rm -rf temp/obo/obo - rm -rf temp/obo/OBO +build: BACKUP = backup/obo-$(shell python -c "import time,os;print(time.strftime('%Y%m%d-%H%M%S',time.gmtime(os.path.getmtime('www/obo'))))") +build: | backup/ www/obo/ + tools/translate_yaml.py --input_dir config --output_dir temp/obo + rm -rf temp/obo/obo temp/obo/OBO -test -e www/obo && mv www/obo $(BACKUP) mv temp/obo www/obo + rmdir temp ### Test Development Apache Config # # Make HTTP HEAD requests quickly against the DEVELOPMENT server # to ensure that redirects are working properly. -tests/development: - mkdir -p $@ - -# Run tests for a single YAML configuration file. -# against the DEVELOPMENT server, -# making requests every 0.01 seconds. -tests/development/%.tsv: config/%.yml tests/development - tools/test.py --delay=0.01 $(DEVELOPMENT) $< $@ - -# Run all tests against development and fail if any FAIL line is found. +# Fail if any FAIL line is found in any of them. .PHONY: test -test: $(foreach o,$(ONTOLOGY_IDS),tests/development/$o.tsv) - @cat tests/development/*.tsv \ - | awk '/^FAIL/ {status=1; print} END {exit status}' +test: + tools/test.py --delay=0.01 --output=tests/development --domain=$(DEVELOPMENT) config/*.yml ### Test Production Apache Config # # Make HTTP HEAD requests slowly against the PRODUCTION server # to ensure that redirects are working properly. -tests/production: - mkdir -p $@ - -# Run tests for a single YAML configuration file -# against the PRODUCTION server, -# making requests every 1 second. -tests/production/%.tsv: config/%.yml tests/production - tools/test.py --delay=1 $(PRODUCTION) $< $@ - -# Run all tests against production and fail if any FAIL line is found. .PHONY: test-production -test-production: $(foreach o,$(ONTOLOGY_IDS),tests/production/$o.tsv) - @cat tests/production/*.tsv \ - | awk '/^FAIL/ {status=1; print} END {exit status}' +test-production: + tools/test.py --delay=1 --output=tests/production --domain=$(PRODUCTION) config/*.yml ### Test Tools # # Test our tools on files in examples/ directory. -tests/examples: - mkdir -p $@ - -tests/examples/%.yml: tools/examples/%.xml tools/examples/%.yml tests/examples - tools/migrate.py $* $< $@ - diff tools/examples/$*.yml $@ - -tests/examples/%.base_redirects.htaccess: tools/examples/%.yml tests/examples - tools/translate-base-redirects.py $< $@ - diff tools/examples/$*.base_redirects.htaccess $@ - -tests/examples/%.products.htaccess: tools/examples/%.yml tests/examples - tools/translate-products.py $< $@ - diff tools/examples/$*.products.htaccess $@ - -tests/examples/%.terms.htaccess: tools/examples/%.yml tests/examples - tools/translate-terms.py $< $@ - diff tools/examples/$*.terms.htaccess $@ - -tests/examples/%.htaccess: tools/examples/%.yml tests/examples - tools/translate-entries.py $< $@ - diff tools/examples/$*.htaccess $@ +.PHONY: test-example1 +test-example1: + tools/migrate.py test1 tools/examples/test1/test1.xml tests/examples/test1/test1.yml + diff tools/examples/test1/test1.yml tests/examples/test1/test1.yml + +.PHONY: test-example2 +test-example2: + tools/translate_yaml.py --input_dir tools/examples/test2/ --output_dir tests/examples/test2/ + diff tools/examples/test2/test2.htaccess tests/examples/test2/.htaccess + diff tools/examples/test2/obo/obo.htaccess tests/examples/test2/obo/.htaccess + diff tools/examples/test2/test2/test2.htaccess tests/examples/test2/test2/.htaccess .PHONY: test-examples -test-examples: tests/examples/test1.yml -test-examples: tests/examples/test2.htaccess -test-examples: tests/examples/test2.base_redirects.htaccess -test-examples: tests/examples/test2.products.htaccess -test-examples: tests/examples/test2.terms.htaccess +test-examples: test-example1 test-example2 ### Update Repository # -# Check Travis-CI for the last build. -# If it did not pass, then fail. -# If it is the same as .current_build, then fail. -# Otherwise replace .current_build, -# pull from git, and run a new `make`. +# Run the safe-update.py script which does the following: +# - Check Travis-CI for the last build. +# - If it did not pass, or if it is the same as the current build, then do nothing. +# - Otherwise replace .current_build, pull from git, and run a new `make`. safe-update: - travis history --no-interactive \ - --repo $(PROJECT) --branch master --limit 1 \ - > .travis_build - @grep ' passed: ' .travis_build - @echo 'Last build is green, but might not be new' - @diff .current_build .travis_build && exit 1 || exit 0 - @echo 'New green build available' - @mv .travis_build .current_build - git pull - make - - -### Migrate Configuration from PURL.org + tools/safe-update.py + + +### Code style and lint checks for python source files. # -# Given an ontology ID (usually lower-case), -# fetch and translate a PURL.org XML file -# into a YAML configuration file. -# This should be a one-time migration. -# Do not overwrite existing config file. -PURL_XML = https://purl.org/admin/purl/?target=&seealso=&maintainers=&explicitmaintainers=&tombstone=false&p_id= - -.PHONY: migrate-% -migrate-%: - @test ! -s config/$*.yml \ - || (echo 'Refusing to overwrite config/$*.yml'; exit 1) - mkdir -p migrations - test -s migrations/$*.xml \ - || curl --fail -o migrations/$*.xml "$(PURL_XML)/obo/$**" - mkdir -p config - tools/migrate.py $* migrations/$*.xml config/$*.yml +# Note that `|| true` is appended to force make to ignore the exit code in both cases +.PHONY: style +style: + pep8 --max-line-length=100 --ignore E129,E126,E121,E111,E114 tools/*.py || true + +.PHONY: delint +delint: + python3 -m pyflakes tools/*.py || true diff --git a/README.md b/README.md index 76b51b00..34dcd1b3 100644 --- a/README.md +++ b/README.md @@ -202,6 +202,29 @@ or delete the VM with You can test against the production PURL server using `make test-production`. We only make one request per second, to avoid abusing the server, so this can take along time. +### Optional: Sync VirtualBox Guest Additions + +If you keep your development VM for any length of time you may be presented with this message upon starting your VM: +``` +==> default: A newer version of the box 'ubuntu/trusty64' is available! You currently +==> default: have version '20190122.1.1'. The latest is version '20190206.0.0'. Run +==> default: `vagrant box update` to update. +``` +If you upgrade, then the next time you resume your box you may receive the warning: +``` +[default] The guest additions on this VM do not match the install version of +VirtualBox! This may cause things such as forwarded ports, shared +folders, and more to not work properly. If any of those things fail on +this machine, please update the guest additions and repackage the +box. +``` + +To automatically sync with VirtualBox's Guest Additions at startup (and thus avoid this warning) you can install `vagrant-vbguest` like so: + +- `vagrant plugin install vagrant-vbguest` (in the tools directory on the host machine) + +Now, whenever you bring up your VM, it will check the version of the VM's guest additions and automatically bring them up to date whenever this is needed. + ## Deployment diff --git a/config/dpo.yml b/config/dpo.yml index bd251a45..420ea64e 100644 --- a/config/dpo.yml +++ b/config/dpo.yml @@ -10,6 +10,3 @@ products: - dpo.owl: https://raw.githubusercontent.com/FlyBase/flybase-controlled-vocabulary/master/releases/dpo.owl term_browser: ontobee -example_terms: - -entries: diff --git a/config/xao.yml b/config/xao.yml index 4698ef4f..a50314bb 100644 --- a/config/xao.yml +++ b/config/xao.yml @@ -8,7 +8,6 @@ products: - xao.obo: http://ontologies.berkeleybop.org/xao.obo term_browser: ontobee -example_terms: entries: - prefix: /tracker/ diff --git a/tools/config.schema.json b/tools/config.schema.json new file mode 100644 index 00000000..65dbbcda --- /dev/null +++ b/tools/config.schema.json @@ -0,0 +1,102 @@ +{ + "properties": { + "idspace": { + "_comment": "See issue #82", + "type": "string", + "pattern": "^[A-Za-z][A-Za-z0-9_]+$" + }, + "base_url": { + "type": "string", + "pattern": "^\\/obo" + }, + "base_redirect": { + "type": "string" + }, + "products": { + "type": "array", + "items": { + "type": "object", + "patternProperties": { + "\\.owl$": { "type": "string" }, + "\\.obo$": { "type": "string" } + } + } + }, + "term_browser": { + "type": "string", + "pattern": "^(ontobee|custom)$" + }, + "example_terms": { + "type": "array", + "items": { + "type": "string" + } + }, + "tests": { + "type": "array", + "items": { + "type": "object", + "properties": { + "from": { + "type": "string", + "pattern": "^\\/" + }, + "to": { + "type": "string", + "pattern": "^(https?|ftp)\\:\\/\\/[a-zA-Z0-9][\\/\\.\\-\\:\\?\\=\\&\\#\\%\\!\\$\\~\\+\\w]+$" + } + }, + "required": ["from", "to"] + } + }, + "entries": { + "type": "array", + "items": { + "type": "object", + "properties": { + "exact": { + "_comment": "Note: JSON-Schema cannot ensure that `exact` mappings are unique", + "type": "string", + "pattern": "^\\/" + }, + "prefix": { + "_comment": "Note: JSON-Schema cannot ensure that `prefix` mappings are unique", + "type": "string", + "pattern": "^\\/" + }, + "regex": { + "_comment": "Note: JSON-Schema cannot ensure that `regex` mappings are unique", + "type": "string" + }, + "replacement": { + "type": "string", + "pattern": "^(https?|ftp)\\:\\/\\/[a-zA-Z0-9][\\/\\.\\-\\:\\?\\=\\&\\#\\%\\!\\$\\~\\+\\w]+$" + }, + "status": { + "type": "string", + "pattern": "^(permanent|temporary|see other)$" + }, + "tests": { + "type": "array", + "items": { + "type": "object", + "properties": { + "from": { + "type": "string", + "pattern": "^\\/" + }, + "to": { + "type": "string", + "pattern": "^(https?|ftp)\\:\\/\\/[a-zA-Z0-9][\\/\\.\\-\\:\\?\\=\\&\\#\\%\\!\\$\\~\\+\\w]+$" + } + }, + "required": ["from", "to"] + } + } + }, + "required": ["replacement"] + } + } + }, + "required": ["idspace", "base_url", "term_browser"] +} diff --git a/tools/config.schema.yml b/tools/config.schema.yml deleted file mode 100644 index b98069e6..00000000 --- a/tools/config.schema.yml +++ /dev/null @@ -1,91 +0,0 @@ -#### -#### Use kwalify and this schema to validate the config files. -#### Example: -#### kwalify -E -m ./tools/config.schema.yml -#### kwalify -E -f ./tools/config.schema.yml ./config/ddanat.yml -#### -type: map -mapping: - "idspace": - type: str - pattern: /^[A-Za-z][A-Za-z0-9_]+$/ # See issue #82 - required: true - "base_url": - type: str - pattern: /^\/obo/ - required: true - "base_redirect": - type: str - required: false - "products": - type: seq - required: false - sequence: - - type: any - "term_browser": - type: str - pattern: /^(ontobee|custom)$/ - required: true - "example_terms": - type: seq - required: false - sequence: - - type: str - "tests": - type: seq - required: false - sequence: - - type: map - mapping: - "from": - type: str - pattern: /^\// - required: true - "to": - type: str - pattern: "/^(https?|ftp)\:\/\/[a-zA-Z0-9][\/\.\-\:\?\=\&\#\%\!\$\~\+\w]+$/" - required: true - "entries": - type: seq - required: false - sequence: - - type: map - mapping: - "exact": - type: str - pattern: /^\// - required: false - unique: true - "prefix": - type: str - pattern: /^\// - required: false - unique: true - "regex": - type: str - required: false - unique: true - "replacement": - type: str - pattern: "/^(https?|ftp)\:\/\/[a-zA-Z0-9][\/\.\-\:\?\=\&\#\%\!\$\~\+\w]+$/" - required: true - unique: false - "status": - type: str - required: false - pattern: /^(permanent|temporary|see other)$/ - "tests": - type: seq - required: false - sequence: - - type: map - mapping: - "from": - type: str - pattern: /^\// - required: true - "to": - type: str - pattern: "/^(https?|ftp)\:\/\/[a-zA-Z0-9][\/\.\-\:\?\=\&\#\%\!\$\~\+\w]+$/" - required: true - diff --git a/tools/examples/test1.htaccess b/tools/examples/test1/test1.htaccess similarity index 100% rename from tools/examples/test1.htaccess rename to tools/examples/test1/test1.htaccess diff --git a/tools/examples/test1.xml b/tools/examples/test1/test1.xml similarity index 100% rename from tools/examples/test1.xml rename to tools/examples/test1/test1.xml diff --git a/tools/examples/test1.yml b/tools/examples/test1/test1.yml similarity index 100% rename from tools/examples/test1.yml rename to tools/examples/test1/test1.yml diff --git a/tools/examples/test2.base_redirects.htaccess b/tools/examples/test2.base_redirects.htaccess deleted file mode 100644 index 38063a20..00000000 --- a/tools/examples/test2.base_redirects.htaccess +++ /dev/null @@ -1,3 +0,0 @@ -# Base redirect for TEST2 -RedirectMatch temp "(?i)^/obo/test2$" "http://example.org/test2" - diff --git a/tools/examples/test2.htaccess b/tools/examples/test2.htaccess deleted file mode 100644 index f6761a14..00000000 --- a/tools/examples/test2.htaccess +++ /dev/null @@ -1,7 +0,0 @@ -# DO NOT EDIT THIS FILE! -# Automatically generated from "tools/examples/test2.yml". -# Edit that source file then regenerate this file. - -RedirectMatch temp "(?i)^/obo/test2/project$" "http://example.org/project.html" -RedirectMatch temp "(?i)^/obo/test2/branches/(.*)$" "http://example.org/branches/$1" -RedirectMatch seeother "(?i)^/obo/test2/TEST_(\d+)$" "http://example.org/about/TEST_$1" diff --git a/tools/examples/test2.products.htaccess b/tools/examples/test2.products.htaccess deleted file mode 100644 index 6e4ffa4f..00000000 --- a/tools/examples/test2.products.htaccess +++ /dev/null @@ -1,4 +0,0 @@ -# Products for TEST2 -RedirectMatch temp "(?i)^/obo/test2.owl$" "http://example.org/test2.owl" -RedirectMatch temp "(?i)^/obo/test2.obo$" "http://example.org/test2.obo" - diff --git a/tools/examples/test2.terms.htaccess b/tools/examples/test2.terms.htaccess deleted file mode 100644 index bf5f71ea..00000000 --- a/tools/examples/test2.terms.htaccess +++ /dev/null @@ -1,3 +0,0 @@ -# Term redirect for TEST2 -RedirectMatch seeother "^/obo/TEST2_(\d+)$" "http://www.ontobee.org/browser/rdf.php?o=TEST2&iri=http://purl.obolibrary.org/obo/TEST2_$1" - diff --git a/tools/examples/test2.yml b/tools/examples/test2/obo.yml similarity index 50% rename from tools/examples/test2.yml rename to tools/examples/test2/obo.yml index 375a2775..5d03c42e 100644 --- a/tools/examples/test2.yml +++ b/tools/examples/test2/obo.yml @@ -1,17 +1,8 @@ -# PURL configuration for http://purl.obolibrary.org/obo/test2 +# PURL configuration for http://purl.obolibrary.org/obo/obo -idspace: TEST2 -base_url: /obo/test2 - -base_redirect: http://example.org/test2 - -products: -- test2.owl: http://example.org/test2.owl -- test2.obo: http://example.org/test2.obo - -term_browser: ontobee -example_terms: -- TEST2_0000001 +idspace: OBO +base_url: /obo/obo +term_browser: custom entries: - exact: /project @@ -28,4 +19,4 @@ entries: status: see other tests: - from: /TEST_1234 - replacement: http://example.org/about/TEST_1234 + to: http://example.org/about/TEST_1234 diff --git a/tools/examples/test2/obo/obo.htaccess b/tools/examples/test2/obo/obo.htaccess new file mode 100644 index 00000000..e1f35ae2 --- /dev/null +++ b/tools/examples/test2/obo/obo.htaccess @@ -0,0 +1,7 @@ +# DO NOT EDIT THIS FILE! +# Automatically generated from "tools/examples/test2/obo.yml". +# Edit that source file then regenerate this file. + +RedirectMatch temp "(?i)^/obo/obo/project$" "http://example.org/project.html" +RedirectMatch temp "(?i)^/obo/obo/branches/(.*)$" "http://example.org/branches/$1" +RedirectMatch seeother "(?i)^/obo/test2/TEST_(\d+)$" "http://example.org/about/TEST_$1" diff --git a/tools/examples/test2/test2.htaccess b/tools/examples/test2/test2.htaccess new file mode 100644 index 00000000..04f6bd21 --- /dev/null +++ b/tools/examples/test2/test2.htaccess @@ -0,0 +1,20 @@ +# DO NOT EDIT THIS FILE! +# Automatically generated from "tools/examples/test2/obo.yml". +# Edit that source file then regenerate this file. + +RedirectMatch temp "(?i)^/obo/obo/project$" "http://example.org/project.html" +RedirectMatch temp "(?i)^/obo/obo/branches/(.*)$" "http://example.org/branches/$1" +RedirectMatch seeother "(?i)^/obo/test2/TEST_(\d+)$" "http://example.org/about/TEST_$1" + +### Generated from project configuration files + +# Base redirect for TEST2 +RedirectMatch temp "(?i)^/obo/test2$" "http://example.org/test2" + +# Products for TEST2 +RedirectMatch temp "(?i)^/obo/test2.owl$" "http://example.org/test2.owl" +RedirectMatch temp "(?i)^/obo/test2.obo$" "http://example.org/test2.obo" + +# Term redirect for TEST2 +RedirectMatch seeother "^/obo/TEST2_(\d+)$" "http://www.ontobee.org/browser/rdf.php?o=TEST2&iri=http://purl.obolibrary.org/obo/TEST2_$1" + diff --git a/tools/examples/test2/test2.yml b/tools/examples/test2/test2.yml new file mode 100644 index 00000000..2c8141ef --- /dev/null +++ b/tools/examples/test2/test2.yml @@ -0,0 +1,14 @@ +# PURL configuration for http://purl.obolibrary.org/obo/test2 + +idspace: TEST2 +base_url: /obo/test2 + +base_redirect: http://example.org/test2 + +products: +- test2.owl: http://example.org/test2.owl +- test2.obo: http://example.org/test2.obo + +term_browser: ontobee +example_terms: +- TEST2_0000001 \ No newline at end of file diff --git a/tools/examples/test2/test2/test2.htaccess b/tools/examples/test2/test2/test2.htaccess new file mode 100644 index 00000000..eaadaa74 --- /dev/null +++ b/tools/examples/test2/test2/test2.htaccess @@ -0,0 +1,4 @@ +# DO NOT EDIT THIS FILE! +# Automatically generated from "tools/examples/test2/test2.yml". +# Edit that source file then regenerate this file. + diff --git a/tools/migrate.py b/tools/migrate.py index 1140dab8..4c10fb14 100755 --- a/tools/migrate.py +++ b/tools/migrate.py @@ -33,7 +33,11 @@ # the `exact` entries are output first, # followed by `prefix` entries in descending order of `id` length. -import argparse, sys, xml.sax, re +import argparse +import re +import os +import sys +import xml.sax # Accumulate entries in these global lists for later sorting. exact = [] @@ -55,34 +59,43 @@ entries: ''' + entry_template = '''- %s: %s replacement: %s ''' + # Parse command line arguments, # run the SAX parser on the XML file, # and write results to the YAML file. def main(): parser = argparse.ArgumentParser(description='Migrate XML to YAML') parser.add_argument('idspace', - type=str, - help='the project IDSPACE, e.g. FOO') + type=str, + help='the project IDSPACE, e.g. FOO') parser.add_argument('xml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the XML file (or STDIN)') + type=argparse.FileType('r'), + default=sys.stdin, + nargs='?', + help='read from the XML file (or STDIN)') parser.add_argument('yaml_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the YAML file (or STDOUT)') + type=str, + nargs='?', + help='write to the YAML file (or STDOUT)') args = parser.parse_args() args.upper_idspace = args.idspace.upper() args.lower_idspace = args.idspace.lower() args.base_url = '/obo/' + args.lower_idspace + if args.yaml_file is not None: + try: + args.yaml_file = open(args.yaml_file, 'w') + except FileNotFoundError: + os.makedirs(os.path.dirname(args.yaml_file)) + args.yaml_file = open(args.yaml_file, 'w') + else: + args.yaml_file = sys.stdout sax = xml.sax.make_parser() sax.setContentHandler(OCLCHandler(args)) @@ -93,18 +106,21 @@ def main(): raise ValueError('No entries to migrate') args.yaml_file.write(header_template % - (args.base_url, args.upper_idspace, args.base_url, args.lower_idspace, args.lower_idspace)) + (args.base_url, args.upper_idspace, args.base_url, args.lower_idspace, + args.lower_idspace)) for entry in entries: args.yaml_file.write(entry_template % - (entry['rule'], entry['id'], entry['url'])) + (entry['rule'], entry['id'], entry['url'])) + + args.yaml_file.close() # Define a SAX ContentHandler class to match the XML format, # and accumulate entry dictionaries into the global lists. # See example above for XML format. class OCLCHandler(xml.sax.ContentHandler): - # Initialize with results of argparse. def __init__(self, args): + # Initialize args with results of argparse. self.args = args self.count = 0 self.content = '' @@ -135,7 +151,7 @@ def endElement(self, name): elif name == 'purl': # The `` in the XML must begin with the base_url, # but we remove this prefix from the YAML output. - if not 'id' in self.entry: + if 'id' not in self.entry: raise ValueError('No for %d' % self.count) id_re = re.compile('^' + self.args.base_url, re.IGNORECASE) if not id_re.match(self.entry['id']): @@ -144,14 +160,14 @@ def endElement(self, name): % (self.count, self.entry['id'], self.args.base_url)) self.entry['id'] = id_re.sub('', self.entry['id']) - if not 'url' in self.entry: + if 'url' not in self.entry: raise ValueError('No for %d' % self.count) if not re.match(r'^(https?|ftp)\:\/\/.+', self.entry['url']): raise ValueError( 'In %d the "%s" is not an absolute HTTP or FTP URL' % (self.count, self.entry['url'])) - if not 'type' in self.entry: + if 'type' not in self.entry: raise ValueError('No for %d' % self.count) elif self.entry['type'] == '302': self.entry['rule'] = 'exact' @@ -161,7 +177,8 @@ def endElement(self, name): prefix.append(self.entry) else: raise ValueError('Unknown type "%s" for %d' % - (self.entry['type'], self.count)) + (self.entry['type'], self.count)) + if __name__ == "__main__": main() diff --git a/tools/safe-update.py b/tools/safe-update.py new file mode 100755 index 00000000..494c5bbb --- /dev/null +++ b/tools/safe-update.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +# Check Travis CI build status for the `master` branch of OBOFoundry/purl.obolibrary.org +# If `master` is green (i.e. all tests are passing), +# and the build number is greater than the current build +# (i.e. the last time we updated), +# then pull `master`, run Make, and update .current_build. + +import difflib +import requests +import subprocess +import sys + +api_url = 'https://api.travis-ci.org' +repo_slug = 'OBOFoundry/purl.obolibrary.org' +accept_header = {'Accept': 'application/vnd.travis-ci.2.1+json'} + +# Get the last build ID from Travis: +resp = requests.get('{}/repos/{}'.format(api_url, repo_slug), headers=accept_header) +if resp.status_code != requests.codes.ok: + resp.raise_for_status() +last_build_id = resp.json()['repo']['last_build_id'] + +# Now get the build details: +resp = requests.get('{}/repos/{}/builds/{}'.format(api_url, repo_slug, last_build_id), + headers=accept_header) +if resp.status_code != requests.codes.ok: + resp.raise_for_status() +content = resp.json() + +# If the last build did not pass, then do nothing and exit. +if content['build']['state'] != 'passed': + print("Last build is not green. Not updating.", file=sys.stderr) + sys.exit(0) + +# Otherwise see if the build description is different from the current build +print("Last build is green. Checking whether it is new ...") +build_desc = "#{} {}: {} {}".format(content['build']['number'], content['build']['state'], + content['commit']['branch'], content['commit']['message']) +# We only want to keep the first line of the last build's description for comparison purposes: +newbuild_lines = build_desc.splitlines(keepends=True)[:1] +with open('.current_build') as infile: + currbuild_lines = infile.readlines() + +diff = list(difflib.unified_diff(currbuild_lines, newbuild_lines)) +if not diff: + print("Last build is not new. Not updating.") + sys.exit(0) + +# Output a diff for information purposes and then do a `git pull` and `make` from the current +# working directory: +for d in diff: + print(d, end='') +print('\nNew green build available. Updating local repository ...') + +if subprocess.call(["git", "pull"]) == 0 and subprocess.call(["make"]) == 0: + with open('.current_build', 'w') as outfile: + outfile.write(newbuild_lines.pop()) diff --git a/tools/site.yml b/tools/site.yml index ce0dafa2..5a1a17f4 100644 --- a/tools/site.yml +++ b/tools/site.yml @@ -29,8 +29,6 @@ with_items: - ntp - git - - ruby - - ruby-dev - python3 - name: Install system-wide packages (All) @@ -39,16 +37,12 @@ - apache2 - python3-pip - - name: Install Travis command-line - when: mode == "production" - command: gem install travis --no-rdoc --no-ri creates=/usr/local/bin/travis - - - name: Install kwalify - command: gem install kwalify creates=/usr/local/bin/kwalify - - name: Install PyYAML pip: name=PyYAML executable=pip3 + - name: Install jsonschema + pip: name=jsonschema executable=pip3 + - name: Clone the Git repo when: mode == "production" git: repo={{ repo_url }} dest={{ repo_dir }} diff --git a/tools/test.py b/tools/test.py index 3983302a..93931fb6 100755 --- a/tools/test.py +++ b/tools/test.py @@ -2,11 +2,17 @@ # # Read a YAML configuration file, # make a series of HTTP HEAD requests to a target server, -# and report the results in a table. +# and report the results in a TSV file. # # NOTE: Currently only tests `example_terms` when `term_browser: ontobee`. -import argparse, sys, yaml, http.client, time +import argparse +import http.client +import os +import re +import sys +import time +import yaml from urllib.parse import unquote @@ -17,105 +23,124 @@ def main(): parser = argparse.ArgumentParser(description='Test a YAML configuration by making HTTP requests') parser.add_argument('-d', '--delay', metavar='D', - type=float, - default=1, - help='delay between requests in seconds (default 1)') + type=float, + default=1, + help='delay between requests in seconds (default 1)') parser.add_argument('-t', '--timeout', metavar='T', - type=float, - default=10, - help='connection timeout in seconds (default 10)') - parser.add_argument('domain', - type=str, - default='172.16.100.10', - nargs='?', - help='target server (default 172.16.100.10)') - parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') - parser.add_argument('report_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the TSV file (or STDOUT)') + type=float, + default=10, + help='connection timeout in seconds (default 10)') + parser.add_argument('-m', '--domain', metavar='DOM', + type=str, + default='172.16.100.10', + help='target server (default 172.16.100.10)') + parser.add_argument('-o', '--output', metavar='DIR', + type=str, + required=True, + help='Directory to write TSV files to') + parser.add_argument('yaml_files', metavar='YAML', + type=argparse.FileType('r'), + default=sys.stdin, + nargs='+', + help='YAML input file') args = parser.parse_args() - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if not 'idspace' in document \ - or type(document['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - idspace = document['idspace'] - - if not 'base_url' in document \ - or type(document['base_url']) is not str: - raise ValueError('YAML document must contain "base_url" string') - base_url = document['base_url'] - - tests = [] - - # Collect the tests to run. - if 'base_redirect' in document: - tests += [{ - 'source': base_url, - 'replacement': document['base_redirect'], - 'status': '302' - }] - - if 'products' in document \ - and type(document['products']) is list: - i = 0 - for product in document['products']: - i += 1 - tests += process_product(i, product) - - if 'term_browser' in document \ - and document['term_browser'].strip().lower() == 'ontobee' \ - and 'example_terms' in document \ - and type(document['example_terms']) is list: - i = 0 - for example_term in document['example_terms']: - i += 1 - tests += process_ontobee(idspace, i, example_term) - - if 'tests' in document: - i = 0 - status = '302' - for test_entry in document['tests']: - i += 1 - test = {'status': status} - if 'from' in test_entry: - test['source'] = base_url + test_entry['from'] - if 'to' in test_entry: - test['replacement'] = test_entry['to'] - if 'source' in test and 'replacement' in test: - tests.append(test) - else: - raise ValueError('Invalid test %d in global tests' % i) - - if 'entries' in document \ - and type(document['entries']) is list: - i = 0 - for entry in document['entries']: - i += 1 - tests += process_entry(base_url, i, entry) - - # Write report table header. - args.report_file.write('\t'.join([ - 'Result', 'Source URL', - 'Expected Status', 'Expected URL', - 'Actual Status', 'Actual URL' - ]) + '\n') - - # Run the tests and add results to the report table. - conn = http.client.HTTPConnection(args.domain, timeout=args.timeout) - for test in tests: - results = run_test(conn, test) - args.report_file.write('\t'.join(results) + '\n') - args.report_file.flush() - time.sleep(args.delay) + # Create the output directory if it doesn't already exist + try: + os.makedirs(args.output) + except FileExistsError: + pass + + failures = [] + for yaml_file in args.yaml_files: + print("Checking {} ...".format(yaml_file.name)) + with open(os.path.normpath(args.output) + '/' + + re.sub('\.yml$', '.tsv', os.path.basename(yaml_file.name)), 'w') as report_file: + # Load YAML document and look for 'entries' list. + document = yaml.load(yaml_file) + + if 'idspace' not in document \ + or type(document['idspace']) is not str: + raise ValueError('YAML document must contain "idspace" string') + idspace = document['idspace'] + + if 'base_url' not in document \ + or type(document['base_url']) is not str: + raise ValueError('YAML document must contain "base_url" string') + base_url = document['base_url'] + + tests = [] + + # Collect the tests to run. + if 'base_redirect' in document: + tests += [{ + 'source': base_url, + 'replacement': document['base_redirect'], + 'status': '302' + }] + + if 'products' in document \ + and type(document['products']) is list: + i = 0 + for product in document['products']: + i += 1 + tests += process_product(i, product) + + if 'term_browser' in document \ + and document['term_browser'].strip().lower() == 'ontobee' \ + and 'example_terms' in document \ + and type(document['example_terms']) is list: + i = 0 + for example_term in document['example_terms']: + i += 1 + tests += process_ontobee(idspace, i, example_term) + + if 'tests' in document: + i = 0 + status = '302' + for test_entry in document['tests']: + i += 1 + test = {'status': status} + if 'from' in test_entry: + test['source'] = base_url + test_entry['from'] + if 'to' in test_entry: + test['replacement'] = test_entry['to'] + if 'source' in test and 'replacement' in test: + tests.append(test) + else: + raise ValueError('Invalid test %d in global tests' % i) + + if 'entries' in document \ + and type(document['entries']) is list: + i = 0 + for entry in document['entries']: + i += 1 + tests += process_entry(base_url, i, entry) + + # Write report table header. + report_file.write('\t'.join([ + 'Result', 'Source URL', + 'Expected Status', 'Expected URL', + 'Actual Status', 'Actual URL' + ]) + '\n') + + # Run the tests and add results to the report table. + conn = http.client.HTTPConnection(args.domain, timeout=args.timeout) + for test in tests: + results = run_test(conn, test) + if results[0] == 'FAIL': + print("FAILURE when checking {}. See {} for details." + .format(yaml_file.name, report_file.name)) + failures.append(idspace) + report_file.write('\t'.join(results) + '\n') + report_file.flush() + time.sleep(args.delay) + + if failures: + print("The following idspaces encountered failures: {}.\n" + "For more details, see their corresponding TSV files in '{}'.\n" + "To re-run tests for just those idspaces, use the script '{}'." + .format(', '.join(failures), args.output, __file__)) def process_product(i, product): @@ -131,13 +156,14 @@ def process_product(i, product): ontobee = 'http://www.ontobee.org/browser/rdf.php?o=%s&iri=http://purl.obolibrary.org/obo/' + def process_ontobee(idspace, i, example_term): """Given an ontology IDSPACE, an index, and an example term ID, return a list with a test to run.""" return [{ 'source': '/obo/' + example_term, 'replacement': (ontobee % idspace) + example_term, - #'replacement': 'http://ontologies.berkeleybop.org/' + example_term, + # 'replacement': 'http://ontologies.berkeleybop.org/' + example_term, 'status': '303' }] @@ -153,9 +179,9 @@ def process_entry(base_url, i, entry): raise ValueError('Entry %d is invalid: "%s"' % (i, entry)) # Validate "replacement" field - if not 'replacement' in entry \ - or entry['replacement'] is None \ - or entry['replacement'].strip() == '': + if 'replacement' not in entry \ + or entry['replacement'] is None \ + or entry['replacement'].strip() == '': raise ValueError('Missing "replacement" field for entry %d' % i) # Validate status code. @@ -172,7 +198,6 @@ def process_entry(base_url, i, entry): test['status'] = status # Determine the type for this entry. - types = [] if 'exact' in entry: test['source'] = base_url + entry['exact'] test['replacement'] = entry['replacement'] diff --git a/tools/translate-base-redirects.py b/tools/translate-base-redirects.py deleted file mode 100755 index 108c76a4..00000000 --- a/tools/translate-base-redirects.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 -# -# Reads a YAML file with a `base_redirect` field -# and writes Apache mod_alias RedirectMatch directives. See: -# -# https://httpd.apache.org/docs/2.4/mod/mod_alias.html -# -# If the YAML file does not contain `base_redirect`, -# then no output is generated. - -import argparse, sys, yaml, re -from urllib.parse import unquote - -header_template = '''# Base redirect for %s -''' - -# Parse command line arguments, -# read entries from the YAML file, -# and write the Apache .htaccess file. -def main(): - parser = argparse.ArgumentParser(description='Translate YAML `base_redirect` to .htaccess') - parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') - parser.add_argument('htaccess_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the .htaccess file (or STDOUT)') - args = parser.parse_args() - - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if not 'idspace' in document \ - or type(document['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - idspace = document['idspace'] - - if not 'base_url' in document \ - or type(document['base_url']) is not str: - raise ValueError('YAML document must contain "base_url" string') - - if 'base_redirect' in document and type(document['base_redirect']) is str: - base_url = unquote(document['base_url']) - base_redirect = unquote(document['base_redirect']) - args.htaccess_file.write(header_template % idspace) - directive = 'RedirectMatch temp "(?i)^%s$" "%s"' % (base_url, base_redirect) - args.htaccess_file.write(directive + '\n\n') - - -if __name__ == "__main__": - main() diff --git a/tools/translate-entries.py b/tools/translate-entries.py deleted file mode 100755 index 0794795a..00000000 --- a/tools/translate-entries.py +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/env python3 -# -# Reads a YAML file with a `base_url` and a list of `entries` -# and writes Apache mod_alias RedirectMatch directives. See: -# -# https://httpd.apache.org/docs/2.4/mod/mod_alias.html -# -# There are three types of entries: -# -# - exact: match an exact URL string -# and redirect to an exact URL -# - prefix: match a URL prefix string, -# from the start of the request URL, -# and redirect to the "replacement" field plus -# any string following the prefix in the request -# - regex: use any regular expression -# allowed by RedirectMatch -# -# Entries can have these fields: -# -# - exact/prefix/regex: the URL string or regex to match; -# exactly one required; -# should begin with a slash "/" except for some regexs -# - replacement: the URL string or regex to redirect to; -# exactly one required -# - status: HTTP status for redirect; -# zero or one value; defaults to "temporary"; -# can be "permanent" (301) or "temporary" (302); -# (Apache uses "temp" for "temporary") -# - tests: an optional list of tests -# each test requires a `from` value, like `exact`, -# and a `to` value, like `replacement` -# -# See the `tools/config.schema.yml` for more details. -# -# For the "exact" and "prefix" types, -# the URL strings are rewritten as escaped regular expressions, -# with a "^base_url" prefix and a "$" suffix. -# Any regular expression special characters (e.g. ., *, ?, []) -# will be escaped: they will not match as regular expressions. -# -# For the "prefix" type, "(.*)" is also appended to the "prefix" field -# and "$1" is appended to the "to" field, -# to configure the prefix match. -# -# For the "regex" type, the "" and "to" fields -# are assumed to be valid regular expressions, -# **including** the `base_url`, -# and are not checked or modified. -# -# **Only** use "regex" if "exact" or "prefix" are insufficient. -# -# The order of YAML objects will be the order -# of the Apache directives. -# If no entries are found, -# the generated file will have a header comment -# without any directives. - -import argparse, sys, yaml, re -from urllib.parse import unquote - -header_template = '''# DO NOT EDIT THIS FILE! -# Automatically generated from "%s". -# Edit that source file then regenerate this file. - -''' - -# Parse command line arguments, -# read entries from the YAML file, -# and write the Apache .htaccess file. -def main(): - parser = argparse.ArgumentParser(description='Translate YAML `entries` to .htaccess') - parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') - parser.add_argument('htaccess_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the .htaccess file (or STDOUT)') - args = parser.parse_args() - - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if not 'base_url' in document \ - or type(document['base_url']) is not str: - raise ValueError('YAML document must contain "base_url" string') - base_url = document['base_url'] - - args.htaccess_file.write(header_template % args.yaml_file.name) - - if 'entries' in document and type(document['entries']) is list: - i = 0 - for entry in document['entries']: - i += 1 - args.htaccess_file.write(process_entry(base_url, i, entry) + '\n') - - -def clean_source(s): - """Given a URL string, - return an escaped regular expression for matching that string. - Only forward-slashes are not escaped.""" - r = s.strip() - r = re.escape(r) - r = r.replace('\\/', '/') - return r - - -def process_entry(base_url, i, entry): - """Given a base URL, an index, and an entry dictionary, - ensure that the entry is valid, - and return an Apache RedirectMatch directive string.""" - source = '' - replacement = '' - - # Check entry data type - if type(entry) is not dict: - raise ValueError('Entry %d is not a YAML map: "%s"' % (i, entry)) - - # Validate "replacement" field - if not 'replacement' in entry \ - or entry['replacement'] is None \ - or entry['replacement'].strip() == '': - raise ValueError('Missing "replacement" field for entry %d' % i) - - # Determine the type for this entry. - types = [] - if 'exact' in entry: - source = '(?i)^%s%s$' % (base_url, clean_source(entry['exact'])) - replacement = entry['replacement'] - types.append('exact') - if 'prefix' in entry: - source = '(?i)^%s%s(.*)$' % (base_url, clean_source(entry['prefix'])) - replacement = entry['replacement'] + '$1' - types.append('prefix') - if 'regex' in entry: - source = entry['regex'] - replacement = entry['replacement'] - types.append('regex') - - # Ensure that there is no more than one "type" key. - if len(types) < 1: - raise ValueError('Entry %d does not have a type; see "replacement: %s"' - % (i, entry['replacement'])) - elif len(types) > 1: - raise ValueError('Entry %d has multiple types: %s; see "replacement: %s"' - % (i, ', '.join(types), entry['replacement'])) - - # Validate status code - status = 'temporary' - if 'status' in entry: - if entry['status'] in ('permanent', 'temporary', 'see other'): - status = entry['status'] - else: - raise ValueError('Invalid status "%s" for entry %d' % (entry['status'], i)) - - # Switch to Apache's preferred names - if status == 'temporary': - status = 'temp' - elif status == 'see other': - status = 'seeother' - - source = unquote(source) - replacement = unquote(replacement) - - return 'RedirectMatch %s "%s" "%s"' % (status, source, replacement) - - -if __name__ == "__main__": - main() diff --git a/tools/translate-products.py b/tools/translate-products.py deleted file mode 100755 index b8baa748..00000000 --- a/tools/translate-products.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 -# -# Reads a YAML file with a list of `products` -# and writes Apache mod_alias RedirectMatch directives. See: -# -# https://httpd.apache.org/docs/2.4/mod/mod_alias.html -# -# The order of YAML objects will be the order -# of the Apache directives. -# If no products are found, no output is generated. - -import argparse, sys, yaml, re -from urllib.parse import unquote - -header_template = '''# Products for %s -''' - -# Parse command line arguments, -# read entries from the YAML file, -# and write the Apache .htaccess file. -def main(): - parser = argparse.ArgumentParser(description='Translate YAML `products` to .htaccess') - parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') - parser.add_argument('htaccess_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the .htaccess file (or STDOUT)') - args = parser.parse_args() - - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if not 'idspace' in document \ - or type(document['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - idspace = document['idspace'] - - if 'products' in document and type(document['products']) is list: - args.htaccess_file.write(header_template % idspace) - i = 0 - for product in document['products']: - i += 1 - args.htaccess_file.write(process_product(i, product) + '\n') - args.htaccess_file.write('\n') - - -def process_product(i, product): - """Given an index, and a product dictionary with one key, - ensure that the entry is valid, - and return an Apache RedirectMatch directive string.""" - for key in product: - source = unquote('(?i)^/obo/%s$' % key) - replacement = unquote(product[key]) - - return 'RedirectMatch temp "%s" "%s"' % (source, replacement) - - -if __name__ == "__main__": - main() diff --git a/tools/translate-terms.py b/tools/translate-terms.py deleted file mode 100755 index ed614c1c..00000000 --- a/tools/translate-terms.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 -# -# Reads a YAML file with a `term_browser` entry -# and an `example_terms` list, -# and writes Apache mod_alias RedirectMatch directives. See: -# -# https://httpd.apache.org/docs/2.4/mod/mod_alias.html -# -# The order of YAML objects will be the order -# of the Apache directives. -# If no example_terms are found, no output is generated. -# -# Note: currently works only for `term_browser: ontobee`. -# When `term_browser: custom` no output is generated. - -import argparse, sys, yaml, re - -header_template = '''# Term redirect for %s -''' - -# Parse command line arguments, -# read entries from the YAML file, -# and write the Apache .htaccess file. -def main(): - parser = argparse.ArgumentParser(description='Translate YAML `example_terms` to .htaccess') - parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') - parser.add_argument('htaccess_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the .htaccess file (or STDOUT)') - args = parser.parse_args() - - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if not 'idspace' in document \ - or type(document['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - idspace = document['idspace'] - - if 'term_browser' in document and document['term_browser'].strip().lower() == 'ontobee': - args.htaccess_file.write(header_template % idspace) - replacement = 'http://www.ontobee.org/browser/rdf.php?o=%s&iri=http://purl.obolibrary.org/obo/%s_$1' % (idspace, idspace) - directive = 'RedirectMatch seeother "^/obo/%s_(\d+)$" "%s"' % (idspace, replacement) - args.htaccess_file.write(directive +'\n\n') - - -if __name__ == "__main__": - main() diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py new file mode 100755 index 00000000..971b16dd --- /dev/null +++ b/tools/translate_yaml.py @@ -0,0 +1,456 @@ +#!/usr/bin/env python3 + +""" +Reads either a list of YAML files, or a directory containing YAML files, +and writes Apache mod_alias RedirectMatch directives to corresponding +.htaccess files. See: + +https://httpd.apache.org/docs/2.4/mod/mod_alias.html + +The `foo.yml` file will generate output for two targets: + +1. /www/obo/foo/.htaccess +2. /www/obo/.htaccess + +Target (1) only applies to project `foo`. +It is generated from `base_url` and the `entries` list. +Projects have wide discretion for this target. + +Target (2) applies to all projects. +The content is tightly constrained to avoid conflicts. +This content is generated from other YAML fields, +such as `products` and `term_browser`. + +Entries: +======= + +There are three types of entries: + +- exact: match an exact URL string + and redirect to an exact URL +- prefix: match a URL prefix string, + from the start of the request URL, + and redirect to the "replacement" field plus + any string following the prefix in the request +- regex: use any regular expression + allowed by RedirectMatch + +Entries can have these fields: + +- exact/prefix/regex: the URL string or regex to match; + exactly one required; + should begin with a slash "/" except for some regexs +- replacement: the URL string or regex to redirect to; + exactly one required +- status: HTTP status for redirect; + zero or one value; defaults to "temporary"; + can be "permanent" (301), "temporary" (302), or "see other" (303); + (Apache uses "temp" for "temporary") +- tests: an optional list of tests + each test requires a `from` value, like `exact`, + and a `to` value, like `replacement` + +See the `tools/config.schema.json` for more details. + +For the "exact" and "prefix" types, +the URL strings are rewritten as escaped regular expressions, +with a "^base_url" prefix and a "$" suffix. +Any regular expression special characters (e.g. ., *, ?, []) +will be escaped: they will not match as regular expressions. + +For the "prefix" type, "(.*)" is also appended to the "prefix" field +and "$1" is appended to the "to" field, +to configure the prefix match. + +For the "regex" type, the "" and "to" fields +are assumed to be valid regular expressions, +**including** the `base_url`, +and are not checked or modified. + +**Only** use "regex" if "exact" or "prefix" are insufficient. + +The order of YAML objects will be the order +of the Apache directives. +If no entries are found, +the generated file will have a header comment +without any directives. + +Base redirects, Products, and Terms +=================================== + +These fields are optional. If the YAML input does not contain them, no +corresponding output will be generated. + +Note that in the case of terms, only `term_browser: ontobee` is currently +supported. When `term_browser: custom` is used no output is generated. +""" + +import functools +import json +import jsonschema +import re +import os +import sys +import yaml + +from argparse import ArgumentParser +from glob import glob +from urllib.parse import unquote + +pwd = os.path.dirname(os.path.realpath(__file__)) +schemafile = "{}/config.schema.json".format(pwd) + + +def load_and_validate(yamlname, schema): + try: + yamlfile = open(yamlname) + yamldoc = yaml.load(yamlfile) + jsonschema.validate(yamldoc, schema) + except (FileNotFoundError, IsADirectoryError, yaml.YAMLError) as e: + print(e, file=sys.stderr) + sys.exit(1) + except jsonschema.exceptions.ValidationError as e: + print("In file: {}:\n{}".format(yamlname, e), file=sys.stderr) + sys.exit(1) + + # The following two errors should not occur, since the presence of `base_url` and `idspace` + # should have been enforced by the above jsonschema validation step. But we double-check anyway. + if 'base_url' not in yamldoc \ + or type(yamldoc['base_url']) is not str: + print('YAML document must contain "base_url" string', file=sys.stderr) + sys.exit(1) + + if 'idspace' not in yamldoc \ + or type(yamldoc['idspace']) is not str: + print('YAML document must contain "idspace" string', file=sys.stderr) + sys.exit(1) + + # jsonschema is not sophisticated enough to validate this one, so we do it here: + if os.path.basename(yamldoc['base_url']).lower() != yamldoc['idspace'].lower(): + print("WARNING: Base URL '{}' must end with '{}', not '{}'" + .format(yamldoc['base_url'], yamldoc['idspace'], os.path.basename(yamldoc['base_url']))) + + return yamldoc + + +def clean_source(s): + """ + Given a URL string, + return an escaped regular expression for matching that string. + Only forward-slashes are not escaped. + """ + r = s.strip() + r = re.escape(r) + r = r.replace('\\/', '/') + return r + + +def process_entry(base_url, i, entry): + """ + Given a base URL, an index, and an entry dictionary, + ensure that the entry is valid, + and return an Apache RedirectMatch directive string. + """ + source = '' + replacement = '' + + # Check entry data type + if type(entry) is not dict: + raise ValueError('Entry %d is not a YAML map: "%s"' % (i, entry)) + + # Validate that "replacement" field exists. If it is missing it should have been caught by the + # jsonschema validation step (see above), but we double-check anyway: + if 'replacement' not in entry \ + or entry['replacement'] is None \ + or entry['replacement'].strip() == '': + raise ValueError('Missing "replacement" field for entry %d' % i) + + # Determine the type for this entry. + types = [] + if 'exact' in entry: + source = '(?i)^%s%s$' % (base_url, clean_source(entry['exact'])) + replacement = entry['replacement'] + types.append('exact') + if 'prefix' in entry: + source = '(?i)^%s%s(.*)$' % (base_url, clean_source(entry['prefix'])) + replacement = entry['replacement'] + '$1' + types.append('prefix') + if 'regex' in entry: + source = entry['regex'] + replacement = entry['replacement'] + types.append('regex') + + # Ensure that there is exactly one "type" key. + if len(types) < 1: + raise ValueError('Entry %d does not have a type; see "replacement: %s"' + % (i, entry['replacement'])) + elif len(types) > 1: + raise ValueError('Entry %d has multiple types: %s; see "replacement: %s"' + % (i, ', '.join(types), entry['replacement'])) + + # Validate status code. Any error here should have been caught by the jsonschema validation + # (see above), but we double-check here anyway: + status = 'temporary' + if 'status' in entry: + if entry['status'] in ('permanent', 'temporary', 'see other'): + status = entry['status'] + else: + raise ValueError('Invalid status "%s" for entry %d' % (entry['status'], i)) + + # Switch to Apache's preferred names + if status == 'temporary': + status = 'temp' + elif status == 'see other': + status = 'seeother' + + source = unquote(source) + replacement = unquote(replacement) + + return 'RedirectMatch %s "%s" "%s"' % (status, source, replacement) + + +def translate_entries(yamldoc, base_url): + """ + Reads the field `entries` from the YAML document, processes each entry that is read using the + given base_url, and appends them all to a list of processed entries that is then returned. + """ + if 'entries' in yamldoc and type(yamldoc['entries']) is list: + entries = [] + for i, entry in enumerate(yamldoc['entries']): + entries.append(process_entry(base_url, i, entry)) + return entries + + +def write_entries(entries, yamlname, outfile): + """ + Write the given entries to the given output stream, indicating the source YAML file + from which the entries were extracted. Note that it is assumed that the output stream, + `outfile` is open for writing. + """ + outfile.write('# DO NOT EDIT THIS FILE!\n' + '# Automatically generated from "%s".\n' + '# Edit that source file then regenerate this file.\n\n' + % yamlname) + for entry in entries or []: + outfile.write('{}\n'.format(entry)) + + +def translate_base_redirects(yamldoc): + """ + Reads the fields `base_redirect` and `base_url` from the given YAML document and + generates a corresponding Apache directive string that is then returned. + """ + if 'base_redirect' in yamldoc and type(yamldoc['base_redirect']) is str: + base_url = unquote(yamldoc['base_url']) + base_redirect = unquote(yamldoc['base_redirect']) + directive = 'RedirectMatch temp "(?i)^%s$" "%s"' % (base_url, base_redirect) + return directive + + +def append_base_redirect(base_redirect, idspace, outfile): + """ + Appends the given base_redirect string for the given idspace to the given output stream. + """ + if base_redirect: + outfile.write('# Base redirect for %s\n' % idspace) + outfile.write(base_redirect + '\n\n') + + +def process_product(product): + """ + Given a product dictionary with one key, + ensure that the entry is valid, + and return an Apache RedirectMatch directive string. + """ + key = [k for k in product].pop() + source = unquote('(?i)^/obo/%s$' % key) + replacement = unquote(product[key]) + return 'RedirectMatch temp "%s" "%s"' % (source, replacement) + + +def translate_products(yamldoc): + """ + Reads the `products` field from the given YAML document, processes each product that is read, + and appends them all to a list of processed products that is then returned. + """ + if 'products' in yamldoc and type(yamldoc['products']) is list: + products_have_owl = False + products = [] + for product in yamldoc['products']: + key = [k for k in product].pop() + if not (key.lower().endswith('.owl') or key.lower().endswith('.obo')): + # If we want to enforce this condition, the way to do it is to add + # `"additionalProperties": false` right after `patternProperties` in the schema file. + print("WARNING: In project '{}', product: '{}' does not end with '.owl' or '.obo'" + .format(yamldoc['idspace'], key)) + if key.endswith('.owl'): + products_have_owl = True + + products.append(process_product(product)) + + if not products_have_owl: + print("WARNING: In project '{}': Mandatory .owl entry missing from product list." + .format(yamldoc['idspace'])) + + return products + + +def append_products(products, idspace, outfile): + """ + Appends the given list of products for the given idspace to the given output stream. Note that it + it is assumed that the output stream `outfile` is open for appending. + """ + if products: + outfile.write('# Products for %s\n' % idspace) + for product in products: + outfile.write(product + '\n') + outfile.write('\n') + + +def translate_terms(yamldoc, idspace): + """ + Reads the `term_browser` field from the given YAML document, validates that it is a supported + term browser, and returns a corresponding Apache redirect statement. + """ + if 'term_browser' in yamldoc and yamldoc['term_browser'].strip().lower() == 'ontobee': + replacement = ('http://www.ontobee.org/browser/rdf.php?' + 'o=%s&iri=http://purl.obolibrary.org/obo/%s_$1' + % (idspace, idspace)) + return 'RedirectMatch seeother "^/obo/%s_(\d+)$" "%s"' % (idspace, replacement) + + +def append_term(term, idspace, outfile): + """ + Appends the given term for the given idspace to the given output stream. Note that it is + assumed that the output stream `outfile` is open for appending. + """ + if term: + outfile.write('# Term redirect for %s\n' % idspace) + outfile.write(term + '\n\n') + + +# Parse command line arguments, +# read entries from the YAML file, +# and write the Apache .htaccess files. +def main(): + parser = ArgumentParser(description=''' + Translates YAML files to .htaccess. + + If a list of input YAML files is specified, then a .htaccess file is generated + corresponding to each given YAML file, containing the `entries` specified in the + YAML file. If a directory containing YAML files is specified instead, then in + addition, the base redirects, terms, and products specified in the YAML file of + each project will be appended to the top-level obo/.htaccess file in the given + output directory.''') + + # This option is required: + parser.add_argument('--output_dir', metavar='DIR', type=str, required=True, + help='Root directory to write to for project-specific .htaccess files') + # The following options cannot be used simultaneously, but one of them needs to be specified: + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--input_files', metavar='YML', type=str, nargs='+', + help='List of YAML input files') + group.add_argument('--input_dir', metavar='DIR', type=str, + help='Directory containing YAML input files') + args = parser.parse_args() + + # Create the output directory, if it does not already exist. If this isn't possible, fail. Note + # that if the directory already exists, then the files inside will be overwritten. + normalised_output_dir = os.path.normpath(args.output_dir) + try: + os.makedirs(normalised_output_dir) + except FileExistsError: + pass + + schema = json.load(open(schemafile)) + entries = {} + base_redirects = {} + products = {} + terms = {} + if args.input_files: + # If only a sequence of YAML filenames is given, then just write the entries found within + # those files but not the base redirects, products, or terms. + for yamlname in args.input_files: + yamldoc = load_and_validate(yamlname, schema) + base_url = yamldoc['base_url'] + # Extract the entries for the project from the YAML file: + entries = translate_entries(yamldoc, base_url) + # Write the entries for the given project to its project-specific .htaccess file, located + # in a subdirectory under the given output directory. Note that if the subdirectory already + # exists, the files inside will simply be overriden: + yamlroot = re.sub('\.yml$', '', os.path.basename(yamlname)) + try: + os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) + except FileExistsError: + pass + with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: + write_entries(entries, yamlname, outfile) + elif args.input_dir: + if not os.path.isdir(args.input_dir): + print("{} is not a directory.".format(args.input_dir)) + sys.exit(1) + + @functools.cmp_to_key + def cmp(s, t): + "Case-insensitive sort, longer names first" + s = s.lower() + t = t.lower() + s_pad = (s + t[len(s):] + 'z') if len(s) < len(t) else s + t_pad = (t + s[len(t):] + 'z') if len(t) < len(s) else t + if s_pad < t_pad: + return -1 + if s_pad > t_pad: + return 1 + return 0 + + normalised_input_dir = os.path.normpath(args.input_dir) + for yamlname in sorted(glob("{}/*.yml".format(normalised_input_dir)), key=cmp): + yamldoc = load_and_validate(yamlname, schema) + base_url = yamldoc['base_url'] + # `idspace` and `yamlroot` are synonyms. The former is taken from the `idspace` specified + # within the given YAML file, while the latter is derived from the filename. They need to + # match (up to a change of case - idspace is always uppercase while yamlroot is lower). + # If they do not match, emit a warning. + idspace = yamldoc['idspace'] + yamlroot = re.sub('\.yml$', '', os.path.basename(yamlname)) + if idspace.lower() != yamlroot.lower(): + print("WARNING: idspace: {} does not match filename {}".format(idspace, yamlname)) + + # Collect the entries for the current idspace: + entries[idspace] = translate_entries(yamldoc, base_url) + # Write the entries to the idspace's project-specific file located in its own subdirectory + # under the output directory, as well as a symlink to the project subdirectory in the + # output directory. If the files/directories already exist, they will be overwritten. + try: + projdir = '{}/{}'.format(normalised_output_dir, yamlroot) + symlink = '{}/{}'.format(normalised_output_dir, idspace) + os.mkdir(projdir) + os.symlink(os.path.basename(projdir), symlink, target_is_directory=True) + except FileExistsError: + pass + with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: + write_entries(entries[idspace], yamlname, outfile) + + # Extract the idspace's base redirects, products, and terms but do not write them yet: + base_redirects[idspace] = translate_base_redirects(yamldoc) + products[idspace] = translate_products(yamldoc) + terms[idspace] = translate_terms(yamldoc, idspace) + + # Now write the entries for the 'OBO' idspace to a global .htaccess file located at the top + # level of the output directory: + with open('{}/.htaccess'.format(normalised_output_dir), 'w') as outfile: + write_entries(entries['OBO'], '{}/obo.yml'.format(normalised_input_dir), outfile) + + # Append the base redirects, products, and terms to the global .htaccess file: + with open('{}/.htaccess'.format(normalised_output_dir), 'a') as outfile: + outfile.write('\n### Generated from project configuration files\n\n') + for idspace in sorted(base_redirects, key=cmp): + append_base_redirect(base_redirects[idspace], idspace, outfile) + for idspace in sorted(products, key=cmp): + append_products(products[idspace], idspace, outfile) + for idspace in sorted(terms, key=cmp): + append_term(terms[idspace], idspace, outfile) + + +if __name__ == "__main__": + main()