From fe95b9cb8eadb719c80715ff37f83a39a0102faa Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 6 Feb 2019 16:32:29 -0500 Subject: [PATCH 01/20] add new YAML translation python script, intended to replace much of the Makefile logic as well as some or all of the existing python scripts --- tools/translate_yaml.py | 369 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 369 insertions(+) create mode 100755 tools/translate_yaml.py diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py new file mode 100755 index 00000000..0efff172 --- /dev/null +++ b/tools/translate_yaml.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 + +""" +Reads either a list of YAML files, or a directory containing YAML files, +and writes Apache mod_alias RedirectMatch directives to corresponding +.htaccess files. See: + +https://httpd.apache.org/docs/2.4/mod/mod_alias.html + +The YAML files will have a `base_url`, a list of `entries`, a `base_redirect` +field (optional), a `products` field (optional), and a `terms` field (optional). + +Entries: +======= + +There are three types of entries: + +- exact: match an exact URL string + and redirect to an exact URL +- prefix: match a URL prefix string, + from the start of the request URL, + and redirect to the "replacement" field plus + any string following the prefix in the request +- regex: use any regular expression + allowed by RedirectMatch + +Entries can have these fields: + +- exact/prefix/regex: the URL string or regex to match; + exactly one required; + should begin with a slash "/" except for some regexs +- replacement: the URL string or regex to redirect to; + exactly one required +- status: HTTP status for redirect; + zero or one value; defaults to "temporary"; + can be "permanent" (301) or "temporary" (302); + (Apache uses "temp" for "temporary") +- tests: an optional list of tests + each test requires a `from` value, like `exact`, + and a `to` value, like `replacement` + +See the `tools/config.schema.yml` for more details. + +For the "exact" and "prefix" types, +the URL strings are rewritten as escaped regular expressions, +with a "^base_url" prefix and a "$" suffix. +Any regular expression special characters (e.g. ., *, ?, []) +will be escaped: they will not match as regular expressions. + +For the "prefix" type, "(.*)" is also appended to the "prefix" field +and "$1" is appended to the "to" field, +to configure the prefix match. + +For the "regex" type, the "" and "to" fields +are assumed to be valid regular expressions, +**including** the `base_url`, +and are not checked or modified. + +**Only** use "regex" if "exact" or "prefix" are insufficient. + +The order of YAML objects will be the order +of the Apache directives. +If no entries are found, +the generated file will have a header comment +without any directives. + +Base redirects, Products, and Terms +=================================== +These fields are optional. If the YAML input does not contain them, no +corresponding output will be generated. + +Note that in the case of terms, only `term_browser: ontobee` is currently +supported. When `term_browser: custom` is used no output is generated. +""" + +import re +import os +import sys +import yaml + +from argparse import ArgumentParser +from glob import glob +from urllib.parse import unquote + + +def clean_source(s): + """ + Given a URL string, + return an escaped regular expression for matching that string. + Only forward-slashes are not escaped. + """ + r = s.strip() + r = re.escape(r) + r = r.replace('\\/', '/') + return r + + +def process_entry(base_url, i, entry): + """ + Given a base URL, an index, and an entry dictionary, + ensure that the entry is valid, + and return an Apache RedirectMatch directive string. + """ + source = '' + replacement = '' + + # Check entry data type + if type(entry) is not dict: + raise ValueError('Entry %d is not a YAML map: "%s"' % (i, entry)) + + # Validate "replacement" field + if 'replacement' not in entry \ + or entry['replacement'] is None \ + or entry['replacement'].strip() == '': + raise ValueError('Missing "replacement" field for entry %d' % i) + + # Determine the type for this entry. + types = [] + if 'exact' in entry: + source = '(?i)^%s%s$' % (base_url, clean_source(entry['exact'])) + replacement = entry['replacement'] + types.append('exact') + if 'prefix' in entry: + source = '(?i)^%s%s(.*)$' % (base_url, clean_source(entry['prefix'])) + replacement = entry['replacement'] + '$1' + types.append('prefix') + if 'regex' in entry: + source = entry['regex'] + replacement = entry['replacement'] + types.append('regex') + + # Ensure that there is no more than one "type" key. + if len(types) < 1: + raise ValueError('Entry %d does not have a type; see "replacement: %s"' + % (i, entry['replacement'])) + elif len(types) > 1: + raise ValueError('Entry %d has multiple types: %s; see "replacement: %s"' + % (i, ', '.join(types), entry['replacement'])) + + # Validate status code + status = 'temporary' + if 'status' in entry: + if entry['status'] in ('permanent', 'temporary', 'see other'): + status = entry['status'] + else: + raise ValueError('Invalid status "%s" for entry %d' % (entry['status'], i)) + + # Switch to Apache's preferred names + if status == 'temporary': + status = 'temp' + elif status == 'see other': + status = 'seeother' + + source = unquote(source) + replacement = unquote(replacement) + + return 'RedirectMatch %s "%s" "%s"' % (status, source, replacement) + + +def process_product(i, product): + """ + Given an index, and a product dictionary with one key, + ensure that the entry is valid, + and return an Apache RedirectMatch directive string. + """ + for key in product: + source = unquote('(?i)^/obo/%s$' % key) + replacement = unquote(product[key]) + + return 'RedirectMatch temp "%s" "%s"' % (source, replacement) + + +def translate_entries(yamldoc, base_url): + """ + Reads the field `entries` from the YAML document, processes each entry that is read using the + given base_url, and appends them all to a list of processed entries that is then returned. + """ + if 'entries' in yamldoc and type(yamldoc['entries']) is list: + entries = [] + for i, entry in enumerate(yamldoc['entries']): + entries.append(process_entry(base_url, i, entry)) + return entries + + +def write_entries(entries, yamlname, outfile): + """ + Write the given entries to the given outfile, indicating the source YAML file + from which the entries were extracted. + """ + outfile.write('# DO NOT EDIT THIS FILE!\n' + '# Automatically generated from "%s".\n' + '# Edit that source file then regenerate this file.\n\n' + % yamlname) + for entry in entries or []: + outfile.write('{}\n'.format(entry)) + + +def translate_base_redirects(yamldoc): + """ + Reads the fields `base_redirect` and `base_url` from the given YAML document and + generates a corresponding Apache directive string that is then returned. + """ + if 'base_redirect' in yamldoc and type(yamldoc['base_redirect']) is str: + base_url = unquote(yamldoc['base_url']) + base_redirect = unquote(yamldoc['base_redirect']) + directive = 'RedirectMatch temp "(?i)^%s$" "%s"' % (base_url, base_redirect) + return directive + + +def append_base_redirect(base_redirect, idspace, outfile): + """ + Appends the given base_redirect string for the given idspace to the given output stream. + """ + if base_redirect: + outfile.write('# Base redirect for %s\n' % idspace) + outfile.write(base_redirect + '\n\n') + + +def translate_products(yamldoc): + """ + Reads the `products` field from the given YAML document, processes each product that is read, + and appends them all to a list of processed products that is then returned. + """ + if 'products' in yamldoc and type(yamldoc['products']) is list: + products = [] + for i, product in enumerate(yamldoc['products']): + products.append(process_product(i, product)) + return products + + +def append_products(products, idspace, outfile): + """ + Appends the given list of products for the given idspace to the given output stream. + """ + if products: + outfile.write('# Products for %s\n' % idspace) + for product in products: + outfile.write(product + '\n') + outfile.write('\n') + + +def translate_terms(yamldoc, idspace): + """ + Reads the `term_browser` field from the given YAML document, validates that it is a supported + term browser, and returns a corresponding Apache redirect statement. + """ + if 'term_browser' in yamldoc and yamldoc['term_browser'].strip().lower() == 'ontobee': + replacement = ('http://www.ontobee.org/browser/rdf.php?' + 'o=%s&iri=http://purl.obolibrary.org/obo/%s_$1' + % (idspace, idspace)) + return 'RedirectMatch seeother "^/obo/%s_(\d+)$" "%s"' % (idspace, replacement) + + +def append_term(term, idspace, outfile): + """ + Appends the given term for the given idspace to the given output stream. + """ + if term: + outfile.write('# Term redirect for %s\n' % idspace) + outfile.write(term + '\n\n') + + +# Parse command line arguments, +# read entries from the YAML file, +# and write the Apache .htaccess files. +def main(): + parser = ArgumentParser(description=''' + Translates YAML files to .htaccess. + + If a list of input YAML files is specified, then a .htaccess file is generated + corresponding to each given YAML file, containing the `entries` specified in the + YAML file. If a directory containing YAML files is specified instead, then in + addition, the base redirects, terms, and products specified in the YAML file of + each project will be appended to the top-level obo/.htaccess file in the given + output directory.''') + + # This option is required: + parser.add_argument('--output_dir', metavar='DIR', type=str, required=True, + help='Root directory to write to for project-specific .htaccess files') + # The following options cannot be used simultaneously, but one of them needs to be specified: + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--input_files', metavar='YML', type=str, nargs='+', + help='List of YAML input files') + group.add_argument('--input_dir', metavar='DIR', type=str, + help='Directory containing YAML input files') + args = parser.parse_args() + + # Create the output directory, failing if it already exists or if unsuccessful for another reason: + normalised_output_dir = os.path.normpath(args.output_dir) + try: + os.makedirs(normalised_output_dir) + except OSError as e: + print(e, file=sys.stderr) + sys.exit(1) + + entries = {} + base_redirects = {} + products = {} + terms = {} + if args.input_files: + # If only a sequence of YAML filenames is given, then just write the entries found within + # those files but not the base redirects, products, or terms. + for yamlname in args.input_files: + yamldoc = yaml.load(open(yamlname)) + if 'base_url' not in yamldoc \ + or type(yamldoc['base_url']) is not str: + raise ValueError('YAML document must contain "base_url" string') + base_url = yamldoc['base_url'] + + entries = translate_entries(yamldoc, base_url) + # Write the entries for the given project to its project-specific .htaccess file, located + # in a subdirectory under the given output directory: + ontid = re.sub('\.yml', '', os.path.basename(yamlname)) + os.mkdir('{}/{}'.format(normalised_output_dir, ontid)) + with open('{}/{}/.htaccess'.format(normalised_output_dir, ontid), 'w') as outfile: + write_entries(entries, yamlname, outfile) + elif args.input_dir: + normalised_input_dir = os.path.normpath(args.input_dir) + for yamlname in glob("{}/*.yml".format(normalised_input_dir)): + yamldoc = yaml.load(open(yamlname)) + + if 'base_url' not in yamldoc \ + or type(yamldoc['base_url']) is not str: + raise ValueError('YAML document must contain "base_url" string') + base_url = yamldoc['base_url'] + + if 'idspace' not in yamldoc \ + or type(yamldoc['idspace']) is not str: + raise ValueError('YAML document must contain "idspace" string') + + # `idspace` and `yamlroot` are synonyms. The former is taken from the `idspace` specified + # within the given YAML file, while the latter is derived from the filename. They need to + # match (up to a change of case - idspace is always uppercase while ontid is lower). + # If they do not match, emit a warning. + idspace = yamldoc['idspace'] + yamlroot = re.sub('\.yml', '', os.path.basename(yamlname)) + if idspace.lower() != yamlroot.lower(): + print("WARNING: idspace: {} does not match filename {}".format(idspace, yamlname)) + + # Collect the entries for the current idspace: + entries[idspace] = translate_entries(yamldoc, base_url) + # Write the entries to the idspace's project-specific file located in its own subdirectory + # under the output directory: + os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) + with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: + write_entries(entries[idspace], yamlname, outfile) + # Extract the idspace's base redirects, products, and terms but do not write them yet: + base_redirects[idspace] = translate_base_redirects(yamldoc) + products[idspace] = translate_products(yamldoc) + terms[idspace] = translate_terms(yamldoc, idspace) + + # Now write the entries for the 'OBO' idspace to a global .htaccess file located at the top + # level of the output directory: + with open('{}/.htaccess'.format(normalised_output_dir), 'w') as outfile: + write_entries(entries['OBO'], '{}/obo.yml'.format(normalised_input_dir), outfile) + + # Append the base redirects, products, and terms to the global .htaccess file: + with open('{}/.htaccess'.format(normalised_output_dir), 'a') as outfile: + outfile.write('\n### Generated from project configuration files\n\n') + for idspace in sorted(base_redirects): + append_base_redirect(base_redirects[idspace], idspace, outfile) + for idspace in sorted(products): + append_products(products[idspace], idspace, outfile) + for idspace in sorted(terms): + append_term(terms[idspace], idspace, outfile) + + +if __name__ == "__main__": + main() From bcc220f3e60ac2025887588ff825b63e3bdf2b16 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 6 Feb 2019 18:34:10 -0500 Subject: [PATCH 02/20] small change to translate_yaml.py; will now clobber files if they exist rather than failing --- tools/translate_yaml.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py index 0efff172..370415f2 100755 --- a/tools/translate_yaml.py +++ b/tools/translate_yaml.py @@ -285,13 +285,13 @@ def main(): help='Directory containing YAML input files') args = parser.parse_args() - # Create the output directory, failing if it already exists or if unsuccessful for another reason: + # Create the output directory, if it already exist. If this isn't possible, fail. Note that if + # the directory already exists, then the files inside will be overwritten. normalised_output_dir = os.path.normpath(args.output_dir) try: os.makedirs(normalised_output_dir) - except OSError as e: - print(e, file=sys.stderr) - sys.exit(1) + except FileExistsError as e: + pass entries = {} base_redirects = {} @@ -310,9 +310,15 @@ def main(): entries = translate_entries(yamldoc, base_url) # Write the entries for the given project to its project-specific .htaccess file, located # in a subdirectory under the given output directory: - ontid = re.sub('\.yml', '', os.path.basename(yamlname)) - os.mkdir('{}/{}'.format(normalised_output_dir, ontid)) - with open('{}/{}/.htaccess'.format(normalised_output_dir, ontid), 'w') as outfile: + yamlroot = re.sub('\.yml', '', os.path.basename(yamlname)) + + # Create the subdirectory; if it already exists, the files inside will be overwritten. + try: + os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) + except FileExistsError as e: + pass + + with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: write_entries(entries, yamlname, outfile) elif args.input_dir: normalised_input_dir = os.path.normpath(args.input_dir) @@ -330,7 +336,7 @@ def main(): # `idspace` and `yamlroot` are synonyms. The former is taken from the `idspace` specified # within the given YAML file, while the latter is derived from the filename. They need to - # match (up to a change of case - idspace is always uppercase while ontid is lower). + # match (up to a change of case - idspace is always uppercase while yamlroot is lower). # If they do not match, emit a warning. idspace = yamldoc['idspace'] yamlroot = re.sub('\.yml', '', os.path.basename(yamlname)) @@ -340,8 +346,12 @@ def main(): # Collect the entries for the current idspace: entries[idspace] = translate_entries(yamldoc, base_url) # Write the entries to the idspace's project-specific file located in its own subdirectory - # under the output directory: - os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) + # under the output directory. If it already exists, the files inside will be overwritten. + try: + os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) + except FileExistsError: + pass + with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: write_entries(entries[idspace], yamlname, outfile) # Extract the idspace's base redirects, products, and terms but do not write them yet: From f5cf7c1e1d8d1ba5b7148e97e54cd56e6e2169f0 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 7 Feb 2019 18:29:12 -0500 Subject: [PATCH 03/20] move validation out of Makefile to python script (leave old Makefile receipe steps in there for now, though) --- Makefile | 47 +++++++++++++----- tools/config.schema.json | 103 +++++++++++++++++++++++++++++++++++++++ tools/translate_yaml.py | 68 +++++++++++++++++--------- 3 files changed, 183 insertions(+), 35 deletions(-) create mode 100644 tools/config.schema.json diff --git a/Makefile b/Makefile index 7109036b..8464dcba 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,6 @@ # Required software: # # - [GNU Make](http://www.gnu.org/software/make/) to run this file -# - [kwalify](http://www.kuwata-lab.com/kwalify/) for YAML validation # - [Python 3](https://www.python.org/downloads/) to run scripts # - [PyYAML](http://pyyaml.org/wiki/PyYAML) for translation to Apache # - [travis.rb](https://github.com/travis-ci/travis.rb) for Travis-CI @@ -53,9 +52,12 @@ SHELL := bash ### Basic Operations -# Default goal: Remove generated files, validate config, and regenerate. +# Default goal: Remove generated files and regenerate. .PHONY: all -all: clean validate build +all: clean build + +.PHONY: old +old: clean validateold buildold # Remove directories with generated files. .PHONY: clean @@ -68,16 +70,20 @@ clean: # Use kwalify and the tools/config.schema.yml # to validate all YAML configuration files. # If any INVALID results are found, exit with an error. -.PHONY: validate -validate: +.PHONY: validateold +validateold: kwalify -f tools/config.schema.yml config/*.yml \ | awk '{print} /INVALID/ {status=1} END {exit status}' # Validate a single configuration file. -.PHONY: validate-% -validate-%: +.PHONY: validateold-% +validateold-%: kwalify -f tools/config.schema.yml config/$*.yml +# Check code style for python source files. +# || true is appended to force make to ignore the exit code from pycodestyle +style: + pep8 --max-line-length=100 --ignore E129,E126,E121,E111,E114 tools/translate_yaml.py || true ### Generate Apache Config # @@ -126,12 +132,20 @@ temp/obo/%/.htaccess: config/%.yml # Build temp files for a single project. .PHONY: build-% -build-%: validate-% temp/obo/%/.htaccess temp/base_redirects/%.htaccess temp/products/%.htaccess temp/terms/%.htaccess +build-%: + tools/translate_yaml.py --input_files config/$*.yml --output_dir temp + @echo "Built files in temp/$*" + +.PHONY: buildold-% +buildold-%: validateold-% temp/obo/%/.htaccess temp/base_redirects/%.htaccess temp/products/%.htaccess temp/terms/%.htaccess @echo "Built files in temp/$*" backup/: mkdir $@ +www/obo/: + mkdir -p $@ + # Get name of a dated-backup directory, in a portable way. BACKUP = backup/obo-$(shell python -c "import time,os;print(time.strftime('%Y%m%d-%H%M%S',time.gmtime(os.path.getmtime('www/obo'))))") @@ -139,11 +153,18 @@ BACKUP = backup/obo-$(shell python -c "import time,os;print(time.strftime('%Y%m% # and move the special `obo` .htaccess file. # Generate .htaccess files for all YAML configuration files. .PHONY: build -build: $(foreach o,$(ONTOLOGY_IDS),temp/obo/$o/.htaccess) -build: $(foreach o,$(ONTOLOGY_IDS),temp/base_redirects/$o.htaccess) -build: $(foreach o,$(ONTOLOGY_IDS),temp/products/$o.htaccess) -build: $(foreach o,$(ONTOLOGY_IDS),temp/terms/$o.htaccess) -build: | backup/ +build: | backup/ www/obo/ + tools/translate_yaml.py --input_dir config --output_dir temp/obo + rm -rf temp/obo/obo + -test -e www/obo && mv www/obo $(BACKUP) + mv temp/obo www/obo + +.PHONY: buildold +buildold: $(foreach o,$(ONTOLOGY_IDS),temp/obo/$o/.htaccess) +buildold: $(foreach o,$(ONTOLOGY_IDS),temp/base_redirects/$o.htaccess) +buildold: $(foreach o,$(ONTOLOGY_IDS),temp/products/$o.htaccess) +buildold: $(foreach o,$(ONTOLOGY_IDS),temp/terms/$o.htaccess) +buildold: | backup/ cat temp/obo/obo/.htaccess > temp/obo/.htaccess echo '' >> temp/obo/.htaccess echo '### Generated from project configuration files' >> temp/obo/.htaccess diff --git a/tools/config.schema.json b/tools/config.schema.json new file mode 100644 index 00000000..34d43c4f --- /dev/null +++ b/tools/config.schema.json @@ -0,0 +1,103 @@ +{ + "properties": { + "idspace": { + "_comment": "See issue #82", + "type": "string", + "pattern": "^[A-Za-z][A-Za-z0-9_]+$" + }, + "base_url": { + "type": "string", + "pattern": "^\\/obo" + }, + "base_redirect": { + "type": "string" + }, + "products": { + "type": "array", + "items": { + "type": "object", + "_comment": "How to make *.owl mandatory?", + "patternProperties": { + "\\.owl$": { "type": "string" }, + "\\.obo$": { "type": "string" } + } + } + }, + "term_browser": { + "type": "string", + "pattern": "^(ontobee|custom)$" + }, + "example_terms": { + "type": "array", + "items": { + "type": "string" + } + }, + "tests": { + "type": "array", + "items": { + "type": "object", + "properties": { + "from": { + "type": "string", + "pattern": "^\\/" + }, + "to": { + "type": "string", + "pattern": "^(https?|ftp)\\:\\/\\/[a-zA-Z0-9][\\/\\.\\-\\:\\?\\=\\&\\#\\%\\!\\$\\~\\+\\w]+$" + } + }, + "required": ["from", "to"] + } + }, + "entries": { + "type": "array", + "items": { + "type": "object", + "properties": { + "exact": { + "_comment": "Note: JSON-Schema cannot ensure that `exact` mappings are unique", + "type": "string", + "pattern": "^\\/" + }, + "prefix": { + "_comment": "Note: JSON-Schema cannot ensure that `prefix` mappings are unique", + "type": "string", + "pattern": "^\\/" + }, + "regex": { + "_comment": "Note: JSON-Schema cannot ensure that `regex` mappings are unique", + "type": "string" + }, + "replacement": { + "type": "string", + "pattern": "^(https?|ftp)\\:\\/\\/[a-zA-Z0-9][\\/\\.\\-\\:\\?\\=\\&\\#\\%\\!\\$\\~\\+\\w]+$" + }, + "status": { + "type": "string", + "pattern": "^(permanent|temporary|see other)$" + }, + "tests": { + "type": "array", + "items": { + "type": "object", + "properties": { + "from": { + "type": "string", + "pattern": "^\\/" + }, + "to": { + "type": "string", + "pattern": "^(https?|ftp)\\:\\/\\/[a-zA-Z0-9][\\/\\.\\-\\:\\?\\=\\&\\#\\%\\!\\$\\~\\+\\w]+$" + } + }, + "required": ["from", "to"] + } + } + }, + "required": ["replacement"] + } + } + }, + "required": ["idspace", "base_url", "term_browser"] +} diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py index 370415f2..f3a605a9 100755 --- a/tools/translate_yaml.py +++ b/tools/translate_yaml.py @@ -73,6 +73,8 @@ supported. When `term_browser: custom` is used no output is generated. """ +import json +import jsonschema import re import os import sys @@ -82,6 +84,39 @@ from glob import glob from urllib.parse import unquote +pwd = os.path.dirname(os.path.realpath(__file__)) +schemafile = "{}/config.schema.json".format(pwd) + + +def load_and_validate(yamlname, schema): + try: + yamlfile = open(yamlname) + yamldoc = yaml.load(yamlfile) + jsonschema.validate(yamldoc, schema) + except FileNotFoundError as e: + print(e, file=sys.stderr) + sys.exit(1) + except yaml.YAMLError as e: + print(e, file=sys.stderr) + sys.exit(1) + except jsonschema.exceptions.ValidationError as e: + print("In file: {}:\n{}".format(yamlname, e), file=sys.stderr) + sys.exit(1) + + # These errors should not occur, since they should have been caught by the above jsonschema + # validation step, but double-check anyway: + if 'base_url' not in yamldoc \ + or type(yamldoc['base_url']) is not str: + print('YAML document must contain "base_url" string', file=sys.stderr) + sys.exit(1) + + if 'idspace' not in yamldoc \ + or type(yamldoc['idspace']) is not str: + print('YAML document must contain "idspace" string', file=sys.stderr) + sys.exit(1) + + return yamldoc + def clean_source(s): """ @@ -108,7 +143,8 @@ def process_entry(base_url, i, entry): if type(entry) is not dict: raise ValueError('Entry %d is not a YAML map: "%s"' % (i, entry)) - # Validate "replacement" field + # Validate that "replacement" field exists. If it is missing it should have been caught by the + # jsonschema validation step (see above), but we double-check anyway: if 'replacement' not in entry \ or entry['replacement'] is None \ or entry['replacement'].strip() == '': @@ -137,7 +173,8 @@ def process_entry(base_url, i, entry): raise ValueError('Entry %d has multiple types: %s; see "replacement: %s"' % (i, ', '.join(types), entry['replacement'])) - # Validate status code + # Validate status code. Any error here should have been caught by the jsonschema validation + # (see above), but we double-check here anyway: status = 'temporary' if 'status' in entry: if entry['status'] in ('permanent', 'temporary', 'see other'): @@ -293,6 +330,7 @@ def main(): except FileExistsError as e: pass + schema = json.load(open(schemafile)) entries = {} base_redirects = {} products = {} @@ -301,39 +339,25 @@ def main(): # If only a sequence of YAML filenames is given, then just write the entries found within # those files but not the base redirects, products, or terms. for yamlname in args.input_files: - yamldoc = yaml.load(open(yamlname)) - if 'base_url' not in yamldoc \ - or type(yamldoc['base_url']) is not str: - raise ValueError('YAML document must contain "base_url" string') + yamldoc = load_and_validate(yamlname, schema) base_url = yamldoc['base_url'] - + # Extract the entries for the project from the YAML file: entries = translate_entries(yamldoc, base_url) # Write the entries for the given project to its project-specific .htaccess file, located - # in a subdirectory under the given output directory: + # in a subdirectory under the given output directory. Note that if the subdirectory already + # exists, the files inside will simply be overriden: yamlroot = re.sub('\.yml', '', os.path.basename(yamlname)) - - # Create the subdirectory; if it already exists, the files inside will be overwritten. try: os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) except FileExistsError as e: pass - with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: write_entries(entries, yamlname, outfile) elif args.input_dir: normalised_input_dir = os.path.normpath(args.input_dir) for yamlname in glob("{}/*.yml".format(normalised_input_dir)): - yamldoc = yaml.load(open(yamlname)) - - if 'base_url' not in yamldoc \ - or type(yamldoc['base_url']) is not str: - raise ValueError('YAML document must contain "base_url" string') + yamldoc = load_and_validate(yamlname, schema) base_url = yamldoc['base_url'] - - if 'idspace' not in yamldoc \ - or type(yamldoc['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - # `idspace` and `yamlroot` are synonyms. The former is taken from the `idspace` specified # within the given YAML file, while the latter is derived from the filename. They need to # match (up to a change of case - idspace is always uppercase while yamlroot is lower). @@ -351,9 +375,9 @@ def main(): os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) except FileExistsError: pass - with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: write_entries(entries[idspace], yamlname, outfile) + # Extract the idspace's base redirects, products, and terms but do not write them yet: base_redirects[idspace] = translate_base_redirects(yamldoc) products[idspace] = translate_products(yamldoc) From 7040c17e4f2250d00dbb7050a8368ef93b625718 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 10 Feb 2019 09:11:10 -0500 Subject: [PATCH 04/20] add instructions to readme for optionally automatically syncing guest additions --- README.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/README.md b/README.md index 76b51b00..34dcd1b3 100644 --- a/README.md +++ b/README.md @@ -202,6 +202,29 @@ or delete the VM with You can test against the production PURL server using `make test-production`. We only make one request per second, to avoid abusing the server, so this can take along time. +### Optional: Sync VirtualBox Guest Additions + +If you keep your development VM for any length of time you may be presented with this message upon starting your VM: +``` +==> default: A newer version of the box 'ubuntu/trusty64' is available! You currently +==> default: have version '20190122.1.1'. The latest is version '20190206.0.0'. Run +==> default: `vagrant box update` to update. +``` +If you upgrade, then the next time you resume your box you may receive the warning: +``` +[default] The guest additions on this VM do not match the install version of +VirtualBox! This may cause things such as forwarded ports, shared +folders, and more to not work properly. If any of those things fail on +this machine, please update the guest additions and repackage the +box. +``` + +To automatically sync with VirtualBox's Guest Additions at startup (and thus avoid this warning) you can install `vagrant-vbguest` like so: + +- `vagrant plugin install vagrant-vbguest` (in the tools directory on the host machine) + +Now, whenever you bring up your VM, it will check the version of the VM's guest additions and automatically bring them up to date whenever this is needed. + ## Deployment From 6ed13267d5e0b9e3d1af3ba0bfed3c32fba7da2c Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 12 Feb 2019 13:22:18 -0500 Subject: [PATCH 05/20] remove old directives from Makefile, small improvements to translate_yaml.py --- Makefile | 109 +++---------------- tools/config.schema.yml | 91 ---------------- tools/translate-base-redirects.py | 55 ---------- tools/translate-entries.py | 173 ------------------------------ tools/translate-products.py | 64 ----------- tools/translate-terms.py | 54 ---------- tools/translate_yaml.py | 40 +++---- 7 files changed, 35 insertions(+), 551 deletions(-) delete mode 100644 tools/config.schema.yml delete mode 100755 tools/translate-base-redirects.py delete mode 100755 tools/translate-entries.py delete mode 100755 tools/translate-products.py delete mode 100755 tools/translate-terms.py diff --git a/Makefile b/Makefile index 8464dcba..44e556eb 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,8 @@ # 2015-11-06 # James A. Overton # +# Last major modification: 2019-02-10, Michael Cuffaro +# # This file contains code for working with # Open Biomedical Ontoloiges (OBO) # Persistent Uniform Resource Locators (PURLs). @@ -56,90 +58,17 @@ SHELL := bash .PHONY: all all: clean build -.PHONY: old -old: clean validateold buildold - # Remove directories with generated files. .PHONY: clean clean: rm -rf temp tests - -### Validate YAML Config -# -# Use kwalify and the tools/config.schema.yml -# to validate all YAML configuration files. -# If any INVALID results are found, exit with an error. -.PHONY: validateold -validateold: - kwalify -f tools/config.schema.yml config/*.yml \ - | awk '{print} /INVALID/ {status=1} END {exit status}' - -# Validate a single configuration file. -.PHONY: validateold-% -validateold-%: - kwalify -f tools/config.schema.yml config/$*.yml - -# Check code style for python source files. -# || true is appended to force make to ignore the exit code from pycodestyle -style: - pep8 --max-line-length=100 --ignore E129,E126,E121,E111,E114 tools/translate_yaml.py || true - -### Generate Apache Config -# -# Convert the YAML configuration files -# to Apache .htaccess files with RedirectMatch directives. -# There are three types: -# -# - base_redirects: when the project's base_url points to something -# - product: for a project's main OWL file -# - term: for a project's terms -# - entries: PURLs under the project's base_url -# -# The first three are inserted into www/obo/.htaccess -# while the last is in the project's www/obo/project/.htaccess -# -# These files are built in the `temp/` directory -# then `temp/obo` replaces `www/obo` as the very last step -# to keep Apache downtime to an absolute minimum. -temp/obo temp/base_redirects temp/products temp/terms: - mkdir -p $@ - -temp/base_redirects/%.htaccess: config/%.yml temp/base_redirects - tools/translate-base-redirects.py $< $@ - -temp/products/%.htaccess: config/%.yml temp/products - tools/translate-products.py $< $@ - -temp/terms/%.htaccess: config/%.yml temp/terms - tools/translate-terms.py $< $@ - -# Generate temp/obo/foo/.htaccess file -# and a symbolic link from the IDSPACE: -# temp/obo/FOO -> temp/obo/foo -# NOTE: The last line removes spurious links -# on case insensitive file systems such as Mac OS X. -temp/obo/%/.htaccess: config/%.yml - mkdir -p temp/obo/$* - tools/translate-entries.py $< $@ - < $< \ - grep '^idspace:' \ - | sed 's/^idspace://' \ - | tr -d ' ' \ - | awk '{print "$* temp/obo/" $$0}' \ - | xargs -t ln -s - rm -f temp/obo/$*/$* - # Build temp files for a single project. .PHONY: build-% build-%: tools/translate_yaml.py --input_files config/$*.yml --output_dir temp @echo "Built files in temp/$*" -.PHONY: buildold-% -buildold-%: validateold-% temp/obo/%/.htaccess temp/base_redirects/%.htaccess temp/products/%.htaccess temp/terms/%.htaccess - @echo "Built files in temp/$*" - backup/: mkdir $@ @@ -149,34 +78,14 @@ www/obo/: # Get name of a dated-backup directory, in a portable way. BACKUP = backup/obo-$(shell python -c "import time,os;print(time.strftime('%Y%m%d-%H%M%S',time.gmtime(os.path.getmtime('www/obo'))))") -# Convert all YAML configuration files to .htaccess -# and move the special `obo` .htaccess file. -# Generate .htaccess files for all YAML configuration files. +# Convert all YAML configuration files to .htaccess. .PHONY: build build: | backup/ www/obo/ tools/translate_yaml.py --input_dir config --output_dir temp/obo rm -rf temp/obo/obo -test -e www/obo && mv www/obo $(BACKUP) mv temp/obo www/obo - -.PHONY: buildold -buildold: $(foreach o,$(ONTOLOGY_IDS),temp/obo/$o/.htaccess) -buildold: $(foreach o,$(ONTOLOGY_IDS),temp/base_redirects/$o.htaccess) -buildold: $(foreach o,$(ONTOLOGY_IDS),temp/products/$o.htaccess) -buildold: $(foreach o,$(ONTOLOGY_IDS),temp/terms/$o.htaccess) -buildold: | backup/ - cat temp/obo/obo/.htaccess > temp/obo/.htaccess - echo '' >> temp/obo/.htaccess - echo '### Generated from project configuration files' >> temp/obo/.htaccess - echo '' >> temp/obo/.htaccess - cat temp/base_redirects/*.htaccess >> temp/obo/.htaccess - cat temp/products/*.htaccess >> temp/obo/.htaccess - cat temp/terms/*.htaccess >> temp/obo/.htaccess - rm -rf temp/obo/obo - rm -rf temp/obo/OBO - -test -e www/obo && mv www/obo $(BACKUP) - mv temp/obo www/obo - + rmdir temp ### Test Development Apache Config # @@ -290,3 +199,13 @@ migrate-%: || curl --fail -o migrations/$*.xml "$(PURL_XML)/obo/$**" mkdir -p config tools/migrate.py $* migrations/$*.xml config/$*.yml + +### Check code style for python source files. +# || true is appended to force make to ignore the exit code from pycodestyle +.PHONY: style +style: + pep8 --max-line-length=100 --ignore E129,E126,E121,E111,E114 tools/*.py || true + +# Run the delinter +lint: + python3 -m pyflakes tools/*.py || true diff --git a/tools/config.schema.yml b/tools/config.schema.yml deleted file mode 100644 index b98069e6..00000000 --- a/tools/config.schema.yml +++ /dev/null @@ -1,91 +0,0 @@ -#### -#### Use kwalify and this schema to validate the config files. -#### Example: -#### kwalify -E -m ./tools/config.schema.yml -#### kwalify -E -f ./tools/config.schema.yml ./config/ddanat.yml -#### -type: map -mapping: - "idspace": - type: str - pattern: /^[A-Za-z][A-Za-z0-9_]+$/ # See issue #82 - required: true - "base_url": - type: str - pattern: /^\/obo/ - required: true - "base_redirect": - type: str - required: false - "products": - type: seq - required: false - sequence: - - type: any - "term_browser": - type: str - pattern: /^(ontobee|custom)$/ - required: true - "example_terms": - type: seq - required: false - sequence: - - type: str - "tests": - type: seq - required: false - sequence: - - type: map - mapping: - "from": - type: str - pattern: /^\// - required: true - "to": - type: str - pattern: "/^(https?|ftp)\:\/\/[a-zA-Z0-9][\/\.\-\:\?\=\&\#\%\!\$\~\+\w]+$/" - required: true - "entries": - type: seq - required: false - sequence: - - type: map - mapping: - "exact": - type: str - pattern: /^\// - required: false - unique: true - "prefix": - type: str - pattern: /^\// - required: false - unique: true - "regex": - type: str - required: false - unique: true - "replacement": - type: str - pattern: "/^(https?|ftp)\:\/\/[a-zA-Z0-9][\/\.\-\:\?\=\&\#\%\!\$\~\+\w]+$/" - required: true - unique: false - "status": - type: str - required: false - pattern: /^(permanent|temporary|see other)$/ - "tests": - type: seq - required: false - sequence: - - type: map - mapping: - "from": - type: str - pattern: /^\// - required: true - "to": - type: str - pattern: "/^(https?|ftp)\:\/\/[a-zA-Z0-9][\/\.\-\:\?\=\&\#\%\!\$\~\+\w]+$/" - required: true - diff --git a/tools/translate-base-redirects.py b/tools/translate-base-redirects.py deleted file mode 100755 index 108c76a4..00000000 --- a/tools/translate-base-redirects.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 -# -# Reads a YAML file with a `base_redirect` field -# and writes Apache mod_alias RedirectMatch directives. See: -# -# https://httpd.apache.org/docs/2.4/mod/mod_alias.html -# -# If the YAML file does not contain `base_redirect`, -# then no output is generated. - -import argparse, sys, yaml, re -from urllib.parse import unquote - -header_template = '''# Base redirect for %s -''' - -# Parse command line arguments, -# read entries from the YAML file, -# and write the Apache .htaccess file. -def main(): - parser = argparse.ArgumentParser(description='Translate YAML `base_redirect` to .htaccess') - parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') - parser.add_argument('htaccess_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the .htaccess file (or STDOUT)') - args = parser.parse_args() - - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if not 'idspace' in document \ - or type(document['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - idspace = document['idspace'] - - if not 'base_url' in document \ - or type(document['base_url']) is not str: - raise ValueError('YAML document must contain "base_url" string') - - if 'base_redirect' in document and type(document['base_redirect']) is str: - base_url = unquote(document['base_url']) - base_redirect = unquote(document['base_redirect']) - args.htaccess_file.write(header_template % idspace) - directive = 'RedirectMatch temp "(?i)^%s$" "%s"' % (base_url, base_redirect) - args.htaccess_file.write(directive + '\n\n') - - -if __name__ == "__main__": - main() diff --git a/tools/translate-entries.py b/tools/translate-entries.py deleted file mode 100755 index 0794795a..00000000 --- a/tools/translate-entries.py +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/env python3 -# -# Reads a YAML file with a `base_url` and a list of `entries` -# and writes Apache mod_alias RedirectMatch directives. See: -# -# https://httpd.apache.org/docs/2.4/mod/mod_alias.html -# -# There are three types of entries: -# -# - exact: match an exact URL string -# and redirect to an exact URL -# - prefix: match a URL prefix string, -# from the start of the request URL, -# and redirect to the "replacement" field plus -# any string following the prefix in the request -# - regex: use any regular expression -# allowed by RedirectMatch -# -# Entries can have these fields: -# -# - exact/prefix/regex: the URL string or regex to match; -# exactly one required; -# should begin with a slash "/" except for some regexs -# - replacement: the URL string or regex to redirect to; -# exactly one required -# - status: HTTP status for redirect; -# zero or one value; defaults to "temporary"; -# can be "permanent" (301) or "temporary" (302); -# (Apache uses "temp" for "temporary") -# - tests: an optional list of tests -# each test requires a `from` value, like `exact`, -# and a `to` value, like `replacement` -# -# See the `tools/config.schema.yml` for more details. -# -# For the "exact" and "prefix" types, -# the URL strings are rewritten as escaped regular expressions, -# with a "^base_url" prefix and a "$" suffix. -# Any regular expression special characters (e.g. ., *, ?, []) -# will be escaped: they will not match as regular expressions. -# -# For the "prefix" type, "(.*)" is also appended to the "prefix" field -# and "$1" is appended to the "to" field, -# to configure the prefix match. -# -# For the "regex" type, the "" and "to" fields -# are assumed to be valid regular expressions, -# **including** the `base_url`, -# and are not checked or modified. -# -# **Only** use "regex" if "exact" or "prefix" are insufficient. -# -# The order of YAML objects will be the order -# of the Apache directives. -# If no entries are found, -# the generated file will have a header comment -# without any directives. - -import argparse, sys, yaml, re -from urllib.parse import unquote - -header_template = '''# DO NOT EDIT THIS FILE! -# Automatically generated from "%s". -# Edit that source file then regenerate this file. - -''' - -# Parse command line arguments, -# read entries from the YAML file, -# and write the Apache .htaccess file. -def main(): - parser = argparse.ArgumentParser(description='Translate YAML `entries` to .htaccess') - parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') - parser.add_argument('htaccess_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the .htaccess file (or STDOUT)') - args = parser.parse_args() - - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if not 'base_url' in document \ - or type(document['base_url']) is not str: - raise ValueError('YAML document must contain "base_url" string') - base_url = document['base_url'] - - args.htaccess_file.write(header_template % args.yaml_file.name) - - if 'entries' in document and type(document['entries']) is list: - i = 0 - for entry in document['entries']: - i += 1 - args.htaccess_file.write(process_entry(base_url, i, entry) + '\n') - - -def clean_source(s): - """Given a URL string, - return an escaped regular expression for matching that string. - Only forward-slashes are not escaped.""" - r = s.strip() - r = re.escape(r) - r = r.replace('\\/', '/') - return r - - -def process_entry(base_url, i, entry): - """Given a base URL, an index, and an entry dictionary, - ensure that the entry is valid, - and return an Apache RedirectMatch directive string.""" - source = '' - replacement = '' - - # Check entry data type - if type(entry) is not dict: - raise ValueError('Entry %d is not a YAML map: "%s"' % (i, entry)) - - # Validate "replacement" field - if not 'replacement' in entry \ - or entry['replacement'] is None \ - or entry['replacement'].strip() == '': - raise ValueError('Missing "replacement" field for entry %d' % i) - - # Determine the type for this entry. - types = [] - if 'exact' in entry: - source = '(?i)^%s%s$' % (base_url, clean_source(entry['exact'])) - replacement = entry['replacement'] - types.append('exact') - if 'prefix' in entry: - source = '(?i)^%s%s(.*)$' % (base_url, clean_source(entry['prefix'])) - replacement = entry['replacement'] + '$1' - types.append('prefix') - if 'regex' in entry: - source = entry['regex'] - replacement = entry['replacement'] - types.append('regex') - - # Ensure that there is no more than one "type" key. - if len(types) < 1: - raise ValueError('Entry %d does not have a type; see "replacement: %s"' - % (i, entry['replacement'])) - elif len(types) > 1: - raise ValueError('Entry %d has multiple types: %s; see "replacement: %s"' - % (i, ', '.join(types), entry['replacement'])) - - # Validate status code - status = 'temporary' - if 'status' in entry: - if entry['status'] in ('permanent', 'temporary', 'see other'): - status = entry['status'] - else: - raise ValueError('Invalid status "%s" for entry %d' % (entry['status'], i)) - - # Switch to Apache's preferred names - if status == 'temporary': - status = 'temp' - elif status == 'see other': - status = 'seeother' - - source = unquote(source) - replacement = unquote(replacement) - - return 'RedirectMatch %s "%s" "%s"' % (status, source, replacement) - - -if __name__ == "__main__": - main() diff --git a/tools/translate-products.py b/tools/translate-products.py deleted file mode 100755 index b8baa748..00000000 --- a/tools/translate-products.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 -# -# Reads a YAML file with a list of `products` -# and writes Apache mod_alias RedirectMatch directives. See: -# -# https://httpd.apache.org/docs/2.4/mod/mod_alias.html -# -# The order of YAML objects will be the order -# of the Apache directives. -# If no products are found, no output is generated. - -import argparse, sys, yaml, re -from urllib.parse import unquote - -header_template = '''# Products for %s -''' - -# Parse command line arguments, -# read entries from the YAML file, -# and write the Apache .htaccess file. -def main(): - parser = argparse.ArgumentParser(description='Translate YAML `products` to .htaccess') - parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') - parser.add_argument('htaccess_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the .htaccess file (or STDOUT)') - args = parser.parse_args() - - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if not 'idspace' in document \ - or type(document['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - idspace = document['idspace'] - - if 'products' in document and type(document['products']) is list: - args.htaccess_file.write(header_template % idspace) - i = 0 - for product in document['products']: - i += 1 - args.htaccess_file.write(process_product(i, product) + '\n') - args.htaccess_file.write('\n') - - -def process_product(i, product): - """Given an index, and a product dictionary with one key, - ensure that the entry is valid, - and return an Apache RedirectMatch directive string.""" - for key in product: - source = unquote('(?i)^/obo/%s$' % key) - replacement = unquote(product[key]) - - return 'RedirectMatch temp "%s" "%s"' % (source, replacement) - - -if __name__ == "__main__": - main() diff --git a/tools/translate-terms.py b/tools/translate-terms.py deleted file mode 100755 index ed614c1c..00000000 --- a/tools/translate-terms.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 -# -# Reads a YAML file with a `term_browser` entry -# and an `example_terms` list, -# and writes Apache mod_alias RedirectMatch directives. See: -# -# https://httpd.apache.org/docs/2.4/mod/mod_alias.html -# -# The order of YAML objects will be the order -# of the Apache directives. -# If no example_terms are found, no output is generated. -# -# Note: currently works only for `term_browser: ontobee`. -# When `term_browser: custom` no output is generated. - -import argparse, sys, yaml, re - -header_template = '''# Term redirect for %s -''' - -# Parse command line arguments, -# read entries from the YAML file, -# and write the Apache .htaccess file. -def main(): - parser = argparse.ArgumentParser(description='Translate YAML `example_terms` to .htaccess') - parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') - parser.add_argument('htaccess_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the .htaccess file (or STDOUT)') - args = parser.parse_args() - - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if not 'idspace' in document \ - or type(document['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - idspace = document['idspace'] - - if 'term_browser' in document and document['term_browser'].strip().lower() == 'ontobee': - args.htaccess_file.write(header_template % idspace) - replacement = 'http://www.ontobee.org/browser/rdf.php?o=%s&iri=http://purl.obolibrary.org/obo/%s_$1' % (idspace, idspace) - directive = 'RedirectMatch seeother "^/obo/%s_(\d+)$" "%s"' % (idspace, replacement) - args.htaccess_file.write(directive +'\n\n') - - -if __name__ == "__main__": - main() diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py index f3a605a9..7dcae7b1 100755 --- a/tools/translate_yaml.py +++ b/tools/translate_yaml.py @@ -39,7 +39,7 @@ each test requires a `from` value, like `exact`, and a `to` value, like `replacement` -See the `tools/config.schema.yml` for more details. +See the `tools/config.schema.json` for more details. For the "exact" and "prefix" types, the URL strings are rewritten as escaped regular expressions, @@ -165,7 +165,7 @@ def process_entry(base_url, i, entry): replacement = entry['replacement'] types.append('regex') - # Ensure that there is no more than one "type" key. + # Ensure that there is exactly one "type" key. if len(types) < 1: raise ValueError('Entry %d does not have a type; see "replacement: %s"' % (i, entry['replacement'])) @@ -194,17 +194,16 @@ def process_entry(base_url, i, entry): return 'RedirectMatch %s "%s" "%s"' % (status, source, replacement) -def process_product(i, product): +def process_product(product): """ - Given an index, and a product dictionary with one key, + Given a product dictionary with one key, ensure that the entry is valid, and return an Apache RedirectMatch directive string. """ - for key in product: - source = unquote('(?i)^/obo/%s$' % key) - replacement = unquote(product[key]) - - return 'RedirectMatch temp "%s" "%s"' % (source, replacement) + key = [k for k in product].pop() + source = unquote('(?i)^/obo/%s$' % key) + replacement = unquote(product[key]) + return 'RedirectMatch temp "%s" "%s"' % (source, replacement) def translate_entries(yamldoc, base_url): @@ -221,8 +220,9 @@ def translate_entries(yamldoc, base_url): def write_entries(entries, yamlname, outfile): """ - Write the given entries to the given outfile, indicating the source YAML file - from which the entries were extracted. + Write the given entries to the given output stream, indicating the source YAML file + from which the entries were extracted. Note that it is assumed that the output stream, + `outfile` is open for writing. """ outfile.write('# DO NOT EDIT THIS FILE!\n' '# Automatically generated from "%s".\n' @@ -260,14 +260,15 @@ def translate_products(yamldoc): """ if 'products' in yamldoc and type(yamldoc['products']) is list: products = [] - for i, product in enumerate(yamldoc['products']): - products.append(process_product(i, product)) + for product in yamldoc['products']: + products.append(process_product(product)) return products def append_products(products, idspace, outfile): """ - Appends the given list of products for the given idspace to the given output stream. + Appends the given list of products for the given idspace to the given output stream. Note that it + it is assumed that the output stream `outfile` is open for appending. """ if products: outfile.write('# Products for %s\n' % idspace) @@ -290,7 +291,8 @@ def translate_terms(yamldoc, idspace): def append_term(term, idspace, outfile): """ - Appends the given term for the given idspace to the given output stream. + Appends the given term for the given idspace to the given output stream. Note that it is + assumed that the output stream `outfile` is open for appending. """ if term: outfile.write('# Term redirect for %s\n' % idspace) @@ -322,12 +324,12 @@ def main(): help='Directory containing YAML input files') args = parser.parse_args() - # Create the output directory, if it already exist. If this isn't possible, fail. Note that if - # the directory already exists, then the files inside will be overwritten. + # Create the output directory, if it does not already exist. If this isn't possible, fail. Note + # that if the directory already exists, then the files inside will be overwritten. normalised_output_dir = os.path.normpath(args.output_dir) try: os.makedirs(normalised_output_dir) - except FileExistsError as e: + except FileExistsError: pass schema = json.load(open(schemafile)) @@ -349,7 +351,7 @@ def main(): yamlroot = re.sub('\.yml', '', os.path.basename(yamlname)) try: os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) - except FileExistsError as e: + except FileExistsError: pass with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: write_entries(entries, yamlname, outfile) From 6b02ae9b21f2230424c14913fe6c34e6714f9ea5 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 12 Feb 2019 15:08:00 -0500 Subject: [PATCH 06/20] add some validation around products and base_url --- tools/translate_yaml.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py index 7dcae7b1..432ac217 100755 --- a/tools/translate_yaml.py +++ b/tools/translate_yaml.py @@ -93,10 +93,7 @@ def load_and_validate(yamlname, schema): yamlfile = open(yamlname) yamldoc = yaml.load(yamlfile) jsonschema.validate(yamldoc, schema) - except FileNotFoundError as e: - print(e, file=sys.stderr) - sys.exit(1) - except yaml.YAMLError as e: + except (FileNotFoundError, IsADirectoryError, yaml.YAMLError) as e: print(e, file=sys.stderr) sys.exit(1) except jsonschema.exceptions.ValidationError as e: @@ -115,6 +112,30 @@ def load_and_validate(yamlname, schema): print('YAML document must contain "idspace" string', file=sys.stderr) sys.exit(1) + # This is a possible error, since jsonschema is not sophisticated enough to validate this: + if os.path.basename(yamldoc['base_url']).lower() != yamldoc['idspace'].lower(): + print("WARNING: Base URL '{}' must end with '{}', not '{}'" + .format(yamldoc['base_url'], yamldoc['idspace'], os.path.basename(yamldoc['base_url']))) + + # There may be problems with the product list as well, which can't be validated in jsonschema: + if 'products' in yamldoc and type(yamldoc['products']) is list: + products_have_owl = False + for product_map in yamldoc['products']: + # Each product map has one key: + key = [k for k in product_map].pop() + if not (key.lower().endswith('.owl') or key.lower().endswith('.obo')): + # This actually could be validated using the schema by adding + # `"additionalProperties": false` right after `patternProperties`: + print("WARNING: In project '{}', product: '{}' does not end with '.owl' or '.obo'" + .format(yamldoc['idspace'], key)) + # This, however, cannot be validated using json.schema and must be done here. + if key.endswith('.owl'): + products_have_owl = True + + if not products_have_owl: + print("WARNING: In project '{}': Mandatory .owl entry missing from product list." + .format(yamldoc['idspace'])) + return yamldoc @@ -356,6 +377,10 @@ def main(): with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: write_entries(entries, yamlname, outfile) elif args.input_dir: + if not os.path.isdir(args.input_dir): + print("{} is not a directory.".format(args.input_dir)) + sys.exit(1) + normalised_input_dir = os.path.normpath(args.input_dir) for yamlname in glob("{}/*.yml".format(normalised_input_dir)): yamldoc = load_and_validate(yamlname, schema) From ed62b101cb7474cdbf499ce23897a15852423e28 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 12 Feb 2019 16:01:34 -0500 Subject: [PATCH 07/20] move products validation to translate_products() --- tools/config.schema.json | 1 - tools/translate_yaml.py | 34 +++++++++++++++------------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/tools/config.schema.json b/tools/config.schema.json index 34d43c4f..65dbbcda 100644 --- a/tools/config.schema.json +++ b/tools/config.schema.json @@ -16,7 +16,6 @@ "type": "array", "items": { "type": "object", - "_comment": "How to make *.owl mandatory?", "patternProperties": { "\\.owl$": { "type": "string" }, "\\.obo$": { "type": "string" } diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py index 432ac217..5e709b07 100755 --- a/tools/translate_yaml.py +++ b/tools/translate_yaml.py @@ -117,25 +117,6 @@ def load_and_validate(yamlname, schema): print("WARNING: Base URL '{}' must end with '{}', not '{}'" .format(yamldoc['base_url'], yamldoc['idspace'], os.path.basename(yamldoc['base_url']))) - # There may be problems with the product list as well, which can't be validated in jsonschema: - if 'products' in yamldoc and type(yamldoc['products']) is list: - products_have_owl = False - for product_map in yamldoc['products']: - # Each product map has one key: - key = [k for k in product_map].pop() - if not (key.lower().endswith('.owl') or key.lower().endswith('.obo')): - # This actually could be validated using the schema by adding - # `"additionalProperties": false` right after `patternProperties`: - print("WARNING: In project '{}', product: '{}' does not end with '.owl' or '.obo'" - .format(yamldoc['idspace'], key)) - # This, however, cannot be validated using json.schema and must be done here. - if key.endswith('.owl'): - products_have_owl = True - - if not products_have_owl: - print("WARNING: In project '{}': Mandatory .owl entry missing from product list." - .format(yamldoc['idspace'])) - return yamldoc @@ -280,9 +261,24 @@ def translate_products(yamldoc): and appends them all to a list of processed products that is then returned. """ if 'products' in yamldoc and type(yamldoc['products']) is list: + products_have_owl = False products = [] for product in yamldoc['products']: + key = [k for k in product].pop() + if not (key.lower().endswith('.owl') or key.lower().endswith('.obo')): + # If we really do want to enforce this condition, the better way to do it is to add + # `"additionalProperties": false` right after `patternProperties` in the schema file. + print("WARNING: In project '{}', product: '{}' does not end with '.owl' or '.obo'" + .format(yamldoc['idspace'], key)) + if key.endswith('.owl'): + products_have_owl = True + products.append(process_product(product)) + + if not products_have_owl: + print("WARNING: In project '{}': Mandatory .owl entry missing from product list." + .format(yamldoc['idspace'])) + return products From cb6b5578d2e78cfe3abce8195242b83935054609 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 12 Feb 2019 16:30:35 -0500 Subject: [PATCH 08/20] add some comments (checking in to avoid conflicts) --- tools/translate_yaml.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py index 5e709b07..f2b3ff13 100755 --- a/tools/translate_yaml.py +++ b/tools/translate_yaml.py @@ -100,8 +100,8 @@ def load_and_validate(yamlname, schema): print("In file: {}:\n{}".format(yamlname, e), file=sys.stderr) sys.exit(1) - # These errors should not occur, since they should have been caught by the above jsonschema - # validation step, but double-check anyway: + # The following two errors should not occur, since the presence of `base_url` and `idspace` + # should have been enforced by the above jsonschema validation step. But we double-check anyway. if 'base_url' not in yamldoc \ or type(yamldoc['base_url']) is not str: print('YAML document must contain "base_url" string', file=sys.stderr) @@ -112,7 +112,7 @@ def load_and_validate(yamlname, schema): print('YAML document must contain "idspace" string', file=sys.stderr) sys.exit(1) - # This is a possible error, since jsonschema is not sophisticated enough to validate this: + # jsonschema is not sophisticated enough to validate this one, so we do it here: if os.path.basename(yamldoc['base_url']).lower() != yamldoc['idspace'].lower(): print("WARNING: Base URL '{}' must end with '{}', not '{}'" .format(yamldoc['base_url'], yamldoc['idspace'], os.path.basename(yamldoc['base_url']))) @@ -266,7 +266,7 @@ def translate_products(yamldoc): for product in yamldoc['products']: key = [k for k in product].pop() if not (key.lower().endswith('.owl') or key.lower().endswith('.obo')): - # If we really do want to enforce this condition, the better way to do it is to add + # If we want to enforce this condition, the way to do it is to add # `"additionalProperties": false` right after `patternProperties` in the schema file. print("WARNING: In project '{}', product: '{}' does not end with '.owl' or '.obo'" .format(yamldoc['idspace'], key)) From 36aebcd81a369290055a5341b6baa30206f641bc Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 14 Feb 2019 16:22:21 -0500 Subject: [PATCH 09/20] move safe-update logic from Makefile to a python script --- Makefile | 21 +++++------------- tools/safe-update.py | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 16 deletions(-) create mode 100755 tools/safe-update.py diff --git a/Makefile b/Makefile index 44e556eb..325bd974 100644 --- a/Makefile +++ b/Makefile @@ -163,23 +163,12 @@ test-examples: tests/examples/test2.terms.htaccess ### Update Repository # -# Check Travis-CI for the last build. -# If it did not pass, then fail. -# If it is the same as .current_build, then fail. -# Otherwise replace .current_build, -# pull from git, and run a new `make`. +# Run the safe-update.py script which does the following: +# - Check Travis-CI for the last build. +# - If it did not pass, or if it is the same as the current build, then do nothing. +# - Otherwise replace .current_build, pull from git, and run a new `make`. safe-update: - travis history --no-interactive \ - --repo $(PROJECT) --branch master --limit 1 \ - > .travis_build - @grep ' passed: ' .travis_build - @echo 'Last build is green, but might not be new' - @diff .current_build .travis_build && exit 1 || exit 0 - @echo 'New green build available' - @mv .travis_build .current_build - git pull - make - + tools/safe-update.py ### Migrate Configuration from PURL.org # diff --git a/tools/safe-update.py b/tools/safe-update.py new file mode 100755 index 00000000..246f1af7 --- /dev/null +++ b/tools/safe-update.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import difflib +import requests +import subprocess +import sys + +api_url = 'https://api.travis-ci.org' +repo_slug = 'OBOFoundry/purl.obolibrary.org' +accept_header = {'Accept': 'application/vnd.travis-ci.2.1+json'} + +# Get the last build ID from Travis: +resp = requests.get('{}/repos/{}'.format(api_url, repo_slug), headers=accept_header) +if resp.status_code != requests.codes.ok: + resp.raise_for_status() +last_build_id = resp.json()['repo']['last_build_id'] + +# Now get the build details: +resp = requests.get('{}/repos/{}/builds/{}'.format(api_url, repo_slug, last_build_id), + headers=accept_header) +if resp.status_code != requests.codes.ok: + resp.raise_for_status() +content = resp.json() + +# If the last build did not pass, then do nothing and exit. +if content['build']['state'] != 'passed': + print("Last build is not green. Not updating.", file=sys.stderr) + sys.exit(0) + +# Otherwise see if the build description is different from the current build +print("Last build is green. Checking whether it is new ...") +build_desc = "#{} {}: {} {}".format(content['build']['number'], content['build']['state'], + content['commit']['branch'], content['commit']['message']) +# We only want to keep the first line of the last build's description for comparison purposes: +newbuild_lines = build_desc.splitlines(keepends=True)[:1] +with open('.current_build') as infile: + currbuild_lines = infile.readlines() + +diff = list(difflib.unified_diff(currbuild_lines, newbuild_lines)) +if not diff: + print("Last build is not new. Not updating.") + sys.exit(0) + +# Output a diff for information purposes and then do a `git pull` and `make` from the current +# working directory: +for d in diff: + print(d, end='') +print('\nNew green build available. Updating local repository ...') + +subprocess.call(["git", "pull"]) +subprocess.call(["make"]) +with open('.current_build', 'w') as outfile: + outfile.write(newbuild_lines.pop()) From 9407ad89618aff882c9aac14b2f2898b47fb5c70 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 15 Feb 2019 12:19:53 -0500 Subject: [PATCH 10/20] reorganise tests examples --- Makefile | 47 +++++------------- tools/examples/{ => test1}/test1.htaccess | 0 tools/examples/{ => test1}/test1.xml | 0 tools/examples/{ => test1}/test1.yml | 0 tools/examples/test2.base_redirects.htaccess | 3 -- tools/examples/test2.htaccess | 7 --- tools/examples/test2.products.htaccess | 4 -- tools/examples/test2.terms.htaccess | 3 -- tools/examples/{test2.yml => test2/obo.yml} | 19 ++----- tools/examples/test2/obo/obo.htaccess | 7 +++ tools/examples/test2/test2.htaccess | 20 ++++++++ tools/examples/test2/test2.yml | 14 ++++++ tools/examples/test2/test2/test2.htaccess | 4 ++ tools/migrate.py | 52 +++++++++++++------- 14 files changed, 97 insertions(+), 83 deletions(-) rename tools/examples/{ => test1}/test1.htaccess (100%) rename tools/examples/{ => test1}/test1.xml (100%) rename tools/examples/{ => test1}/test1.yml (100%) delete mode 100644 tools/examples/test2.base_redirects.htaccess delete mode 100644 tools/examples/test2.htaccess delete mode 100644 tools/examples/test2.products.htaccess delete mode 100644 tools/examples/test2.terms.htaccess rename tools/examples/{test2.yml => test2/obo.yml} (50%) create mode 100644 tools/examples/test2/obo/obo.htaccess create mode 100644 tools/examples/test2/test2.htaccess create mode 100644 tools/examples/test2/test2.yml create mode 100644 tools/examples/test2/test2/test2.htaccess diff --git a/Makefile b/Makefile index 325bd974..3110868e 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,6 @@ # - [GNU Make](http://www.gnu.org/software/make/) to run this file # - [Python 3](https://www.python.org/downloads/) to run scripts # - [PyYAML](http://pyyaml.org/wiki/PyYAML) for translation to Apache -# - [travis.rb](https://github.com/travis-ci/travis.rb) for Travis-CI ### Configuration @@ -30,9 +29,6 @@ # Defaults to the list of config/*.yml files. ONTOLOGY_IDS ?= $(patsubst config/%.yml,%,$(wildcard config/*.yml)) -# The GitHub owner/project -PROJECT ?= OBOFoundry/purl.obolibrary.org - # Local development server. DEVELOPMENT ?= localhost @@ -58,7 +54,7 @@ SHELL := bash .PHONY: all all: clean build -# Remove directories with generated files. +# Remove directories with generated files and tests. .PHONY: clean clean: rm -rf temp tests @@ -69,6 +65,8 @@ build-%: tools/translate_yaml.py --input_files config/$*.yml --output_dir temp @echo "Built files in temp/$*" +# The following two directories Must exist in order to execute the code that +# assigns the variable BACKUP (see below) backup/: mkdir $@ @@ -130,35 +128,16 @@ test-production: $(foreach o,$(ONTOLOGY_IDS),tests/production/$o.tsv) ### Test Tools # # Test our tools on files in examples/ directory. -tests/examples: - mkdir -p $@ - -tests/examples/%.yml: tools/examples/%.xml tools/examples/%.yml tests/examples - tools/migrate.py $* $< $@ - diff tools/examples/$*.yml $@ - -tests/examples/%.base_redirects.htaccess: tools/examples/%.yml tests/examples - tools/translate-base-redirects.py $< $@ - diff tools/examples/$*.base_redirects.htaccess $@ - -tests/examples/%.products.htaccess: tools/examples/%.yml tests/examples - tools/translate-products.py $< $@ - diff tools/examples/$*.products.htaccess $@ - -tests/examples/%.terms.htaccess: tools/examples/%.yml tests/examples - tools/translate-terms.py $< $@ - diff tools/examples/$*.terms.htaccess $@ - -tests/examples/%.htaccess: tools/examples/%.yml tests/examples - tools/translate-entries.py $< $@ - diff tools/examples/$*.htaccess $@ - -.PHONY: test-examples -test-examples: tests/examples/test1.yml -test-examples: tests/examples/test2.htaccess -test-examples: tests/examples/test2.base_redirects.htaccess -test-examples: tests/examples/test2.products.htaccess -test-examples: tests/examples/test2.terms.htaccess +.PHONY: test-examples test-example1 test-example2 +test-example1: + tools/migrate.py test1 tools/examples/test1/test1.xml tests/examples/test1/test1.yml + diff tools/examples/test1/test1.yml tests/examples/test1/test1.yml +test-example2: + tools/translate_yaml.py --input_dir tools/examples/test2/ --output_dir tests/examples/test2/ + diff tools/examples/test2/test2.htaccess tests/examples/test2/.htaccess + diff tools/examples/test2/obo/obo.htaccess tests/examples/test2/obo/.htaccess + diff tools/examples/test2/test2/test2.htaccess tests/examples/test2/test2/.htaccess +test-examples: test-example1 test-example2 ### Update Repository diff --git a/tools/examples/test1.htaccess b/tools/examples/test1/test1.htaccess similarity index 100% rename from tools/examples/test1.htaccess rename to tools/examples/test1/test1.htaccess diff --git a/tools/examples/test1.xml b/tools/examples/test1/test1.xml similarity index 100% rename from tools/examples/test1.xml rename to tools/examples/test1/test1.xml diff --git a/tools/examples/test1.yml b/tools/examples/test1/test1.yml similarity index 100% rename from tools/examples/test1.yml rename to tools/examples/test1/test1.yml diff --git a/tools/examples/test2.base_redirects.htaccess b/tools/examples/test2.base_redirects.htaccess deleted file mode 100644 index 38063a20..00000000 --- a/tools/examples/test2.base_redirects.htaccess +++ /dev/null @@ -1,3 +0,0 @@ -# Base redirect for TEST2 -RedirectMatch temp "(?i)^/obo/test2$" "http://example.org/test2" - diff --git a/tools/examples/test2.htaccess b/tools/examples/test2.htaccess deleted file mode 100644 index f6761a14..00000000 --- a/tools/examples/test2.htaccess +++ /dev/null @@ -1,7 +0,0 @@ -# DO NOT EDIT THIS FILE! -# Automatically generated from "tools/examples/test2.yml". -# Edit that source file then regenerate this file. - -RedirectMatch temp "(?i)^/obo/test2/project$" "http://example.org/project.html" -RedirectMatch temp "(?i)^/obo/test2/branches/(.*)$" "http://example.org/branches/$1" -RedirectMatch seeother "(?i)^/obo/test2/TEST_(\d+)$" "http://example.org/about/TEST_$1" diff --git a/tools/examples/test2.products.htaccess b/tools/examples/test2.products.htaccess deleted file mode 100644 index 6e4ffa4f..00000000 --- a/tools/examples/test2.products.htaccess +++ /dev/null @@ -1,4 +0,0 @@ -# Products for TEST2 -RedirectMatch temp "(?i)^/obo/test2.owl$" "http://example.org/test2.owl" -RedirectMatch temp "(?i)^/obo/test2.obo$" "http://example.org/test2.obo" - diff --git a/tools/examples/test2.terms.htaccess b/tools/examples/test2.terms.htaccess deleted file mode 100644 index bf5f71ea..00000000 --- a/tools/examples/test2.terms.htaccess +++ /dev/null @@ -1,3 +0,0 @@ -# Term redirect for TEST2 -RedirectMatch seeother "^/obo/TEST2_(\d+)$" "http://www.ontobee.org/browser/rdf.php?o=TEST2&iri=http://purl.obolibrary.org/obo/TEST2_$1" - diff --git a/tools/examples/test2.yml b/tools/examples/test2/obo.yml similarity index 50% rename from tools/examples/test2.yml rename to tools/examples/test2/obo.yml index 375a2775..5d03c42e 100644 --- a/tools/examples/test2.yml +++ b/tools/examples/test2/obo.yml @@ -1,17 +1,8 @@ -# PURL configuration for http://purl.obolibrary.org/obo/test2 +# PURL configuration for http://purl.obolibrary.org/obo/obo -idspace: TEST2 -base_url: /obo/test2 - -base_redirect: http://example.org/test2 - -products: -- test2.owl: http://example.org/test2.owl -- test2.obo: http://example.org/test2.obo - -term_browser: ontobee -example_terms: -- TEST2_0000001 +idspace: OBO +base_url: /obo/obo +term_browser: custom entries: - exact: /project @@ -28,4 +19,4 @@ entries: status: see other tests: - from: /TEST_1234 - replacement: http://example.org/about/TEST_1234 + to: http://example.org/about/TEST_1234 diff --git a/tools/examples/test2/obo/obo.htaccess b/tools/examples/test2/obo/obo.htaccess new file mode 100644 index 00000000..e1f35ae2 --- /dev/null +++ b/tools/examples/test2/obo/obo.htaccess @@ -0,0 +1,7 @@ +# DO NOT EDIT THIS FILE! +# Automatically generated from "tools/examples/test2/obo.yml". +# Edit that source file then regenerate this file. + +RedirectMatch temp "(?i)^/obo/obo/project$" "http://example.org/project.html" +RedirectMatch temp "(?i)^/obo/obo/branches/(.*)$" "http://example.org/branches/$1" +RedirectMatch seeother "(?i)^/obo/test2/TEST_(\d+)$" "http://example.org/about/TEST_$1" diff --git a/tools/examples/test2/test2.htaccess b/tools/examples/test2/test2.htaccess new file mode 100644 index 00000000..04f6bd21 --- /dev/null +++ b/tools/examples/test2/test2.htaccess @@ -0,0 +1,20 @@ +# DO NOT EDIT THIS FILE! +# Automatically generated from "tools/examples/test2/obo.yml". +# Edit that source file then regenerate this file. + +RedirectMatch temp "(?i)^/obo/obo/project$" "http://example.org/project.html" +RedirectMatch temp "(?i)^/obo/obo/branches/(.*)$" "http://example.org/branches/$1" +RedirectMatch seeother "(?i)^/obo/test2/TEST_(\d+)$" "http://example.org/about/TEST_$1" + +### Generated from project configuration files + +# Base redirect for TEST2 +RedirectMatch temp "(?i)^/obo/test2$" "http://example.org/test2" + +# Products for TEST2 +RedirectMatch temp "(?i)^/obo/test2.owl$" "http://example.org/test2.owl" +RedirectMatch temp "(?i)^/obo/test2.obo$" "http://example.org/test2.obo" + +# Term redirect for TEST2 +RedirectMatch seeother "^/obo/TEST2_(\d+)$" "http://www.ontobee.org/browser/rdf.php?o=TEST2&iri=http://purl.obolibrary.org/obo/TEST2_$1" + diff --git a/tools/examples/test2/test2.yml b/tools/examples/test2/test2.yml new file mode 100644 index 00000000..2c8141ef --- /dev/null +++ b/tools/examples/test2/test2.yml @@ -0,0 +1,14 @@ +# PURL configuration for http://purl.obolibrary.org/obo/test2 + +idspace: TEST2 +base_url: /obo/test2 + +base_redirect: http://example.org/test2 + +products: +- test2.owl: http://example.org/test2.owl +- test2.obo: http://example.org/test2.obo + +term_browser: ontobee +example_terms: +- TEST2_0000001 \ No newline at end of file diff --git a/tools/examples/test2/test2/test2.htaccess b/tools/examples/test2/test2/test2.htaccess new file mode 100644 index 00000000..eaadaa74 --- /dev/null +++ b/tools/examples/test2/test2/test2.htaccess @@ -0,0 +1,4 @@ +# DO NOT EDIT THIS FILE! +# Automatically generated from "tools/examples/test2/test2.yml". +# Edit that source file then regenerate this file. + diff --git a/tools/migrate.py b/tools/migrate.py index 1140dab8..3db01e75 100755 --- a/tools/migrate.py +++ b/tools/migrate.py @@ -33,7 +33,11 @@ # the `exact` entries are output first, # followed by `prefix` entries in descending order of `id` length. -import argparse, sys, xml.sax, re +import argparse +import re +import os +import sys +import xml.sax # Accumulate entries in these global lists for later sorting. exact = [] @@ -55,34 +59,43 @@ entries: ''' + entry_template = '''- %s: %s replacement: %s ''' + # Parse command line arguments, # run the SAX parser on the XML file, # and write results to the YAML file. def main(): parser = argparse.ArgumentParser(description='Migrate XML to YAML') parser.add_argument('idspace', - type=str, - help='the project IDSPACE, e.g. FOO') + type=str, + help='the project IDSPACE, e.g. FOO') parser.add_argument('xml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the XML file (or STDIN)') + type=argparse.FileType('r'), + default=sys.stdin, + nargs='?', + help='read from the XML file (or STDIN)') parser.add_argument('yaml_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the YAML file (or STDOUT)') + type=str, + nargs='?', + help='write to the YAML file (or STDOUT)') args = parser.parse_args() args.upper_idspace = args.idspace.upper() args.lower_idspace = args.idspace.lower() args.base_url = '/obo/' + args.lower_idspace + if args.yaml_file is not None: + try: + args.yaml_file = open(args.yaml_file, 'w') + except FileNotFoundError: + os.makedirs(os.path.dirname(args.yaml_file)) + args.yaml_file = open(args.yaml_file, 'w') + else: + args.yaml_file = sys.stdout sax = xml.sax.make_parser() sax.setContentHandler(OCLCHandler(args)) @@ -93,18 +106,20 @@ def main(): raise ValueError('No entries to migrate') args.yaml_file.write(header_template % - (args.base_url, args.upper_idspace, args.base_url, args.lower_idspace, args.lower_idspace)) + (args.base_url, args.upper_idspace, args.base_url, args.lower_idspace, + args.lower_idspace)) for entry in entries: args.yaml_file.write(entry_template % - (entry['rule'], entry['id'], entry['url'])) + (entry['rule'], entry['id'], entry['url'])) + args.yaml_file.close() # Define a SAX ContentHandler class to match the XML format, # and accumulate entry dictionaries into the global lists. # See example above for XML format. class OCLCHandler(xml.sax.ContentHandler): - # Initialize with results of argparse. def __init__(self, args): + # Initialize args with results of argparse. self.args = args self.count = 0 self.content = '' @@ -135,7 +150,7 @@ def endElement(self, name): elif name == 'purl': # The `` in the XML must begin with the base_url, # but we remove this prefix from the YAML output. - if not 'id' in self.entry: + if 'id' not in self.entry: raise ValueError('No for %d' % self.count) id_re = re.compile('^' + self.args.base_url, re.IGNORECASE) if not id_re.match(self.entry['id']): @@ -144,14 +159,14 @@ def endElement(self, name): % (self.count, self.entry['id'], self.args.base_url)) self.entry['id'] = id_re.sub('', self.entry['id']) - if not 'url' in self.entry: + if 'url' not in self.entry: raise ValueError('No for %d' % self.count) if not re.match(r'^(https?|ftp)\:\/\/.+', self.entry['url']): raise ValueError( 'In %d the "%s" is not an absolute HTTP or FTP URL' % (self.count, self.entry['url'])) - if not 'type' in self.entry: + if 'type' not in self.entry: raise ValueError('No for %d' % self.count) elif self.entry['type'] == '302': self.entry['rule'] = 'exact' @@ -161,7 +176,8 @@ def endElement(self, name): prefix.append(self.entry) else: raise ValueError('Unknown type "%s" for %d' % - (self.entry['type'], self.count)) + (self.entry['type'], self.count)) + if __name__ == "__main__": main() From f0f07d84782eba5162f82e41d8ad81993df5484e Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 18 Feb 2019 09:00:12 -0500 Subject: [PATCH 11/20] do not write to .current_build if 'git pull' or 'make' fails --- tools/safe-update.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/safe-update.py b/tools/safe-update.py index 246f1af7..8d738ccd 100755 --- a/tools/safe-update.py +++ b/tools/safe-update.py @@ -47,7 +47,6 @@ print(d, end='') print('\nNew green build available. Updating local repository ...') -subprocess.call(["git", "pull"]) -subprocess.call(["make"]) -with open('.current_build', 'w') as outfile: - outfile.write(newbuild_lines.pop()) +if subprocess.call(["git", "pull"]) == 0 and subprocess.call(["make"]) == 0: + with open('.current_build', 'w') as outfile: + outfile.write(newbuild_lines.pop()) From cbed803bfc8a987a93eaa51a41090b80a3f8dc51 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 18 Feb 2019 18:07:00 -0500 Subject: [PATCH 12/20] create the target directory if it does not exist in 'test.py' --- Makefile | 52 +++++++++++++++++++------------ tools/migrate.py | 1 + tools/test.py | 81 +++++++++++++++++++++++++++++------------------- 3 files changed, 83 insertions(+), 51 deletions(-) diff --git a/Makefile b/Makefile index 3110868e..06226b36 100644 --- a/Makefile +++ b/Makefile @@ -59,25 +59,35 @@ all: clean build clean: rm -rf temp tests -# Build temp files for a single project. + +### Build recipe for a single project. +# +# Convert the YAML file of a single project to a .htaccess file and place it +# in the temp/ directory. .PHONY: build-% build-%: tools/translate_yaml.py --input_files config/$*.yml --output_dir temp @echo "Built files in temp/$*" -# The following two directories Must exist in order to execute the code that -# assigns the variable BACKUP (see below) -backup/: - mkdir $@ +# Build recipe for all projects +# +# Convert the YAML files of every project to .htaccess files and place them +# in the www/obo directory. + +# Final output directory: www/obo/: mkdir -p $@ -# Get name of a dated-backup directory, in a portable way. -BACKUP = backup/obo-$(shell python -c "import time,os;print(time.strftime('%Y%m%d-%H%M%S',time.gmtime(os.path.getmtime('www/obo'))))") +# When a new build is created, the old build's files are moved here, in a subdirectory +# whose name is generated in a portable way using python (see the target-specific +# variable BACKUP below). +backup/: + mkdir $@ -# Convert all YAML configuration files to .htaccess. +# The main build target: .PHONY: build +build: BACKUP = backup/obo-$(shell python -c "import time,os;print(time.strftime('%Y%m%d-%H%M%S',time.gmtime(os.path.getmtime('www/obo'))))") build: | backup/ www/obo/ tools/translate_yaml.py --input_dir config --output_dir temp/obo rm -rf temp/obo/obo @@ -85,17 +95,16 @@ build: | backup/ www/obo/ mv temp/obo www/obo rmdir temp + ### Test Development Apache Config # # Make HTTP HEAD requests quickly against the DEVELOPMENT server # to ensure that redirects are working properly. -tests/development: - mkdir -p $@ # Run tests for a single YAML configuration file. # against the DEVELOPMENT server, # making requests every 0.01 seconds. -tests/development/%.tsv: config/%.yml tests/development +tests/development/%.tsv: config/%.yml tools/test.py --delay=0.01 $(DEVELOPMENT) $< $@ # Run all tests against development and fail if any FAIL line is found. @@ -109,13 +118,11 @@ test: $(foreach o,$(ONTOLOGY_IDS),tests/development/$o.tsv) # # Make HTTP HEAD requests slowly against the PRODUCTION server # to ensure that redirects are working properly. -tests/production: - mkdir -p $@ # Run tests for a single YAML configuration file # against the PRODUCTION server, # making requests every 1 second. -tests/production/%.tsv: config/%.yml tests/production +tests/production/%.tsv: config/%.yml tools/test.py --delay=1 $(PRODUCTION) $< $@ # Run all tests against production and fail if any FAIL line is found. @@ -128,15 +135,19 @@ test-production: $(foreach o,$(ONTOLOGY_IDS),tests/production/$o.tsv) ### Test Tools # # Test our tools on files in examples/ directory. -.PHONY: test-examples test-example1 test-example2 +.PHONY: test-example1 test-example1: tools/migrate.py test1 tools/examples/test1/test1.xml tests/examples/test1/test1.yml diff tools/examples/test1/test1.yml tests/examples/test1/test1.yml + +.PHONY: test-example2 test-example2: tools/translate_yaml.py --input_dir tools/examples/test2/ --output_dir tests/examples/test2/ diff tools/examples/test2/test2.htaccess tests/examples/test2/.htaccess diff tools/examples/test2/obo/obo.htaccess tests/examples/test2/obo/.htaccess diff tools/examples/test2/test2/test2.htaccess tests/examples/test2/test2/.htaccess + +.PHONY: test-examples test-examples: test-example1 test-example2 @@ -149,6 +160,7 @@ test-examples: test-example1 test-example2 safe-update: tools/safe-update.py + ### Migrate Configuration from PURL.org # # Given an ontology ID (usually lower-case), @@ -168,12 +180,14 @@ migrate-%: mkdir -p config tools/migrate.py $* migrations/$*.xml config/$*.yml -### Check code style for python source files. -# || true is appended to force make to ignore the exit code from pycodestyle + +### Code style and lint checks for python source files. +# +# Note that `|| true` is appended to force make to ignore the exit code in both cases .PHONY: style style: pep8 --max-line-length=100 --ignore E129,E126,E121,E111,E114 tools/*.py || true -# Run the delinter -lint: +.PHONY: delint +delint: python3 -m pyflakes tools/*.py || true diff --git a/tools/migrate.py b/tools/migrate.py index 3db01e75..4c10fb14 100755 --- a/tools/migrate.py +++ b/tools/migrate.py @@ -114,6 +114,7 @@ def main(): args.yaml_file.close() + # Define a SAX ContentHandler class to match the XML format, # and accumulate entry dictionaries into the global lists. # See example above for XML format. diff --git a/tools/test.py b/tools/test.py index 3983302a..9b8f167e 100755 --- a/tools/test.py +++ b/tools/test.py @@ -6,7 +6,12 @@ # # NOTE: Currently only tests `example_terms` when `term_browser: ontobee`. -import argparse, sys, yaml, http.client, time +import argparse +import http.client +import os +import sys +import time +import yaml from urllib.parse import unquote @@ -17,40 +22,49 @@ def main(): parser = argparse.ArgumentParser(description='Test a YAML configuration by making HTTP requests') parser.add_argument('-d', '--delay', metavar='D', - type=float, - default=1, - help='delay between requests in seconds (default 1)') + type=float, + default=1, + help='delay between requests in seconds (default 1)') parser.add_argument('-t', '--timeout', metavar='T', - type=float, - default=10, - help='connection timeout in seconds (default 10)') + type=float, + default=10, + help='connection timeout in seconds (default 10)') parser.add_argument('domain', - type=str, - default='172.16.100.10', - nargs='?', - help='target server (default 172.16.100.10)') + type=str, + default='172.16.100.10', + nargs='?', + help='target server (default 172.16.100.10)') parser.add_argument('yaml_file', - type=argparse.FileType('r'), - default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') + type=argparse.FileType('r'), + default=sys.stdin, + nargs='?', + help='read from the YAML file (or STDIN)') parser.add_argument('report_file', - type=argparse.FileType('w'), - default=sys.stdout, - nargs='?', - help='write to the TSV file (or STDOUT)') + type=str, + nargs='?', + help='write to the TSV file (or STDOUT)') args = parser.parse_args() + # Create the report file if has been specified, otherwise set it to sys.stdout: + if args.report_file is not None: + try: + args.report_file = open(args.report_file, 'w') + except FileNotFoundError: + os.makedirs(os.path.dirname(args.report_file)) + args.report_file = open(args.report_file, 'w') + else: + args.report_file = sys.stdout + # Load YAML document and look for 'entries' list. document = yaml.load(args.yaml_file) - if not 'idspace' in document \ - or type(document['idspace']) is not str: + if 'idspace' not in document \ + or type(document['idspace']) is not str: raise ValueError('YAML document must contain "idspace" string') idspace = document['idspace'] - if not 'base_url' in document \ - or type(document['base_url']) is not str: + if 'base_url' not in document \ + or type(document['base_url']) is not str: raise ValueError('YAML document must contain "base_url" string') base_url = document['base_url'] @@ -65,16 +79,16 @@ def main(): }] if 'products' in document \ - and type(document['products']) is list: + and type(document['products']) is list: i = 0 for product in document['products']: i += 1 tests += process_product(i, product) if 'term_browser' in document \ - and document['term_browser'].strip().lower() == 'ontobee' \ - and 'example_terms' in document \ - and type(document['example_terms']) is list: + and document['term_browser'].strip().lower() == 'ontobee' \ + and 'example_terms' in document \ + and type(document['example_terms']) is list: i = 0 for example_term in document['example_terms']: i += 1 @@ -96,7 +110,7 @@ def main(): raise ValueError('Invalid test %d in global tests' % i) if 'entries' in document \ - and type(document['entries']) is list: + and type(document['entries']) is list: i = 0 for entry in document['entries']: i += 1 @@ -117,6 +131,8 @@ def main(): args.report_file.flush() time.sleep(args.delay) + args.report_file.close() + def process_product(i, product): """Given an index, and a product dictionary, @@ -131,13 +147,14 @@ def process_product(i, product): ontobee = 'http://www.ontobee.org/browser/rdf.php?o=%s&iri=http://purl.obolibrary.org/obo/' + def process_ontobee(idspace, i, example_term): """Given an ontology IDSPACE, an index, and an example term ID, return a list with a test to run.""" return [{ 'source': '/obo/' + example_term, 'replacement': (ontobee % idspace) + example_term, - #'replacement': 'http://ontologies.berkeleybop.org/' + example_term, + # 'replacement': 'http://ontologies.berkeleybop.org/' + example_term, 'status': '303' }] @@ -153,9 +170,9 @@ def process_entry(base_url, i, entry): raise ValueError('Entry %d is invalid: "%s"' % (i, entry)) # Validate "replacement" field - if not 'replacement' in entry \ - or entry['replacement'] is None \ - or entry['replacement'].strip() == '': + if 'replacement' not in entry \ + or entry['replacement'] is None \ + or entry['replacement'].strip() == '': raise ValueError('Missing "replacement" field for entry %d' % i) # Validate status code. From 08f95c087089b10a34c79ccab437d8ee82efba6d Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 18 Feb 2019 18:43:16 -0500 Subject: [PATCH 13/20] accept an output directory rather than an output file as the argument to test.py --- Makefile | 4 +- tools/test.py | 181 +++++++++++++++++++++++++------------------------- 2 files changed, 91 insertions(+), 94 deletions(-) diff --git a/Makefile b/Makefile index 06226b36..d4e3e8e4 100644 --- a/Makefile +++ b/Makefile @@ -105,7 +105,7 @@ build: | backup/ www/obo/ # against the DEVELOPMENT server, # making requests every 0.01 seconds. tests/development/%.tsv: config/%.yml - tools/test.py --delay=0.01 $(DEVELOPMENT) $< $@ + tools/test.py --delay=0.01 --output $(@D) $(DEVELOPMENT) $< # Run all tests against development and fail if any FAIL line is found. .PHONY: test @@ -123,7 +123,7 @@ test: $(foreach o,$(ONTOLOGY_IDS),tests/development/$o.tsv) # against the PRODUCTION server, # making requests every 1 second. tests/production/%.tsv: config/%.yml - tools/test.py --delay=1 $(PRODUCTION) $< $@ + tools/test.py --delay=1 --output $(@D) $(PRODUCTION) $< # Run all tests against production and fail if any FAIL line is found. .PHONY: test-production diff --git a/tools/test.py b/tools/test.py index 9b8f167e..3ef7d880 100755 --- a/tools/test.py +++ b/tools/test.py @@ -2,13 +2,14 @@ # # Read a YAML configuration file, # make a series of HTTP HEAD requests to a target server, -# and report the results in a table. +# and report the results in a TSV file. # # NOTE: Currently only tests `example_terms` when `term_browser: ontobee`. import argparse import http.client import os +import re import sys import time import yaml @@ -29,6 +30,10 @@ def main(): type=float, default=10, help='connection timeout in seconds (default 10)') + parser.add_argument('-o', '--output', metavar='DIR', + type=str, + required=True, + help='Directory to write TSV files to') parser.add_argument('domain', type=str, default='172.16.100.10', @@ -39,99 +44,91 @@ def main(): default=sys.stdin, nargs='?', help='read from the YAML file (or STDIN)') - parser.add_argument('report_file', - type=str, - nargs='?', - help='write to the TSV file (or STDOUT)') args = parser.parse_args() - # Create the report file if has been specified, otherwise set it to sys.stdout: - if args.report_file is not None: - try: - args.report_file = open(args.report_file, 'w') - except FileNotFoundError: - os.makedirs(os.path.dirname(args.report_file)) - args.report_file = open(args.report_file, 'w') - else: - args.report_file = sys.stdout - - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if 'idspace' not in document \ - or type(document['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - idspace = document['idspace'] - - if 'base_url' not in document \ - or type(document['base_url']) is not str: - raise ValueError('YAML document must contain "base_url" string') - base_url = document['base_url'] - - tests = [] - - # Collect the tests to run. - if 'base_redirect' in document: - tests += [{ - 'source': base_url, - 'replacement': document['base_redirect'], - 'status': '302' - }] - - if 'products' in document \ - and type(document['products']) is list: - i = 0 - for product in document['products']: - i += 1 - tests += process_product(i, product) - - if 'term_browser' in document \ - and document['term_browser'].strip().lower() == 'ontobee' \ - and 'example_terms' in document \ - and type(document['example_terms']) is list: - i = 0 - for example_term in document['example_terms']: - i += 1 - tests += process_ontobee(idspace, i, example_term) - - if 'tests' in document: - i = 0 - status = '302' - for test_entry in document['tests']: - i += 1 - test = {'status': status} - if 'from' in test_entry: - test['source'] = base_url + test_entry['from'] - if 'to' in test_entry: - test['replacement'] = test_entry['to'] - if 'source' in test and 'replacement' in test: - tests.append(test) - else: - raise ValueError('Invalid test %d in global tests' % i) - - if 'entries' in document \ - and type(document['entries']) is list: - i = 0 - for entry in document['entries']: - i += 1 - tests += process_entry(base_url, i, entry) - - # Write report table header. - args.report_file.write('\t'.join([ - 'Result', 'Source URL', - 'Expected Status', 'Expected URL', - 'Actual Status', 'Actual URL' - ]) + '\n') - - # Run the tests and add results to the report table. - conn = http.client.HTTPConnection(args.domain, timeout=args.timeout) - for test in tests: - results = run_test(conn, test) - args.report_file.write('\t'.join(results) + '\n') - args.report_file.flush() - time.sleep(args.delay) - - args.report_file.close() + # Create the output directory if it doesn't already exist + try: + os.makedirs(args.output) + except FileExistsError: + pass + + with open(os.path.normpath(args.output) + '/' + + re.sub('\.yml$', '.tsv', os.path.basename(args.yaml_file.name)), 'w') as report_file: + # Load YAML document and look for 'entries' list. + document = yaml.load(args.yaml_file) + + if 'idspace' not in document \ + or type(document['idspace']) is not str: + raise ValueError('YAML document must contain "idspace" string') + idspace = document['idspace'] + + if 'base_url' not in document \ + or type(document['base_url']) is not str: + raise ValueError('YAML document must contain "base_url" string') + base_url = document['base_url'] + + tests = [] + + # Collect the tests to run. + if 'base_redirect' in document: + tests += [{ + 'source': base_url, + 'replacement': document['base_redirect'], + 'status': '302' + }] + + if 'products' in document \ + and type(document['products']) is list: + i = 0 + for product in document['products']: + i += 1 + tests += process_product(i, product) + + if 'term_browser' in document \ + and document['term_browser'].strip().lower() == 'ontobee' \ + and 'example_terms' in document \ + and type(document['example_terms']) is list: + i = 0 + for example_term in document['example_terms']: + i += 1 + tests += process_ontobee(idspace, i, example_term) + + if 'tests' in document: + i = 0 + status = '302' + for test_entry in document['tests']: + i += 1 + test = {'status': status} + if 'from' in test_entry: + test['source'] = base_url + test_entry['from'] + if 'to' in test_entry: + test['replacement'] = test_entry['to'] + if 'source' in test and 'replacement' in test: + tests.append(test) + else: + raise ValueError('Invalid test %d in global tests' % i) + + if 'entries' in document \ + and type(document['entries']) is list: + i = 0 + for entry in document['entries']: + i += 1 + tests += process_entry(base_url, i, entry) + + # Write report table header. + report_file.write('\t'.join([ + 'Result', 'Source URL', + 'Expected Status', 'Expected URL', + 'Actual Status', 'Actual URL' + ]) + '\n') + + # Run the tests and add results to the report table. + conn = http.client.HTTPConnection(args.domain, timeout=args.timeout) + for test in tests: + results = run_test(conn, test) + report_file.write('\t'.join(results) + '\n') + report_file.flush() + time.sleep(args.delay) def process_product(i, product): From 6a7ab0e79909e0757bc9bb9d58547f912cf43f27 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 18 Feb 2019 19:38:49 -0500 Subject: [PATCH 14/20] accept list of yaml files for dev and prod tests, and sort .htaccess files case-insensitively, longer names first (to ensure no regex conflicts) --- Makefile | 27 ++---- tools/test.py | 182 +++++++++++++++++++++------------------- tools/translate_yaml.py | 26 ++++-- 3 files changed, 121 insertions(+), 114 deletions(-) diff --git a/Makefile b/Makefile index d4e3e8e4..3e078e2c 100644 --- a/Makefile +++ b/Makefile @@ -100,36 +100,19 @@ build: | backup/ www/obo/ # # Make HTTP HEAD requests quickly against the DEVELOPMENT server # to ensure that redirects are working properly. - -# Run tests for a single YAML configuration file. -# against the DEVELOPMENT server, -# making requests every 0.01 seconds. -tests/development/%.tsv: config/%.yml - tools/test.py --delay=0.01 --output $(@D) $(DEVELOPMENT) $< - -# Run all tests against development and fail if any FAIL line is found. +# Fail if any FAIL line is found in any of them. .PHONY: test -test: $(foreach o,$(ONTOLOGY_IDS),tests/development/$o.tsv) - @cat tests/development/*.tsv \ - | awk '/^FAIL/ {status=1; print} END {exit status}' +test: + tools/test.py --delay=0.01 --output=tests/development --domain=$(DEVELOPMENT) config/*.yml ### Test Production Apache Config # # Make HTTP HEAD requests slowly against the PRODUCTION server # to ensure that redirects are working properly. - -# Run tests for a single YAML configuration file -# against the PRODUCTION server, -# making requests every 1 second. -tests/production/%.tsv: config/%.yml - tools/test.py --delay=1 --output $(@D) $(PRODUCTION) $< - -# Run all tests against production and fail if any FAIL line is found. .PHONY: test-production -test-production: $(foreach o,$(ONTOLOGY_IDS),tests/production/$o.tsv) - @cat tests/production/*.tsv \ - | awk '/^FAIL/ {status=1; print} END {exit status}' +test-production: + tools/test.py --delay=1 --output=tests/production --domain=$(PRODUCTION) config/*.yml ### Test Tools diff --git a/tools/test.py b/tools/test.py index 3ef7d880..5f4b0675 100755 --- a/tools/test.py +++ b/tools/test.py @@ -30,20 +30,19 @@ def main(): type=float, default=10, help='connection timeout in seconds (default 10)') + parser.add_argument('-m', '--domain', metavar='DOM', + type=str, + default='172.16.100.10', + help='target server (default 172.16.100.10)') parser.add_argument('-o', '--output', metavar='DIR', type=str, required=True, help='Directory to write TSV files to') - parser.add_argument('domain', - type=str, - default='172.16.100.10', - nargs='?', - help='target server (default 172.16.100.10)') - parser.add_argument('yaml_file', + parser.add_argument('yaml_files', metavar='YAML', type=argparse.FileType('r'), default=sys.stdin, - nargs='?', - help='read from the YAML file (or STDIN)') + nargs='+', + help='YAML input file') args = parser.parse_args() # Create the output directory if it doesn't already exist @@ -52,84 +51,95 @@ def main(): except FileExistsError: pass - with open(os.path.normpath(args.output) + '/' + - re.sub('\.yml$', '.tsv', os.path.basename(args.yaml_file.name)), 'w') as report_file: - # Load YAML document and look for 'entries' list. - document = yaml.load(args.yaml_file) - - if 'idspace' not in document \ - or type(document['idspace']) is not str: - raise ValueError('YAML document must contain "idspace" string') - idspace = document['idspace'] - - if 'base_url' not in document \ - or type(document['base_url']) is not str: - raise ValueError('YAML document must contain "base_url" string') - base_url = document['base_url'] - - tests = [] - - # Collect the tests to run. - if 'base_redirect' in document: - tests += [{ - 'source': base_url, - 'replacement': document['base_redirect'], - 'status': '302' - }] - - if 'products' in document \ - and type(document['products']) is list: - i = 0 - for product in document['products']: - i += 1 - tests += process_product(i, product) - - if 'term_browser' in document \ - and document['term_browser'].strip().lower() == 'ontobee' \ - and 'example_terms' in document \ - and type(document['example_terms']) is list: - i = 0 - for example_term in document['example_terms']: - i += 1 - tests += process_ontobee(idspace, i, example_term) - - if 'tests' in document: - i = 0 - status = '302' - for test_entry in document['tests']: - i += 1 - test = {'status': status} - if 'from' in test_entry: - test['source'] = base_url + test_entry['from'] - if 'to' in test_entry: - test['replacement'] = test_entry['to'] - if 'source' in test and 'replacement' in test: - tests.append(test) - else: - raise ValueError('Invalid test %d in global tests' % i) - - if 'entries' in document \ - and type(document['entries']) is list: - i = 0 - for entry in document['entries']: - i += 1 - tests += process_entry(base_url, i, entry) - - # Write report table header. - report_file.write('\t'.join([ - 'Result', 'Source URL', - 'Expected Status', 'Expected URL', - 'Actual Status', 'Actual URL' - ]) + '\n') - - # Run the tests and add results to the report table. - conn = http.client.HTTPConnection(args.domain, timeout=args.timeout) - for test in tests: - results = run_test(conn, test) - report_file.write('\t'.join(results) + '\n') - report_file.flush() - time.sleep(args.delay) - + failures = [] + for yaml_file in args.yaml_files: + print("Checking {} ...".format(yaml_file.name)) + with open(os.path.normpath(args.output) + '/' + + re.sub('\.yml$', '.tsv', os.path.basename(yaml_file.name)), 'w') as report_file: + # Load YAML document and look for 'entries' list. + document = yaml.load(yaml_file) + + if 'idspace' not in document \ + or type(document['idspace']) is not str: + raise ValueError('YAML document must contain "idspace" string') + idspace = document['idspace'] + + if 'base_url' not in document \ + or type(document['base_url']) is not str: + raise ValueError('YAML document must contain "base_url" string') + base_url = document['base_url'] + + tests = [] + + # Collect the tests to run. + if 'base_redirect' in document: + tests += [{ + 'source': base_url, + 'replacement': document['base_redirect'], + 'status': '302' + }] + + if 'products' in document \ + and type(document['products']) is list: + i = 0 + for product in document['products']: + i += 1 + tests += process_product(i, product) + + if 'term_browser' in document \ + and document['term_browser'].strip().lower() == 'ontobee' \ + and 'example_terms' in document \ + and type(document['example_terms']) is list: + i = 0 + for example_term in document['example_terms']: + i += 1 + tests += process_ontobee(idspace, i, example_term) + + if 'tests' in document: + i = 0 + status = '302' + for test_entry in document['tests']: + i += 1 + test = {'status': status} + if 'from' in test_entry: + test['source'] = base_url + test_entry['from'] + if 'to' in test_entry: + test['replacement'] = test_entry['to'] + if 'source' in test and 'replacement' in test: + tests.append(test) + else: + raise ValueError('Invalid test %d in global tests' % i) + + if 'entries' in document \ + and type(document['entries']) is list: + i = 0 + for entry in document['entries']: + i += 1 + tests += process_entry(base_url, i, entry) + + # Write report table header. + report_file.write('\t'.join([ + 'Result', 'Source URL', + 'Expected Status', 'Expected URL', + 'Actual Status', 'Actual URL' + ]) + '\n') + + # Run the tests and add results to the report table. + conn = http.client.HTTPConnection(args.domain, timeout=args.timeout) + for test in tests: + results = run_test(conn, test) + if results[0] == 'FAIL': + print("FAILURE when checking {}. See {} for details." + .format(yaml_file.name, report_file.name)) + failures.append(idspace) + report_file.write('\t'.join(results) + '\n') + report_file.flush() + time.sleep(args.delay) + + if failures: + print("The following idspaces encountered failures: {}.\n" + "Use the script {} to run tests for just those idspaces." + .format(', '.join(failures), __file__)) def process_product(i, product): """Given an index, and a product dictionary, diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py index f2b3ff13..82ed66db 100755 --- a/tools/translate_yaml.py +++ b/tools/translate_yaml.py @@ -73,6 +73,7 @@ supported. When `term_browser: custom` is used no output is generated. """ +import functools import json import jsonschema import re @@ -365,7 +366,7 @@ def main(): # Write the entries for the given project to its project-specific .htaccess file, located # in a subdirectory under the given output directory. Note that if the subdirectory already # exists, the files inside will simply be overriden: - yamlroot = re.sub('\.yml', '', os.path.basename(yamlname)) + yamlroot = re.sub('\.yml$', '', os.path.basename(yamlname)) try: os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) except FileExistsError: @@ -377,8 +378,21 @@ def main(): print("{} is not a directory.".format(args.input_dir)) sys.exit(1) + @functools.cmp_to_key + def cmp(s, t): + "Case-insensitive sort, longer names first" + s = s.lower() + t = t.lower() + s_pad = (s + t[len(s):] + 'z') if len(s) < len(t) else s + t_pad = (t + s[len(t):] + 'z') if len(t) < len(s) else t + if s_pad < t_pad: + return -1 + if s_pad > t_pad: + return 1 + return 0 + normalised_input_dir = os.path.normpath(args.input_dir) - for yamlname in glob("{}/*.yml".format(normalised_input_dir)): + for yamlname in sorted(glob("{}/*.yml".format(normalised_input_dir)), key=cmp): yamldoc = load_and_validate(yamlname, schema) base_url = yamldoc['base_url'] # `idspace` and `yamlroot` are synonyms. The former is taken from the `idspace` specified @@ -386,7 +400,7 @@ def main(): # match (up to a change of case - idspace is always uppercase while yamlroot is lower). # If they do not match, emit a warning. idspace = yamldoc['idspace'] - yamlroot = re.sub('\.yml', '', os.path.basename(yamlname)) + yamlroot = re.sub('\.yml$', '', os.path.basename(yamlname)) if idspace.lower() != yamlroot.lower(): print("WARNING: idspace: {} does not match filename {}".format(idspace, yamlname)) @@ -414,11 +428,11 @@ def main(): # Append the base redirects, products, and terms to the global .htaccess file: with open('{}/.htaccess'.format(normalised_output_dir), 'a') as outfile: outfile.write('\n### Generated from project configuration files\n\n') - for idspace in sorted(base_redirects): + for idspace in sorted(base_redirects, key=cmp): append_base_redirect(base_redirects[idspace], idspace, outfile) - for idspace in sorted(products): + for idspace in sorted(products, key=cmp): append_products(products[idspace], idspace, outfile) - for idspace in sorted(terms): + for idspace in sorted(terms, key=cmp): append_term(terms[idspace], idspace, outfile) From 658982d69970546fa250fdfa5ed73ff99a4a9de3 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 18 Feb 2019 19:47:04 -0500 Subject: [PATCH 15/20] better error message --- tools/test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/test.py b/tools/test.py index 5f4b0675..d4288831 100755 --- a/tools/test.py +++ b/tools/test.py @@ -138,8 +138,10 @@ def main(): if failures: print("The following idspaces encountered failures: {}.\n" - "Use the script {} to run tests for just those idspaces." - .format(', '.join(failures), __file__)) + "See their corresponding TSV files in '{}' for more detail.\n" + "Use the script '{}' to run tests for just those idspaces." + .format(', '.join(failures), args.output, __file__)) + def process_product(i, product): """Given an index, and a product dictionary, From 301231262cadf1296e0a2b59b8e6b51cfd2a9b1c Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 19 Feb 2019 10:42:16 -0500 Subject: [PATCH 16/20] create symlink to project in output directory --- Makefile | 2 +- tools/test.py | 4 ++-- tools/translate_yaml.py | 8 ++++++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 3e078e2c..275a7683 100644 --- a/Makefile +++ b/Makefile @@ -90,7 +90,7 @@ backup/: build: BACKUP = backup/obo-$(shell python -c "import time,os;print(time.strftime('%Y%m%d-%H%M%S',time.gmtime(os.path.getmtime('www/obo'))))") build: | backup/ www/obo/ tools/translate_yaml.py --input_dir config --output_dir temp/obo - rm -rf temp/obo/obo + rm -rf temp/obo/obo temp/obo/OBO -test -e www/obo && mv www/obo $(BACKUP) mv temp/obo www/obo rmdir temp diff --git a/tools/test.py b/tools/test.py index d4288831..f60c5570 100755 --- a/tools/test.py +++ b/tools/test.py @@ -138,8 +138,8 @@ def main(): if failures: print("The following idspaces encountered failures: {}.\n" - "See their corresponding TSV files in '{}' for more detail.\n" - "Use the script '{}' to run tests for just those idspaces." + "For more details, see their corresponding TSV files in '{}'.\n" + "To re-run tests for just those idspaces, use the script '{}'." .format(', '.join(failures), args.output, __file__)) diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py index 82ed66db..8c47d26b 100755 --- a/tools/translate_yaml.py +++ b/tools/translate_yaml.py @@ -407,9 +407,13 @@ def cmp(s, t): # Collect the entries for the current idspace: entries[idspace] = translate_entries(yamldoc, base_url) # Write the entries to the idspace's project-specific file located in its own subdirectory - # under the output directory. If it already exists, the files inside will be overwritten. + # under the output directory, as well as a symlink to the project subdirectory in the + # output directory. If the files/directories already exist, they will be overwritten. try: - os.mkdir('{}/{}'.format(normalised_output_dir, yamlroot)) + projdir = '{}/{}'.format(normalised_output_dir, yamlroot) + symlink = '{}/{}'.format(normalised_output_dir, idspace) + os.mkdir(projdir) + os.symlink(os.path.basename(projdir), symlink, target_is_directory=True) except FileExistsError: pass with open('{}/{}/.htaccess'.format(normalised_output_dir, yamlroot), 'w') as outfile: From 9598d3910966fc3253e4ac28b68d23fa64b3f50c Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 19 Feb 2019 11:08:04 -0500 Subject: [PATCH 17/20] remove variable 'types' that is never used --- tools/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/test.py b/tools/test.py index f60c5570..93931fb6 100755 --- a/tools/test.py +++ b/tools/test.py @@ -198,7 +198,6 @@ def process_entry(base_url, i, entry): test['status'] = status # Determine the type for this entry. - types = [] if 'exact' in entry: test['source'] = base_url + entry['exact'] test['replacement'] = entry['replacement'] From b5ea77f63895486c5c133855f553b80158628e2a Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 21 Feb 2019 15:06:12 -0500 Subject: [PATCH 18/20] Remove section of Makefile: 'Migrate Configuration from PURL.org' and remove empty example_terms and entries arrays from dpo.yml and xao.yml --- Makefile | 20 -------------------- config/dpo.yml | 3 --- config/xao.yml | 1 - 3 files changed, 24 deletions(-) diff --git a/Makefile b/Makefile index 275a7683..c5bc178d 100644 --- a/Makefile +++ b/Makefile @@ -144,26 +144,6 @@ safe-update: tools/safe-update.py -### Migrate Configuration from PURL.org -# -# Given an ontology ID (usually lower-case), -# fetch and translate a PURL.org XML file -# into a YAML configuration file. -# This should be a one-time migration. -# Do not overwrite existing config file. -PURL_XML = https://purl.org/admin/purl/?target=&seealso=&maintainers=&explicitmaintainers=&tombstone=false&p_id= - -.PHONY: migrate-% -migrate-%: - @test ! -s config/$*.yml \ - || (echo 'Refusing to overwrite config/$*.yml'; exit 1) - mkdir -p migrations - test -s migrations/$*.xml \ - || curl --fail -o migrations/$*.xml "$(PURL_XML)/obo/$**" - mkdir -p config - tools/migrate.py $* migrations/$*.xml config/$*.yml - - ### Code style and lint checks for python source files. # # Note that `|| true` is appended to force make to ignore the exit code in both cases diff --git a/config/dpo.yml b/config/dpo.yml index bd251a45..420ea64e 100644 --- a/config/dpo.yml +++ b/config/dpo.yml @@ -10,6 +10,3 @@ products: - dpo.owl: https://raw.githubusercontent.com/FlyBase/flybase-controlled-vocabulary/master/releases/dpo.owl term_browser: ontobee -example_terms: - -entries: diff --git a/config/xao.yml b/config/xao.yml index 4698ef4f..a50314bb 100644 --- a/config/xao.yml +++ b/config/xao.yml @@ -8,7 +8,6 @@ products: - xao.obo: http://ontologies.berkeleybop.org/xao.obo term_browser: ontobee -example_terms: entries: - prefix: /tracker/ From 18cdac34cf1c38f1176b55750d99dd1983ca067c Mon Sep 17 00:00:00 2001 From: "James A. Overton" Date: Wed, 27 Feb 2019 11:59:23 -0500 Subject: [PATCH 19/20] Drop kwalify, travis, ruby; add jsonschema --- tools/site.yml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tools/site.yml b/tools/site.yml index ce0dafa2..5a1a17f4 100644 --- a/tools/site.yml +++ b/tools/site.yml @@ -29,8 +29,6 @@ with_items: - ntp - git - - ruby - - ruby-dev - python3 - name: Install system-wide packages (All) @@ -39,16 +37,12 @@ - apache2 - python3-pip - - name: Install Travis command-line - when: mode == "production" - command: gem install travis --no-rdoc --no-ri creates=/usr/local/bin/travis - - - name: Install kwalify - command: gem install kwalify creates=/usr/local/bin/kwalify - - name: Install PyYAML pip: name=PyYAML executable=pip3 + - name: Install jsonschema + pip: name=jsonschema executable=pip3 + - name: Clone the Git repo when: mode == "production" git: repo={{ repo_url }} dest={{ repo_dir }} From cb576916b29036b15296c3d3759a75cc11df6a1b Mon Sep 17 00:00:00 2001 From: "James A. Overton" Date: Wed, 27 Feb 2019 12:00:59 -0500 Subject: [PATCH 20/20] Tweak documentation for scripts --- tools/safe-update.py | 6 ++++++ tools/translate_yaml.py | 42 ++++++++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/tools/safe-update.py b/tools/safe-update.py index 8d738ccd..494c5bbb 100755 --- a/tools/safe-update.py +++ b/tools/safe-update.py @@ -1,5 +1,11 @@ #!/usr/bin/env python3 +# Check Travis CI build status for the `master` branch of OBOFoundry/purl.obolibrary.org +# If `master` is green (i.e. all tests are passing), +# and the build number is greater than the current build +# (i.e. the last time we updated), +# then pull `master`, run Make, and update .current_build. + import difflib import requests import subprocess diff --git a/tools/translate_yaml.py b/tools/translate_yaml.py index 8c47d26b..971b16dd 100755 --- a/tools/translate_yaml.py +++ b/tools/translate_yaml.py @@ -7,8 +7,19 @@ https://httpd.apache.org/docs/2.4/mod/mod_alias.html -The YAML files will have a `base_url`, a list of `entries`, a `base_redirect` -field (optional), a `products` field (optional), and a `terms` field (optional). +The `foo.yml` file will generate output for two targets: + +1. /www/obo/foo/.htaccess +2. /www/obo/.htaccess + +Target (1) only applies to project `foo`. +It is generated from `base_url` and the `entries` list. +Projects have wide discretion for this target. + +Target (2) applies to all projects. +The content is tightly constrained to avoid conflicts. +This content is generated from other YAML fields, +such as `products` and `term_browser`. Entries: ======= @@ -33,7 +44,7 @@ exactly one required - status: HTTP status for redirect; zero or one value; defaults to "temporary"; - can be "permanent" (301) or "temporary" (302); + can be "permanent" (301), "temporary" (302), or "see other" (303); (Apache uses "temp" for "temporary") - tests: an optional list of tests each test requires a `from` value, like `exact`, @@ -66,6 +77,7 @@ Base redirects, Products, and Terms =================================== + These fields are optional. If the YAML input does not contain them, no corresponding output will be generated. @@ -197,18 +209,6 @@ def process_entry(base_url, i, entry): return 'RedirectMatch %s "%s" "%s"' % (status, source, replacement) -def process_product(product): - """ - Given a product dictionary with one key, - ensure that the entry is valid, - and return an Apache RedirectMatch directive string. - """ - key = [k for k in product].pop() - source = unquote('(?i)^/obo/%s$' % key) - replacement = unquote(product[key]) - return 'RedirectMatch temp "%s" "%s"' % (source, replacement) - - def translate_entries(yamldoc, base_url): """ Reads the field `entries` from the YAML document, processes each entry that is read using the @@ -256,6 +256,18 @@ def append_base_redirect(base_redirect, idspace, outfile): outfile.write(base_redirect + '\n\n') +def process_product(product): + """ + Given a product dictionary with one key, + ensure that the entry is valid, + and return an Apache RedirectMatch directive string. + """ + key = [k for k in product].pop() + source = unquote('(?i)^/obo/%s$' % key) + replacement = unquote(product[key]) + return 'RedirectMatch temp "%s" "%s"' % (source, replacement) + + def translate_products(yamldoc): """ Reads the `products` field from the given YAML document, processes each product that is read,