diff --git a/helper/fieldValue.js b/helper/fieldValue.js index 11ddef2d8..8060fafd4 100644 --- a/helper/fieldValue.js +++ b/helper/fieldValue.js @@ -1,4 +1,5 @@ const _ = require('lodash'); +const htmlSanitize = require('./htmlSanitize'); function getStringValue(property) { // numeric value, cast to string @@ -41,5 +42,5 @@ function getArrayValue(property) { return [property]; } -module.exports.getStringValue = getStringValue; -module.exports.getArrayValue = getArrayValue; +module.exports.getStringValue = (property) => htmlSanitize(getStringValue(property)); +module.exports.getArrayValue = (property) => htmlSanitize(getArrayValue(property)); diff --git a/helper/geojsonify.js b/helper/geojsonify.js index 91165f834..a82fb3926 100644 --- a/helper/geojsonify.js +++ b/helper/geojsonify.js @@ -6,6 +6,7 @@ const _ = require('lodash'); const Document = require('pelias-model').Document; const codec = require('pelias-model').codec; const field = require('./fieldValue'); +const htmlSanitize = require('./htmlSanitize'); const decode_gid = require('./decode_gid'); function geojsonifyPlaces( params, docs ){ @@ -42,13 +43,17 @@ function geojsonifyPlaces( params, docs ){ function geojsonifyPlace(params, place) { const gid_components = decode_gid(place._id); + const source = htmlSanitize(place.source); + const source_id = htmlSanitize(gid_components.id); + const layer = htmlSanitize(place.layer); + // setup the base doc const doc = { - id: gid_components.id, - gid: new Document(place.source, place.layer, gid_components.id).getGid(), - layer: place.layer, - source: place.source, - source_id: gid_components.id, + id: source_id, + gid: new Document(source, layer, source_id).getGid(), + layer, + source, + source_id, bounding_box: place.bounding_box, lat: parseFloat(place.center_point.lat), lng: parseFloat(place.center_point.lon), diff --git a/helper/htmlSanitize.js b/helper/htmlSanitize.js new file mode 100644 index 000000000..cc21a3e8d --- /dev/null +++ b/helper/htmlSanitize.js @@ -0,0 +1,27 @@ +const _ = require('lodash'); +const stripHTML = require('string-strip-html').stripHtml; +const options = { stripTogetherWithTheirContents: ['link', 'script', 'style', 'xml'] }; + +/** + * Sanitize HTML in strings by completely removing 'dangerous' elements + * while keeping the inner HTML of non-dangerous elements. + * + * note: Arrays and Objects of strings are supported but currently + * they are not sanitized *recursively*. + * + * see: https://www.npmjs.com/package/string-strip-html + */ + +function htmlSanitizeValue(value) { + if (!_.isString(value)) { return value; } + return stripHTML(value, options).result; +} + +function htmlSanitize(data) { + if (_.isString(data)) { return htmlSanitizeValue(data); } + if (_.isArray(data)) { return _.map(data, htmlSanitizeValue); } + if (_.isPlainObject(data)) { return _.mapValues(data, htmlSanitizeValue); } + return data; +} + +module.exports = htmlSanitize; diff --git a/package.json b/package.json index a0271e643..1d58678b0 100644 --- a/package.json +++ b/package.json @@ -64,6 +64,7 @@ "retry": "^0.12.0", "stable": "^0.1.8", "stats-lite": "^2.0.4", + "string-strip-html": "^8.3.0", "through2": "^3.0.0" }, "devDependencies": { diff --git a/test/unit/helper/geojsonify.js b/test/unit/helper/geojsonify.js index 1f1523f0e..cf35c84aa 100644 --- a/test/unit/helper/geojsonify.js +++ b/test/unit/helper/geojsonify.js @@ -768,6 +768,49 @@ module.exports.tests.addendum = function(test, common) { }); }; +// strip HTML entities from the response +module.exports.tests.sanitizeHTML = function (test, common) { + test('sanitize HTML', function (t) { + var aliases = [{ + '_id': 'example:

example

:1', + 'source': 'example', + 'layer': '

example

', + 'name': { + 'default': 'Example