diff --git a/helper/fieldValue.js b/helper/fieldValue.js index 11ddef2d8..8060fafd4 100644 --- a/helper/fieldValue.js +++ b/helper/fieldValue.js @@ -1,4 +1,5 @@ const _ = require('lodash'); +const htmlSanitize = require('./htmlSanitize'); function getStringValue(property) { // numeric value, cast to string @@ -41,5 +42,5 @@ function getArrayValue(property) { return [property]; } -module.exports.getStringValue = getStringValue; -module.exports.getArrayValue = getArrayValue; +module.exports.getStringValue = (property) => htmlSanitize(getStringValue(property)); +module.exports.getArrayValue = (property) => htmlSanitize(getArrayValue(property)); diff --git a/helper/geojsonify.js b/helper/geojsonify.js index 91165f834..a82fb3926 100644 --- a/helper/geojsonify.js +++ b/helper/geojsonify.js @@ -6,6 +6,7 @@ const _ = require('lodash'); const Document = require('pelias-model').Document; const codec = require('pelias-model').codec; const field = require('./fieldValue'); +const htmlSanitize = require('./htmlSanitize'); const decode_gid = require('./decode_gid'); function geojsonifyPlaces( params, docs ){ @@ -42,13 +43,17 @@ function geojsonifyPlaces( params, docs ){ function geojsonifyPlace(params, place) { const gid_components = decode_gid(place._id); + const source = htmlSanitize(place.source); + const source_id = htmlSanitize(gid_components.id); + const layer = htmlSanitize(place.layer); + // setup the base doc const doc = { - id: gid_components.id, - gid: new Document(place.source, place.layer, gid_components.id).getGid(), - layer: place.layer, - source: place.source, - source_id: gid_components.id, + id: source_id, + gid: new Document(source, layer, source_id).getGid(), + layer, + source, + source_id, bounding_box: place.bounding_box, lat: parseFloat(place.center_point.lat), lng: parseFloat(place.center_point.lon), diff --git a/helper/htmlSanitize.js b/helper/htmlSanitize.js new file mode 100644 index 000000000..cc21a3e8d --- /dev/null +++ b/helper/htmlSanitize.js @@ -0,0 +1,27 @@ +const _ = require('lodash'); +const stripHTML = require('string-strip-html').stripHtml; +const options = { stripTogetherWithTheirContents: ['link', 'script', 'style', 'xml'] }; + +/** + * Sanitize HTML in strings by completely removing 'dangerous' elements + * while keeping the inner HTML of non-dangerous elements. + * + * note: Arrays and Objects of strings are supported but currently + * they are not sanitized *recursively*. + * + * see: https://www.npmjs.com/package/string-strip-html + */ + +function htmlSanitizeValue(value) { + if (!_.isString(value)) { return value; } + return stripHTML(value, options).result; +} + +function htmlSanitize(data) { + if (_.isString(data)) { return htmlSanitizeValue(data); } + if (_.isArray(data)) { return _.map(data, htmlSanitizeValue); } + if (_.isPlainObject(data)) { return _.mapValues(data, htmlSanitizeValue); } + return data; +} + +module.exports = htmlSanitize; diff --git a/package.json b/package.json index a0271e643..1d58678b0 100644 --- a/package.json +++ b/package.json @@ -64,6 +64,7 @@ "retry": "^0.12.0", "stable": "^0.1.8", "stats-lite": "^2.0.4", + "string-strip-html": "^8.3.0", "through2": "^3.0.0" }, "devDependencies": { diff --git a/test/unit/helper/geojsonify.js b/test/unit/helper/geojsonify.js index 1f1523f0e..cf35c84aa 100644 --- a/test/unit/helper/geojsonify.js +++ b/test/unit/helper/geojsonify.js @@ -768,6 +768,49 @@ module.exports.tests.addendum = function(test, common) { }); }; +// strip HTML entities from the response +module.exports.tests.sanitizeHTML = function (test, common) { + test('sanitize HTML', function (t) { + var aliases = [{ + '_id': 'example:
example
:1', + 'source': 'example', + 'layer': 'example
', + 'name': { + 'default': 'Example Place' + }, + 'center_point': { + 'lon': 0, + 'lat': 0 + } + }]; + + const expected = { + type: 'FeatureCollection', + features: [{ + type: 'Feature', + geometry: { + type: 'Point', + coordinates: [0, 0] + }, + properties: { + id: '1', + gid: 'example:example:1', + layer: 'example', + source: 'example', + source_id: '1', + name: 'Example Place' + } + }], + bbox: [0, 0, 0, 0] + }; + + var actual = geojsonify({}, aliases); + t.deepEquals(actual, expected); + t.end(); + }); + +}; + module.exports.all = (tape, common) => { function test(name, testFunction) { return tape(`geojsonify: ${name}`, testFunction); diff --git a/test/unit/helper/htmlSanitize.js b/test/unit/helper/htmlSanitize.js new file mode 100644 index 000000000..f6602e19b --- /dev/null +++ b/test/unit/helper/htmlSanitize.js @@ -0,0 +1,102 @@ +const sanitize = require('../../../helper/htmlSanitize'); +module.exports.tests = {}; + +module.exports.tests.remove = function (test, common) { + test('remove: LINK tags', (t) => { + t.deepEquals(sanitize('AAA BBB'), 'AAA BBB'); + t.end(); + }); + test('remove: SCRIPT tags', (t) => { + t.deepEquals(sanitize('AAA BBB'), 'AAA BBB'); + t.end(); + }); + test('remove: STYLE tags', (t) => { + t.deepEquals(sanitize('AAA BBB'), 'AAA BBB'); + t.end(); + }); + test('remove: XML tags', (t) => { + t.deepEquals(sanitize('AAACCC
BBB'), 'AAA CCC BBB'); + t.end(); + }); + test('keep contents: nested safe tags', (t) => { + t.deepEquals(sanitize('AAACCC
BBB'), 'AAA CCC BBB'); + t.end(); + }); + test('keep contents: invalid nested safe tags (missing closing tag)', (t) => { + t.deepEquals(sanitize('AAACCC
BBB'), 'AAA CCC BBB'); + t.end(); + }); +}; + +module.exports.tests.types = function (test, common) { + test('string', (t) => { + t.deepEquals(sanitize('AAA BBB'), 'AAA BBB'); + t.end(); + }); + test('string - empty', (t) => { + t.deepEquals(sanitize(''), ''); + t.end(); + }); + test('number', (t) => { + t.deepEquals(sanitize(0.1), 0.1); + t.end(); + }); + test('number - empty', (t) => { + t.deepEquals(sanitize(NaN), NaN); + t.end(); + }); + test('array', (t) => { + t.deepEquals(sanitize([ + 'AAA BBB', + 'CCC DDD' + ]), [ + 'AAA BBB', + 'CCC DDD' + ]); + t.end(); + }); + test('array - empty', (t) => { + t.deepEquals(sanitize([]), []); + t.end(); + }); + test('object', (t) => { + t.deepEquals(sanitize({ + a: 'AAA BBB', + b: 'CCC DDD' + }), { + a: 'AAA BBB', + b: 'CCC DDD' + }); + t.end(); + }); + test('object - empty', (t) => { + t.deepEquals(sanitize({}), {}); + t.end(); + }); + test('null', (t) => { + t.deepEquals(sanitize(null), null); + t.end(); + }); + test('undefined', (t) => { + t.deepEquals(sanitize(undefined), undefined); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('[helper] htmlSanitize: ' + name, testFunction); + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index d6da58903..fd603ca10 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -36,6 +36,7 @@ var tests = [ require('./helper/decode_gid'), require('./helper/diffPlaces'), require('./helper/fieldValue'), + require('./helper/htmlSanitize'), require('./helper/geojsonify_place_details'), require('./helper/geojsonify'), require('./helper/iso3166'),