From 89e1b323dd1e189dba64100a5401f4c764b4183f Mon Sep 17 00:00:00 2001 From: Kimmo Brunfeldt Date: Thu, 28 Sep 2017 18:04:23 +0300 Subject: [PATCH] Initial commit --- .env.sample | 15 ++++++++ .eslintrc | 17 +++++++++ .gitignore | 50 +++++++++++++++++++++++++ Procfile | 1 + README.md | 33 +++++++++++++++++ app.json | 27 ++++++++++++++ package.json | 33 +++++++++++++++++ src/app.js | 47 ++++++++++++++++++++++++ src/config.js | 11 ++++++ src/core/pdf-core.js | 37 +++++++++++++++++++ src/http/pdf-http.js | 14 +++++++ src/index.js | 39 ++++++++++++++++++++ src/middleware/error-logger.js | 59 ++++++++++++++++++++++++++++++ src/middleware/error-responder.js | 52 ++++++++++++++++++++++++++ src/middleware/require-https.js | 12 ++++++ src/router.js | 19 ++++++++++ src/util/express.js | 61 +++++++++++++++++++++++++++++++ src/util/logger.js | 29 +++++++++++++++ src/util/require-envs.js | 11 ++++++ src/util/validation.js | 9 +++++ 20 files changed, 576 insertions(+) create mode 100644 .env.sample create mode 100644 .eslintrc create mode 100644 .gitignore create mode 100644 Procfile create mode 100644 README.md create mode 100644 app.json create mode 100644 package.json create mode 100644 src/app.js create mode 100644 src/config.js create mode 100644 src/core/pdf-core.js create mode 100644 src/http/pdf-http.js create mode 100644 src/index.js create mode 100644 src/middleware/error-logger.js create mode 100644 src/middleware/error-responder.js create mode 100644 src/middleware/require-https.js create mode 100644 src/router.js create mode 100644 src/util/express.js create mode 100644 src/util/logger.js create mode 100644 src/util/require-envs.js create mode 100644 src/util/validation.js diff --git a/.env.sample b/.env.sample new file mode 100644 index 00000000..9cb600fa --- /dev/null +++ b/.env.sample @@ -0,0 +1,15 @@ +#!/bin/bash + +# Guide: +# +# 1. Copy this file to .env +# +# cp .env-sample .env +# +# 2. Fill the blanks + +export NODE_ENV=development +export PORT=9000 +export ALLOW_HTTP=true + +echo "Environment variables set!" diff --git a/.eslintrc b/.eslintrc new file mode 100644 index 00000000..c829e65b --- /dev/null +++ b/.eslintrc @@ -0,0 +1,17 @@ +{ + "env": { + "browser": true, + "amd": true, + "node": true, + "es6": true + }, + "extends": "airbnb-base", + "rules": { + "no-implicit-coercion": "error", + "no-process-env": "error", + "no-path-concat": "error", + "import/no-extraneous-dependencies": ["error", {"devDependencies": true}], + "no-use-before-define": ["error", { "functions": false }], + "no-underscore-dangle": "off" + } +} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..80825b22 --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +.DS_Store +.idea + +# Logs +logs +*.log +npm-debug.log* + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (http://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules +jspm_packages + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity diff --git a/Procfile b/Procfile new file mode 100644 index 00000000..9cbc91a2 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: NODE_ENV=production node src/index.js \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..d95d6f25 --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +# URL to PDF + +> Web page PDF rendering done right. Packaged to an easy API. + +A simple API which converts a given URL to a PDF. **Why is it "done right"?** + +* Rendered with Headless Chrome, using [Puppeteer](https://github.com/GoogleChrome/puppeteer) +* Sensible defaults +* Easy deployment to Heroku. I love Lambda but.. Deploy to Heroku button. + + +**Requires Node 8+ (async, await).** + +## Get started + +* `cp .env.sample .env` +* Fill in the blanks in `.env` +* `source .env` or `bash .env` + + Or use [autoenv](https://github.com/kennethreitz/autoenv). + +* `npm install` +* `npm start` Start express server locally +* Server runs at http://localhost:9000 or what `$PORT` env defines + + +## Techstack + +* Node 8+ (async, await), written in ES7 +* [Express.js](https://expressjs.com/) app with a nice internal architecture, based on [these conventions](https://github.com/kimmobrunfeldt/express-example). +* Hapi-style Joi validation with [express-validation](https://github.com/andrewkeig/express-validation) +* Heroku + [Puppeteer buildpack](https://github.com/jontewks/puppeteer-heroku-buildpack) +* [Puppeteer](https://github.com/GoogleChrome/puppeteer) to control Chrome diff --git a/app.json b/app.json new file mode 100644 index 00000000..056b49e3 --- /dev/null +++ b/app.json @@ -0,0 +1,27 @@ +{ + "name": "url-to-pdf-api", + "description": "Web page PDF rendering done right. Packaged to an easy API.", + "keywords": [ + "pdf", + "html", + "html to pdf", + "html 2 pdf", + "render" + ], + "website": "https://github.com/kimmobrunfeldt/url-to-pdf-api", + "repository": "https://github.com/kimmobrunfeldt/url-to-pdf-api", + "env": { + "ALLOW_HTTP": { + "description": "When set to \"true\", unsecure requests are allowed.", + "value": "false" + } + }, + "buildpacks": [ + { + "url": "https://github.com/jontewks/puppeteer-heroku-buildpack" + }, + { + "url": "http://github.com/heroku/heroku-buildpack-nodejs.git" + } + ] +} \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 00000000..15f8c4a0 --- /dev/null +++ b/package.json @@ -0,0 +1,33 @@ +{ + "name": "url-to-pdf-api", + "version": "1.0.0", + "description": "Web page PDF rendering done right. Packaged to an easy API.", + "main": "src/index.js", + "scripts": { + "start": "node src/index.js" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/kimmobrunfeldt/url-to-pdf-api.git" + }, + "author": "Kimmo Brunfeldt", + "license": "MIT", + "bugs": { + "url": "https://github.com/kimmobrunfeldt/url-to-pdf-api/issues" + }, + "homepage": "https://github.com/kimmobrunfeldt/url-to-pdf-api#readme", + "dependencies": { + "bluebird": "^3.5.0", + "body-parser": "^1.18.2", + "compression": "^1.7.1", + "cors": "^2.8.4", + "express": "^4.15.5", + "express-validation": "^1.0.2", + "joi": "^11.1.1", + "lodash": "^4.17.4", + "morgan": "^1.9.0", + "puppeteer": "^0.11.0", + "server-destroy": "^1.0.1", + "winston": "^2.3.1" + } +} diff --git a/src/app.js b/src/app.js new file mode 100644 index 00000000..f9857d5d --- /dev/null +++ b/src/app.js @@ -0,0 +1,47 @@ +const express = require('express'); +const morgan = require('morgan'); +const bodyParser = require('body-parser'); +const compression = require('compression'); +const cors = require('cors'); +const logger = require('./util/logger')(__filename); +const errorResponder = require('./middleware/error-responder'); +const ipLogger = require('./middleware/ip-logger'); +const errorLogger = require('./middleware/error-logger'); +const requireHttps = require('./middleware/require-https'); +const createRouter = require('./router'); +const config = require('./config'); + +function createApp() { + const app = express(); + // App is served behind Heroku's router. + // This is needed to be able to use req.ip or req.secure + app.enable('trust proxy', 1); + app.disable('x-powered-by'); + + if (config.NODE_ENV !== 'production') { + app.use(morgan('dev')); + } + + const corsOpts = { + origin: config.CORS_ORIGIN, + methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS', 'HEAD', 'PATCH'], + }; + logger.info('Using CORS options:', corsOpts); + app.use(cors(corsOpts)); + app.use(bodyParser.json({ limit: '1mb' })); + app.use(compression({ + // Compress everything over 10 bytes + threshold: 10, + })); + + // Initialize routes + const router = createRouter(); + app.use('/', router); + + app.use(errorLogger()); + app.use(errorResponder()); + + return app; +} + +module.exports = createApp; diff --git a/src/config.js b/src/config.js new file mode 100644 index 00000000..7c8bf207 --- /dev/null +++ b/src/config.js @@ -0,0 +1,11 @@ +/* eslint-disable no-process-env */ +const requireEnvs = require('./util/require-envs'); + +// Env vars should be casted to correct types +const config = { + PORT: Number(process.env.PORT) || 9000, + NODE_ENV: process.env.NODE_ENV, + LOG_LEVEL: process.env.LOG_LEVEL, +}; + +module.exports = config; diff --git a/src/core/pdf-core.js b/src/core/pdf-core.js new file mode 100644 index 00000000..1c579039 --- /dev/null +++ b/src/core/pdf-core.js @@ -0,0 +1,37 @@ +const fs = require('fs'); +const puppeteer = require('puppeteer'); +const BPromise = require('bluerbird'); +const _ = require('lodash'); + +BPromise.promisifyAll(fs); + +async function render(_opts = {}) { + const opts = _.merge({ + viewport: { + width: 1200, + height: 800, + }, + goto: { + waitUntil: 'networkidle', + }, + pdf: { + format: 'A4', + } + }, _opts); + + const browser = await puppeteer.launch(); + const page = await browser.newPage(); + await page.setViewport(opts.viewport) + await page.goto(params.url, opts.goto); + await page.pdf(_.merge({}, opts.pdf, { + path: 'page.pdf', + })); + + await browser.close(); + + return fs.readFileAsync('page.pdf', { encoding: null }); +} + +module.exports = { + render, +}; diff --git a/src/http/pdf-http.js b/src/http/pdf-http.js new file mode 100644 index 00000000..b47227cf --- /dev/null +++ b/src/http/pdf-http.js @@ -0,0 +1,14 @@ +const ex = require('../util/express'); +const pdfCore = require('../core/pdf-core'); + +const getRender = ex.createJsonRoute((req) => { + const params = { + url: req.query.url, + }; + + return pdfCore.render(params); +}); + +module.exports = { + getRender, +}; diff --git a/src/index.js b/src/index.js new file mode 100644 index 00000000..c4a9dce9 --- /dev/null +++ b/src/index.js @@ -0,0 +1,39 @@ +const createApp = require('./app'); +const enableDestroy = require('server-destroy'); +const BPromise = require('bluebird'); +const logger = require('./util/logger')(__filename); +const config = require('./config'); + +BPromise.config({ + warnings: config.NODE_ENV !== 'production', + longStackTraces: true, +}); + +const app = createApp(); +const server = app.listen(config.PORT, () => { + logger.info( + 'Express server listening on http://localhost:%d/ in %s mode', + config.PORT, + app.get('env') + ); +}); +enableDestroy(server); + +function closeServer(signal) { + logger.info(`${signal} received`); + logger.info('Closing http.Server ..'); + server.destroy(); +} + +// Handle signals gracefully. Heroku will send SIGTERM before idle. +process.on('SIGTERM', closeServer.bind(this, 'SIGTERM')); +process.on('SIGINT', closeServer.bind(this, 'SIGINT(Ctrl-C)')); + +server.on('close', () => { + logger.info('Server closed'); + process.emit('cleanup'); + + logger.info('Giving 100ms time to cleanup..'); + // Give a small time frame to clean up + setTimeout(process.exit, 100); +}); diff --git a/src/middleware/error-logger.js b/src/middleware/error-logger.js new file mode 100644 index 00000000..c7de1bf4 --- /dev/null +++ b/src/middleware/error-logger.js @@ -0,0 +1,59 @@ +const _ = require('lodash'); +const logger = require('../util/logger')(__filename); + +function createErrorLogger(opts) { + opts = _.merge({ + logRequest: status => { + return status >= 400 && status !== 404 && status !== 503; + }, + logStackTrace: status => { + return status >= 500 && status !== 503; + } + }, opts); + + return function errorHandler(err, req, res, next) { + const status = err.status ? err.status : 500; + const logLevel = getLogLevel(status); + const log = logger[logLevel]; + + if (opts.logRequest(status)) { + logRequestDetails(logLevel, req, status); + } + + if (opts.logStackTrace(status)) { + log(err, err.stack); + } + else { + log(err.toString()); + } + + next(err); + }; +} + +function getLogLevel(status) { + return status >= 500 ? 'error' : 'warn'; +} + +function logRequestDetails(logLevel, req, status) { + logger[logLevel]('Request headers:', deepSupressLongStrings(req.headers)); + logger[logLevel]('Request parameters:', deepSupressLongStrings(req.params)); + logger.logEncrypted(logLevel, 'Request body:', req.body); +} + +function deepSupressLongStrings(obj) { + let newObj = {}; + _.each(obj, (val, key) => { + if (_.isString(val) && val.length > 100) { + newObj[key] = val.slice(0, 100) + '... [CONTENT SLICED]'; + } else if (_.isPlainObject(val)) { + return deepSupressLongStrings(val); + } else { + newObj[key] = val; + } + }); + + return newObj; +} + +module.exports = createErrorLogger; diff --git a/src/middleware/error-responder.js b/src/middleware/error-responder.js new file mode 100644 index 00000000..c912a89a --- /dev/null +++ b/src/middleware/error-responder.js @@ -0,0 +1,52 @@ +const http = require('http'); +const _ = require('lodash'); + +// This reponder is assuming that all <500 errors are safe to be responded +// with their .message attribute. +// DO NOT write sensitive data into error messages. +function createErrorResponder(opts) { + opts = _.merge({ + isErrorSafeToRespond: function(status) { + return status < 500; + }, + }, opts); + + return function errorResponder(err, req, res, next) { + var message; + var status = err.status ? err.status : 500; + switch (err.type) { + case 'StripeCardError': + // A declined card error + status = 402; + break; + case 'StripeInvalidRequestError': + status = 402; + break; + case 'StripeConnectionError': + status = 503; + break; + case 'StripeRateLimitError': + status = 429; + break; + default: + break; + } + + var httpMessage = http.STATUS_CODES[status]; + if (opts.isErrorSafeToRespond(status)) { + message = err.message; + } else { + message = httpMessage; + } + + const isPrettyValidationErr = _.has(err, 'errors'); + const body = isPrettyValidationErr + ? JSON.stringify(err) + : { status, statusText: httpMessage, messages: [message] }; + + res.status(status); + res.send(body); + }; +} + +module.exports = createErrorResponder; diff --git a/src/middleware/require-https.js b/src/middleware/require-https.js new file mode 100644 index 00000000..429b7423 --- /dev/null +++ b/src/middleware/require-https.js @@ -0,0 +1,12 @@ +const createRequireHttps = () => function RequireHttps(req, res, next) { + if (req.secure) { + // Allow requests only over https + return next(); + } + + const err = new Error('Only HTTPS allowed.'); + err.status = 403; + next(err); +}; + +module.exports = createRequireHttps; diff --git a/src/router.js b/src/router.js new file mode 100644 index 00000000..3bfb3b53 --- /dev/null +++ b/src/router.js @@ -0,0 +1,19 @@ +const _ = require('lodash'); +const Joi = require('joi'); +const validate = require('express-validation'); +const express = require('express'); +const pdf = require('./http/pdf-http'); +const { renderQueryParams } = require('./util/validation'); + +function createRouter() { + const router = express.Router(); + + const getRenderSchema = { + query: renderQueryParams, + }; + router.get('/api/render', validate(getRenderSchema), pdf.getRender); + + return router; +} + +module.exports = createRouter; diff --git a/src/util/express.js b/src/util/express.js new file mode 100644 index 00000000..256583fa --- /dev/null +++ b/src/util/express.js @@ -0,0 +1,61 @@ +const _ = require('lodash'); +const BPromise = require('bluebird'); + +// Route which assumes that the Promise `func` returns, will be resolved +// with data which will be sent as json response. +function createJsonRoute(func) { + return createRoute(func, function sendJsonResponse(data, req, res, next) { + res.json(data); + }); +} + +// Generic route creator +// Factory function to create a new route to reduce boilerplate in controllers +// and make it easier to interact with promises. +// `func` must return a promise +// `responseHandler` receives the data from asynchronous `func` as the first +// parameter +// Factory function to create a new 'raw' route handler. +// When using this function directly instead of `createJsonRoute`, you must +// send a response to express' `res` object. +function createRoute(func, responseHandler) { + return function route(req, res, next) { + try { + var callback = _.isFunction(responseHandler) ? func.bind(this, req, res) : + func.bind(this, req, res, next); + + var valuePromise = callback(); + if (!_.isFunction(_.get(valuePromise, 'then'))) { + // It was a not a Promise, so wrap it as a Promise + valuePromise = BPromise.resolve(valuePromise); + } + + if (_.isFunction(responseHandler)) { + valuePromise + .then(function(data) { + return responseHandler(data, req, res, next); + }) + .catch(next); + } + else { + valuePromise.catch(next); + } + + } + catch (err) { + next(err); + } + }; +} + +function throwStatus(status, message) { + const err = new Error(message); + err.status = status; + throw err; +} + +module.exports = { + createRoute, + createJsonRoute, + throwStatus +}; diff --git a/src/util/logger.js b/src/util/logger.js new file mode 100644 index 00000000..b6846ab9 --- /dev/null +++ b/src/util/logger.js @@ -0,0 +1,29 @@ +const path = require('path'); +const winston = require('winston'); +const _ = require('lodash'); +const config = require('../config'); + +const COLORIZE = config.NODE_ENV === 'development'; + +function createLogger(filePath) { + const fileName = path.basename(filePath); + + const logger = new winston.Logger({ + transports: [new winston.transports.Console({ + colorize: COLORIZE, + label: fileName, + timestamp: true, + })], + }); + + _setLevelForTransports(logger, config.LOG_LEVEL || 'info'); + return logger; +} + +function _setLevelForTransports(logger, level) { + _.each(logger.transports, function(transport) { + transport.level = level; + }); +} + +module.exports = createLogger; diff --git a/src/util/require-envs.js b/src/util/require-envs.js new file mode 100644 index 00000000..938a0241 --- /dev/null +++ b/src/util/require-envs.js @@ -0,0 +1,11 @@ +const _ = require('lodash'); + +function requireEnvs(arr) { + _.each(arr, varName => { + if (!process.env[varName]) { + throw new Error('Environment variable not set: ' + varName); + } + }); +} + +module.exports = requireEnvs; \ No newline at end of file diff --git a/src/util/validation.js b/src/util/validation.js new file mode 100644 index 00000000..5da25b05 --- /dev/null +++ b/src/util/validation.js @@ -0,0 +1,9 @@ +const Joi = require('joi'); + +const renderQueryParams = Joi.object({ + url: Joi.string().required(), +}).unknown(); + +module.exports = { + renderQueryParams, +};