-
Notifications
You must be signed in to change notification settings - Fork 5
/
helpers.js
48 lines (37 loc) · 1.25 KB
/
helpers.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
const got = require('got');
const Entities = require('html-entities').AllHtmlEntities;
const entities = new Entities();
/**
* Read out <h1> and meta description for URL and check if the url holds a deprecated entry
* We use the <h1> rather than the <title> as the title is a little more verbose
*
* @param {String} url
* @returns {Promise} Array of h1 and description for the documented URL
*/
const getTitleAndDescription = async (url) => {
const DESCRIPTION_REGEX = /<meta name="description" content="(.*?)"\/>/i;
const TITLE_REGEX = /<h1>(.*?)<\/h1>/i;
// to not rely on exact words this matches the deprecation container
const DEPRECATION_REGEX = /class="notecard deprecated"/;
const { body: doc } = await got(url);
if (DEPRECATION_REGEX.test(doc)) {
return [null, null];
}
const titleMatch = doc.match(TITLE_REGEX);
if (!titleMatch) {
return [null, null];
}
let [, title] = titleMatch;
if (title.length > 40) {
title = title.slice(0, 40) + '…';
}
const descriptionMatch = doc.match(DESCRIPTION_REGEX);
if (!descriptionMatch) {
return [null, null];
}
let [, description] = descriptionMatch;
return [entities.decode(title), entities.decode(description)];
};
module.exports = {
getTitleAndDescription,
};