Skip to content

Commit

Permalink
download external images (#1188)
Browse files Browse the repository at this point in the history
* download external images

Part of #1103

* feedbacked
  • Loading branch information
peterbe authored Sep 2, 2020
1 parent 028b100 commit a478d30
Show file tree
Hide file tree
Showing 11 changed files with 79 additions and 42 deletions.
18 changes: 7 additions & 11 deletions build/check-images.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ function checkImageReferences(doc, $, options, { url, rawContent }) {

const checkImages = options.flawLevels.get("images") !== FLAW_LEVELS.IGNORE;

function addImageFlaw($img, src, { explanation, suggestion = null }) {
function addImageFlaw(
$img,
src,
{ explanation, externalImage = false, suggestion = null }
) {
for (const match of findMatchesInText(src, rawContent, {
attribute: "src",
})) {
Expand All @@ -39,6 +43,7 @@ function checkImageReferences(doc, $, options, { url, rawContent }) {
fixable,
suggestion,
explanation,
externalImage,
...match,
});
}
Expand Down Expand Up @@ -94,18 +99,9 @@ function checkImageReferences(doc, $, options, { url, rawContent }) {
} else {
addImageFlaw(img, src, {
explanation: "External image URL",
externalImage: true,
});
}

// TODO: It might be prudent to cease allowing any remote images.
// If you rely on an external domain that isn't our designated
// default domain, we should probably download it and check it in.
// It means less SSL and network overheads if we can download all
// images on an existing HTTP2 connection, and if the domain is
// something out of our control we're potentially at mercy of the
// images suddenly disappearing.
// Due to so much else going on, let's not make a huge stink about
// it at the moment (peterbe, Aug 2020).
}
} else {
// Remember, you can not have search parameters on local images.
Expand Down
46 changes: 38 additions & 8 deletions build/flaws.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
const fs = require("fs");
const path = require("path");

const chalk = require("chalk");
const got = require("got");

const { Document, Redirect } = require("../content");
const { FLAW_LEVELS } = require("./constants");
Expand All @@ -7,6 +11,7 @@ const {
findMatchesInText,
replaceMatchesInText,
} = require("./matches-in-text");
const { fstat } = require("fs-extra");

function injectFlaws(doc, $, options, { rawContent }) {
if (doc.isArchive) return;
Expand Down Expand Up @@ -169,7 +174,7 @@ function injectFlaws(doc, $, options, { rawContent }) {
}
}

function fixFixableFlaws(doc, options, document) {
async function fixFixableFlaws(doc, options, document) {
if (!options.fixFlaws || document.isArchive) return;

let newRawHTML = document.rawHTML;
Expand Down Expand Up @@ -229,27 +234,52 @@ function fixFixableFlaws(doc, options, document) {
}
}

// Any 'images' flaws with a suggestion...
// Any 'images' flaws with a suggestion or external image...
for (const flaw of doc.flaws.images || []) {
if (!flaw.suggestion) {
if (!(flaw.suggestion || flaw.externalImage)) {
continue;
}
// The reason we're not using the parse HTML, as a cheerio object `$`
// is because the raw HTML we're dealing with isn't actually proper
// HTML. It's only proper HTML when the kumascript macros have been
// expanded.
const htmlBefore = newRawHTML;
newRawHTML = replaceMatchesInText(flaw.src, newRawHTML, flaw.suggestion, {
let newSrc;
if (flaw.externalImage) {
// Sanity check that it's an external image
const url = new URL(flaw.src);
if (url.protocol !== "https:") {
throw new Error(`Insecure image URL ${flaw.src}`);
}
try {
const imageBuffer = await got(flaw.src, {
responseType: "buffer",
resolveBodyOnly: true,
timeout: 10000,
retry: 3,
});
const destination = path.join(
Document.getFolderPath(document.metadata),
path.basename(url.pathname)
);
fs.writeFileSync(destination, imageBuffer);
console.log(`Downloaded ${flaw.src} to ${destination}`);
newSrc = path.basename(destination);
} catch (error) {
console.error(error);
throw error;
}
} else {
newSrc = flaw.suggestion;
}
newRawHTML = replaceMatchesInText(flaw.src, newRawHTML, newSrc, {
inAttribute: "src",
});
if (htmlBefore !== newRawHTML) {
// flaw.fixed = !options.fixFlawsDryRun;
}
if (loud) {
console.log(
chalk.grey(
`Fixed image ${chalk.white.bold(flaw.src)} to ${chalk.white.bold(
flaw.suggestion
newSrc
)}`
)
);
Expand Down
10 changes: 8 additions & 2 deletions build/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
const childProcess = require("child_process");

const chalk = require("chalk");
const PACKAGE_REPOSITORY_URL = require("../package.json").repository;

const PACKAGE_REPOSITORY_URL = require("../package.json").repository;
const { buildURL, Document } = require("../content");
const kumascript = require("../kumascript");

Expand Down Expand Up @@ -198,7 +199,12 @@ async function buildDocument(document, documentOptions = {}) {

// If fixFlaws is on and the doc has fixable flaws, this returned
// raw HTML string will be different.
fixFixableFlaws(doc, options, document);
try {
await fixFixableFlaws(doc, options, document);
} catch (error) {
console.error(error);
throw error;
}

// Apply syntax highlighting all <pre> tags.
syntaxHighlight($, doc);
Expand Down
6 changes: 4 additions & 2 deletions client/src/document/toolbar/flaws.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ function Flaws({ doc, flaws }: { doc: Doc; flaws: FlawCount[] }) {
const fixableFlaws = Object.values(doc.flaws)
.map((flaws) => {
return flaws.filter(
(flaw) => !flaw.fixed && (flaw.suggestion || flaw.fixable)
(flaw) =>
!flaw.fixed && (flaw.suggestion || flaw.fixable || flaw.externalImage)
);
})
.flat();
Expand Down Expand Up @@ -611,7 +612,8 @@ function Images({
>
line {flaw.line}:{flaw.column}
</a>{" "}
{flaw.fixable && <FixableFlawBadge />} <br />
{(flaw.fixable || flaw.externalImage) && <FixableFlawBadge />}{" "}
<br />
{flaw.suggestion && (
<small>
<b>Suggestion:</b>
Expand Down
1 change: 1 addition & 0 deletions client/src/document/types.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export interface GenericFlaw {
suggestion: string | null;
fixable?: boolean;
fixed?: true;
externalImage?: boolean;
}

export interface BrokenLink extends GenericFlaw {
Expand Down
10 changes: 6 additions & 4 deletions content/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,17 @@ function urlToFolderPath(url) {
}

function create(html, metadata) {
const folderPath = buildPath(
path.join(CONTENT_ROOT, metadata.locale),
metadata.slug
);
const folderPath = getFolderPath(metadata);

fs.mkdirSync(folderPath, { recursive: true });

saveHTMLFile(getHTMLPath(folderPath), trimLineEndings(html), metadata);
}

function getFolderPath(metadata) {
return buildPath(path.join(CONTENT_ROOT, metadata.locale), metadata.slug);
}

function archive(renderedHTML, rawHTML, metadata, wikiHistory) {
if (!CONTENT_ARCHIVE_ROOT) {
throw new Error("Can't archive when CONTENT_ARCHIVE_ROOT is not set");
Expand Down Expand Up @@ -284,6 +285,7 @@ module.exports = {
update,
del,
urlToFolderPath,
getFolderPath,

findByURL,
findAll,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
---
title: theme-color
slug: Web/HTML/Element/meta/name/theme-color
summary: >-
The theme-color value for the name attribute of the meta element indicates a
suggested color that user agents should use to customize the display of the
page or of the surrounding user interface. If specified, the content attribute
must contain a valid CSS color.
tags:
- Attribute
- HTML
Expand All @@ -23,7 +18,7 @@ <h2 id="Example">Example</h2>

<p>The following image shows the effect that the {{htmlelement("meta")}} element above will have on a document displayed in Chrome running on an Android mobile device.</p>

<figure><img alt="Image showing the effect of using theme-color" src="https://mdn.mozillademos.org/files/17199/theme-color.png" style="height: 686px; width: 894px;">
<figure><img alt="Image showing the effect of using theme-color" src="theme-color.png" style="height: 686px; width: 894px;">
<figcaption><small>Image credit: from <a href="https://developers.google.com/web/fundamentals/design-and-ux/browser-customization">Icons &amp; Browser Colors</a>, created and <a href="https://developers.google.com/readme/policies">shared by Google</a> and used according to terms described in the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons 4.0 Attribution License</a>.</small></figcaption>
</figure>

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"front-matter": "4.0.2",
"fuzzysearch": "1.0.3",
"glob": "^7.1.6",
"got": "11.5.2",
"ignore-loader": "0.1.2",
"imagemin": "7.0.1",
"imagemin-gifsicle": "7.0.0",
Expand Down Expand Up @@ -89,7 +90,6 @@
"extend": "^3.0.2",
"foreman": "3.0.1",
"fs-extra": "9.0.1",
"got": "^11.5.2",
"husky": "4.2.5",
"jest": "^24.0.0",
"jest-environment-jsdom-sixteen": "^1.0.3",
Expand Down
13 changes: 9 additions & 4 deletions server/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,15 @@ router.put("/fixfixableflaws", withDocument, async (req, res) => {
return res.status(400).send("Can't fix archived documents");
}
// To get the 'doc' we have to find the built art
await buildDocument(req.document, {
fixFlaws: true,
fixFlawsVerbose: true,
});
try {
await buildDocument(req.document, {
fixFlaws: true,
fixFlawsVerbose: true,
});
} catch (error) {
console.error(`Error in buildDocument(${req.document.url})`, error);
return res.status(500).send(error.toString());
}

// Just because you *build* document, doesn't mean it writes the new
// built content to disk. So, if *was* already built, with all the flaws
Expand Down
8 changes: 4 additions & 4 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1538,9 +1538,9 @@
integrity sha512-ONhaKPIufzzrlNbqtWFFd+jlnemX6lJAgq9ZeiZtS7I1PIf/la7CW4m83rTXRnVnsMbW2k56pGYu7AUFJD9Pow==

"@sindresorhus/is@^3.0.0":
version "3.1.0"
resolved "https://registry.yarnpkg.com/@sindresorhus/is/-/is-3.1.0.tgz#d8735532635bea69ad39119df5f0f10099bd09dc"
integrity sha512-n4J+zu52VdY43kdi/XdI9DzuMr1Mur8zFL5ZRG2opCans9aiFwkPxHYFEb5Xgy7n1Z4K6WfI4FpqUqsh3E8BPQ==
version "3.1.2"
resolved "https://registry.yarnpkg.com/@sindresorhus/is/-/is-3.1.2.tgz#548650de521b344e3781fbdb0ece4aa6f729afb8"
integrity sha512-JiX9vxoKMmu8Y3Zr2RVathBL1Cdu4Nt4MuNWemt1Nc06A0RAin9c5FArkhGsyMBWfCu4zj+9b+GxtjAnE4qqLQ==

"@sinonjs/commons@^1.7.0":
version "1.7.2"
Expand Down Expand Up @@ -6365,7 +6365,7 @@ good-listener@^1.2.2:
dependencies:
delegate "^3.1.2"

got@^11.5.2:
[email protected]:
version "11.5.2"
resolved "https://registry.yarnpkg.com/got/-/got-11.5.2.tgz#772e3f3a06d9c7589c7c94dc3c83cdb31ddbf742"
integrity sha512-yUhpEDLeuGiGJjRSzEq3kvt4zJtAcjKmhIiwNp/eUs75tRlXfWcHo5tcBaMQtnjHWC7nQYT5HkY/l0QOQTkVww==
Expand Down

0 comments on commit a478d30

Please sign in to comment.