Skip to content

Commit

Permalink
fix(ssr): avoid duplicate hreflang for zh-CN/zh-TW alternate links (#…
Browse files Browse the repository at this point in the history
…7921)

Keeps `zh` (for `zh-CN`), but adds `zh-Hans` (for `zh-CN`) and `zh-Hant` (for `zh-TW`).

Co-authored-by: Claas Augner <[email protected]>
  • Loading branch information
yin1999 and caugner authored Feb 16, 2023
1 parent a153c87 commit 79fb3fc
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 12 deletions.
29 changes: 20 additions & 9 deletions ssr/render.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,15 @@ const PREFERRED_LOCALE = {
zh: "zh-CN",
};

function htmlEscape(s) {
// We should use the language tag (e.g. "zh-Hans") instead of the locale.
// This is a map of locale => language tag.
// See https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
const LANGUAGE_TAGS = Object.freeze({
"zh-CN": "zh-Hans",
"zh-TW": "zh-Hant",
});

function htmlEscape(s: string) {
if (!s) {
return s;
}
Expand All @@ -28,7 +36,7 @@ function htmlEscape(s) {
.replace(/'/gim, "&apos;");
}

function getHrefLang(locale, otherLocales) {
function getHrefLang(locale: string, allLocales: Array<string>) {
// In most cases, just return the language code, removing the country
// code if present (so, for example, 'en-US' becomes 'en').
const hreflang = locale.split("-")[0];
Expand All @@ -37,17 +45,17 @@ function getHrefLang(locale, otherLocales) {
// a preferred one. For example, if the document is available in 'zh-CN' and
// in 'zh-TW', we need to output something like this:
// <link rel=alternate hreflang=zh href=...>
// <link rel=alternate hreflang=zh-TW href=...>
// <link rel=alternate hreflang=zh-Hant href=...>
//
// But other bother if both ambigious locale-to-hreflang are present.
const preferred = PREFERRED_LOCALE[hreflang];
if (preferred) {
// e.g. `preferred===zh-CN` if hreflang was `zh`
if (locale !== preferred) {
// e.g. `locale===zh-TW`
if (otherLocales.includes(preferred)) {
if (allLocales.includes(preferred)) {
// If the more preferred one was there, use the locale + region format.
return locale;
return LANGUAGE_TAGS[locale] ?? locale;
}
}
}
Expand Down Expand Up @@ -186,28 +194,31 @@ export default function render(
hydrationData.doc = doc;

if (doc.other_translations) {
const allOtherLocales = doc.other_translations.map((t) => t.locale);
// Note, we also always include "self" as a locale. That's why we concat
// this doc's locale plus doc.other_translations.
const thisLocale = {
locale: doc.locale,
title: doc.title,
url: doc.mdn_url,
};
for (const translation of [...doc.other_translations, thisLocale]) {

const allTranslations = [...doc.other_translations, thisLocale];
const allLocales = allTranslations.map((t) => t.locale);

for (const translation of allTranslations) {
const translationURL = doc.mdn_url.replace(
`/${doc.locale}/`,
() => `/${translation.locale}/`
);
// The locale used in `<link rel="alternate">` needs to be the ISO-639-1
// code. For example, it's "en", not "en-US". And it's "sv" not "sv-SE".
// See https://developers.google.com/search/docs/advanced/crawling/localized-versions?hl=en&visit_id=637411409912568511-3980844248&rd=1#language-codes
// See https://developers.google.com/search/docs/specialty/international/localized-versions#language-codes
translations.push(
`<link rel="alternate" title="${htmlEscape(
translation.title
)}" href="https://developer.mozilla.org${translationURL}" hreflang="${getHrefLang(
translation.locale,
allOtherLocales
allLocales
)}"/>`
);
}
Expand Down
40 changes: 37 additions & 3 deletions testing/tests/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,11 @@ test("content built foo page", () => {
expect($('script[src="/static/js/ga.js"]')).toHaveLength(1);

// Because this en-US page has a French translation
expect($('link[rel="alternate"]')).toHaveLength(3);
expect($('link[rel="alternate"]')).toHaveLength(4);
expect($('link[rel="alternate"][hreflang="en"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="fr"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="zh"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="zh-Hant"]')).toHaveLength(1);
const toEnUSURL = $('link[rel="alternate"][hreflang="en"]').attr("href");
const toFrURL = $('link[rel="alternate"][hreflang="fr"]').attr("href");
// The domain is hardcoded because the URL needs to be absolute and when
Expand Down Expand Up @@ -175,10 +176,11 @@ test("content built French foo page", () => {
const htmlFile = path.join(builtFolder, "index.html");
const html = fs.readFileSync(htmlFile, "utf-8");
const $ = cheerio.load(html);
expect($('link[rel="alternate"]')).toHaveLength(3);
expect($('link[rel="alternate"]')).toHaveLength(4);
expect($('link[rel="alternate"][hreflang="en"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="fr"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="zh"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="zh-Hant"]')).toHaveLength(1);
expect($('meta[property="og:locale"]').attr("content")).toBe("fr");
expect($('meta[property="og:title"]').attr("content")).toBe(
"<foo>: Une page de test | MDN"
Expand Down Expand Up @@ -210,6 +212,37 @@ test("French translation using English front-matter bits", () => {
expect(bcd.value.query).toBe("javascript.builtins.Array.toLocaleString");
});

test("content built zh-CN page for hreflang tag testing", () => {
const builtFolder = path.join(buildRoot, "zh-cn", "docs", "web", "foo");
const jsonFile = path.join(builtFolder, "index.json");
expect(fs.existsSync(jsonFile)).toBeTruthy();
const { doc } = JSON.parse(fs.readFileSync(jsonFile, "utf-8")) as {
doc: Doc;
};
expect(Object.keys(doc.flaws)).toHaveLength(1);
expect(doc.flaws.translation_differences).toHaveLength(1);
expect(doc.title).toBe("<foo>: 测试网页");
expect(doc.isTranslated).toBe(true);
expect(doc.other_translations[0].locale).toBe("en-US");
expect(doc.other_translations[0].native).toBe("English (US)");
expect(doc.other_translations[0].title).toBe("<foo>: A test tag");

const htmlFile = path.join(builtFolder, "index.html");
const html = fs.readFileSync(htmlFile, "utf-8");
const $ = cheerio.load(html);
// The built page should not have duplicate hreflang tags,
// when zh-TW translation is also available.
expect($('link[rel="alternate"]')).toHaveLength(4);
expect($('link[rel="alternate"][hreflang="en"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="fr"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="zh"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="zh-Hant"]')).toHaveLength(1);
expect($('meta[property="og:locale"]').attr("content")).toBe("zh-CN");
expect($('meta[property="og:title"]').attr("content")).toBe(
"<foo>: 测试网页 | MDN"
);
});

test("content built zh-TW page with en-US fallback image", () => {
const builtFolder = path.join(buildRoot, "zh-tw", "docs", "web", "foo");
const jsonFile = path.join(builtFolder, "index.json");
Expand All @@ -228,10 +261,11 @@ test("content built zh-TW page with en-US fallback image", () => {
const htmlFile = path.join(builtFolder, "index.html");
const html = fs.readFileSync(htmlFile, "utf-8");
const $ = cheerio.load(html);
expect($('link[rel="alternate"]')).toHaveLength(3);
expect($('link[rel="alternate"]')).toHaveLength(4);
expect($('link[rel="alternate"][hreflang="en"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="fr"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="zh"]')).toHaveLength(1);
expect($('link[rel="alternate"][hreflang="zh-Hant"]')).toHaveLength(1);
expect($('meta[property="og:locale"]').attr("content")).toBe("zh-TW");
expect($('meta[property="og:title"]').attr("content")).toBe(
"<foo>: 測試網頁 | MDN"
Expand Down
8 changes: 8 additions & 0 deletions testing/translated-content/files/zh-cn/web/foo/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
title: "<foo>: 测试网页"
slug: Web/Foo
translation_of: Web/Foo
---

This is a test page for hreflang tag testing. When zh-TW locale is also
available, this page should not have duplicate hreflang tags.

0 comments on commit 79fb3fc

Please sign in to comment.