Skip to content

Commit

Permalink
Refactor: fetch directly
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed Jan 11, 2025
1 parent b426f66 commit 6895c7e
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 398 deletions.
29 changes: 10 additions & 19 deletions Build/build-reject-ip-list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
import { fsFetchCache, getFileContentHash } from './lib/cache-filesystem';
import { processLine } from './lib/process-line';
import { RulesetOutput } from './lib/create-file';
import { SOURCE_DIR } from './constants/dir';
import { $$fetch } from './lib/fetch-retry';
import { fetchAssetsWithout304 } from './lib/fetch-assets';

const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf';
const getBogusNxDomainIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = $$fetch(BOGUS_NXDOMAIN_URL).then(async (resp) => {
Expand Down Expand Up @@ -37,26 +37,17 @@ const BOTNET_FILTER_MIRROR_URL = [
// https://curbengh.github.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt
];

const getBotNetFilterIPsPromise = fsFetchCache.applyWithHttp304AndMirrors<[ipv4: string[], ipv6: string[]]>(
BOTNET_FILTER_URL,
BOTNET_FILTER_MIRROR_URL,
getFileContentHash(__filename),
(text) => text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
const ip = processLine(cur);
if (ip) {
if (isProbablyIpv4(ip)) {
acc[0].push(ip);
} else if (isProbablyIpv6(ip)) {
acc[1].push(ip);
}
const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssetsWithout304(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL).then(text => text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
const ip = processLine(cur);
if (ip) {
if (isProbablyIpv4(ip)) {
acc[0].push(ip);
} else if (isProbablyIpv6(ip)) {
acc[1].push(ip);
}
return acc;
}, [[], []]),
{
serializer: JSON.stringify,
deserializer: JSON.parse
}
);
return acc;
}, [[], []]));

export const buildRejectIPList = task(require.main === module, __filename)(async (span) => {
const [bogusNxDomainIPs, botNetIPs] = await Promise.all([
Expand Down
256 changes: 7 additions & 249 deletions Build/lib/cache-filesystem.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,12 @@ import os from 'node:os';
import path from 'node:path';
import { mkdirSync } from 'node:fs';
import picocolors from 'picocolors';
import { mergeHeaders } from 'foxts/merge-headers';
import { headersToObject } from 'foxts/headers-to-object';
import { identity } from 'foxts/identity';
import { fastStringArrayJoin } from 'foxts/fast-string-array-join';
import { performance } from 'node:perf_hooks';
import fs from 'node:fs';
import { simpleStringHash } from 'foxts/simple-string-hash';
import { defaultRequestInit, requestWithLog, ResponseError } from './fetch-retry';
import type { UndiciResponseData } from './fetch-retry';
// import type { UndiciResponseData } from './fetch-retry';
import { Custom304NotModifiedError, CustomAbortError, CustomNoETagFallbackError, fetchAssetsWithout304, sleepWithAbort } from './fetch-assets';

import type { IncomingHttpHeaders } from 'undici/types/header';
import { Headers } from 'undici';
import { CACHE_DIR } from '../constants/dir';

export interface CacheOptions<S = string> {
Expand Down Expand Up @@ -69,20 +61,6 @@ export const TTL = {
TWO_WEEKS: () => randomInt(10, 14) * ONE_DAY
};

function ensureETag(headers: IncomingHttpHeaders | Headers) {
if (headers instanceof Headers && headers.has('etag')) {
return headers.get('etag');
}

if ('etag' in headers && typeof headers.etag === 'string' && headers.etag.length > 0) {
return headers.etag;
}
if ('ETag' in headers && typeof headers.ETag === 'string' && headers.ETag.length > 0) {
return headers.ETag;
}
return null;
}

export class Cache<S = string> {
private db: Database;
/** Time before deletion */
Expand Down Expand Up @@ -199,238 +177,18 @@ export class Cache<S = string> {
this.statement.del.run(key);
}

async applyWithHttp304<T>(
url: string,
extraCacheKey: string,
fn: (resp: UndiciResponseData) => Promise<T>,
opt: Omit<CacheApplyOption<T, S>, 'incrementTtlWhenHit'>
// requestInit?: RequestInit
): Promise<T> {
if (opt.temporaryBypass) {
return fn(await requestWithLog(url));
}

const baseKey = url + '$' + extraCacheKey;
const etagKey = baseKey + '$etag';
const cachedKey = baseKey + '$cached';

const etag = this.get(etagKey);

const onMiss = async (resp: UndiciResponseData) => {
const serializer = 'serializer' in opt ? opt.serializer : identity as any;

const value = await fn(resp);

let serverETag = ensureETag(resp.headers);
if (serverETag) {
// FUCK someonewhocares.org
if (url.includes('someonewhocares.org')) {
serverETag = serverETag.replace('-gzip', '');
}

console.log(picocolors.yellow('[cache] miss'), url, { status: resp.statusCode, cachedETag: etag, serverETag });

this.set(etagKey, serverETag, TTL.ONE_WEEK_STATIC);
this.set(cachedKey, serializer(value), TTL.ONE_WEEK_STATIC);
return value;
}

this.del(etagKey);
console.log(picocolors.red('[cache] no etag'), picocolors.gray(url));
if (opt.ttl) {
this.set(cachedKey, serializer(value), opt.ttl);
}

return value;
};

const cached = this.get(cachedKey);
if (cached == null) {
return onMiss(await requestWithLog(url));
}

const resp = await requestWithLog(
url,
{
...defaultRequestInit,
headers: (typeof etag === 'string' && etag.length > 0)
? headersToObject(mergeHeaders(defaultRequestInit.headers, { 'If-None-Match': etag }))
: defaultRequestInit.headers
}
);

// Only miss if previously a ETag was present and the server responded with a 304
if (!ensureETag(resp.headers) && resp.statusCode !== 304) {
return onMiss(resp);
}

console.log(picocolors.green(`[cache] ${resp.statusCode === 304 ? 'http 304' : 'cache hit'}`), picocolors.gray(url));
this.updateTtl(cachedKey, TTL.ONE_WEEK_STATIC);

const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;
return deserializer(cached);
}

async applyWithHttp304AndMirrors<T>(
primaryUrl: string,
mirrorUrls: string[],
extraCacheKey: string,
fn: (resp: string) => Promise<T> | T,
opt: Omit<CacheApplyOption<T, S>, 'incrementTtlWhenHit'>
): Promise<T> {
if (opt.temporaryBypass) {
return fn(await fetchAssetsWithout304(primaryUrl, mirrorUrls));
}

const baseKey = primaryUrl + '$' + extraCacheKey;
const getETagKey = (url: string) => baseKey + '$' + url + '$etag';
const cachedKey = baseKey + '$cached';

const controller = new AbortController();

const previouslyCached = this.get(cachedKey);

const createFetchFallbackPromise = async (url: string, index: number) => {
// Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 500ms before downloading from the fallback URL.
if (index >= 0) {
try {
await sleepWithAbort(50 + (index + 1) * 100, controller.signal);
} catch {
console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url));
throw new CustomAbortError();
}
if (controller.signal.aborted) {
console.log(picocolors.gray('[fetch cancelled]'), picocolors.gray(url));
throw new CustomAbortError();
}
}

const etag = this.get(getETagKey(url));
const res = await requestWithLog(
url,
{
signal: controller.signal,
...defaultRequestInit,
headers: (typeof etag === 'string' && etag.length > 0 && typeof previouslyCached === 'string' && previouslyCached.length > 1)
? headersToObject(mergeHeaders(defaultRequestInit.headers, { 'If-None-Match': etag }))
: defaultRequestInit.headers
}
);

const serverETag = ensureETag(res.headers);
if (serverETag) {
this.set(getETagKey(url), serverETag, TTL.ONE_WEEK_STATIC);
}
// If we do not have a cached value, we ignore 304
if (res.statusCode === 304 && typeof previouslyCached === 'string' && previouslyCached.length > 1) {
const err = new Custom304NotModifiedError(url, previouslyCached);
controller.abort(err);
throw err;
}
if (!serverETag && !this.get(getETagKey(primaryUrl)) && typeof previouslyCached === 'string') {
const err = new CustomNoETagFallbackError(previouslyCached);
controller.abort(err);
throw err;
}

// either no etag and not cached
// or has etag but not 304
const text = await res.body.text();

if (text.length < 2) {
throw new ResponseError(res, url, 'empty response');
}

controller.abort();
return text;
};

try {
const text = mirrorUrls.length === 0
? await createFetchFallbackPromise(primaryUrl, -1)
: await Promise.any([
createFetchFallbackPromise(primaryUrl, -1),
...mirrorUrls.map(createFetchFallbackPromise)
]);

console.log(picocolors.yellow('[cache] miss'), primaryUrl);
const serializer = 'serializer' in opt ? opt.serializer : identity as any;

const value = await fn(text);

this.set(cachedKey, serializer(value), opt.ttl ?? TTL.ONE_WEEK_STATIC);

return value;
} catch (e) {
const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;

const on304 = (error: Custom304NotModifiedError): NonNullable<T> => {
console.log(picocolors.green('[cache] http 304'), picocolors.gray(primaryUrl));
this.updateTtl(cachedKey, TTL.ONE_WEEK_STATIC);
return deserializer(error.data);
};

const onNoETagFallback = (error: CustomNoETagFallbackError): NonNullable<T> => {
console.log(picocolors.green('[cache] hit'), picocolors.gray(primaryUrl));
return deserializer(error.data);
};

const onSingleError = (error: object & {}) => {
if ('name' in error) {
if (error.name === 'Custom304NotModifiedError') {
return on304(error as Custom304NotModifiedError);
}
if (error.name === 'CustomNoETagFallbackError') {
return onNoETagFallback(error as CustomNoETagFallbackError);
}
if (error.name === 'CustomAbortError' || error.name === 'AbortError') {
// noop
}
}
if ('digest' in error) {
if (error.digest === 'Custom304NotModifiedError') {
return on304(error as Custom304NotModifiedError);
}
if (error.digest === 'CustomNoETagFallbackError') {
return onNoETagFallback(error as CustomNoETagFallbackError);
}
}
return null;
};

if (e && typeof e === 'object') {
if ('errors' in e && Array.isArray(e.errors)) {
for (let i = 0, len = e.errors.length; i < len; i++) {
const error = e.errors[i];

const result = onSingleError(error);
if (result !== null) {
return result;
}

console.log(picocolors.red('[fetch error 1]'), picocolors.gray(`[${primaryUrl}]`), error);
}
} else {
const result = onSingleError(e);
if (result !== null) {
return result;
}

console.log(picocolors.red('[fetch error 2]'), picocolors.gray(`[${primaryUrl}]`), e);
}
}

console.log(`Download Rule for [${primaryUrl}] failed`, { e, name: (e as any).name });
throw e;
}
}

destroy() {
this.db.close();
}

deleteTable(tableName: string) {
this.db.exec(`DROP TABLE IF EXISTS ${tableName};`);
}
}

export const fsFetchCache = new Cache({ cachePath: CACHE_DIR });
// drop deprecated cache
new Cache({ cachePath: CACHE_DIR }).deleteTable('cache');

// process.on('exit', () => {
// fsFetchCache.destroy();
// });
Expand Down
11 changes: 7 additions & 4 deletions Build/lib/fetch-assets.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import picocolors from 'picocolors';
import { defaultRequestInit, requestWithLog, ResponseError } from './fetch-retry';
import { $$fetch, defaultRequestInit, ResponseError } from './fetch-retry';
import { wait } from 'foxts/wait';

// eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better
Expand Down Expand Up @@ -41,7 +41,7 @@ export function sleepWithAbort(ms: number, signal: AbortSignal) {
});
}

export async function fetchAssetsWithout304(url: string, fallbackUrls: string[] | readonly string[]) {
export async function fetchAssetsWithout304(url: string, fallbackUrls: null | undefined | string[] | readonly string[]) {
const controller = new AbortController();

const createFetchFallbackPromise = async (url: string, index: number) => {
Expand All @@ -58,8 +58,8 @@ export async function fetchAssetsWithout304(url: string, fallbackUrls: string[]
console.log(picocolors.gray('[fetch cancelled]'), picocolors.gray(url));
throw new CustomAbortError();
}
const res = await requestWithLog(url, { signal: controller.signal, ...defaultRequestInit });
const text = await res.body.text();
const res = await $$fetch(url, { signal: controller.signal, ...defaultRequestInit });
const text = await res.text();

if (text.length < 2) {
throw new ResponseError(res, url, 'empty response w/o 304');
Expand All @@ -69,6 +69,9 @@ export async function fetchAssetsWithout304(url: string, fallbackUrls: string[]
return text;
};

if (!fallbackUrls || fallbackUrls.length === 0) {
return createFetchFallbackPromise(url, -1);
}
return Promise.any([
createFetchFallbackPromise(url, -1),
...fallbackUrls.map(createFetchFallbackPromise)
Expand Down
Loading

0 comments on commit 6895c7e

Please sign in to comment.