Skip to content

Commit

Permalink
Update Reject Hosts / Add mirror support for Hosts Source
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed Jan 22, 2024
1 parent 41b2f54 commit af8cce4
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 60 deletions.
58 changes: 30 additions & 28 deletions Build/build-common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,34 +91,36 @@ const processFile = (span: Span, sourcePath: string) => {
});
};

async function transformDomainset(parentSpan: Span, sourcePath: string, relativePath: string) {
const span = parentSpan.traceChild(`transform domainset: ${path.basename(sourcePath, path.extname(sourcePath))}`);

const res = await processFile(span, sourcePath);
if (!res) return;

const [title, descriptions, lines] = res;

const deduped = domainDeduper(lines);
const description = [
...SHARED_DESCRIPTION,
...(
descriptions.length
? ['', ...descriptions]
: []
)
];

return span.traceAsyncFn(() => createRuleset(
span,
title,
description,
new Date(),
deduped,
'domainset',
path.resolve(outputSurgeDir, relativePath),
path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`)
));
function transformDomainset(parentSpan: Span, sourcePath: string, relativePath: string) {
return parentSpan
.traceChild(`transform domainset: ${path.basename(sourcePath, path.extname(sourcePath))}`)
.traceAsyncFn(async (span) => {
const res = await processFile(span, sourcePath);
if (!res) return;

const [title, descriptions, lines] = res;

const deduped = domainDeduper(lines);
const description = [
...SHARED_DESCRIPTION,
...(
descriptions.length
? ['', ...descriptions]
: []
)
];

return createRuleset(
span,
title,
description,
new Date(),
deduped,
'domainset',
path.resolve(outputSurgeDir, relativePath),
path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`)
);
});
}

/**
Expand Down
10 changes: 6 additions & 4 deletions Build/build-reject-domainset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {

const domainSets = new Set<string>();

let shouldStop = false;
// Parse from AdGuard Filters
await span
const shouldStop = await span
.traceChild('download and process hosts / adblock filter rules')
.traceAsyncFn(async (childSpan) => {
// eslint-disable-next-line sukka/no-single-return -- not single return
let shouldStop = false;
await Promise.all([
// Parse from remote hosts & domain lists
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))),
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetHelpers.add(domainSets, hosts))),

...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))),

Expand All @@ -44,6 +45,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
: processFilterRules(childSpan, input[0], input[1], input[2])
).then(({ white, black, foundDebugDomain }) => {
if (foundDebugDomain) {
// eslint-disable-next-line sukka/no-single-return -- not single return
shouldStop = true;
// we should not break here, as we want to see full matches from all data source
}
Expand All @@ -65,7 +67,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
setAddFromArray(domainSets, await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf')));
})
]);

// eslint-disable-next-line sukka/no-single-return -- not single return
return shouldStop;
});

Expand Down
31 changes: 25 additions & 6 deletions Build/lib/parse-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,32 +44,51 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
}
));
}
export function processHosts(span: Span, hostsUrl: string, includeAllSubDomain = false, ttl: number | null = null) {
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn(() => fsCache.apply(
export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsCache.apply(
hostsUrl,
async () => {
const domainSets = new Set<string>();

for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
const lineCb = (l: string) => {
const line = processLine(l);
if (!line) {
continue;
return;
}

const _domain = line.split(/\s/)[1]?.trim();
if (!_domain) {
continue;
return;
}
const domain = normalizeDomain(_domain);
if (!domain) {
continue;
return;
}
if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
foundDebugDomain = true;
}

domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
};

if (mirrors == null || mirrors.length === 0) {
for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
lineCb(l);
}
} else {
// Avoid event loop starvation, so we wait for a macrotask before we start fetching.
await Promise.resolve();

const filterRules = await childSpan.traceChild('download hosts').traceAsyncFn(() => {
return fetchAssets(hostsUrl, mirrors).then(text => text.split('\n'));
});

childSpan.traceChild('parse hosts').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]);
}
});
}

console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
Expand Down
37 changes: 25 additions & 12 deletions Build/lib/reject-data-source.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,34 @@
import { TTL } from './cache-filesystem';

export const HOSTS = [
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()],
['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()],
type HostsSource = [main: string, mirrors: string[] | null, includeAllSubDomain: boolean, ttl: number];

export const HOSTS: HostsSource[] = [
[
'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext',
['https://raw.githubusercontent.com/uBlockOrigin/uAssets/master/thirdparties/pgl.yoyo.org/as/serverlist'],
true,
TTL.THREE_HOURS()
],
['https://someonewhocares.org/hosts/hosts', null, true, TTL.THREE_HOURS()],
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', true, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()],
// have not been updated for more than a year, so we set a 14 days cache ttl
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', null, true, TTL.TWO_WEEKS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', null, false, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false, TTL.THREE_DAYS()],
// ad-wars is not actively maintained, so we set a 7 days cache ttl
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, TTL.ONE_WEEK()],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, TTL.THREE_HOURS()],
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.ONE_WEEK()],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', null, true, TTL.THREE_HOURS()],
// Curben's UrlHaus Malicious URL Blocklist
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()]
[
'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt',
[
'https://urlhaus-filter.pages.dev/urlhaus-filter-hosts.txt',
'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts.txt'
],
true,
TTL.THREE_HOURS()
]
// Curben's Phishing URL Blocklist
// Covered by lib/get-phishing-domains.ts
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
Expand Down
8 changes: 1 addition & 7 deletions Build/lib/trace-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export function traceSync<T>(prefix: string, fn: () => T, timeFormatter: Formatt
console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
return result;
}
traceSync.skip = <T>(_prefix: string, fn: () => T): T => fn();
// traceSync.skip = <T>(_prefix: string, fn: () => T): T => fn();

export const traceAsync = async <T>(prefix: string, fn: () => Promise<T>, timeFormatter: Formatter = picocolors.blue): Promise<T> => {
const start = Bun.nanoseconds();
Expand All @@ -18,9 +18,3 @@ export const traceAsync = async <T>(prefix: string, fn: () => Promise<T>, timeFo
console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
return result;
};

export interface TaskResult {
readonly start: number,
readonly end: number,
readonly taskName: string
}
2 changes: 1 addition & 1 deletion Build/trace/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ export const createSpan = (name: string, parentTraceResult?: TraceResult): Span

const stop = (time?: number) => {
if (status === SPAN_STATUS_END) {
throw new Error('span already stopped');
throw new Error(`span already stopped: ${name}`);
}
const end = time ?? Bun.nanoseconds();

Expand Down
5 changes: 3 additions & 2 deletions Source/non_ip/reject.conf
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,7 @@ DOMAIN-KEYWORD,adjust.
DOMAIN-KEYWORD,appsflyer
DOMAIN-KEYWORD,dnserror
DOMAIN-KEYWORD,marketing.net
AND,((DOMAIN-KEYWORD,f-log), (DOMAIN-SUFFIX,grammarly.io))
DOMAIN,stun.smartgslb.com
AND,((DOMAIN-SUFFIX,prod.hosts.ooklaserver.net), (DOMAIN-KEYWORD,.ad.))

DOMAIN-KEYWORD,_vmind.qqvideo.tc.qq.com
DOMAIN-KEYWORD,-logging.nextmedia.com
Expand All @@ -120,7 +118,10 @@ DOMAIN-KEYWORD,.engage.3m.
# -telemetry.officeapps.live.com
DOMAIN-KEYWORD,telemetry.officeapps.live.com
DOMAIN-KEYWORD,-launches.appsflyersdk.com
DOMAIN-KEYWORD,-s2s.sensic.net

AND,((DOMAIN-KEYWORD,f-log), (DOMAIN-SUFFIX,grammarly.io))
AND,((DOMAIN-SUFFIX,prod.hosts.ooklaserver.net), (DOMAIN-KEYWORD,.ad.))
AND,((DOMAIN-KEYWORD,genuine), (DOMAIN-KEYWORD,autodesk))

# Important: Force add the following domains without whitelisting
Expand Down

0 comments on commit af8cce4

Please sign in to comment.