Skip to content

Commit

Permalink
Perf: prefer hosts over AdBlock syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed Nov 29, 2023
1 parent 07b3951 commit 4cda4df
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 41 deletions.
21 changes: 11 additions & 10 deletions Build/build-phishing-domainset.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { processFilterRules } from './lib/parse-filter';
import { processFilterRules, processHosts } from './lib/parse-filter';
import path from 'path';
import { createRuleset } from './lib/create-file';
import { processLine } from './lib/process-line';
Expand Down Expand Up @@ -65,15 +65,16 @@ const BLACK_TLD = new Set([
]);

export const buildPhishingDomainSet = task(import.meta.path, async () => {
const [{ black: domainSet }, gorhill] = await Promise.all([
processFilterRules(
'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
[
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
]
),
const [domainSet, gorhill] = await Promise.all([
processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
// processFilterRules(
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
// [
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
// ]
// ),
getGorhillPublicSuffixPromise()
]);

Expand Down
4 changes: 2 additions & 2 deletions Build/lib/parse-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ export async function processDomainLists(domainListsUrl: string | URL) {
return domainSets;
}

export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false) {
export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false, skipDomainCheck = false) {
console.time(`- processHosts: ${hostsUrl}`);

if (typeof hostsUrl === 'string') {
Expand All @@ -85,7 +85,7 @@ export async function processHosts(hostsUrl: string | URL, includeAllSubDomain =
foundDebugDomain = true;
}

const domain = normalizeDomain(_domain);
const domain = skipDomainCheck ? _domain : normalizeDomain(_domain);
if (domain) {
if (includeAllSubDomain) {
domainSets.add(`.${domain}`);
Expand Down
66 changes: 37 additions & 29 deletions Build/lib/reject-data-source.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
export const HOSTS: [string, boolean][] = [
export const HOSTS = [
// ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', false],
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false],
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', false],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false],
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', false]
];
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', false],
// Curben's UrlHaus Malicious URL Blocklist
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true],
// Curben's Phishing URL Blocklist
['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
// Curben's PUP Domains Blocklist
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true],
// BarbBlock
['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true]
] as const;

export const ADGUARD_FILTERS = [
// EasyList
Expand Down Expand Up @@ -103,33 +111,33 @@ export const ADGUARD_FILTERS = [
]
],
// Curben's UrlHaus Malicious URL Blocklist
[
'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
[
'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
]
],
// [
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// [
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
// // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// // 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// 'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
// ]
// ],
// Curben's Phishing URL Blocklist
[
'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
[
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
]
],
// [
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
// [
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
// ]
// ],
// Curben's PUP Domains Blocklist
[
'https://curbengh.github.io/pup-filter/pup-filter-agh.txt',
[
'https://pup-filter.pages.dev/pup-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
]
],
// [
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt',
// [
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
// // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// // 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
// ]
// ],
// GameConsoleAdblockList
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
// PiHoleBlocklist
Expand All @@ -142,7 +150,7 @@ export const ADGUARD_FILTERS = [
// Spam404
'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt',
// BarbBlock
'https://paulgb.github.io/BarbBlock/blacklists/ublock-origin.txt',
// 'https://paulgb.github.io/BarbBlock/blacklists/ublock-origin.txt',
// Brave First Party & First Party CNAME
'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt'
] as const;
Expand Down

0 comments on commit 4cda4df

Please sign in to comment.