From 7229179612b92913614549b73826cd8e9abd7dba Mon Sep 17 00:00:00 2001 From: mindofmar Date: Wed, 29 Apr 2026 00:45:49 -0500 Subject: [PATCH 1/2] feat: path-based scanning of urls --- .../PhishingController-method-action-types.ts | 5 +- .../src/PhishingController.test.ts | 52 +++++++++++++++++- .../src/PhishingController.ts | 18 ++++--- packages/phishing-controller/src/index.ts | 5 ++ .../phishing-controller/src/utils.test.ts | 49 +++++++++++++++++ packages/phishing-controller/src/utils.ts | 54 +++++++++++++++++++ 6 files changed, 173 insertions(+), 10 deletions(-) diff --git a/packages/phishing-controller/src/PhishingController-method-action-types.ts b/packages/phishing-controller/src/PhishingController-method-action-types.ts index dfda91ec84..ed5d94cd2c 100644 --- a/packages/phishing-controller/src/PhishingController-method-action-types.ts +++ b/packages/phishing-controller/src/PhishingController-method-action-types.ts @@ -59,8 +59,9 @@ export type PhishingControllerBypassAction = { }; /** - * Scan a URL for phishing. It will only scan the hostname of the URL. It also only supports - * web URLs. + * Scan a URL for phishing. For most hosts only the hostname is sent to the API; for known + * shared gateways the pathname is included (see `PHISHING_DETECTION_PATH_BASED_ROOT_DOMAINS`). + * Only supports web URLs (`http:` / `https:`). * * @param url - The URL to scan. * @returns The phishing detection scan result. diff --git a/packages/phishing-controller/src/PhishingController.test.ts b/packages/phishing-controller/src/PhishingController.test.ts index 046be4253d..9e931217d7 100644 --- a/packages/phishing-controller/src/PhishingController.test.ts +++ b/packages/phishing-controller/src/PhishingController.test.ts @@ -2813,7 +2813,7 @@ describe('PhishingController', () => { it('should return a PhishingDetectionScanResult with a fetchError on timeout', async () => { const scope = nock(PHISHING_DETECTION_BASE_URL) .get(`/${PHISHING_DETECTION_SCAN_ENDPOINT}`) - .query({ url: testUrl }) + .query({ url: 'example.com' }) .delayConnection(10000) .reply(200, {}); @@ -2935,6 +2935,56 @@ describe('PhishingController', () => { expect(response).toMatchObject(mockResponse); expect(scope.isDone()).toBe(true); }); + + it('should send hostname and path for path-based gateways and cache per path', async () => { + const urlA = 'https://ipfs.io/ipfs/QmAAA'; + const urlB = 'https://ipfs.io/ipfs/QmBBB'; + + const scopeA = nock(PHISHING_DETECTION_BASE_URL) + .get(`/${PHISHING_DETECTION_SCAN_ENDPOINT}`) + .query({ url: 'ipfs.io/ipfs/QmAAA' }) + .reply(200, { + recommendedAction: RecommendedAction.Warn, + }); + + const scopeB = nock(PHISHING_DETECTION_BASE_URL) + .get(`/${PHISHING_DETECTION_SCAN_ENDPOINT}`) + .query({ url: 'ipfs.io/ipfs/QmBBB' }) + .reply(200, { + recommendedAction: RecommendedAction.Block, + }); + + const fetchSpy = jest.spyOn(global, 'fetch'); + + const resultA1 = await rootMessenger.call( + 'PhishingController:scanUrl', + urlA, + ); + const resultB = await rootMessenger.call( + 'PhishingController:scanUrl', + urlB, + ); + const resultA2 = await rootMessenger.call( + 'PhishingController:scanUrl', + urlA, + ); + + expect(resultA1).toMatchObject({ + hostname: 'ipfs.io', + recommendedAction: RecommendedAction.Warn, + }); + expect(resultB).toMatchObject({ + hostname: 'ipfs.io', + recommendedAction: RecommendedAction.Block, + }); + expect(resultA2).toStrictEqual(resultA1); + + expect(scopeA.isDone()).toBe(true); + expect(scopeB.isDone()).toBe(true); + expect(fetchSpy).toHaveBeenCalledTimes(2); + + fetchSpy.mockRestore(); + }); }); describe('bulkScanUrls', () => { diff --git a/packages/phishing-controller/src/PhishingController.ts b/packages/phishing-controller/src/PhishingController.ts index a7821fc454..2619313124 100644 --- a/packages/phishing-controller/src/PhishingController.ts +++ b/packages/phishing-controller/src/PhishingController.ts @@ -48,6 +48,7 @@ import { getHostnameFromUrl, roundToNearestMinute, getHostnameFromWebUrl, + getPhishingDetectionScanUrlParam, buildCacheKey, splitCacheHits, resolveChainName, @@ -910,15 +911,16 @@ export class PhishingController extends BaseController< } /** - * Scan a URL for phishing. It will only scan the hostname of the URL. It also only supports - * web URLs. + * Scan a URL for phishing. For most hosts only the hostname is sent to the API; for known + * shared gateways the pathname is included (see `PHISHING_DETECTION_PATH_BASED_ROOT_DOMAINS`). + * Only supports web URLs (`http:` / `https:`). * * @param url - The URL to scan. * @returns The phishing detection scan result. */ async scanUrl(url: string): Promise { - const [hostname, ok] = getHostnameFromWebUrl(url); - if (!ok) { + const [scanUrlParam, scanParamOk] = getPhishingDetectionScanUrlParam(url); + if (!scanParamOk) { return { hostname: '', recommendedAction: RecommendedAction.None, @@ -926,7 +928,9 @@ export class PhishingController extends BaseController< }; } - const cachedResult = this.#urlScanCache.get(hostname); + const [hostname] = getHostnameFromWebUrl(url); + + const cachedResult = this.#urlScanCache.get(scanUrlParam); if (cachedResult) { return cachedResult; } @@ -934,7 +938,7 @@ export class PhishingController extends BaseController< const apiResponse = await safelyExecuteWithTimeout( async () => { const res = await fetch( - `${PHISHING_DETECTION_BASE_URL}/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent(hostname)}`, + `${PHISHING_DETECTION_BASE_URL}/${PHISHING_DETECTION_SCAN_ENDPOINT}?url=${encodeURIComponent(scanUrlParam)}`, { method: 'GET', headers: { @@ -974,7 +978,7 @@ export class PhishingController extends BaseController< recommendedAction: apiResponse.recommendedAction, }; - this.#urlScanCache.set(hostname, result); + this.#urlScanCache.set(scanUrlParam, result); return result; } diff --git a/packages/phishing-controller/src/index.ts b/packages/phishing-controller/src/index.ts index e4d44b72d8..5233dbad7a 100644 --- a/packages/phishing-controller/src/index.ts +++ b/packages/phishing-controller/src/index.ts @@ -29,6 +29,11 @@ export { ApprovalFeatureType, } from './types'; export type { CacheEntry } from './CacheManager'; +export { + PHISHING_DETECTION_PATH_BASED_ROOT_DOMAINS, + getPhishingDetectionScanUrlParam, + isPhishingDetectionPathBasedHostname, +} from './utils'; export type { PhishingControllerMaybeUpdateStateAction, diff --git a/packages/phishing-controller/src/utils.test.ts b/packages/phishing-controller/src/utils.test.ts index 83b14756f8..0d6a282d68 100644 --- a/packages/phishing-controller/src/utils.test.ts +++ b/packages/phishing-controller/src/utils.test.ts @@ -10,6 +10,8 @@ import { getHostnameAndPathComponents, getHostnameFromUrl, getHostnameFromWebUrl, + getPhishingDetectionScanUrlParam, + isPhishingDetectionPathBasedHostname, matchPartsAgainstList, processConfigs, processDomainList, @@ -981,6 +983,53 @@ describe('getHostnameFromWebUrl', () => { ); }); +describe('isPhishingDetectionPathBasedHostname', () => { + it('returns true for registered roots and subdomains', () => { + expect(isPhishingDetectionPathBasedHostname('ipfs.io')).toBe(true); + expect(isPhishingDetectionPathBasedHostname('gateway.ipfs.io')).toBe(true); + expect(isPhishingDetectionPathBasedHostname('dweb.link')).toBe(true); + expect(isPhishingDetectionPathBasedHostname('sites.google.com')).toBe(true); + }); + + it('is case-insensitive', () => { + expect(isPhishingDetectionPathBasedHostname('IPFS.IO')).toBe(true); + expect(isPhishingDetectionPathBasedHostname('Gateway.IPFS.IO')).toBe(true); + }); + + it('returns false for unrelated hosts', () => { + expect(isPhishingDetectionPathBasedHostname('example.com')).toBe(false); + expect(isPhishingDetectionPathBasedHostname('evil-ipfs.io')).toBe(false); + }); +}); + +describe('getPhishingDetectionScanUrlParam', () => { + it('returns hostname only for non-gateway hosts', () => { + expect(getPhishingDetectionScanUrlParam('https://example.com/path?q=1#h')).toStrictEqual([ + 'example.com', + true, + ]); + }); + + it('returns hostname plus path for path-based gateway hosts', () => { + expect( + getPhishingDetectionScanUrlParam( + 'https://ipfs.io/ipfs/QmAAA/foo?x=1#frag', + ), + ).toStrictEqual(['ipfs.io/ipfs/QmAAA/foo', true]); + }); + + it('does not append path when pathname is /', () => { + expect(getPhishingDetectionScanUrlParam('https://dweb.link/')).toStrictEqual([ + 'dweb.link', + true, + ]); + }); + + it('returns ok false for invalid web URLs', () => { + expect(getPhishingDetectionScanUrlParam('not-a-url')).toStrictEqual(['', false]); + }); +}); + /** * Extracts the domain name (e.g., example.com) from a given hostname. * diff --git a/packages/phishing-controller/src/utils.ts b/packages/phishing-controller/src/utils.ts index a74506b624..ff1ac2e7a7 100644 --- a/packages/phishing-controller/src/utils.ts +++ b/packages/phishing-controller/src/utils.ts @@ -365,6 +365,60 @@ export const getHostnameFromWebUrl = (url: string): [string, boolean] => { return [hostname || '', Boolean(hostname)]; }; +/** + * Hosts where PDS single-URL scans include the URL path (shared gateways / hosts where many sites + * share one origin). For all other hosts, only the hostname is sent. + */ +export const PHISHING_DETECTION_PATH_BASED_ROOT_DOMAINS = [ + 'ipfs.io', + 'dweb.link', + 'cf-ipfs.com', + 'cloudflare-ipfs.com', + 'irys.xyz', + 'sites.google.com', +] as const; + +/** + * @param hostname - Lowercase normalization is applied for matching registered roots and subdomains. + * @returns Whether {@link getPhishingDetectionScanUrlParam} appends pathname for this hostname. + */ +export function isPhishingDetectionPathBasedHostname( + hostname: string, +): boolean { + const normalizedHost = hostname.toLowerCase(); + return PHISHING_DETECTION_PATH_BASED_ROOT_DOMAINS.some( + (root) => normalizedHost === root || normalizedHost.endsWith(`.${root}`), + ); +} + +/** + * Builds the `url` query parameter for {@link PhishingController.scanUrl}. For hosts in + * {@link PHISHING_DETECTION_PATH_BASED_ROOT_DOMAINS} (and their subdomains), the value is hostname + * plus pathname, without protocol, query, or fragment. For all other hosts, only hostname is used. + * + * @param url - A web URL string (must use `http:` or `https:` — same rules as {@link getHostnameFromWebUrl}). + * @returns A tuple of `[scanUrlParam, ok]` where `ok` is false when the URL is not a valid web URL. + */ +export const getPhishingDetectionScanUrlParam = ( + url: string, +): [scanUrlParam: string, ok: boolean] => { + const [hostname, ok] = getHostnameFromWebUrl(url); + if (!ok) { + return ['', false]; + } + + if (!isPhishingDetectionPathBasedHostname(hostname)) { + return [hostname, true]; + } + + // `getHostnameFromWebUrl` already required a successful `new URL(url)` parse. + const { pathname } = new URL(url); + const pathSuffix = pathname === '/' ? '' : pathname; + const scanUrlParam = pathSuffix ? `${hostname}${pathSuffix}` : hostname; + + return [scanUrlParam, true]; +}; + export const getPathnameFromUrl = (url: string): string => { try { const { pathname } = new URL(url); From d028d0d7189f8d9ff0152f7b8cd95237e08126e6 Mon Sep 17 00:00:00 2001 From: mindofmar Date: Fri, 1 May 2026 09:05:54 -0500 Subject: [PATCH 2/2] fix: linting and ci issues --- packages/phishing-controller/CHANGELOG.md | 4 ++++ .../phishing-controller/src/utils.test.ts | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/packages/phishing-controller/CHANGELOG.md b/packages/phishing-controller/CHANGELOG.md index 74e9b53d1a..4e84083de2 100644 --- a/packages/phishing-controller/CHANGELOG.md +++ b/packages/phishing-controller/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Support path-based phishing lists (`blocklistPaths`, `whitelistPaths`) and path-aware URL scanning for shared gateways (for example IPFS gateways and `sites.google.com`) via `getPhishingDetectionScanUrlParam`, `isPhishingDetectionPathBasedHostname`, and `PHISHING_DETECTION_PATH_BASED_ROOT_DOMAINS` ([#8662](https://github.com/MetaMask/core/pull/8662)) + ### Changed - Bump `@metamask/messenger` from `^1.0.0` to `^1.1.1` ([#8364](https://github.com/MetaMask/core/pull/8364), [#8373](https://github.com/MetaMask/core/pull/8373)) diff --git a/packages/phishing-controller/src/utils.test.ts b/packages/phishing-controller/src/utils.test.ts index 0d6a282d68..882d7c3089 100644 --- a/packages/phishing-controller/src/utils.test.ts +++ b/packages/phishing-controller/src/utils.test.ts @@ -1004,10 +1004,9 @@ describe('isPhishingDetectionPathBasedHostname', () => { describe('getPhishingDetectionScanUrlParam', () => { it('returns hostname only for non-gateway hosts', () => { - expect(getPhishingDetectionScanUrlParam('https://example.com/path?q=1#h')).toStrictEqual([ - 'example.com', - true, - ]); + expect( + getPhishingDetectionScanUrlParam('https://example.com/path?q=1#h'), + ).toStrictEqual(['example.com', true]); }); it('returns hostname plus path for path-based gateway hosts', () => { @@ -1019,14 +1018,16 @@ describe('getPhishingDetectionScanUrlParam', () => { }); it('does not append path when pathname is /', () => { - expect(getPhishingDetectionScanUrlParam('https://dweb.link/')).toStrictEqual([ - 'dweb.link', - true, - ]); + expect( + getPhishingDetectionScanUrlParam('https://dweb.link/'), + ).toStrictEqual(['dweb.link', true]); }); it('returns ok false for invalid web URLs', () => { - expect(getPhishingDetectionScanUrlParam('not-a-url')).toStrictEqual(['', false]); + expect(getPhishingDetectionScanUrlParam('not-a-url')).toStrictEqual([ + '', + false, + ]); }); });