Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions src/review/content-lane/safe-url.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
// SSRF-safe URL guard (content-lane primitive).
//
// SELF-CONTAINED NATIVE PORT (reviewbot→gittensory convergence). Byte-faithful to reviewbot's
// SELF-CONTAINED NATIVE PORT (reviewbot→gittensory convergence). Ported from reviewbot's
// core/source-url.ts isSafeHttpUrl + isSafeEndpointUrl (the host/IP guard, including the encoded-IP
// decoding that a dotted-quad regex misses). PURE — no imports, no I/O.
// decoding that a dotted-quad regex misses), hardened so a trailing-dot or `*.localhost` host can't
// dodge the loopback check. PURE — no imports, no I/O.
//
// Rejects non-HTTPS (isSafeHttpUrl), localhost/.local/.internal, and private/loopback/link-local
// IPs in any literal notation (decimal `2130706433`, hex `0x7f000001`, octal, short `127.1`, and the
// IPv6 forms). isSafeEndpointUrl additionally permits wss:/ws: for base-layer chain endpoints.
// Rejects non-HTTPS (isSafeHttpUrl), localhost / `*.localhost` / .local / .internal, and private/
// loopback/link-local IPs in any literal notation (decimal `2130706433`, hex `0x7f000001`, octal,
// short `127.1`, and the IPv6 forms). isSafeEndpointUrl additionally permits wss:/ws: for base-layer
// chain endpoints.

function parseIpv4Component(part: string): number | null {
if (/^0x[0-9a-f]+$/i.test(part)) return parseInt(part, 16);
Expand All @@ -29,7 +31,7 @@ function ipv4ToInt(host: string): number | null {
// isSafe*Url entry points: a host reaches here only after `new URL()`, and the WHATWG parser
// rejects any all-numeric dotted host whose components overflow (so the >0xff / >lastMax / >2^32
// cases never arrive), while a host that survives parsing as a domain has a non-numeric label that
// makes parseIpv4Component bail (line 24) before these run. Retained for source parity + defense.
// makes parseIpv4Component bail (line 26) before these run. Retained for source parity + defense.
/* v8 ignore start -- @preserve unreachable through new URL() host normalization (see note above) */
for (let i = 0; i < n - 1; i += 1) if ((vals[i] as number) > 0xff) return null;
const lastMax = [0xffffffff, 0xffffff, 0xffff, 0xff][n - 1] as number;
Expand Down Expand Up @@ -78,8 +80,12 @@ function ipv6IsPrivateOrLocal(host: string): boolean {
}

function hostIsPrivateOrLocal(host: string): boolean {
const h = host.toLowerCase();
if (h === "localhost" || h.endsWith(".local") || h.endsWith(".internal")) return true;
// Normalize once: lower-case, then strip the FQDN root dot(s) the parser keeps on named hosts
// (`localhost.`) but not on IP literals — else `localhost.` / `foo.internal.` would read as public.
const h = host.toLowerCase().replace(/\.+$/, "");
// localhost + its RFC 6761 `*.localhost` namespace, plus the reserved `.local` (mDNS) / `.internal`.
if (h === "localhost" || h.endsWith(".localhost")) return true;
if (h.endsWith(".local") || h.endsWith(".internal")) return true;
if (h === "0.0.0.0" || h === "::1" || h === "[::1]") return true;
if (h.includes(":")) return ipv6IsPrivateOrLocal(h);
return ipv4IsPrivateOrLocal(h);
Expand All @@ -94,7 +100,7 @@ export function isSafeHttpUrl(raw: string): boolean {
return false;
}
if (url.protocol !== "https:") return false;
return !hostIsPrivateOrLocal(url.hostname.toLowerCase());
return !hostIsPrivateOrLocal(url.hostname);
}

/** Like isSafeHttpUrl but also permits secure WebSocket endpoints (`wss:`, `ws:`) — base-layer chain
Expand All @@ -107,5 +113,5 @@ export function isSafeEndpointUrl(raw: string): boolean {
return false;
}
if (!["https:", "wss:", "ws:"].includes(url.protocol)) return false;
return !hostIsPrivateOrLocal(url.hostname.toLowerCase());
return !hostIsPrivateOrLocal(url.hostname);
}
19 changes: 19 additions & 0 deletions test/unit/content-lane-safe-url.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,25 @@ describe("isSafeHttpUrl", () => {
expect(isSafeHttpUrl("https://printer.local")).toBe(false);
});

it("rejects the RFC 6761 *.localhost loopback namespace (not just bare localhost)", () => {
// RFC 6761 makes every `*.localhost` name loopback (systemd-resolved, browsers), so the bare
// `=== "localhost"` check leaked sub-labelled forms; `.endsWith(".localhost")` closes them.
expect(isSafeHttpUrl("https://test.localhost")).toBe(false);
expect(isSafeHttpUrl("https://foo.bar.localhost")).toBe(false);
expect(isSafeEndpointUrl("wss://api.localhost")).toBe(false);
});

it("rejects trailing-dot FQDN forms of named loopback hosts (SSRF bypass regression)", () => {
// The parser keeps the root dot on named hosts: `new URL("https://localhost./").hostname` ===
// "localhost.", which still resolves to loopback — so the guard must strip it before its checks.
expect(isSafeHttpUrl("https://localhost./")).toBe(false);
expect(isSafeHttpUrl("https://foo.local./")).toBe(false);
expect(isSafeHttpUrl("https://bar.internal./")).toBe(false);
expect(isSafeHttpUrl("https://localhost../")).toBe(false); // strip the whole run, not one dot
expect(isSafeHttpUrl("https://db.localhost./")).toBe(false); // subdomain + trailing dot
expect(isSafeEndpointUrl("wss://localhost./")).toBe(false); // shared guard → wss hardened too
});

it("rejects encoded-IP SSRF bypasses that a dotted-quad regex misses", () => {
expect(isSafeHttpUrl("https://2130706433")).toBe(false); // decimal 127.0.0.1
expect(isSafeHttpUrl("https://0x7f000001")).toBe(false); // hex 127.0.0.1
Expand Down