Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,031 changes: 1,031 additions & 0 deletions crates/common/src/integrations/google_tag_manager.rs

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions crates/common/src/integrations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub mod adserver_mock;
pub mod aps;
pub mod datadome;
pub mod didomi;
pub mod google_tag_manager;
pub mod lockr;
pub mod nextjs;
pub mod permutive;
Expand All @@ -31,6 +32,7 @@ pub(crate) fn builders() -> &'static [IntegrationBuilder] {
permutive::register,
lockr::register,
didomi::register,
google_tag_manager::register,
datadome::register,
]
}
31 changes: 31 additions & 0 deletions crates/js/lib/src/integrations/google_tag_manager/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { log } from '../../core/log';

import { installGtmBeaconGuard } from './script_guard';
import { installGtmGuard } from './script_guard';

/**
* Google Tag Manager integration for tsjs
*
* Installs guards to intercept GTM and Google Analytics traffic:
*
* 1. **Script guard** — intercepts dynamically inserted `<script>` and
* `<link>` elements and rewrites their URLs to the first-party proxy.
*
* 2. **Beacon guard** — intercepts `navigator.sendBeacon()` and `fetch()`
* calls to Google Analytics domains (www.google-analytics.com,
* analytics.google.com) and rewrites them to the first-party proxy.
* This is necessary because gtag.js constructs beacon URLs dynamically
* from bare domain strings, which can't be safely rewritten at the
* script level.
*
* URLs are rewritten to preserve the original path:
* - https://www.googletagmanager.com/gtm.js?id=GTM-XXXX -> /integrations/google_tag_manager/gtm.js?id=GTM-XXXX
* - https://www.google-analytics.com/g/collect -> /integrations/google_tag_manager/g/collect
* - https://analytics.google.com/g/collect -> /integrations/google_tag_manager/g/collect
*/

if (typeof window !== 'undefined') {
installGtmGuard();
installGtmBeaconGuard();
log.info('Google Tag Manager integration initialized');
}
91 changes: 91 additions & 0 deletions crates/js/lib/src/integrations/google_tag_manager/script_guard.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import { createBeaconGuard } from '../../shared/beacon_guard';
import { createScriptGuard } from '../../shared/script_guard';

/**
* Google Tag Manager Script Interception Guard
*
* Intercepts dynamically inserted script tags that load GTM or Google Analytics
* and rewrites their URLs to use the first-party proxy endpoint. This catches
* scripts inserted via appendChild, insertBefore, or any other dynamic DOM
* manipulation (e.g. Next.js dynamic imports).
*
* Built on the shared script_guard factory with custom URL rewriting to preserve
* the original path and query string.
*/

/** Regex to match GTM/GA domains: www.googletagmanager.com, www.google-analytics.com, analytics.google.com */
const GTM_URL_PATTERN =
/^(?:https?:)?(?:\/\/)?(www\.(googletagmanager|google-analytics)\.com|analytics\.google\.com)(?:\/|$)/i;

/**
* Check if a URL is a GTM or Google Analytics URL.
* Matches the logic from google_tag_manager.rs GTM_URL_PATTERN.
*
* Valid patterns:
* - https://www.googletagmanager.com/gtm.js?id=GTM-XXXX
* - https://www.google-analytics.com/g/collect
* - https://analytics.google.com/g/collect
* - //www.googletagmanager.com/gtm.js?id=GTM-XXXX
*
* Invalid:
* - https://googletagmanager.com/gtm.js (missing www.)
* - https://example.com/www.googletagmanager.com (domain mismatch)
*/
function isGtmUrl(url: string): boolean {
return !!url && GTM_URL_PATTERN.test(url);
}

/**
* Extract the path and query string from a GTM/GA URL.
* e.g., "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX" -> "/gtm.js?id=GTM-XXXX"
* "https://www.google-analytics.com/g/collect?v=2" -> "/g/collect?v=2"
*/
function extractGtmPath(url: string): string {
try {
const normalizedUrl = url.startsWith('//')
? `https:${url}`
: url.startsWith('http')
? url
: `https://${url}`;

const parsed = new URL(normalizedUrl);
return parsed.pathname + parsed.search;
} catch {
// Fallback: extract path after the domain
console.debug('[GTM Guard] URL parsing failed, using fallback for:', url);
const match = url.match(
/(?:www\.(?:googletagmanager|google-analytics)\.com|analytics\.google\.com)(\/[^'"\s]*)/i
);
return match?.[1] || '/gtm.js';
}
}

/**
* Rewrite a GTM/GA URL to the first-party proxy path.
*/
function rewriteGtmUrl(originalUrl: string): string {
return `${window.location.origin}/integrations/google_tag_manager${extractGtmPath(originalUrl)}`;
}

const guard = createScriptGuard({
name: 'GTM',
isTargetUrl: isGtmUrl,
rewriteUrl: rewriteGtmUrl,
});

const beaconGuard = createBeaconGuard({
name: 'GTM',
isTargetUrl: isGtmUrl,
rewriteUrl: rewriteGtmUrl,
});

export const installGtmGuard = guard.install;
export const isGuardInstalled = guard.isInstalled;
export const resetGuardState = guard.reset;

export const installGtmBeaconGuard = beaconGuard.install;
export const isBeaconGuardInstalled = beaconGuard.isInstalled;
export const resetBeaconGuardState = beaconGuard.reset;

// Export for testing
export { isGtmUrl, extractGtmPath, rewriteGtmUrl };
123 changes: 123 additions & 0 deletions crates/js/lib/src/shared/beacon_guard.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import { log } from '../core/log';

/**
* Shared Beacon Guard Factory
*
* Creates a network interception guard that patches `navigator.sendBeacon`
* and `window.fetch` to intercept outgoing beacon/analytics requests whose
* URLs match an integration's target domains. Matched URLs are rewritten to
* a first-party proxy endpoint.
*
* This complements the script_guard (which intercepts DOM insertions) by
* handling the _runtime_ network calls that analytics SDKs use to send data.
*
* Each call to createBeaconGuard() produces an independent guard with its
* own installation state, so multiple integrations can coexist.
*/

export interface BeaconGuardConfig {
/** Integration name used in log messages (e.g. "GTM"). */
name: string;
/** Return true if the URL belongs to this integration's analytics domain. */
isTargetUrl: (url: string) => boolean;
/** Rewrite the original URL to a first-party proxy URL. */
rewriteUrl: (url: string) => string;
}

export interface BeaconGuard {
/** Patch sendBeacon/fetch to intercept matching beacon requests. */
install: () => void;
/** Whether the guard has already been installed. */
isInstalled: () => boolean;
/** Reset installation state (primarily for testing). */
reset: () => void;
}

/**
* Extract a URL string from the various input types that fetch() accepts.
* Returns null if the input can't be resolved to a URL string.
*/
function extractUrl(input: RequestInfo | URL): string | null {
if (typeof input === 'string') {
return input;
}
if (input instanceof URL) {
return input.href;
}
if (input instanceof Request) {
return input.url;
}
return null;
}

/**
* Create an independent beacon guard for a specific integration.
*/
export function createBeaconGuard(config: BeaconGuardConfig): BeaconGuard {
let installed = false;
const prefix = `${config.name} beacon guard`;

function install(): void {
if (installed) {
log.debug(`${prefix}: already installed, skipping`);
return;
}

if (typeof window === 'undefined') {
log.debug(`${prefix}: not in browser environment, skipping`);
return;
}

log.info(`${prefix}: installing network interception`);

// --- Patch navigator.sendBeacon ---
if (typeof navigator !== 'undefined' && typeof navigator.sendBeacon === 'function') {
const originalSendBeacon = navigator.sendBeacon.bind(navigator);

navigator.sendBeacon = function (url: string, data?: BodyInit | null): boolean {
if (config.isTargetUrl(url)) {
const rewritten = config.rewriteUrl(url);
log.info(`${prefix}: rewriting sendBeacon`, { original: url, rewritten });
return originalSendBeacon(rewritten, data);
}
return originalSendBeacon(url, data);
};
}

// --- Patch window.fetch ---
if (typeof window.fetch === 'function') {
const originalFetch = window.fetch.bind(window);

window.fetch = function (input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
const url = extractUrl(input);

if (url && config.isTargetUrl(url)) {
const rewritten = config.rewriteUrl(url);
log.info(`${prefix}: rewriting fetch`, { original: url, rewritten });

// If the input was a Request, create a new one with the rewritten URL
if (input instanceof Request) {
const newRequest = new Request(rewritten, input);
return originalFetch(newRequest, init);
}
return originalFetch(rewritten, init);
}

return originalFetch(input, init);
};
}

installed = true;
log.info(`${prefix}: network interception installed successfully`);
}

function isInstalled(): boolean {
return installed;
}

function reset(): void {
installed = false;
}

return { install, isInstalled, reset };
}
Loading