From d888ab6ed779ee1ea04ccbf3ec13a6e48c93e488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cintia=20S=C3=A1nchez=20Garc=C3=ADa?= Date: Mon, 14 Jul 2025 20:24:40 +0200 Subject: [PATCH] Track views only when no bots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Cintia Sánchez García --- web/package.json | 1 + web/src/layout/package/index.test.tsx | 1 + web/src/layout/package/index.tsx | 7 +++++- web/src/utils/detectBot.test.tsx | 30 +++++++++++++++++++++++++ web/src/utils/detectBot.ts | 32 +++++++++++++++++++++++++++ web/yarn.lock | 9 +++++++- 6 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 web/src/utils/detectBot.test.tsx create mode 100644 web/src/utils/detectBot.ts diff --git a/web/package.json b/web/package.json index 522ba34b..a84a9633 100644 --- a/web/package.json +++ b/web/package.json @@ -5,6 +5,7 @@ "type": "module", "dependencies": { "@analytics/google-analytics-v3": "^0.7.0", + "@fingerprintjs/botd": "^1.9.1", "analytics": "^0.8.16", "apexcharts": "3.49.0", "bootstrap": "^5.3.5", diff --git a/web/src/layout/package/index.test.tsx b/web/src/layout/package/index.test.tsx index e5b2f045..11ab0552 100644 --- a/web/src/layout/package/index.test.tsx +++ b/web/src/layout/package/index.test.tsx @@ -20,6 +20,7 @@ jest.mock('rehype-github-alerts', () => () =>
); jest.mock('../../utils/bannerDispatcher', () => ({ getBanner: () => null, })); +jest.mock('../../utils/detectBot', () => jest.fn(() => Promise.resolve(false))); const getMockPackage = (fixtureId: string): Package => { // eslint-disable-next-line @typescript-eslint/no-require-imports diff --git a/web/src/layout/package/index.tsx b/web/src/layout/package/index.tsx index 5f939cac..43762f54 100644 --- a/web/src/layout/package/index.tsx +++ b/web/src/layout/package/index.tsx @@ -34,6 +34,7 @@ import { Version, } from '../../types'; import bannerDispatcher from '../../utils/bannerDispatcher'; +import detectBot from '../../utils/detectBot'; import isFuture from '../../utils/isFuture'; import isPackageOfficial from '../../utils/isPackageOfficial'; import { prepareQueryString } from '../../utils/prepareQueryString'; @@ -172,7 +173,11 @@ const PackageView = () => { async function trackView(pkgID: string, version: string) { try { - API.trackView(pkgID, version); + // Skip tracking for bots and crawlers + if (await detectBot()) { + return; + } + await API.trackView(pkgID, version); } catch { // Do not do anything } diff --git a/web/src/utils/detectBot.test.tsx b/web/src/utils/detectBot.test.tsx new file mode 100644 index 00000000..574065e0 --- /dev/null +++ b/web/src/utils/detectBot.test.tsx @@ -0,0 +1,30 @@ +import detectBot from './detectBot'; + +// Mock the detectBot function for testing +const mockDetectBot = detectBot as jest.MockedFunction; + +jest.mock('./detectBot', () => jest.fn()); + +describe('detectBot', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('should detect bots', async () => { + mockDetectBot.mockResolvedValue(true); + const result = await detectBot(); + expect(result).toBe(true); + }); + + it('should not detect regular browsers', async () => { + mockDetectBot.mockResolvedValue(false); + const result = await detectBot(); + expect(result).toBe(false); + }); + + it('should handle server-side rendering', async () => { + mockDetectBot.mockResolvedValue(true); + const result = await detectBot(); + expect(result).toBe(true); + }); +}); diff --git a/web/src/utils/detectBot.ts b/web/src/utils/detectBot.ts new file mode 100644 index 00000000..c6623d52 --- /dev/null +++ b/web/src/utils/detectBot.ts @@ -0,0 +1,32 @@ +import { load } from '@fingerprintjs/botd'; + +// Bot detection utility using BotD library for identifying automated requests +// This helps prevent bot traffic from being counted in view statistics + +let botdPromise: Promise>> | null = null; + +/** + * Initialize BotD library (singleton pattern) + */ +const initializeBotD = async (): Promise>> => { + if (!botdPromise) { + botdPromise = load(); + } + return botdPromise; +}; + +/** + * Detects if the current request is likely from a bot or crawler using BotD + * @returns Promise indicating if the request is from a bot + */ +const detectBot = async (): Promise => { + try { + const botd = await initializeBotD(); + const result = await botd.detect(); + return result?.bot || false; + } catch (error) { + return false; // In case of error, assume it's not a bot + } +}; + +export default detectBot; diff --git a/web/yarn.lock b/web/yarn.lock index d21f96c4..64b9fa19 100644 --- a/web/yarn.lock +++ b/web/yarn.lock @@ -1353,6 +1353,13 @@ "@eslint/core" "^0.13.0" levn "^0.4.1" +"@fingerprintjs/botd@^1.9.1": + version "1.9.1" + resolved "https://registry.yarnpkg.com/@fingerprintjs/botd/-/botd-1.9.1.tgz#6217541bc822bef4f17b3b1931955d2ddac5d4ec" + integrity sha512-7kv3Yolsx9E56i+L1hCEcupH5yqcI5cmVktxy6B0K7rimaH5qDXwsiA5FL+fkxeUny7XQKn7p13HvK7ofDZB3g== + dependencies: + tslib "^2.4.0" + "@humanfs/core@^0.19.1": version "0.19.1" resolved "https://registry.yarnpkg.com/@humanfs/core/-/core-0.19.1.tgz#17c55ca7d426733fe3c561906b8173c336b40a77" @@ -11226,7 +11233,7 @@ tslib@^1.8.1: resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== -tslib@^2.0.3, tslib@^2.8.1: +tslib@^2.0.3, tslib@^2.4.0, tslib@^2.8.1: version "2.8.1" resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.8.1.tgz#612efe4ed235d567e8aba5f2a5fab70280ade83f" integrity sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==