From 8d183d6b25c51ed307f387d87349b0c3fb36f496 Mon Sep 17 00:00:00 2001
From: Sergei Zharinov <zharinov@users.noreply.github.com>
Date: Mon, 22 Jul 2024 13:59:29 -0300
Subject: [PATCH] feat(http): Force default rate limits for some known hosts
 (#30207)

Co-authored-by: HonkingGoose <34918129+HonkingGoose@users.noreply.github.com>
Co-authored-by: Michael Kriese <michael.kriese@visualon.de>
---
 lib/modules/datasource/rubygems/http.ts  | 18 -----
 lib/modules/datasource/rubygems/index.ts |  5 +-
 lib/util/host-rules.ts                   |  2 +-
 lib/util/http/host-rules.ts              | 14 ----
 lib/util/http/index.ts                   |  8 +--
 lib/util/http/queue.ts                   |  2 +-
 lib/util/http/rate-limit.spec.ts         | 83 ++++++++++++++++++++++
 lib/util/http/rate-limits.ts             | 87 ++++++++++++++++++++++++
 lib/util/http/throttle.ts                | 11 ++-
 lib/util/http/types.ts                   |  6 ++
 lib/workers/global/initialize.ts         |  2 +
 11 files changed, 189 insertions(+), 49 deletions(-)
 delete mode 100644 lib/modules/datasource/rubygems/http.ts
 create mode 100644 lib/util/http/rate-limit.spec.ts
 create mode 100644 lib/util/http/rate-limits.ts

diff --git a/lib/modules/datasource/rubygems/http.ts b/lib/modules/datasource/rubygems/http.ts
deleted file mode 100644
index 31ee0e0f1b..0000000000
--- a/lib/modules/datasource/rubygems/http.ts
+++ /dev/null
@@ -1,18 +0,0 @@
-import { Http } from '../../../util/http';
-import { Throttle } from '../../../util/http/throttle';
-import { parseUrl } from '../../../util/url';
-
-export class RubygemsHttp extends Http {
-  protected override getThrottle(url: string): Throttle | null {
-    const host = parseUrl(url)?.host;
-
-    if (host === 'rubygems.org') {
-      // rubygems.org has a rate limit of 10 per second, so we use a more conservative 8
-      // See: https://guides.rubygems.org/rubygems-org-rate-limits/
-      const intervalMs = 125;
-      return new Throttle(intervalMs);
-    }
-
-    return super.getThrottle(url);
-  }
-}
diff --git a/lib/modules/datasource/rubygems/index.ts b/lib/modules/datasource/rubygems/index.ts
index 942bfd6395..2c92242bed 100644
--- a/lib/modules/datasource/rubygems/index.ts
+++ b/lib/modules/datasource/rubygems/index.ts
@@ -2,14 +2,13 @@ import { Marshal } from '@qnighy/marshal';
 import type { ZodError } from 'zod';
 import { logger } from '../../../logger';
 import { cache } from '../../../util/cache/package/decorator';
-import { HttpError } from '../../../util/http';
+import { Http, HttpError } from '../../../util/http';
 import { AsyncResult, Result } from '../../../util/result';
 import { getQueryString, joinUrlParts, parseUrl } from '../../../util/url';
 import * as rubyVersioning from '../../versioning/ruby';
 import { Datasource } from '../datasource';
 import type { GetReleasesConfig, ReleaseResult } from '../types';
 import { getV1Releases } from './common';
-import { RubygemsHttp } from './http';
 import { MetadataCache } from './metadata-cache';
 import { GemInfo, MarshalledVersionInfo } from './schema';
 import { VersionsEndpointCache } from './versions-endpoint-cache';
@@ -34,7 +33,7 @@ export class RubyGemsDatasource extends Datasource {
 
   constructor() {
     super(RubyGemsDatasource.id);
-    this.http = new RubygemsHttp(RubyGemsDatasource.id);
+    this.http = new Http(RubyGemsDatasource.id);
     this.versionsEndpointCache = new VersionsEndpointCache(this.http);
     this.metadataCache = new MetadataCache(this.http);
   }
diff --git a/lib/util/host-rules.ts b/lib/util/host-rules.ts
index 55d5c4d85a..dd39f0f2ad 100644
--- a/lib/util/host-rules.ts
+++ b/lib/util/host-rules.ts
@@ -77,7 +77,7 @@ export interface HostRuleSearch {
   readOnly?: boolean;
 }
 
-function matchesHost(url: string, matchHost: string): boolean {
+export function matchesHost(url: string, matchHost: string): boolean {
   if (isHttpUrl(url) && isHttpUrl(matchHost)) {
     return url.startsWith(matchHost);
   }
diff --git a/lib/util/http/host-rules.ts b/lib/util/http/host-rules.ts
index 09775fe9f4..ebbf1ed339 100644
--- a/lib/util/http/host-rules.ts
+++ b/lib/util/http/host-rules.ts
@@ -217,17 +217,3 @@ export function applyHostRule<GotOptions extends HostRulesGotOptions>(
 
   return options;
 }
-
-export function getConcurrentRequestsLimit(url: string): number | null {
-  const { concurrentRequestLimit } = hostRules.find({ url });
-  return is.number(concurrentRequestLimit) && concurrentRequestLimit > 0
-    ? concurrentRequestLimit
-    : null;
-}
-
-export function getThrottleIntervalMs(url: string): number | null {
-  const { maxRequestsPerSecond } = hostRules.find({ url });
-  return is.number(maxRequestsPerSecond) && maxRequestsPerSecond > 0
-    ? Math.ceil(1000 / maxRequestsPerSecond)
-    : null;
-}
diff --git a/lib/util/http/index.ts b/lib/util/http/index.ts
index 0a9f2e313b..4f7539da94 100644
--- a/lib/util/http/index.ts
+++ b/lib/util/http/index.ts
@@ -18,7 +18,7 @@ import { hooks } from './hooks';
 import { applyHostRule, findMatchingRule } from './host-rules';
 import { getQueue } from './queue';
 import { getRetryAfter, wrapWithRetry } from './retry-after';
-import { Throttle, getThrottle } from './throttle';
+import { getThrottle } from './throttle';
 import type {
   GotJSONOptions,
   GotOptions,
@@ -134,10 +134,6 @@ export class Http<Opts extends HttpOptions = HttpOptions> {
     );
   }
 
-  protected getThrottle(url: string): Throttle | null {
-    return getThrottle(url);
-  }
-
   protected async request<T>(
     requestUrl: string | URL,
     httpOptions: InternalHttpOptions,
@@ -212,7 +208,7 @@ export class Http<Opts extends HttpOptions = HttpOptions> {
         return gotTask(url, options, { queueMs });
       };
 
-      const throttle = this.getThrottle(url);
+      const throttle = getThrottle(url);
       const throttledTask: GotTask<T> = throttle
         ? () => throttle.add<HttpResponse<T>>(httpTask)
         : httpTask;
diff --git a/lib/util/http/queue.ts b/lib/util/http/queue.ts
index 1c127d2f26..9764b6309d 100644
--- a/lib/util/http/queue.ts
+++ b/lib/util/http/queue.ts
@@ -1,7 +1,7 @@
 import PQueue from 'p-queue';
 import { logger } from '../../logger';
 import { parseUrl } from '../url';
-import { getConcurrentRequestsLimit } from './host-rules';
+import { getConcurrentRequestsLimit } from './rate-limits';
 
 const hostQueues = new Map<string, PQueue | null>();
 
diff --git a/lib/util/http/rate-limit.spec.ts b/lib/util/http/rate-limit.spec.ts
new file mode 100644
index 0000000000..5fc1f75add
--- /dev/null
+++ b/lib/util/http/rate-limit.spec.ts
@@ -0,0 +1,83 @@
+import * as hostRules from '../host-rules';
+import {
+  getConcurrentRequestsLimit,
+  getThrottleIntervalMs,
+  setHttpRateLimits,
+} from './rate-limits';
+
+describe('util/http/rate-limit', () => {
+  beforeEach(() => {
+    hostRules.clear();
+    setHttpRateLimits([]);
+  });
+
+  describe('getConcurrentRequestsLimit', () => {
+    it('returns null if no limits are set', () => {
+      expect(getConcurrentRequestsLimit('https://example.com')).toBeNull();
+    });
+
+    it('returns null if host does not match', () => {
+      setHttpRateLimits([
+        { matchHost: 'https://crates.io/api/', throttleMs: 1000 },
+      ]);
+      expect(getConcurrentRequestsLimit('https://index.crates.io')).toBeNull();
+    });
+
+    it('gets the limit from the host rules', () => {
+      hostRules.add({ matchHost: 'example.com', concurrentRequestLimit: 123 });
+      expect(getConcurrentRequestsLimit('https://example.com')).toBe(123);
+    });
+
+    it('selects default value if host rule is greater', () => {
+      setHttpRateLimits([{ matchHost: 'example.com', concurrency: 123 }]);
+      hostRules.add({ matchHost: 'example.com', concurrentRequestLimit: 456 });
+      expect(getConcurrentRequestsLimit('https://example.com')).toBe(123);
+    });
+
+    it('selects host rule value if default is greater', () => {
+      setHttpRateLimits([{ matchHost: 'example.com', concurrency: 456 }]);
+      hostRules.add({ matchHost: 'example.com', concurrentRequestLimit: 123 });
+      expect(getConcurrentRequestsLimit('https://example.com')).toBe(123);
+    });
+
+    it('matches wildcard host', () => {
+      setHttpRateLimits([{ matchHost: '*', concurrency: 123 }]);
+      expect(getConcurrentRequestsLimit('https://example.com')).toBe(123);
+    });
+  });
+
+  describe('getThrottleIntervalMs', () => {
+    it('returns null if no limits are set', () => {
+      expect(getThrottleIntervalMs('https://example.com')).toBeNull();
+    });
+
+    it('returns null if host does not match', () => {
+      setHttpRateLimits([
+        { matchHost: 'https://crates.io/api/', concurrency: 123 },
+      ]);
+      expect(getThrottleIntervalMs('https://index.crates.io')).toBeNull();
+    });
+
+    it('gets the limit from the host rules', () => {
+      hostRules.add({ matchHost: 'example.com', maxRequestsPerSecond: 8 });
+      expect(getThrottleIntervalMs('https://example.com')).toBe(125);
+    });
+
+    it('selects maximum throttle when default is greater', () => {
+      setHttpRateLimits([{ matchHost: 'example.com', throttleMs: 500 }]);
+      hostRules.add({ matchHost: 'example.com', maxRequestsPerSecond: 8 });
+      expect(getThrottleIntervalMs('https://example.com')).toBe(500);
+    });
+
+    it('selects maximum throttle when host rule is greater', () => {
+      setHttpRateLimits([{ matchHost: 'example.com', throttleMs: 125 }]);
+      hostRules.add({ matchHost: 'example.com', maxRequestsPerSecond: 2 });
+      expect(getThrottleIntervalMs('https://example.com')).toBe(500);
+    });
+
+    it('matches wildcard host', () => {
+      setHttpRateLimits([{ matchHost: '*', throttleMs: 123 }]);
+      expect(getThrottleIntervalMs('https://example.com')).toBe(123);
+    });
+  });
+});
diff --git a/lib/util/http/rate-limits.ts b/lib/util/http/rate-limits.ts
new file mode 100644
index 0000000000..ffcab7a223
--- /dev/null
+++ b/lib/util/http/rate-limits.ts
@@ -0,0 +1,87 @@
+import is from '@sindresorhus/is';
+import { matchesHost } from '../host-rules';
+import * as hostRules from '../host-rules';
+import type { RateLimitRule } from './types';
+
+const defaults: RateLimitRule[] = [
+  {
+    // https://guides.rubygems.org/rubygems-org-rate-limits/
+    matchHost: 'rubygems.org',
+    throttleMs: 125,
+  },
+  {
+    // https://crates.io/data-access#api
+    matchHost: 'https://crates.io/api/',
+    throttleMs: 1000,
+  },
+  {
+    matchHost: '*',
+    concurrency: 16,
+  },
+];
+
+let limits: RateLimitRule[] = [];
+
+export function setHttpRateLimits(rules?: RateLimitRule[]): void {
+  limits = rules ?? defaults;
+}
+
+function matches(url: string, host: string): boolean {
+  if (host === '*') {
+    return true;
+  }
+
+  return matchesHost(url, host);
+}
+
+export function getConcurrentRequestsLimit(url: string): number | null {
+  let result: number | null = null;
+
+  const { concurrentRequestLimit: hostRuleLimit } = hostRules.find({ url });
+  if (
+    is.number(hostRuleLimit) &&
+    hostRuleLimit > 0 &&
+    hostRuleLimit < Number.MAX_SAFE_INTEGER
+  ) {
+    result = hostRuleLimit;
+  }
+
+  for (const { matchHost, concurrency: limit } of limits) {
+    if (!matches(url, matchHost) || !is.number(limit)) {
+      continue;
+    }
+
+    if (result && result <= limit) {
+      continue;
+    }
+
+    result = limit;
+    break;
+  }
+
+  return result;
+}
+
+export function getThrottleIntervalMs(url: string): number | null {
+  let result: number | null = null;
+
+  const { maxRequestsPerSecond } = hostRules.find({ url });
+  if (is.number(maxRequestsPerSecond) && maxRequestsPerSecond > 0) {
+    result = Math.ceil(1000 / maxRequestsPerSecond);
+  }
+
+  for (const { matchHost, throttleMs: limit } of limits) {
+    if (!matches(url, matchHost) || !is.number(limit)) {
+      continue;
+    }
+
+    if (result && result >= limit) {
+      continue;
+    }
+
+    result = limit;
+    break;
+  }
+
+  return result;
+}
diff --git a/lib/util/http/throttle.ts b/lib/util/http/throttle.ts
index c868354ce5..2250a10ed9 100644
--- a/lib/util/http/throttle.ts
+++ b/lib/util/http/throttle.ts
@@ -1,7 +1,7 @@
 import pThrottle from 'p-throttle';
 import { logger } from '../../logger';
 import { parseUrl } from '../url';
-import { getThrottleIntervalMs } from './host-rules';
+import { getThrottleIntervalMs } from './rate-limits';
 
 const hostThrottles = new Map<string, Throttle | null>();
 
@@ -33,11 +33,10 @@ export function getThrottle(url: string): Throttle | null {
   let throttle = hostThrottles.get(host);
   if (throttle === undefined) {
     throttle = null; // null represents "no throttle", as opposed to undefined
-    const throttleOptions = getThrottleIntervalMs(url);
-    if (throttleOptions) {
-      const intervalMs = throttleOptions;
-      logger.debug(`Using throttle ${intervalMs} intervalMs for host ${host}`);
-      throttle = new Throttle(intervalMs);
+    const throttleMs = getThrottleIntervalMs(url);
+    if (throttleMs) {
+      logger.debug(`Using throttle ${throttleMs} intervalMs for host ${host}`);
+      throttle = new Throttle(throttleMs);
     } else {
       logger.trace({ host }, 'No throttle');
     }
diff --git a/lib/util/http/types.ts b/lib/util/http/types.ts
index a767c29c5b..899766bc86 100644
--- a/lib/util/http/types.ts
+++ b/lib/util/http/types.ts
@@ -91,3 +91,9 @@ export interface HttpResponse<T = string> {
 
 export type Task<T> = () => Promise<T>;
 export type GotTask<T> = Task<HttpResponse<T>>;
+
+export interface RateLimitRule {
+  matchHost: string;
+  throttleMs?: number;
+  concurrency?: number;
+}
diff --git a/lib/workers/global/initialize.ts b/lib/workers/global/initialize.ts
index 3212fcecc4..9719553e04 100644
--- a/lib/workers/global/initialize.ts
+++ b/lib/workers/global/initialize.ts
@@ -10,6 +10,7 @@ import * as packageCache from '../../util/cache/package';
 import { setEmojiConfig } from '../../util/emoji';
 import { validateGitVersion } from '../../util/git';
 import * as hostRules from '../../util/host-rules';
+import { setHttpRateLimits } from '../../util/http/rate-limits';
 import { initMergeConfidence } from '../../util/merge-confidence';
 import { setMaxLimit } from './limits';
 
@@ -79,6 +80,7 @@ export async function globalInitialize(
   config_: AllConfig,
 ): Promise<RenovateConfig> {
   let config = config_;
+  setHttpRateLimits();
   await checkVersions();
   setGlobalHostRules(config);
   config = await initPlatform(config);
-- 
GitLab