From 8d183d6b25c51ed307f387d87349b0c3fb36f496 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov <zharinov@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:59:29 -0300 Subject: [PATCH] feat(http): Force default rate limits for some known hosts (#30207) Co-authored-by: HonkingGoose <34918129+HonkingGoose@users.noreply.github.com> Co-authored-by: Michael Kriese <michael.kriese@visualon.de> --- lib/modules/datasource/rubygems/http.ts | 18 ----- lib/modules/datasource/rubygems/index.ts | 5 +- lib/util/host-rules.ts | 2 +- lib/util/http/host-rules.ts | 14 ---- lib/util/http/index.ts | 8 +-- lib/util/http/queue.ts | 2 +- lib/util/http/rate-limit.spec.ts | 83 ++++++++++++++++++++++ lib/util/http/rate-limits.ts | 87 ++++++++++++++++++++++++ lib/util/http/throttle.ts | 11 ++- lib/util/http/types.ts | 6 ++ lib/workers/global/initialize.ts | 2 + 11 files changed, 189 insertions(+), 49 deletions(-) delete mode 100644 lib/modules/datasource/rubygems/http.ts create mode 100644 lib/util/http/rate-limit.spec.ts create mode 100644 lib/util/http/rate-limits.ts diff --git a/lib/modules/datasource/rubygems/http.ts b/lib/modules/datasource/rubygems/http.ts deleted file mode 100644 index 31ee0e0f1b..0000000000 --- a/lib/modules/datasource/rubygems/http.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { Http } from '../../../util/http'; -import { Throttle } from '../../../util/http/throttle'; -import { parseUrl } from '../../../util/url'; - -export class RubygemsHttp extends Http { - protected override getThrottle(url: string): Throttle | null { - const host = parseUrl(url)?.host; - - if (host === 'rubygems.org') { - // rubygems.org has a rate limit of 10 per second, so we use a more conservative 8 - // See: https://guides.rubygems.org/rubygems-org-rate-limits/ - const intervalMs = 125; - return new Throttle(intervalMs); - } - - return super.getThrottle(url); - } -} diff --git a/lib/modules/datasource/rubygems/index.ts b/lib/modules/datasource/rubygems/index.ts index 942bfd6395..2c92242bed 100644 --- a/lib/modules/datasource/rubygems/index.ts +++ b/lib/modules/datasource/rubygems/index.ts @@ -2,14 +2,13 @@ import { Marshal } from '@qnighy/marshal'; import type { ZodError } from 'zod'; import { logger } from '../../../logger'; import { cache } from '../../../util/cache/package/decorator'; -import { HttpError } from '../../../util/http'; +import { Http, HttpError } from '../../../util/http'; import { AsyncResult, Result } from '../../../util/result'; import { getQueryString, joinUrlParts, parseUrl } from '../../../util/url'; import * as rubyVersioning from '../../versioning/ruby'; import { Datasource } from '../datasource'; import type { GetReleasesConfig, ReleaseResult } from '../types'; import { getV1Releases } from './common'; -import { RubygemsHttp } from './http'; import { MetadataCache } from './metadata-cache'; import { GemInfo, MarshalledVersionInfo } from './schema'; import { VersionsEndpointCache } from './versions-endpoint-cache'; @@ -34,7 +33,7 @@ export class RubyGemsDatasource extends Datasource { constructor() { super(RubyGemsDatasource.id); - this.http = new RubygemsHttp(RubyGemsDatasource.id); + this.http = new Http(RubyGemsDatasource.id); this.versionsEndpointCache = new VersionsEndpointCache(this.http); this.metadataCache = new MetadataCache(this.http); } diff --git a/lib/util/host-rules.ts b/lib/util/host-rules.ts index 55d5c4d85a..dd39f0f2ad 100644 --- a/lib/util/host-rules.ts +++ b/lib/util/host-rules.ts @@ -77,7 +77,7 @@ export interface HostRuleSearch { readOnly?: boolean; } -function matchesHost(url: string, matchHost: string): boolean { +export function matchesHost(url: string, matchHost: string): boolean { if (isHttpUrl(url) && isHttpUrl(matchHost)) { return url.startsWith(matchHost); } diff --git a/lib/util/http/host-rules.ts b/lib/util/http/host-rules.ts index 09775fe9f4..ebbf1ed339 100644 --- a/lib/util/http/host-rules.ts +++ b/lib/util/http/host-rules.ts @@ -217,17 +217,3 @@ export function applyHostRule<GotOptions extends HostRulesGotOptions>( return options; } - -export function getConcurrentRequestsLimit(url: string): number | null { - const { concurrentRequestLimit } = hostRules.find({ url }); - return is.number(concurrentRequestLimit) && concurrentRequestLimit > 0 - ? concurrentRequestLimit - : null; -} - -export function getThrottleIntervalMs(url: string): number | null { - const { maxRequestsPerSecond } = hostRules.find({ url }); - return is.number(maxRequestsPerSecond) && maxRequestsPerSecond > 0 - ? Math.ceil(1000 / maxRequestsPerSecond) - : null; -} diff --git a/lib/util/http/index.ts b/lib/util/http/index.ts index 0a9f2e313b..4f7539da94 100644 --- a/lib/util/http/index.ts +++ b/lib/util/http/index.ts @@ -18,7 +18,7 @@ import { hooks } from './hooks'; import { applyHostRule, findMatchingRule } from './host-rules'; import { getQueue } from './queue'; import { getRetryAfter, wrapWithRetry } from './retry-after'; -import { Throttle, getThrottle } from './throttle'; +import { getThrottle } from './throttle'; import type { GotJSONOptions, GotOptions, @@ -134,10 +134,6 @@ export class Http<Opts extends HttpOptions = HttpOptions> { ); } - protected getThrottle(url: string): Throttle | null { - return getThrottle(url); - } - protected async request<T>( requestUrl: string | URL, httpOptions: InternalHttpOptions, @@ -212,7 +208,7 @@ export class Http<Opts extends HttpOptions = HttpOptions> { return gotTask(url, options, { queueMs }); }; - const throttle = this.getThrottle(url); + const throttle = getThrottle(url); const throttledTask: GotTask<T> = throttle ? () => throttle.add<HttpResponse<T>>(httpTask) : httpTask; diff --git a/lib/util/http/queue.ts b/lib/util/http/queue.ts index 1c127d2f26..9764b6309d 100644 --- a/lib/util/http/queue.ts +++ b/lib/util/http/queue.ts @@ -1,7 +1,7 @@ import PQueue from 'p-queue'; import { logger } from '../../logger'; import { parseUrl } from '../url'; -import { getConcurrentRequestsLimit } from './host-rules'; +import { getConcurrentRequestsLimit } from './rate-limits'; const hostQueues = new Map<string, PQueue | null>(); diff --git a/lib/util/http/rate-limit.spec.ts b/lib/util/http/rate-limit.spec.ts new file mode 100644 index 0000000000..5fc1f75add --- /dev/null +++ b/lib/util/http/rate-limit.spec.ts @@ -0,0 +1,83 @@ +import * as hostRules from '../host-rules'; +import { + getConcurrentRequestsLimit, + getThrottleIntervalMs, + setHttpRateLimits, +} from './rate-limits'; + +describe('util/http/rate-limit', () => { + beforeEach(() => { + hostRules.clear(); + setHttpRateLimits([]); + }); + + describe('getConcurrentRequestsLimit', () => { + it('returns null if no limits are set', () => { + expect(getConcurrentRequestsLimit('https://example.com')).toBeNull(); + }); + + it('returns null if host does not match', () => { + setHttpRateLimits([ + { matchHost: 'https://crates.io/api/', throttleMs: 1000 }, + ]); + expect(getConcurrentRequestsLimit('https://index.crates.io')).toBeNull(); + }); + + it('gets the limit from the host rules', () => { + hostRules.add({ matchHost: 'example.com', concurrentRequestLimit: 123 }); + expect(getConcurrentRequestsLimit('https://example.com')).toBe(123); + }); + + it('selects default value if host rule is greater', () => { + setHttpRateLimits([{ matchHost: 'example.com', concurrency: 123 }]); + hostRules.add({ matchHost: 'example.com', concurrentRequestLimit: 456 }); + expect(getConcurrentRequestsLimit('https://example.com')).toBe(123); + }); + + it('selects host rule value if default is greater', () => { + setHttpRateLimits([{ matchHost: 'example.com', concurrency: 456 }]); + hostRules.add({ matchHost: 'example.com', concurrentRequestLimit: 123 }); + expect(getConcurrentRequestsLimit('https://example.com')).toBe(123); + }); + + it('matches wildcard host', () => { + setHttpRateLimits([{ matchHost: '*', concurrency: 123 }]); + expect(getConcurrentRequestsLimit('https://example.com')).toBe(123); + }); + }); + + describe('getThrottleIntervalMs', () => { + it('returns null if no limits are set', () => { + expect(getThrottleIntervalMs('https://example.com')).toBeNull(); + }); + + it('returns null if host does not match', () => { + setHttpRateLimits([ + { matchHost: 'https://crates.io/api/', concurrency: 123 }, + ]); + expect(getThrottleIntervalMs('https://index.crates.io')).toBeNull(); + }); + + it('gets the limit from the host rules', () => { + hostRules.add({ matchHost: 'example.com', maxRequestsPerSecond: 8 }); + expect(getThrottleIntervalMs('https://example.com')).toBe(125); + }); + + it('selects maximum throttle when default is greater', () => { + setHttpRateLimits([{ matchHost: 'example.com', throttleMs: 500 }]); + hostRules.add({ matchHost: 'example.com', maxRequestsPerSecond: 8 }); + expect(getThrottleIntervalMs('https://example.com')).toBe(500); + }); + + it('selects maximum throttle when host rule is greater', () => { + setHttpRateLimits([{ matchHost: 'example.com', throttleMs: 125 }]); + hostRules.add({ matchHost: 'example.com', maxRequestsPerSecond: 2 }); + expect(getThrottleIntervalMs('https://example.com')).toBe(500); + }); + + it('matches wildcard host', () => { + setHttpRateLimits([{ matchHost: '*', throttleMs: 123 }]); + expect(getThrottleIntervalMs('https://example.com')).toBe(123); + }); + }); +}); diff --git a/lib/util/http/rate-limits.ts b/lib/util/http/rate-limits.ts new file mode 100644 index 0000000000..ffcab7a223 --- /dev/null +++ b/lib/util/http/rate-limits.ts @@ -0,0 +1,87 @@ +import is from '@sindresorhus/is'; +import { matchesHost } from '../host-rules'; +import * as hostRules from '../host-rules'; +import type { RateLimitRule } from './types'; + +const defaults: RateLimitRule[] = [ + { + // https://guides.rubygems.org/rubygems-org-rate-limits/ + matchHost: 'rubygems.org', + throttleMs: 125, + }, + { + // https://crates.io/data-access#api + matchHost: 'https://crates.io/api/', + throttleMs: 1000, + }, + { + matchHost: '*', + concurrency: 16, + }, +]; + +let limits: RateLimitRule[] = []; + +export function setHttpRateLimits(rules?: RateLimitRule[]): void { + limits = rules ?? defaults; +} + +function matches(url: string, host: string): boolean { + if (host === '*') { + return true; + } + + return matchesHost(url, host); +} + +export function getConcurrentRequestsLimit(url: string): number | null { + let result: number | null = null; + + const { concurrentRequestLimit: hostRuleLimit } = hostRules.find({ url }); + if ( + is.number(hostRuleLimit) && + hostRuleLimit > 0 && + hostRuleLimit < Number.MAX_SAFE_INTEGER + ) { + result = hostRuleLimit; + } + + for (const { matchHost, concurrency: limit } of limits) { + if (!matches(url, matchHost) || !is.number(limit)) { + continue; + } + + if (result && result <= limit) { + continue; + } + + result = limit; + break; + } + + return result; +} + +export function getThrottleIntervalMs(url: string): number | null { + let result: number | null = null; + + const { maxRequestsPerSecond } = hostRules.find({ url }); + if (is.number(maxRequestsPerSecond) && maxRequestsPerSecond > 0) { + result = Math.ceil(1000 / maxRequestsPerSecond); + } + + for (const { matchHost, throttleMs: limit } of limits) { + if (!matches(url, matchHost) || !is.number(limit)) { + continue; + } + + if (result && result >= limit) { + continue; + } + + result = limit; + break; + } + + return result; +} diff --git a/lib/util/http/throttle.ts b/lib/util/http/throttle.ts index c868354ce5..2250a10ed9 100644 --- a/lib/util/http/throttle.ts +++ b/lib/util/http/throttle.ts @@ -1,7 +1,7 @@ import pThrottle from 'p-throttle'; import { logger } from '../../logger'; import { parseUrl } from '../url'; -import { getThrottleIntervalMs } from './host-rules'; +import { getThrottleIntervalMs } from './rate-limits'; const hostThrottles = new Map<string, Throttle | null>(); @@ -33,11 +33,10 @@ export function getThrottle(url: string): Throttle | null { let throttle = hostThrottles.get(host); if (throttle === undefined) { throttle = null; // null represents "no throttle", as opposed to undefined - const throttleOptions = getThrottleIntervalMs(url); - if (throttleOptions) { - const intervalMs = throttleOptions; - logger.debug(`Using throttle ${intervalMs} intervalMs for host ${host}`); - throttle = new Throttle(intervalMs); + const throttleMs = getThrottleIntervalMs(url); + if (throttleMs) { + logger.debug(`Using throttle ${throttleMs} intervalMs for host ${host}`); + throttle = new Throttle(throttleMs); } else { logger.trace({ host }, 'No throttle'); } diff --git a/lib/util/http/types.ts b/lib/util/http/types.ts index a767c29c5b..899766bc86 100644 --- a/lib/util/http/types.ts +++ b/lib/util/http/types.ts @@ -91,3 +91,9 @@ export interface HttpResponse<T = string> { export type Task<T> = () => Promise<T>; export type GotTask<T> = Task<HttpResponse<T>>; + +export interface RateLimitRule { + matchHost: string; + throttleMs?: number; + concurrency?: number; +} diff --git a/lib/workers/global/initialize.ts b/lib/workers/global/initialize.ts index 3212fcecc4..9719553e04 100644 --- a/lib/workers/global/initialize.ts +++ b/lib/workers/global/initialize.ts @@ -10,6 +10,7 @@ import * as packageCache from '../../util/cache/package'; import { setEmojiConfig } from '../../util/emoji'; import { validateGitVersion } from '../../util/git'; import * as hostRules from '../../util/host-rules'; +import { setHttpRateLimits } from '../../util/http/rate-limits'; import { initMergeConfidence } from '../../util/merge-confidence'; import { setMaxLimit } from './limits'; @@ -79,6 +80,7 @@ export async function globalInitialize( config_: AllConfig, ): Promise<RenovateConfig> { let config = config_; + setHttpRateLimits(); await checkVersions(); setGlobalHostRules(config); config = await initPlatform(config); -- GitLab