diff --git a/lib/modules/datasource/rubygems/index.spec.ts b/lib/modules/datasource/rubygems/index.spec.ts index 6c7a23804c457e7084e711bff70e5cba02a19a5d..86f09ab2a12119150076073afd69b3e122912301 100644 --- a/lib/modules/datasource/rubygems/index.spec.ts +++ b/lib/modules/datasource/rubygems/index.spec.ts @@ -2,7 +2,7 @@ import { getPkgReleases } from '..'; import { Fixtures } from '../../../../test/fixtures'; import * as httpMock from '../../../../test/http-mock'; import * as rubyVersioning from '../../versioning/ruby'; -import { VersionsDatasource, memCache } from './versions-datasource'; +import { memCache } from './versions-datasource'; import { RubyGemsDatasource } from '.'; const rubygemsOrgVersions = Fixtures.get('rubygems-org.txt'); @@ -63,26 +63,6 @@ describe('modules/datasource/rubygems/index', () => { expect(res).toBeNull(); }); - it('returns null for an error without "not_supported" reason', async () => { - const versionsdataSourceSpy = jest - .spyOn(VersionsDatasource.prototype, 'syncVersions') - .mockImplementationOnce(() => { - throw new Error(); - }); - - try { - const res = await getPkgReleases({ - versioning: rubyVersioning.id, - datasource: RubyGemsDatasource.id, - packageName: 'rails', - registryUrls: [], - }); - expect(res).toBeNull(); - } finally { - versionsdataSourceSpy.mockRestore(); - } - }); - it('returns a dep for rubygems.org package hit', async () => { httpMock .scope('https://rubygems.org') diff --git a/lib/modules/datasource/rubygems/versions-datasource.spec.ts b/lib/modules/datasource/rubygems/versions-datasource.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..c13c5c5124343a28b47ee7cad6fda4c12308bbfd --- /dev/null +++ b/lib/modules/datasource/rubygems/versions-datasource.spec.ts @@ -0,0 +1,310 @@ +import { codeBlock } from 'common-tags'; +import * as httpMock from '../../../../test/http-mock'; +import { PAGE_NOT_FOUND_ERROR } from '../../../constants/error-messages'; +import type { GetReleasesConfig } from '../types'; +import { VersionsDatasource, memCache } from './versions-datasource'; + +const rubygems = new VersionsDatasource('rubygems'); + +const fullBody = + codeBlock` + created_at: 2021-05-04T00:00:00.000Z + --- + foo 1.1.1 11111111111111111111111111111111 + bar 2.2.2 22222222222222222222222222222222 + baz 3.3.3 33333333333333333333333333333333 + ` + '\n'; + +describe('modules/datasource/rubygems/versions-datasource', () => { + beforeEach(() => { + memCache.clear(); + jest.resetAllMocks(); + }); + + describe('Full sync', () => { + it('supports sequential access', async () => { + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply(200, fullBody); + const config: Omit<GetReleasesConfig, 'packageName'> = { + registryUrl: 'https://rubygems.org', + }; + + const foo = await rubygems.getReleases({ ...config, packageName: 'foo' }); + const bar = await rubygems.getReleases({ ...config, packageName: 'bar' }); + const baz = await rubygems.getReleases({ ...config, packageName: 'baz' }); + const qux = await rubygems.getReleases({ ...config, packageName: 'qux' }); + + expect(foo).toEqual({ releases: [{ version: '1.1.1' }] }); + expect(bar).toEqual({ releases: [{ version: '2.2.2' }] }); + expect(baz).toEqual({ releases: [{ version: '3.3.3' }] }); + expect(qux).toBeNull(); + + expect( + memCache.get('rubygems-versions-cache:https://rubygems.org') + ).toMatchObject({ + contentTail: '33333333333333333333333333333333\n', + }); + }); + + it('supports concurrent access', async () => { + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply(200, fullBody); + const config: Omit<GetReleasesConfig, 'packageName'> = { + registryUrl: 'https://rubygems.org', + }; + + const [foo, bar, baz] = await Promise.all([ + rubygems.getReleases({ ...config, packageName: 'foo' }), + rubygems.getReleases({ ...config, packageName: 'bar' }), + rubygems.getReleases({ ...config, packageName: 'baz' }), + ]); + + expect(foo).toEqual({ releases: [{ version: '1.1.1' }] }); + expect(bar).toEqual({ releases: [{ version: '2.2.2' }] }); + expect(baz).toEqual({ releases: [{ version: '3.3.3' }] }); + }); + + it('handles 404', async () => { + httpMock.scope('https://rubygems.org').get('/versions').reply(404); + + await expect( + rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }) + ).rejects.toThrow(PAGE_NOT_FOUND_ERROR); + + await expect( + rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'bar', + }) + ).rejects.toThrow(PAGE_NOT_FOUND_ERROR); + + expect(memCache.size).toBe(1); + }); + + it('handles unknown error', async () => { + httpMock + .scope('https://rubygems.org') + .get('/versions') + .replyWithError('Unknown error'); + + await expect( + rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }) + ).rejects.toThrow('Unknown error'); + expect(memCache.size).toBe(0); + }); + }); + + describe('Delta sync', () => { + beforeAll(() => { + jest.useFakeTimers({ advanceTimers: true }); + }); + + beforeEach(() => { + jest.setSystemTime(new Date('2021-05-04T00:00:00.000Z')); + }); + + it('refreshes after 15 minutes', async () => { + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply(200, fullBody); + + const res1 = await rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }); + expect(res1).toEqual({ releases: [{ version: '1.1.1' }] }); + + jest.advanceTimersByTime(15 * 60 * 1000); + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply( + 206, + codeBlock` + 33333333333333333333333333333333 + foo -1.1.1,1.2.3 44444444444444444444444444444444 + ` + '\n' + ); + + const res2 = await rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }); + expect(res2).toEqual({ releases: [{ version: '1.2.3' }] }); + + expect( + memCache.get('rubygems-versions-cache:https://rubygems.org') + ).toMatchObject({ + contentTail: '44444444444444444444444444444444\n', + }); + }); + + it('handles tail-head mismatch', async () => { + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply(200, fullBody); + + const res1 = await rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }); + expect(res1).toEqual({ releases: [{ version: '1.1.1' }] }); + + jest.advanceTimersByTime(15 * 60 * 1000); + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply( + 206, + codeBlock` + 01010101010101010101010101010101 + foo -1.1.1,1.2.3 44444444444444444444444444444444 + ` + '\n' + ) + .get('/versions') + .reply( + 200, + codeBlock` + created_at: 2021-05-04T00:00:00.000Z + --- + foo 1.2.3 11111111111111111111111111111111 + bar 2.2.2 22222222222222222222222222222222 + baz 3.3.3 01010101010101010101010101010101 + ` + '\n' + ); + + const res2 = await rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }); + expect(res2).toEqual({ releases: [{ version: '1.2.3' }] }); + + expect( + memCache.get('rubygems-versions-cache:https://rubygems.org') + ).toMatchObject({ + contentTail: '01010101010101010101010101010101\n', + }); + }); + + it('handles full body response', async () => { + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply(200, fullBody); + + const res1 = await rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }); + expect(res1).toEqual({ releases: [{ version: '1.1.1' }] }); + + jest.advanceTimersByTime(15 * 60 * 1000); + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply( + 200, + fullBody + `foo -1.1.1,1.2.3 44444444444444444444444444444444\n` + ); + + const res2 = await rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }); + expect(res2).toEqual({ releases: [{ version: '1.2.3' }] }); + + expect( + memCache.get('rubygems-versions-cache:https://rubygems.org') + ).toMatchObject({ + contentTail: '44444444444444444444444444444444\n', + }); + }); + + describe('Error handling', () => { + beforeEach(async () => { + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply(200, fullBody); + + await rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }); + + jest.advanceTimersByTime(15 * 60 * 1000); + }); + + it('handles 404', async () => { + httpMock.scope('https://rubygems.org').get('/versions').reply(404); + + await expect( + rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }) + ).rejects.toThrow(PAGE_NOT_FOUND_ERROR); + + await expect( + rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }) + ).rejects.toThrow(PAGE_NOT_FOUND_ERROR); + }); + + it('handles 416', async () => { + httpMock + .scope('https://rubygems.org') + .get('/versions') + .reply(416) + .get('/versions') + .reply( + 200, + codeBlock` + created_at: 2021-05-05T00:00:00.000Z + --- + foo 9.9.9 99999999999999999999999999999999 + ` + '\n' + ); + + const res = await rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }); + + expect(res).toEqual({ releases: [{ version: '9.9.9' }] }); + }); + + it('handles unknown errors', async () => { + httpMock + .scope('https://rubygems.org') + .get('/versions') + .replyWithError('Unknown error'); + + await expect( + rubygems.getReleases({ + registryUrl: 'https://rubygems.org', + packageName: 'foo', + }) + ).rejects.toThrow('Unknown error'); + + expect( + memCache.get('rubygems-versions-cache:https://rubygems.org') + ).toBeUndefined(); + }); + }); + }); +}); diff --git a/lib/modules/datasource/rubygems/versions-datasource.ts b/lib/modules/datasource/rubygems/versions-datasource.ts index a6d5de0bda0cf3264c7d3b410aa175f4412240d9..9c56c1e34eb539cdeb275f4013dc01776631317d 100644 --- a/lib/modules/datasource/rubygems/versions-datasource.ts +++ b/lib/modules/datasource/rubygems/versions-datasource.ts @@ -1,28 +1,69 @@ import { z } from 'zod'; import { PAGE_NOT_FOUND_ERROR } from '../../../constants/error-messages'; import { logger } from '../../../logger'; -import { ExternalHostError } from '../../../types/errors/external-host-error'; import { getElapsedMinutes } from '../../../util/date'; import { HttpError } from '../../../util/http'; +import type { HttpOptions } from '../../../util/http/types'; import { newlineRegex } from '../../../util/regex'; import { LooseArray } from '../../../util/schema-utils'; import { copystr } from '../../../util/string'; import { Datasource } from '../datasource'; import type { GetReleasesConfig, ReleaseResult } from '../types'; -type PackageReleases = Map<string, string[]>; +interface VersionsEndpointUnsupported { + versionsEndpointSupported: false; +} + +type PackageVersions = Map<string, string[]>; -interface RegistryCache { - lastSync: Date; - packageReleases: PackageReleases; +interface VersionsEndpointData { + versionsEndpointSupported: true; + packageVersions: PackageVersions; + syncedAt: Date; contentLength: number; - isSupported: boolean; - registryUrl: string; + + /** + * Last 33 characters of the response (32 hex digits + newline) + */ + contentTail: string; +} + +function getContentTail(content: string): string { + return content.slice(-33); } -export const memCache = new Map<string, RegistryCache>(); +function getContentHead(content: string): string { + return content.slice(0, 33); +} + +function stripContentHead(content: string): string { + return content.slice(33); +} + +function parseFullBody(body: string): VersionsEndpointData { + const versionsEndpointSupported = true; + const packageVersions = VersionsDatasource.reconcilePackageVersions( + new Map<string, string[]>(), + VersionLines.parse(body) + ); + const syncedAt = new Date(); + const contentLength = body.length; + const contentTail = getContentTail(body); -const Lines = z + return { + versionsEndpointSupported, + packageVersions, + syncedAt, + contentLength, + contentTail, + }; +} + +type VersionsEndpointCache = VersionsEndpointUnsupported | VersionsEndpointData; + +export const memCache = new Map<string, VersionsEndpointCache>(); + +const VersionLines = z .string() .transform((x) => x.split(newlineRegex)) .pipe( @@ -49,132 +90,209 @@ const Lines = z }) ) ); -type Lines = z.infer<typeof Lines>; +type VersionLines = z.infer<typeof VersionLines>; export class VersionsDatasource extends Datasource { constructor(override readonly id: string) { super(id); } - getRegistryCache(registryUrl: string): RegistryCache { + static isStale(regCache: VersionsEndpointData): boolean { + return getElapsedMinutes(regCache.syncedAt) >= 15; + } + + static reconcilePackageVersions( + packageVersions: PackageVersions, + versionLines: VersionLines + ): PackageVersions { + for (const line of versionLines) { + const packageName = copystr(line.packageName); + let versions = packageVersions.get(packageName) ?? []; + + const { deletedVersions, addedVersions } = line; + + if (deletedVersions.size > 0) { + versions = versions.filter((v) => !deletedVersions.has(v)); + } + + if (addedVersions.length > 0) { + const existingVersions = new Set(versions); + for (const addedVersion of addedVersions) { + if (!existingVersions.has(addedVersion)) { + const version = copystr(addedVersion); + versions.push(version); + } + } + } + + packageVersions.set(packageName, versions); + } + + return packageVersions; + } + + private cacheRequests = new Map<string, Promise<VersionsEndpointCache>>(); + + /** + * At any given time, there should only be one request for a given registryUrl. + */ + private async getCache(registryUrl: string): Promise<VersionsEndpointCache> { const cacheKey = `rubygems-versions-cache:${registryUrl}`; - const regCache = memCache.get(cacheKey) ?? { - lastSync: new Date('2000-01-01'), - packageReleases: new Map<string, string[]>(), - contentLength: 0, - isSupported: false, - registryUrl, - }; - memCache.set(cacheKey, regCache); - return regCache; + + const oldCache = memCache.get(cacheKey); + memCache.delete(cacheKey); // If no error is thrown, we'll re-set the cache + + let newCache: VersionsEndpointCache; + + if (!oldCache) { + newCache = await this.fullSync(registryUrl); + } else if (oldCache.versionsEndpointSupported === false) { + newCache = oldCache; + } else if (VersionsDatasource.isStale(oldCache)) { + newCache = await this.deltaSync(oldCache, registryUrl); + } else { + newCache = oldCache; + } + memCache.set(cacheKey, newCache); + return newCache; } async getReleases({ registryUrl, packageName, }: GetReleasesConfig): Promise<ReleaseResult | null> { - logger.debug(`getRubygemsOrgDependency(${packageName})`); - // istanbul ignore if if (!registryUrl) { return null; } - const regCache = this.getRegistryCache(registryUrl); - await this.syncVersions(regCache); + /** + * Ensure that only one request for a given registryUrl is in flight at a time. + */ + let cacheRequest = this.cacheRequests.get(registryUrl); + if (!cacheRequest) { + cacheRequest = this.getCache(registryUrl); + this.cacheRequests.set(registryUrl, cacheRequest); + } + let cache: VersionsEndpointCache; + try { + cache = await cacheRequest; + } finally { + this.cacheRequests.delete(registryUrl); + } - if (!regCache.isSupported) { + if (cache.versionsEndpointSupported === false) { + logger.debug( + { packageName, registryUrl }, + 'Rubygems: endpoint not supported' + ); throw new Error(PAGE_NOT_FOUND_ERROR); } - const versions = regCache.packageReleases.get(packageName); - if (!versions) { + const packageVersions = cache.packageVersions.get(packageName); + if (!packageVersions?.length) { + logger.debug( + { packageName, registryUrl }, + 'Rubygems: versions not found' + ); return null; } - const releases = versions.map((version) => ({ version })); + const releases = packageVersions.map((version) => ({ version })); return { releases }; } - private updatePackageReleases( - packageReleases: PackageReleases, - lines: Lines - ): void { - for (const line of lines) { - const packageName = copystr(line.packageName); - let versions = packageReleases.get(packageName) ?? []; - - const { deletedVersions, addedVersions } = line; - - if (deletedVersions.size > 0) { - versions = versions.filter((v) => !deletedVersions.has(v)); - } - - if (addedVersions.length > 0) { - const existingVersions = new Set(versions); - for (const addedVersion of addedVersions) { - if (!existingVersions.has(addedVersion)) { - const version = copystr(addedVersion); - versions.push(version); - } - } + async fullSync(registryUrl: string): Promise<VersionsEndpointCache> { + try { + const url = `${registryUrl}/versions`; + const opts: HttpOptions = { headers: { 'Accept-Encoding': 'gzip' } }; + const { body } = await this.http.get(url, opts); + return parseFullBody(body); + } catch (err) { + if (err instanceof HttpError && err.response?.statusCode === 404) { + return { versionsEndpointSupported: false }; } - packageReleases.set(packageName, versions); + this.handleGenericErrors(err); } } - async updateRubyGemsVersions(regCache: RegistryCache): Promise<void> { - const url = `${regCache.registryUrl}/versions`; - const options = { - headers: { - 'accept-encoding': 'gzip', - range: `bytes=${regCache.contentLength}-`, - }, - }; - let newLines: string; + async deltaSync( + oldCache: VersionsEndpointData, + registryUrl: string + ): Promise<VersionsEndpointCache> { try { - logger.debug('Rubygems: Fetching rubygems.org versions'); - const startTime = Date.now(); - newLines = (await this.http.get(url, options)).body; - const durationMs = Math.round(Date.now() - startTime); - logger.debug(`Rubygems: Fetched rubygems.org versions in ${durationMs}`); - } catch (err) /* istanbul ignore next */ { - if (err instanceof HttpError && err.response?.statusCode === 404) { - regCache.isSupported = false; - return; - } + const url = `${registryUrl}/versions`; + const startByte = oldCache.contentLength - oldCache.contentTail.length; + const opts: HttpOptions = { + headers: { + ['Accept-Encoding']: 'deflate, compress, br', // Note: `gzip` usage breaks http client, when used with `Range` header + ['Range']: `bytes=${startByte}-`, + }, + }; + const { statusCode, body } = await this.http.get(url, opts); - if (err.statusCode === 416) { - logger.debug('Rubygems: No update'); - regCache.lastSync = new Date(); - return; + /** + * Rubygems will return the full body instead of `416 Range Not Satisfiable`. + * In this case, status code will be 200 instead of 206. + */ + if (statusCode === 200) { + return parseFullBody(body); } - regCache.contentLength = 0; - regCache.packageReleases.clear(); + /** + * We request data in range that overlaps previously fetched data. + * If the head of the response doesn't match the tail of the previous response, + * it means that the data we have is no longer valid. + * In this case we start over with a full sync. + */ + const contentHead = getContentHead(body); + if (contentHead !== oldCache.contentTail) { + return this.fullSync(registryUrl); + } - logger.debug({ err }, 'Rubygems fetch error'); - throw new ExternalHostError(err); - } + /** + * Update the cache with the new data. + */ + const versionsEndpointSupported = true; + const delta = stripContentHead(body); + const packageVersions = VersionsDatasource.reconcilePackageVersions( + oldCache.packageVersions, + VersionLines.parse(delta) + ); + const syncedAt = new Date(); + const contentLength = oldCache.contentLength + delta.length; + const contentTail = getContentTail(body); - regCache.isSupported = true; - regCache.lastSync = new Date(); + return { + versionsEndpointSupported, + packageVersions, + syncedAt, + contentLength, + contentTail, + }; + } catch (err) { + if (err instanceof HttpError) { + const responseStatus = err.response?.statusCode; - const lines = Lines.parse(newLines); - this.updatePackageReleases(regCache.packageReleases, lines); - } + /** + * In case of `416 Range Not Satisfiable` we do a full sync. + * This is unlikely to happen in real life, but anyway. + */ + if (responseStatus === 416) { + return this.fullSync(registryUrl); + } - private updateRubyGemsVersionsPromise: Promise<void> | null = null; + /** + * If the endpoint is not supported, we stop trying. + * This is unlikely to happen in real life, but still. + */ + if (responseStatus === 404) { + return { versionsEndpointSupported: false }; + } + } - async syncVersions(regCache: RegistryCache): Promise<void> { - const isStale = getElapsedMinutes(regCache.lastSync) >= 15; - if (isStale) { - this.updateRubyGemsVersionsPromise = - this.updateRubyGemsVersionsPromise ?? - this.updateRubyGemsVersions(regCache); - await this.updateRubyGemsVersionsPromise; - this.updateRubyGemsVersionsPromise = null; + this.handleGenericErrors(err); } } }