From 4461796836af9ce05b2f109b27e80ea107fd5140 Mon Sep 17 00:00:00 2001 From: Jamie Magee <jamie.magee@gmail.com> Date: Tue, 12 Oct 2021 21:42:12 -0700 Subject: [PATCH] refactor(crate): convert to class-based datasource (#12033) --- lib/datasource/api.ts | 4 +- lib/datasource/crate/index.spec.ts | 39 ++- lib/datasource/crate/index.ts | 440 ++++++++++++++-------------- lib/manager/cargo/extract.ts | 4 +- lib/util/cache/package/decorator.ts | 12 + 5 files changed, 268 insertions(+), 231 deletions(-) diff --git a/lib/datasource/api.ts b/lib/datasource/api.ts index 3e45c789a1..9cf8796dc8 100644 --- a/lib/datasource/api.ts +++ b/lib/datasource/api.ts @@ -3,7 +3,7 @@ import { ArtifactoryDatasource } from './artifactory'; import { BitBucketTagsDatasource } from './bitbucket-tags'; import { CdnJsDatasource } from './cdnjs'; import { ClojureDatasource } from './clojure'; -import * as crate from './crate'; +import { CrateDatasource } from './crate'; import { DartDatasource } from './dart'; import * as docker from './docker'; import { GalaxyDatasource } from './galaxy'; @@ -45,7 +45,7 @@ api.set(ArtifactoryDatasource.id, new ArtifactoryDatasource()); api.set('bitbucket-tags', new BitBucketTagsDatasource()); api.set('cdnjs', new CdnJsDatasource()); api.set('clojure', new ClojureDatasource()); -api.set('crate', crate); +api.set('crate', new CrateDatasource()); api.set('dart', new DartDatasource()); api.set('docker', docker); api.set('galaxy', new GalaxyDatasource()); diff --git a/lib/datasource/crate/index.spec.ts b/lib/datasource/crate/index.spec.ts index afaac1b826..9da0f5a7e4 100644 --- a/lib/datasource/crate/index.spec.ts +++ b/lib/datasource/crate/index.spec.ts @@ -10,7 +10,7 @@ import { setGlobalConfig } from '../../config/global'; import type { RepoGlobalConfig } from '../../config/types'; import * as memCache from '../../util/cache/memory'; import { RegistryFlavor, RegistryInfo } from './types'; -import { id as datasource, fetchCrateRecordsPayload, getIndexSuffix } from '.'; +import { CrateDatasource } from '.'; jest.mock('simple-git'); const simpleGit: any = _simpleGit; @@ -22,6 +22,8 @@ const res3 = loadFixture('mypkg'); const baseUrl = 'https://raw.githubusercontent.com/rust-lang/crates.io-index/master/'; +const datasource = CrateDatasource.id; + function setupGitMocks(delayMs?: number): { mockClone: jest.Mock<any, any> } { const mockClone = jest .fn() @@ -63,13 +65,29 @@ function setupErrorGitMock(): { mockClone: jest.Mock<any, any> } { describe('datasource/crate/index', () => { describe('getIndexSuffix', () => { it('returns correct suffixes', () => { - expect(getIndexSuffix('a')).toStrictEqual(['1', 'a']); - expect(getIndexSuffix('1')).toStrictEqual(['1', '1']); - expect(getIndexSuffix('1234567')).toStrictEqual(['12', '34', '1234567']); - expect(getIndexSuffix('ab')).toStrictEqual(['2', 'ab']); - expect(getIndexSuffix('abc')).toStrictEqual(['3', 'a', 'abc']); - expect(getIndexSuffix('abcd')).toStrictEqual(['ab', 'cd', 'abcd']); - expect(getIndexSuffix('abcde')).toStrictEqual(['ab', 'cd', 'abcde']); + expect(CrateDatasource.getIndexSuffix('a')).toStrictEqual(['1', 'a']); + expect(CrateDatasource.getIndexSuffix('1')).toStrictEqual(['1', '1']); + expect(CrateDatasource.getIndexSuffix('1234567')).toStrictEqual([ + '12', + '34', + '1234567', + ]); + expect(CrateDatasource.getIndexSuffix('ab')).toStrictEqual(['2', 'ab']); + expect(CrateDatasource.getIndexSuffix('abc')).toStrictEqual([ + '3', + 'a', + 'abc', + ]); + expect(CrateDatasource.getIndexSuffix('abcd')).toStrictEqual([ + 'ab', + 'cd', + 'abcd', + ]); + expect(CrateDatasource.getIndexSuffix('abcde')).toStrictEqual([ + 'ab', + 'cd', + 'abcde', + ]); }); }); @@ -323,7 +341,10 @@ describe('datasource/crate/index', () => { const info: RegistryInfo = { flavor: RegistryFlavor.Cloudsmith, }; - await expect(fetchCrateRecordsPayload(info, 'benedict')).toReject(); + const crateDatasource = new CrateDatasource(); + await expect( + crateDatasource.fetchCrateRecordsPayload(info, 'benedict') + ).toReject(); }); }); }); diff --git a/lib/datasource/crate/index.ts b/lib/datasource/crate/index.ts index 39dcaf5b7c..e66807e199 100644 --- a/lib/datasource/crate/index.ts +++ b/lib/datasource/crate/index.ts @@ -3,274 +3,278 @@ import Git from 'simple-git'; import { join } from 'upath'; import { getGlobalConfig } from '../../config/global'; import { logger } from '../../logger'; -import { ExternalHostError } from '../../types/errors/external-host-error'; import * as memCache from '../../util/cache/memory'; -import * as packageCache from '../../util/cache/package'; +import { cache } from '../../util/cache/package/decorator'; import { privateCacheDir, readFile } from '../../util/fs'; import { simpleGitConfig } from '../../util/git/config'; -import { Http } from '../../util/http'; import * as cargoVersioning from '../../versioning/cargo'; +import { Datasource } from '../datasource'; import type { GetReleasesConfig, Release, ReleaseResult } from '../types'; import { CrateRecord, RegistryFlavor, RegistryInfo } from './types'; -export const id = 'crate'; -export const customRegistrySupport = true; -export const defaultRegistryUrls = ['https://crates.io']; -export const defaultVersioning = cargoVersioning.id; -export const registryStrategy = 'first'; +export class CrateDatasource extends Datasource { + static readonly id = 'crate'; -const http = new Http(id); + constructor() { + super(CrateDatasource.id); + } -const CRATES_IO_BASE_URL = - 'https://raw.githubusercontent.com/rust-lang/crates.io-index/master/'; + override defaultRegistryUrls = ['https://crates.io']; -export function getIndexSuffix(lookupName: string): string[] { - const len = lookupName.length; + override defaultVersioning = cargoVersioning.id; - if (len === 1) { - return ['1', lookupName]; - } - if (len === 2) { - return ['2', lookupName]; - } - if (len === 3) { - return ['3', lookupName[0], lookupName]; - } + static readonly CRATES_IO_BASE_URL = + 'https://raw.githubusercontent.com/rust-lang/crates.io-index/master/'; - return [lookupName.slice(0, 2), lookupName.slice(2, 4), lookupName]; -} + @cache({ + namespace: `datasource-${CrateDatasource.id}`, + key: ({ registryUrl, lookupName }: GetReleasesConfig) => + `${registryUrl}/${lookupName}`, + cacheable: ({ registryUrl }: GetReleasesConfig) => + CrateDatasource.areReleasesCacheable(registryUrl), + }) + async getReleases({ + lookupName, + registryUrl, + }: GetReleasesConfig): Promise<ReleaseResult | null> { + // istanbul ignore if + if (!registryUrl) { + logger.warn( + 'crate datasource: No registryUrl specified, cannot perform getReleases' + ); + return null; + } -export async function fetchCrateRecordsPayload( - info: RegistryInfo, - lookupName: string -): Promise<string> { - if (info.clonePath) { - const path = join(info.clonePath, ...getIndexSuffix(lookupName)); - return readFile(path, 'utf8'); - } + const registryInfo = await CrateDatasource.fetchRegistryInfo({ + lookupName, + registryUrl, + }); + if (!registryInfo) { + logger.debug({ registryUrl }, 'Could not fetch registry info'); + return null; + } - if (info.flavor === RegistryFlavor.CratesIo) { - const crateUrl = CRATES_IO_BASE_URL + getIndexSuffix(lookupName).join('/'); - try { - return (await http.get(crateUrl)).body; - } catch (err) { - if ( - err.statusCode === 429 || - (err.statusCode >= 500 && err.statusCode < 600) - ) { - throw new ExternalHostError(err); - } + const dependencyUrl = CrateDatasource.getDependencyUrl( + registryInfo, + lookupName + ); - throw err; + const payload = await this.fetchCrateRecordsPayload( + registryInfo, + lookupName + ); + const lines = payload + .split('\n') // break into lines + .map((line) => line.trim()) // remove whitespace + .filter((line) => line.length !== 0) // remove empty lines + .map((line) => JSON.parse(line) as CrateRecord); // parse + const result: ReleaseResult = { + dependencyUrl, + releases: [], + }; + result.releases = lines + .map((version) => { + const release: Release = { + version: version.vers, + }; + if (version.yanked) { + release.isDeprecated = true; + } + return release; + }) + .filter((release) => release.version); + if (!result.releases.length) { + return null; } - } - throw new Error(`unsupported crate registry flavor: ${info.flavor}`); -} + return result; + } -/** - * Computes the dependency URL for a crate, given - * registry information - */ -function getDependencyUrl(info: RegistryInfo, lookupName: string): string { - switch (info.flavor) { - case RegistryFlavor.CratesIo: - return `https://crates.io/crates/${lookupName}`; - case RegistryFlavor.Cloudsmith: { - // input: https://dl.cloudsmith.io/basic/$org/$repo/cargo/index.git - const tokens = info.url.pathname.split('/'); - const org = tokens[2]; - const repo = tokens[3]; - return `https://cloudsmith.io/~${org}/repos/${repo}/packages/detail/cargo/${lookupName}`; + public async fetchCrateRecordsPayload( + info: RegistryInfo, + lookupName: string + ): Promise<string> { + if (info.clonePath) { + const path = join( + info.clonePath, + ...CrateDatasource.getIndexSuffix(lookupName) + ); + return readFile(path, 'utf8'); } - default: - return `${info.rawUrl}/${lookupName}`; - } -} -/** - * Given a Git URL, computes a semi-human-readable name for a folder in which to - * clone the repository. - */ -function cacheDirFromUrl(url: URL): string { - const proto = url.protocol.replace(/:$/, ''); - const host = url.hostname; - const hash = hasha(url.pathname, { - algorithm: 'sha256', - }).substr(0, 7); - - return `crate-registry-${proto}-${host}-${hash}`; -} + if (info.flavor === RegistryFlavor.CratesIo) { + const crateUrl = + CrateDatasource.CRATES_IO_BASE_URL + + CrateDatasource.getIndexSuffix(lookupName).join('/'); + try { + return (await this.http.get(crateUrl)).body; + } catch (err) { + this.handleGenericErrors(err); + } + } -/** - * Fetches information about a registry, by url. - * If no url is given, assumes crates.io. - * If an url is given, assumes it's a valid Git repository - * url and clones it to cache. - */ -async function fetchRegistryInfo( - config: GetReleasesConfig, - registryUrl: string -): Promise<RegistryInfo | null> { - let url: URL; - try { - url = new URL(registryUrl); - } catch (err) { - logger.debug({ registryUrl }, 'could not parse registry URL'); - return null; + throw new Error(`unsupported crate registry flavor: ${info.flavor}`); } - let flavor: RegistryFlavor; - if (url.hostname === 'crates.io') { - flavor = RegistryFlavor.CratesIo; - } else if (url.hostname === 'dl.cloudsmith.io') { - flavor = RegistryFlavor.Cloudsmith; - } else { - flavor = RegistryFlavor.Other; + /** + * Computes the dependency URL for a crate, given + * registry information + */ + private static getDependencyUrl( + info: RegistryInfo, + lookupName: string + ): string { + switch (info.flavor) { + case RegistryFlavor.CratesIo: + return `https://crates.io/crates/${lookupName}`; + case RegistryFlavor.Cloudsmith: { + // input: https://dl.cloudsmith.io/basic/$org/$repo/cargo/index.git + const tokens = info.url.pathname.split('/'); + const org = tokens[2]; + const repo = tokens[3]; + return `https://cloudsmith.io/~${org}/repos/${repo}/packages/detail/cargo/${lookupName}`; + } + default: + return `${info.rawUrl}/${lookupName}`; + } } - const registry: RegistryInfo = { - flavor, - rawUrl: registryUrl, - url, - }; + /** + * Given a Git URL, computes a semi-human-readable name for a folder in which to + * clone the repository. + */ + private static cacheDirFromUrl(url: URL): string { + const proto = url.protocol.replace(/:$/, ''); + const host = url.hostname; + const hash = hasha(url.pathname, { + algorithm: 'sha256', + }).substr(0, 7); + + return `crate-registry-${proto}-${host}-${hash}`; + } - if (flavor !== RegistryFlavor.CratesIo) { - if (!getGlobalConfig().allowCustomCrateRegistries) { - logger.warn( - 'crate datasource: allowCustomCrateRegistries=true is required for registries other than crates.io, bailing out' - ); + /** + * Fetches information about a registry, by url. + * If no url is given, assumes crates.io. + * If an url is given, assumes it's a valid Git repository + * url and clones it to cache. + */ + private static async fetchRegistryInfo({ + lookupName, + registryUrl, + }: GetReleasesConfig): Promise<RegistryInfo | null> { + let url: URL; + try { + url = new URL(registryUrl); + } catch (err) { + logger.debug({ registryUrl }, 'could not parse registry URL'); return null; } - const cacheKey = `crate-datasource/registry-clone-path/${registryUrl}`; - const cacheKeyForError = `crate-datasource/registry-clone-path/${registryUrl}/error`; - - // We need to ensure we don't run `git clone` in parallel. Therefore we store - // a promise of the running operation in the mem cache, which in the end resolves - // to the file path of the cloned repository. - - const clonePathPromise: Promise<string> | null = memCache.get(cacheKey); - let clonePath: string; - - // eslint-disable-next-line @typescript-eslint/no-misused-promises - if (clonePathPromise) { - clonePath = await clonePathPromise; + let flavor: RegistryFlavor; + if (url.hostname === 'crates.io') { + flavor = RegistryFlavor.CratesIo; + } else if (url.hostname === 'dl.cloudsmith.io') { + flavor = RegistryFlavor.Cloudsmith; } else { - clonePath = join(privateCacheDir(), cacheDirFromUrl(url)); - logger.info({ clonePath, registryUrl }, `Cloning private cargo registry`); - - const git = Git(simpleGitConfig()); - const clonePromise = git.clone(registryUrl, clonePath, { - '--depth': 1, - }); + flavor = RegistryFlavor.Other; + } - memCache.set( - cacheKey, - clonePromise.then(() => clonePath).catch(() => null) - ); + const registry: RegistryInfo = { + flavor, + rawUrl: registryUrl, + url, + }; - try { - await clonePromise; - } catch (err) { + if (flavor !== RegistryFlavor.CratesIo) { + if (!getGlobalConfig().allowCustomCrateRegistries) { logger.warn( - { err, lookupName: config.lookupName, registryUrl }, - 'failed cloning git registry' + 'crate datasource: allowCustomCrateRegistries=true is required for registries other than crates.io, bailing out' ); - memCache.set(cacheKeyForError, err); - return null; } - } - if (!clonePath) { - const err = memCache.get(cacheKeyForError); - logger.warn( - { err, lookupName: config.lookupName, registryUrl }, - 'Previous git clone failed, bailing out.' - ); + const cacheKey = `crate-datasource/registry-clone-path/${registryUrl}`; + const cacheKeyForError = `crate-datasource/registry-clone-path/${registryUrl}/error`; - return null; - } + // We need to ensure we don't run `git clone` in parallel. Therefore we store + // a promise of the running operation in the mem cache, which in the end resolves + // to the file path of the cloned repository. - registry.clonePath = clonePath; - } + const clonePathPromise: Promise<string> | null = memCache.get(cacheKey); + let clonePath: string; - return registry; -} + // eslint-disable-next-line @typescript-eslint/no-misused-promises + if (clonePathPromise) { + clonePath = await clonePathPromise; + } else { + clonePath = join( + privateCacheDir(), + CrateDatasource.cacheDirFromUrl(url) + ); + logger.info( + { clonePath, registryUrl }, + `Cloning private cargo registry` + ); -export function areReleasesCacheable(registryUrl: string): boolean { - // We only cache public releases, we don't want to cache private - // cloned data between runs. - return registryUrl === 'https://crates.io'; -} + const git = Git(simpleGitConfig()); + const clonePromise = git.clone(registryUrl, clonePath, { + '--depth': 1, + }); -export async function getReleases( - config: GetReleasesConfig -): Promise<ReleaseResult | null> { - const { lookupName, registryUrl } = config; + memCache.set( + cacheKey, + clonePromise.then(() => clonePath).catch(() => null) + ); - // istanbul ignore if - if (!registryUrl) { - logger.warn( - 'crate datasource: No registryUrl specified, cannot perform getReleases' - ); - return null; - } + try { + await clonePromise; + } catch (err) { + logger.warn( + { err, lookupName, registryUrl }, + 'failed cloning git registry' + ); + memCache.set(cacheKeyForError, err); + + return null; + } + } - const cacheable = areReleasesCacheable(registryUrl); - const cacheNamespace = 'datasource-crate'; - const cacheKey = `${registryUrl}/${lookupName}`; + if (!clonePath) { + const err = memCache.get(cacheKeyForError); + logger.warn( + { err, lookupName, registryUrl }, + 'Previous git clone failed, bailing out.' + ); - if (cacheable) { - const cachedResult = await packageCache.get<ReleaseResult>( - cacheNamespace, - cacheKey - ); - // istanbul ignore if - if (cachedResult) { - logger.debug({ cacheKey }, 'Returning cached resource'); - return cachedResult; + return null; + } + + registry.clonePath = clonePath; } - } - const registryInfo = await fetchRegistryInfo(config, registryUrl); - if (!registryInfo) { - logger.debug({ registryUrl }, 'Could not fetch registry info'); - return null; + return registry; } - const dependencyUrl = getDependencyUrl(registryInfo, lookupName); - - const payload = await fetchCrateRecordsPayload(registryInfo, lookupName); - const lines = payload - .split('\n') // break into lines - .map((line) => line.trim()) // remove whitespace - .filter((line) => line.length !== 0) // remove empty lines - .map((line) => JSON.parse(line) as CrateRecord); // parse - const result: ReleaseResult = { - dependencyUrl, - releases: [], - }; - result.releases = lines - .map((version) => { - const release: Release = { - version: version.vers, - }; - if (version.yanked) { - release.isDeprecated = true; - } - return release; - }) - .filter((release) => release.version); - if (!result.releases.length) { - return null; + private static areReleasesCacheable(registryUrl: string): boolean { + // We only cache public releases, we don't want to cache private + // cloned data between runs. + return registryUrl === 'https://crates.io'; } - if (cacheable) { - const cacheMinutes = 10; - await packageCache.set(cacheNamespace, cacheKey, result, cacheMinutes); - } + public static getIndexSuffix(lookupName: string): string[] { + const len = lookupName.length; - return result; + if (len === 1) { + return ['1', lookupName]; + } + if (len === 2) { + return ['2', lookupName]; + } + if (len === 3) { + return ['3', lookupName[0], lookupName]; + } + + return [lookupName.slice(0, 2), lookupName.slice(2, 4), lookupName]; + } } diff --git a/lib/manager/cargo/extract.ts b/lib/manager/cargo/extract.ts index af76dedd3a..d10b89bf3d 100644 --- a/lib/manager/cargo/extract.ts +++ b/lib/manager/cargo/extract.ts @@ -1,5 +1,5 @@ import { parse } from '@iarna/toml'; -import * as datasourceCrate from '../../datasource/crate'; +import { CrateDatasource } from '../../datasource/crate'; import { logger } from '../../logger'; import { SkipReason } from '../../types'; import { findLocalSiblingOrParent, readLocalFile } from '../../util/fs'; @@ -70,7 +70,7 @@ function extractFromSection( depType: section, currentValue: currentValue as any, managerData: { nestedVersion }, - datasource: datasourceCrate.id, + datasource: CrateDatasource.id, }; if (registryUrls) { dep.registryUrls = registryUrls; diff --git a/lib/util/cache/package/decorator.ts b/lib/util/cache/package/decorator.ts index 0e547bb1f7..cf7382e2d6 100644 --- a/lib/util/cache/package/decorator.ts +++ b/lib/util/cache/package/decorator.ts @@ -57,6 +57,7 @@ function decorate<T>(fn: Handler<T>): Decorator<T> { } type HashFunction<T extends any[] = any[]> = (...args: T) => string; +type BooleanFunction<T extends any[] = any[]> = (...args: T) => boolean; /** * The cache decorator parameters. @@ -74,6 +75,12 @@ interface CacheParameters { */ key: string | HashFunction; + /** + * A function that returns true if a result is cacheable + * Used to prevent caching of private, sensitive, results + */ + cacheable?: BooleanFunction; + /** * The TTL (or expiry) of the key in minutes */ @@ -86,9 +93,14 @@ interface CacheParameters { export function cache<T>({ namespace, key, + cacheable = () => true, ttlMinutes = 30, }: CacheParameters): Decorator<T> { return decorate(async ({ args, instance, callback }) => { + if (!cacheable.apply(instance, args)) { + return callback(); + } + let finalNamespace: string; if (is.string(namespace)) { finalNamespace = namespace; -- GitLab