From 4461796836af9ce05b2f109b27e80ea107fd5140 Mon Sep 17 00:00:00 2001
From: Jamie Magee <jamie.magee@gmail.com>
Date: Tue, 12 Oct 2021 21:42:12 -0700
Subject: [PATCH] refactor(crate): convert to class-based datasource (#12033)

---
 lib/datasource/api.ts               |   4 +-
 lib/datasource/crate/index.spec.ts  |  39 ++-
 lib/datasource/crate/index.ts       | 440 ++++++++++++++--------------
 lib/manager/cargo/extract.ts        |   4 +-
 lib/util/cache/package/decorator.ts |  12 +
 5 files changed, 268 insertions(+), 231 deletions(-)

diff --git a/lib/datasource/api.ts b/lib/datasource/api.ts
index 3e45c789a1..9cf8796dc8 100644
--- a/lib/datasource/api.ts
+++ b/lib/datasource/api.ts
@@ -3,7 +3,7 @@ import { ArtifactoryDatasource } from './artifactory';
 import { BitBucketTagsDatasource } from './bitbucket-tags';
 import { CdnJsDatasource } from './cdnjs';
 import { ClojureDatasource } from './clojure';
-import * as crate from './crate';
+import { CrateDatasource } from './crate';
 import { DartDatasource } from './dart';
 import * as docker from './docker';
 import { GalaxyDatasource } from './galaxy';
@@ -45,7 +45,7 @@ api.set(ArtifactoryDatasource.id, new ArtifactoryDatasource());
 api.set('bitbucket-tags', new BitBucketTagsDatasource());
 api.set('cdnjs', new CdnJsDatasource());
 api.set('clojure', new ClojureDatasource());
-api.set('crate', crate);
+api.set('crate', new CrateDatasource());
 api.set('dart', new DartDatasource());
 api.set('docker', docker);
 api.set('galaxy', new GalaxyDatasource());
diff --git a/lib/datasource/crate/index.spec.ts b/lib/datasource/crate/index.spec.ts
index afaac1b826..9da0f5a7e4 100644
--- a/lib/datasource/crate/index.spec.ts
+++ b/lib/datasource/crate/index.spec.ts
@@ -10,7 +10,7 @@ import { setGlobalConfig } from '../../config/global';
 import type { RepoGlobalConfig } from '../../config/types';
 import * as memCache from '../../util/cache/memory';
 import { RegistryFlavor, RegistryInfo } from './types';
-import { id as datasource, fetchCrateRecordsPayload, getIndexSuffix } from '.';
+import { CrateDatasource } from '.';
 
 jest.mock('simple-git');
 const simpleGit: any = _simpleGit;
@@ -22,6 +22,8 @@ const res3 = loadFixture('mypkg');
 const baseUrl =
   'https://raw.githubusercontent.com/rust-lang/crates.io-index/master/';
 
+const datasource = CrateDatasource.id;
+
 function setupGitMocks(delayMs?: number): { mockClone: jest.Mock<any, any> } {
   const mockClone = jest
     .fn()
@@ -63,13 +65,29 @@ function setupErrorGitMock(): { mockClone: jest.Mock<any, any> } {
 describe('datasource/crate/index', () => {
   describe('getIndexSuffix', () => {
     it('returns correct suffixes', () => {
-      expect(getIndexSuffix('a')).toStrictEqual(['1', 'a']);
-      expect(getIndexSuffix('1')).toStrictEqual(['1', '1']);
-      expect(getIndexSuffix('1234567')).toStrictEqual(['12', '34', '1234567']);
-      expect(getIndexSuffix('ab')).toStrictEqual(['2', 'ab']);
-      expect(getIndexSuffix('abc')).toStrictEqual(['3', 'a', 'abc']);
-      expect(getIndexSuffix('abcd')).toStrictEqual(['ab', 'cd', 'abcd']);
-      expect(getIndexSuffix('abcde')).toStrictEqual(['ab', 'cd', 'abcde']);
+      expect(CrateDatasource.getIndexSuffix('a')).toStrictEqual(['1', 'a']);
+      expect(CrateDatasource.getIndexSuffix('1')).toStrictEqual(['1', '1']);
+      expect(CrateDatasource.getIndexSuffix('1234567')).toStrictEqual([
+        '12',
+        '34',
+        '1234567',
+      ]);
+      expect(CrateDatasource.getIndexSuffix('ab')).toStrictEqual(['2', 'ab']);
+      expect(CrateDatasource.getIndexSuffix('abc')).toStrictEqual([
+        '3',
+        'a',
+        'abc',
+      ]);
+      expect(CrateDatasource.getIndexSuffix('abcd')).toStrictEqual([
+        'ab',
+        'cd',
+        'abcd',
+      ]);
+      expect(CrateDatasource.getIndexSuffix('abcde')).toStrictEqual([
+        'ab',
+        'cd',
+        'abcde',
+      ]);
     });
   });
 
@@ -323,7 +341,10 @@ describe('datasource/crate/index', () => {
       const info: RegistryInfo = {
         flavor: RegistryFlavor.Cloudsmith,
       };
-      await expect(fetchCrateRecordsPayload(info, 'benedict')).toReject();
+      const crateDatasource = new CrateDatasource();
+      await expect(
+        crateDatasource.fetchCrateRecordsPayload(info, 'benedict')
+      ).toReject();
     });
   });
 });
diff --git a/lib/datasource/crate/index.ts b/lib/datasource/crate/index.ts
index 39dcaf5b7c..e66807e199 100644
--- a/lib/datasource/crate/index.ts
+++ b/lib/datasource/crate/index.ts
@@ -3,274 +3,278 @@ import Git from 'simple-git';
 import { join } from 'upath';
 import { getGlobalConfig } from '../../config/global';
 import { logger } from '../../logger';
-import { ExternalHostError } from '../../types/errors/external-host-error';
 import * as memCache from '../../util/cache/memory';
-import * as packageCache from '../../util/cache/package';
+import { cache } from '../../util/cache/package/decorator';
 import { privateCacheDir, readFile } from '../../util/fs';
 import { simpleGitConfig } from '../../util/git/config';
-import { Http } from '../../util/http';
 import * as cargoVersioning from '../../versioning/cargo';
+import { Datasource } from '../datasource';
 import type { GetReleasesConfig, Release, ReleaseResult } from '../types';
 import { CrateRecord, RegistryFlavor, RegistryInfo } from './types';
 
-export const id = 'crate';
-export const customRegistrySupport = true;
-export const defaultRegistryUrls = ['https://crates.io'];
-export const defaultVersioning = cargoVersioning.id;
-export const registryStrategy = 'first';
+export class CrateDatasource extends Datasource {
+  static readonly id = 'crate';
 
-const http = new Http(id);
+  constructor() {
+    super(CrateDatasource.id);
+  }
 
-const CRATES_IO_BASE_URL =
-  'https://raw.githubusercontent.com/rust-lang/crates.io-index/master/';
+  override defaultRegistryUrls = ['https://crates.io'];
 
-export function getIndexSuffix(lookupName: string): string[] {
-  const len = lookupName.length;
+  override defaultVersioning = cargoVersioning.id;
 
-  if (len === 1) {
-    return ['1', lookupName];
-  }
-  if (len === 2) {
-    return ['2', lookupName];
-  }
-  if (len === 3) {
-    return ['3', lookupName[0], lookupName];
-  }
+  static readonly CRATES_IO_BASE_URL =
+    'https://raw.githubusercontent.com/rust-lang/crates.io-index/master/';
 
-  return [lookupName.slice(0, 2), lookupName.slice(2, 4), lookupName];
-}
+  @cache({
+    namespace: `datasource-${CrateDatasource.id}`,
+    key: ({ registryUrl, lookupName }: GetReleasesConfig) =>
+      `${registryUrl}/${lookupName}`,
+    cacheable: ({ registryUrl }: GetReleasesConfig) =>
+      CrateDatasource.areReleasesCacheable(registryUrl),
+  })
+  async getReleases({
+    lookupName,
+    registryUrl,
+  }: GetReleasesConfig): Promise<ReleaseResult | null> {
+    // istanbul ignore if
+    if (!registryUrl) {
+      logger.warn(
+        'crate datasource: No registryUrl specified, cannot perform getReleases'
+      );
+      return null;
+    }
 
-export async function fetchCrateRecordsPayload(
-  info: RegistryInfo,
-  lookupName: string
-): Promise<string> {
-  if (info.clonePath) {
-    const path = join(info.clonePath, ...getIndexSuffix(lookupName));
-    return readFile(path, 'utf8');
-  }
+    const registryInfo = await CrateDatasource.fetchRegistryInfo({
+      lookupName,
+      registryUrl,
+    });
+    if (!registryInfo) {
+      logger.debug({ registryUrl }, 'Could not fetch registry info');
+      return null;
+    }
 
-  if (info.flavor === RegistryFlavor.CratesIo) {
-    const crateUrl = CRATES_IO_BASE_URL + getIndexSuffix(lookupName).join('/');
-    try {
-      return (await http.get(crateUrl)).body;
-    } catch (err) {
-      if (
-        err.statusCode === 429 ||
-        (err.statusCode >= 500 && err.statusCode < 600)
-      ) {
-        throw new ExternalHostError(err);
-      }
+    const dependencyUrl = CrateDatasource.getDependencyUrl(
+      registryInfo,
+      lookupName
+    );
 
-      throw err;
+    const payload = await this.fetchCrateRecordsPayload(
+      registryInfo,
+      lookupName
+    );
+    const lines = payload
+      .split('\n') // break into lines
+      .map((line) => line.trim()) // remove whitespace
+      .filter((line) => line.length !== 0) // remove empty lines
+      .map((line) => JSON.parse(line) as CrateRecord); // parse
+    const result: ReleaseResult = {
+      dependencyUrl,
+      releases: [],
+    };
+    result.releases = lines
+      .map((version) => {
+        const release: Release = {
+          version: version.vers,
+        };
+        if (version.yanked) {
+          release.isDeprecated = true;
+        }
+        return release;
+      })
+      .filter((release) => release.version);
+    if (!result.releases.length) {
+      return null;
     }
-  }
 
-  throw new Error(`unsupported crate registry flavor: ${info.flavor}`);
-}
+    return result;
+  }
 
-/**
- * Computes the dependency URL for a crate, given
- * registry information
- */
-function getDependencyUrl(info: RegistryInfo, lookupName: string): string {
-  switch (info.flavor) {
-    case RegistryFlavor.CratesIo:
-      return `https://crates.io/crates/${lookupName}`;
-    case RegistryFlavor.Cloudsmith: {
-      // input: https://dl.cloudsmith.io/basic/$org/$repo/cargo/index.git
-      const tokens = info.url.pathname.split('/');
-      const org = tokens[2];
-      const repo = tokens[3];
-      return `https://cloudsmith.io/~${org}/repos/${repo}/packages/detail/cargo/${lookupName}`;
+  public async fetchCrateRecordsPayload(
+    info: RegistryInfo,
+    lookupName: string
+  ): Promise<string> {
+    if (info.clonePath) {
+      const path = join(
+        info.clonePath,
+        ...CrateDatasource.getIndexSuffix(lookupName)
+      );
+      return readFile(path, 'utf8');
     }
-    default:
-      return `${info.rawUrl}/${lookupName}`;
-  }
-}
 
-/**
- * Given a Git URL, computes a semi-human-readable name for a folder in which to
- * clone the repository.
- */
-function cacheDirFromUrl(url: URL): string {
-  const proto = url.protocol.replace(/:$/, '');
-  const host = url.hostname;
-  const hash = hasha(url.pathname, {
-    algorithm: 'sha256',
-  }).substr(0, 7);
-
-  return `crate-registry-${proto}-${host}-${hash}`;
-}
+    if (info.flavor === RegistryFlavor.CratesIo) {
+      const crateUrl =
+        CrateDatasource.CRATES_IO_BASE_URL +
+        CrateDatasource.getIndexSuffix(lookupName).join('/');
+      try {
+        return (await this.http.get(crateUrl)).body;
+      } catch (err) {
+        this.handleGenericErrors(err);
+      }
+    }
 
-/**
- * Fetches information about a registry, by url.
- * If no url is given, assumes crates.io.
- * If an url is given, assumes it's a valid Git repository
- * url and clones it to cache.
- */
-async function fetchRegistryInfo(
-  config: GetReleasesConfig,
-  registryUrl: string
-): Promise<RegistryInfo | null> {
-  let url: URL;
-  try {
-    url = new URL(registryUrl);
-  } catch (err) {
-    logger.debug({ registryUrl }, 'could not parse registry URL');
-    return null;
+    throw new Error(`unsupported crate registry flavor: ${info.flavor}`);
   }
 
-  let flavor: RegistryFlavor;
-  if (url.hostname === 'crates.io') {
-    flavor = RegistryFlavor.CratesIo;
-  } else if (url.hostname === 'dl.cloudsmith.io') {
-    flavor = RegistryFlavor.Cloudsmith;
-  } else {
-    flavor = RegistryFlavor.Other;
+  /**
+   * Computes the dependency URL for a crate, given
+   * registry information
+   */
+  private static getDependencyUrl(
+    info: RegistryInfo,
+    lookupName: string
+  ): string {
+    switch (info.flavor) {
+      case RegistryFlavor.CratesIo:
+        return `https://crates.io/crates/${lookupName}`;
+      case RegistryFlavor.Cloudsmith: {
+        // input: https://dl.cloudsmith.io/basic/$org/$repo/cargo/index.git
+        const tokens = info.url.pathname.split('/');
+        const org = tokens[2];
+        const repo = tokens[3];
+        return `https://cloudsmith.io/~${org}/repos/${repo}/packages/detail/cargo/${lookupName}`;
+      }
+      default:
+        return `${info.rawUrl}/${lookupName}`;
+    }
   }
 
-  const registry: RegistryInfo = {
-    flavor,
-    rawUrl: registryUrl,
-    url,
-  };
+  /**
+   * Given a Git URL, computes a semi-human-readable name for a folder in which to
+   * clone the repository.
+   */
+  private static cacheDirFromUrl(url: URL): string {
+    const proto = url.protocol.replace(/:$/, '');
+    const host = url.hostname;
+    const hash = hasha(url.pathname, {
+      algorithm: 'sha256',
+    }).substr(0, 7);
+
+    return `crate-registry-${proto}-${host}-${hash}`;
+  }
 
-  if (flavor !== RegistryFlavor.CratesIo) {
-    if (!getGlobalConfig().allowCustomCrateRegistries) {
-      logger.warn(
-        'crate datasource: allowCustomCrateRegistries=true is required for registries other than crates.io, bailing out'
-      );
+  /**
+   * Fetches information about a registry, by url.
+   * If no url is given, assumes crates.io.
+   * If an url is given, assumes it's a valid Git repository
+   * url and clones it to cache.
+   */
+  private static async fetchRegistryInfo({
+    lookupName,
+    registryUrl,
+  }: GetReleasesConfig): Promise<RegistryInfo | null> {
+    let url: URL;
+    try {
+      url = new URL(registryUrl);
+    } catch (err) {
+      logger.debug({ registryUrl }, 'could not parse registry URL');
       return null;
     }
 
-    const cacheKey = `crate-datasource/registry-clone-path/${registryUrl}`;
-    const cacheKeyForError = `crate-datasource/registry-clone-path/${registryUrl}/error`;
-
-    // We need to ensure we don't run `git clone` in parallel. Therefore we store
-    // a promise of the running operation in the mem cache, which in the end resolves
-    // to the file path of the cloned repository.
-
-    const clonePathPromise: Promise<string> | null = memCache.get(cacheKey);
-    let clonePath: string;
-
-    // eslint-disable-next-line @typescript-eslint/no-misused-promises
-    if (clonePathPromise) {
-      clonePath = await clonePathPromise;
+    let flavor: RegistryFlavor;
+    if (url.hostname === 'crates.io') {
+      flavor = RegistryFlavor.CratesIo;
+    } else if (url.hostname === 'dl.cloudsmith.io') {
+      flavor = RegistryFlavor.Cloudsmith;
     } else {
-      clonePath = join(privateCacheDir(), cacheDirFromUrl(url));
-      logger.info({ clonePath, registryUrl }, `Cloning private cargo registry`);
-
-      const git = Git(simpleGitConfig());
-      const clonePromise = git.clone(registryUrl, clonePath, {
-        '--depth': 1,
-      });
+      flavor = RegistryFlavor.Other;
+    }
 
-      memCache.set(
-        cacheKey,
-        clonePromise.then(() => clonePath).catch(() => null)
-      );
+    const registry: RegistryInfo = {
+      flavor,
+      rawUrl: registryUrl,
+      url,
+    };
 
-      try {
-        await clonePromise;
-      } catch (err) {
+    if (flavor !== RegistryFlavor.CratesIo) {
+      if (!getGlobalConfig().allowCustomCrateRegistries) {
         logger.warn(
-          { err, lookupName: config.lookupName, registryUrl },
-          'failed cloning git registry'
+          'crate datasource: allowCustomCrateRegistries=true is required for registries other than crates.io, bailing out'
         );
-        memCache.set(cacheKeyForError, err);
-
         return null;
       }
-    }
 
-    if (!clonePath) {
-      const err = memCache.get(cacheKeyForError);
-      logger.warn(
-        { err, lookupName: config.lookupName, registryUrl },
-        'Previous git clone failed, bailing out.'
-      );
+      const cacheKey = `crate-datasource/registry-clone-path/${registryUrl}`;
+      const cacheKeyForError = `crate-datasource/registry-clone-path/${registryUrl}/error`;
 
-      return null;
-    }
+      // We need to ensure we don't run `git clone` in parallel. Therefore we store
+      // a promise of the running operation in the mem cache, which in the end resolves
+      // to the file path of the cloned repository.
 
-    registry.clonePath = clonePath;
-  }
+      const clonePathPromise: Promise<string> | null = memCache.get(cacheKey);
+      let clonePath: string;
 
-  return registry;
-}
+      // eslint-disable-next-line @typescript-eslint/no-misused-promises
+      if (clonePathPromise) {
+        clonePath = await clonePathPromise;
+      } else {
+        clonePath = join(
+          privateCacheDir(),
+          CrateDatasource.cacheDirFromUrl(url)
+        );
+        logger.info(
+          { clonePath, registryUrl },
+          `Cloning private cargo registry`
+        );
 
-export function areReleasesCacheable(registryUrl: string): boolean {
-  // We only cache public releases, we don't want to cache private
-  // cloned data between runs.
-  return registryUrl === 'https://crates.io';
-}
+        const git = Git(simpleGitConfig());
+        const clonePromise = git.clone(registryUrl, clonePath, {
+          '--depth': 1,
+        });
 
-export async function getReleases(
-  config: GetReleasesConfig
-): Promise<ReleaseResult | null> {
-  const { lookupName, registryUrl } = config;
+        memCache.set(
+          cacheKey,
+          clonePromise.then(() => clonePath).catch(() => null)
+        );
 
-  // istanbul ignore if
-  if (!registryUrl) {
-    logger.warn(
-      'crate datasource: No registryUrl specified, cannot perform getReleases'
-    );
-    return null;
-  }
+        try {
+          await clonePromise;
+        } catch (err) {
+          logger.warn(
+            { err, lookupName, registryUrl },
+            'failed cloning git registry'
+          );
+          memCache.set(cacheKeyForError, err);
+
+          return null;
+        }
+      }
 
-  const cacheable = areReleasesCacheable(registryUrl);
-  const cacheNamespace = 'datasource-crate';
-  const cacheKey = `${registryUrl}/${lookupName}`;
+      if (!clonePath) {
+        const err = memCache.get(cacheKeyForError);
+        logger.warn(
+          { err, lookupName, registryUrl },
+          'Previous git clone failed, bailing out.'
+        );
 
-  if (cacheable) {
-    const cachedResult = await packageCache.get<ReleaseResult>(
-      cacheNamespace,
-      cacheKey
-    );
-    // istanbul ignore if
-    if (cachedResult) {
-      logger.debug({ cacheKey }, 'Returning cached resource');
-      return cachedResult;
+        return null;
+      }
+
+      registry.clonePath = clonePath;
     }
-  }
 
-  const registryInfo = await fetchRegistryInfo(config, registryUrl);
-  if (!registryInfo) {
-    logger.debug({ registryUrl }, 'Could not fetch registry info');
-    return null;
+    return registry;
   }
 
-  const dependencyUrl = getDependencyUrl(registryInfo, lookupName);
-
-  const payload = await fetchCrateRecordsPayload(registryInfo, lookupName);
-  const lines = payload
-    .split('\n') // break into lines
-    .map((line) => line.trim()) // remove whitespace
-    .filter((line) => line.length !== 0) // remove empty lines
-    .map((line) => JSON.parse(line) as CrateRecord); // parse
-  const result: ReleaseResult = {
-    dependencyUrl,
-    releases: [],
-  };
-  result.releases = lines
-    .map((version) => {
-      const release: Release = {
-        version: version.vers,
-      };
-      if (version.yanked) {
-        release.isDeprecated = true;
-      }
-      return release;
-    })
-    .filter((release) => release.version);
-  if (!result.releases.length) {
-    return null;
+  private static areReleasesCacheable(registryUrl: string): boolean {
+    // We only cache public releases, we don't want to cache private
+    // cloned data between runs.
+    return registryUrl === 'https://crates.io';
   }
 
-  if (cacheable) {
-    const cacheMinutes = 10;
-    await packageCache.set(cacheNamespace, cacheKey, result, cacheMinutes);
-  }
+  public static getIndexSuffix(lookupName: string): string[] {
+    const len = lookupName.length;
 
-  return result;
+    if (len === 1) {
+      return ['1', lookupName];
+    }
+    if (len === 2) {
+      return ['2', lookupName];
+    }
+    if (len === 3) {
+      return ['3', lookupName[0], lookupName];
+    }
+
+    return [lookupName.slice(0, 2), lookupName.slice(2, 4), lookupName];
+  }
 }
diff --git a/lib/manager/cargo/extract.ts b/lib/manager/cargo/extract.ts
index af76dedd3a..d10b89bf3d 100644
--- a/lib/manager/cargo/extract.ts
+++ b/lib/manager/cargo/extract.ts
@@ -1,5 +1,5 @@
 import { parse } from '@iarna/toml';
-import * as datasourceCrate from '../../datasource/crate';
+import { CrateDatasource } from '../../datasource/crate';
 import { logger } from '../../logger';
 import { SkipReason } from '../../types';
 import { findLocalSiblingOrParent, readLocalFile } from '../../util/fs';
@@ -70,7 +70,7 @@ function extractFromSection(
       depType: section,
       currentValue: currentValue as any,
       managerData: { nestedVersion },
-      datasource: datasourceCrate.id,
+      datasource: CrateDatasource.id,
     };
     if (registryUrls) {
       dep.registryUrls = registryUrls;
diff --git a/lib/util/cache/package/decorator.ts b/lib/util/cache/package/decorator.ts
index 0e547bb1f7..cf7382e2d6 100644
--- a/lib/util/cache/package/decorator.ts
+++ b/lib/util/cache/package/decorator.ts
@@ -57,6 +57,7 @@ function decorate<T>(fn: Handler<T>): Decorator<T> {
 }
 
 type HashFunction<T extends any[] = any[]> = (...args: T) => string;
+type BooleanFunction<T extends any[] = any[]> = (...args: T) => boolean;
 
 /**
  * The cache decorator parameters.
@@ -74,6 +75,12 @@ interface CacheParameters {
    */
   key: string | HashFunction;
 
+  /**
+   * A function that returns true if a result is cacheable
+   * Used to prevent caching of private, sensitive, results
+   */
+  cacheable?: BooleanFunction;
+
   /**
    * The TTL (or expiry) of the key in minutes
    */
@@ -86,9 +93,14 @@ interface CacheParameters {
 export function cache<T>({
   namespace,
   key,
+  cacheable = () => true,
   ttlMinutes = 30,
 }: CacheParameters): Decorator<T> {
   return decorate(async ({ args, instance, callback }) => {
+    if (!cacheable.apply(instance, args)) {
+      return callback();
+    }
+
     let finalNamespace: string;
     if (is.string(namespace)) {
       finalNamespace = namespace;
-- 
GitLab