From 78b8483953ac23f79bb9f5fb7d50eb1b639450f5 Mon Sep 17 00:00:00 2001
From: Rhys Arkins <rhys@arkins.net>
Date: Fri, 13 Jan 2023 22:20:32 +0100
Subject: [PATCH] feat(datasource/npm): cache etag for reuse (#19823)

---
 docs/usage/self-hosted-configuration.md | 11 ++++
 lib/config/global.ts                    |  1 +
 lib/config/options/index.ts             | 10 ++++
 lib/config/types.ts                     |  1 +
 lib/modules/datasource/npm/get.spec.ts  | 55 ++++++++++++++++++
 lib/modules/datasource/npm/get.ts       | 75 ++++++++++++++++++++++---
 lib/modules/datasource/npm/types.ts     |  7 +++
 7 files changed, 151 insertions(+), 9 deletions(-)

diff --git a/docs/usage/self-hosted-configuration.md b/docs/usage/self-hosted-configuration.md
index 47b3c2a5b2..b96e7861a4 100644
--- a/docs/usage/self-hosted-configuration.md
+++ b/docs/usage/self-hosted-configuration.md
@@ -200,6 +200,17 @@ For example:
 }
 ```
 
+## cacheHardTtlMinutes
+
+This experimental feature is used to implement the concept of a "soft" cache expiry for datasources, starting with `npm`.
+It should be set to a non-zero value, recommended to be at least 60 (i.e. one hour).
+
+When this value is set, the `npm` datasource will use the `cacheHardTtlMinutes` value for cache expiry, instead of its default expiry of 15 minutes, which becomes the "soft" expiry value.
+Results which are soft expired are reused in the following manner:
+
+- The `etag` from the cached results will be reused, and may result in a 304 response, meaning cached results are revalidated
+- If an error occurs when querying the `npmjs` registry, then soft expired results will be reused if they are present
+
 ## containerbaseDir
 
 This directory is used to cache downloads when `binarySource=docker` or `binarySource=install`.
diff --git a/lib/config/global.ts b/lib/config/global.ts
index 8051cd74d6..c1b4e92455 100644
--- a/lib/config/global.ts
+++ b/lib/config/global.ts
@@ -10,6 +10,7 @@ export class GlobalConfig {
     'allowScripts',
     'binarySource',
     'cacheDir',
+    'cacheHardTtlMinutes',
     'containerbaseDir',
     'customEnvVariables',
     'dockerChildPrefix',
diff --git a/lib/config/options/index.ts b/lib/config/options/index.ts
index 7d8c7aa990..6203eeef4d 100644
--- a/lib/config/options/index.ts
+++ b/lib/config/options/index.ts
@@ -2217,6 +2217,16 @@ const options: RenovateOptions[] = [
     env: false,
     experimental: true,
   },
+  {
+    name: 'cacheHardTtlMinutes',
+    description:
+      'Maximum duration in minutes to keep datasource cache entries.',
+    type: 'integer',
+    stage: 'repository',
+    default: 0,
+    globalOnly: true,
+    experimental: true,
+  },
   {
     name: 'prBodyDefinitions',
     description: 'Table column definitions to use in PR tables.',
diff --git a/lib/config/types.ts b/lib/config/types.ts
index 380289033b..5e67b80df5 100644
--- a/lib/config/types.ts
+++ b/lib/config/types.ts
@@ -114,6 +114,7 @@ export interface RepoGlobalConfig {
   allowScripts?: boolean;
   allowedPostUpgradeCommands?: string[];
   binarySource?: 'docker' | 'global' | 'install' | 'hermit';
+  cacheHardTtlMinutes?: number;
   customEnvVariables?: Record<string, string>;
   dockerChildPrefix?: string;
   dockerImagePrefix?: string;
diff --git a/lib/modules/datasource/npm/get.spec.ts b/lib/modules/datasource/npm/get.spec.ts
index edbccebe62..c0a7850746 100644
--- a/lib/modules/datasource/npm/get.spec.ts
+++ b/lib/modules/datasource/npm/get.spec.ts
@@ -1,10 +1,16 @@
 import * as httpMock from '../../../../test/http-mock';
+import { mocked } from '../../../../test/util';
 import { ExternalHostError } from '../../../types/errors/external-host-error';
+import * as _packageCache from '../../../util/cache/package';
 import * as hostRules from '../../../util/host-rules';
 import { Http } from '../../../util/http';
 import { getDependency } from './get';
 import { resolveRegistryUrl, setNpmrc } from './npmrc';
 
+jest.mock('../../../util/cache/package');
+
+const packageCache = mocked(_packageCache);
+
 function getPath(s = ''): string {
   const [x] = s.split('\n');
   const prePath = x.replace(/^.*https:\/\/test\.org/, '');
@@ -463,4 +469,53 @@ describe('modules/datasource/npm/get', () => {
       ]
     `);
   });
+
+  it('returns cached legacy', async () => {
+    packageCache.get.mockResolvedValueOnce({ some: 'result' });
+    const dep = await getDependency(http, 'https://some.url', 'some-package');
+    expect(dep).toMatchObject({ some: 'result' });
+  });
+
+  it('returns unexpired cache', async () => {
+    packageCache.get.mockResolvedValueOnce({
+      some: 'result',
+      cacheData: { softExpireAt: '2099' },
+    });
+    const dep = await getDependency(http, 'https://some.url', 'some-package');
+    expect(dep).toMatchObject({ some: 'result' });
+  });
+
+  it('returns soft expired cache if revalidated', async () => {
+    packageCache.get.mockResolvedValueOnce({
+      some: 'result',
+      cacheData: {
+        softExpireAt: '2020',
+        etag: 'some-etag',
+      },
+    });
+    setNpmrc('registry=https://test.org\n_authToken=XXX');
+
+    httpMock.scope('https://test.org').get('/@neutrinojs%2Freact').reply(304);
+    const registryUrl = resolveRegistryUrl('@neutrinojs/react');
+    const dep = await getDependency(http, registryUrl, '@neutrinojs/react');
+    expect(dep).toMatchObject({ some: 'result' });
+  });
+
+  it('returns soft expired cache on npmjs error', async () => {
+    packageCache.get.mockResolvedValueOnce({
+      some: 'result',
+      cacheData: {
+        softExpireAt: '2020',
+        etag: 'some-etag',
+      },
+    });
+
+    httpMock
+      .scope('https://registry.npmjs.org')
+      .get('/@neutrinojs%2Freact')
+      .reply(500);
+    const registryUrl = resolveRegistryUrl('@neutrinojs/react');
+    const dep = await getDependency(http, registryUrl, '@neutrinojs/react');
+    expect(dep).toMatchObject({ some: 'result' });
+  });
 });
diff --git a/lib/modules/datasource/npm/get.ts b/lib/modules/datasource/npm/get.ts
index 2136bdbc86..05315fb9a4 100644
--- a/lib/modules/datasource/npm/get.ts
+++ b/lib/modules/datasource/npm/get.ts
@@ -1,13 +1,21 @@
 import url from 'url';
 import is from '@sindresorhus/is';
+import { DateTime } from 'luxon';
+import { GlobalConfig } from '../../../config/global';
 import { logger } from '../../../logger';
 import { ExternalHostError } from '../../../types/errors/external-host-error';
 import * as packageCache from '../../../util/cache/package';
 import type { Http } from '../../../util/http';
+import type { HttpOptions } from '../../../util/http/types';
 import { regEx } from '../../../util/regex';
 import { joinUrlParts } from '../../../util/url';
 import { id } from './common';
-import type { NpmDependency, NpmRelease, NpmResponse } from './types';
+import type {
+  CachedNpmDependency,
+  NpmDependency,
+  NpmRelease,
+  NpmResponse,
+} from './types';
 
 interface PackageSource {
   sourceUrl?: string;
@@ -56,19 +64,57 @@ export async function getDependency(
 
   // Now check the persistent cache
   const cacheNamespace = 'datasource-npm';
-  const cachedResult = await packageCache.get<NpmDependency>(
+  const cachedResult = await packageCache.get<CachedNpmDependency>(
     cacheNamespace,
     packageUrl
   );
-  // istanbul ignore if
   if (cachedResult) {
-    return cachedResult;
+    if (cachedResult.cacheData) {
+      const softExpireAt = DateTime.fromISO(
+        cachedResult.cacheData.softExpireAt
+      );
+      if (softExpireAt.isValid && softExpireAt > DateTime.local()) {
+        logger.trace('Cached result is not expired - reusing');
+        delete cachedResult.cacheData;
+        return cachedResult;
+      }
+      logger.trace('Cached result is soft expired');
+    } else {
+      logger.trace('Reusing legacy cached result');
+      return cachedResult;
+    }
+  }
+  const cacheMinutes = process.env.RENOVATE_CACHE_NPM_MINUTES
+    ? parseInt(process.env.RENOVATE_CACHE_NPM_MINUTES, 10)
+    : 15;
+  const softExpireAt = DateTime.local().plus({ minutes: cacheMinutes }).toISO();
+  let { cacheHardTtlMinutes } = GlobalConfig.get();
+  if (!(is.number(cacheHardTtlMinutes) && cacheHardTtlMinutes > cacheMinutes)) {
+    cacheHardTtlMinutes = cacheMinutes;
   }
 
   const uri = url.parse(packageUrl);
 
   try {
-    const raw = await http.getJson<NpmResponse>(packageUrl);
+    const options: HttpOptions = {};
+    if (cachedResult?.cacheData?.etag) {
+      logger.debug('Using cached etag');
+      options.headers = { 'If-None-Match': cachedResult.cacheData.etag };
+    }
+    const raw = await http.getJson<NpmResponse>(packageUrl, options);
+    if (cachedResult?.cacheData && raw.statusCode === 304) {
+      logger.trace('Cached data is unchanged and can be reused');
+      cachedResult.cacheData.softExpireAt = softExpireAt;
+      await packageCache.set(
+        cacheNamespace,
+        packageUrl,
+        cachedResult,
+        cacheHardTtlMinutes
+      );
+      delete cachedResult.cacheData;
+      return cachedResult;
+    }
+    const etag = raw.headers.etag;
     const res = raw.body;
     if (!res.versions || !Object.keys(res.versions).length) {
       // Registry returned a 200 OK but with no versions
@@ -125,9 +171,6 @@ export async function getDependency(
     });
     logger.trace({ dep }, 'dep');
     // serialize first before saving
-    const cacheMinutes = process.env.RENOVATE_CACHE_NPM_MINUTES
-      ? parseInt(process.env.RENOVATE_CACHE_NPM_MINUTES, 10)
-      : 15;
     // TODO: use dynamic detection of public repos instead of a static list (#9587)
     const whitelistedPublicScopes = [
       '@graphql-codegen',
@@ -140,7 +183,13 @@ export async function getDependency(
       (whitelistedPublicScopes.includes(packageName.split('/')[0]) ||
         !packageName.startsWith('@'))
     ) {
-      await packageCache.set(cacheNamespace, packageUrl, dep, cacheMinutes);
+      const cacheData = { softExpireAt, etag };
+      await packageCache.set(
+        cacheNamespace,
+        packageUrl,
+        { ...dep, cacheData },
+        etag ? cacheHardTtlMinutes : cacheMinutes
+      );
     }
     return dep;
   } catch (err) {
@@ -153,6 +202,14 @@ export async function getDependency(
       return null;
     }
     if (uri.host === 'registry.npmjs.org') {
+      if (cachedResult) {
+        logger.warn(
+          { err },
+          'npmjs error, reusing expired cached result instead'
+        );
+        delete cachedResult.cacheData;
+        return cachedResult;
+      }
       // istanbul ignore if
       if (err.name === 'ParseError' && err.body) {
         err.body = 'err.body deleted by Renovate';
diff --git a/lib/modules/datasource/npm/types.ts b/lib/modules/datasource/npm/types.ts
index d4dbcb2493..36330d0411 100644
--- a/lib/modules/datasource/npm/types.ts
+++ b/lib/modules/datasource/npm/types.ts
@@ -46,4 +46,11 @@ export interface NpmDependency extends ReleaseResult {
   sourceDirectory?: string;
 }
 
+export interface CachedNpmDependency extends NpmDependency {
+  cacheData?: {
+    etag: string | undefined;
+    softExpireAt: string;
+  };
+}
+
 export type Npmrc = Record<string, any>;
-- 
GitLab