From ba13dc547bdcea5cddd5960418e125d6eccf4458 Mon Sep 17 00:00:00 2001
From: Gabriel-Ladzaretti
 <97394622+Gabriel-Ladzaretti@users.noreply.github.com>
Date: Fri, 26 Aug 2022 07:47:14 +0300
Subject: [PATCH] feat(cache/repository): Implement S3 client (#17322)

Co-authored-by: Michael Kriese <michael.kriese@visualon.de>
---
 docs/usage/self-hosted-configuration.md       |  20 +++
 docs/usage/self-hosted-experimental.md        |  12 ++
 .../cache/repository/impl/cache-factory.ts    |   6 +-
 lib/util/cache/repository/impl/s3.spec.ts     | 133 ++++++++++++++++++
 lib/util/cache/repository/impl/s3.ts          |  70 +++++++++
 lib/util/s3.spec.ts                           |  25 +++-
 lib/util/s3.ts                                |   7 +-
 7 files changed, 270 insertions(+), 3 deletions(-)
 create mode 100644 lib/util/cache/repository/impl/s3.spec.ts
 create mode 100644 lib/util/cache/repository/impl/s3.ts

diff --git a/docs/usage/self-hosted-configuration.md b/docs/usage/self-hosted-configuration.md
index e4ca94e93f..f4f2b8c1c0 100644
--- a/docs/usage/self-hosted-configuration.md
+++ b/docs/usage/self-hosted-configuration.md
@@ -638,6 +638,26 @@ JSON files will be stored inside the `cacheDir` beside the existing file-based p
 
 ## repositoryCacheType
 
+Set this to an S3 URI to enable S3 backed repository cache.
+
+```ts
+{
+  repositoryCacheType: 's3://bucket-name';
+}
+```
+
+<!-- prettier-ignore -->
+!!! note
+    [IAM is supported](https://docs.aws.amazon.com/sdk-for-javascript/v3/developer-guide/loading-node-credentials-iam.html) when running renovate within EC2 instance in an ECS cluster. In this case, no additional environment variables are requited.
+    Otherwise, The following environment variables should be set in order for the S3 client to work.
+
+```
+    AWS_ACCESS_KEY_ID
+    AWS_SECRET_ACCESS_KEY
+    AWS_SESSION_TOKEN
+    AWS_REGION
+```
+
 ## requireConfig
 
 By default, Renovate needs a Renovate config file in each repository where it runs before it will propose any dependency updates.
diff --git a/docs/usage/self-hosted-experimental.md b/docs/usage/self-hosted-experimental.md
index 841bf1f168..c63539f9f1 100644
--- a/docs/usage/self-hosted-experimental.md
+++ b/docs/usage/self-hosted-experimental.md
@@ -46,3 +46,15 @@ If set, Renovate will use this string as GitLab server version instead of checki
 This can be useful when you use the GitLab `CI_JOB_TOKEN` to authenticate Renovate.
 
 Read [platform details](modules/platform/gitlab/index.md) to learn why we need the server version on GitLab.
+
+## `RENOVATE_X_S3_ENDPOINT`
+
+If set, Renovate will use this string as the `endpoint` when instantiating the AWS s3 client.
+
+## `RENOVATE_X_S3_PATH_STYLE`
+
+If set, Renovate will enable `forcePathStyle` when instantiating the AWS s3 client.
+
+> Whether to force path style URLs for S3 objects (e.g., `https://s3.amazonaws.com//` instead of `https://.s3.amazonaws.com/`
+
+Source: [AWS s3 documentation - Interface BucketEndpointInputConfig](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-s3/interfaces/bucketendpointinputconfig.html)
diff --git a/lib/util/cache/repository/impl/cache-factory.ts b/lib/util/cache/repository/impl/cache-factory.ts
index 1ebfa5136b..165917bf3b 100644
--- a/lib/util/cache/repository/impl/cache-factory.ts
+++ b/lib/util/cache/repository/impl/cache-factory.ts
@@ -2,15 +2,19 @@ import type { RepositoryCacheType } from '../../../../config/types';
 import { logger } from '../../../../logger';
 import type { RepoCache } from '../types';
 import { RepoCacheLocal } from './local';
+import { RepoCacheS3 } from './s3';
 
 export class CacheFactory {
   static get(
     repository: string,
     cacheType: RepositoryCacheType = 'local'
   ): RepoCache {
-    switch (cacheType) {
+    const type = cacheType.split('://')[0].trim().toLowerCase();
+    switch (type) {
       case 'local':
         return new RepoCacheLocal(repository);
+      case 's3':
+        return new RepoCacheS3(repository, cacheType);
       default:
         logger.warn(
           { cacheType },
diff --git a/lib/util/cache/repository/impl/s3.spec.ts b/lib/util/cache/repository/impl/s3.spec.ts
new file mode 100644
index 0000000000..e8fbdfaf90
--- /dev/null
+++ b/lib/util/cache/repository/impl/s3.spec.ts
@@ -0,0 +1,133 @@
+import { Readable } from 'stream';
+import {
+  GetObjectCommand,
+  GetObjectCommandInput,
+  PutObjectCommand,
+  PutObjectCommandInput,
+  PutObjectCommandOutput,
+  S3Client,
+} from '@aws-sdk/client-s3';
+import { mockClient } from 'aws-sdk-client-mock';
+import { partial } from '../../../../../test/util';
+import { GlobalConfig } from '../../../../config/global';
+import { logger } from '../../../../logger';
+import { parseS3Url } from '../../../s3';
+import type { RepoCacheRecord } from '../types';
+import { CacheFactory } from './cache-factory';
+import { RepoCacheS3 } from './s3';
+
+function createGetObjectCommandInput(
+  repository: string,
+  url: string
+): GetObjectCommandInput {
+  return {
+    Bucket: parseS3Url(url)?.Bucket,
+    Key: `github/${repository}/cache.json`,
+  };
+}
+
+function createPutObjectCommandInput(
+  repository: string,
+  url: string,
+  data: RepoCacheRecord
+): PutObjectCommandInput {
+  return {
+    ...createGetObjectCommandInput(repository, url),
+    Body: JSON.stringify(data),
+    ContentType: 'application/json',
+  };
+}
+
+/*
+ * Note: MockedClient.on(Command, input) will match input (using Sinon matchers) to the actual
+ *       'new Command(actualInput)' call within the tested code segment.
+ */
+
+describe('util/cache/repository/impl/s3', () => {
+  const s3Mock = mockClient(S3Client);
+  const repository = 'org/repo';
+  const repoCache = partial<RepoCacheRecord>({ payload: 'payload' });
+  const url = 's3://bucket-name';
+  const err = new Error('error');
+  let getObjectCommandInput: GetObjectCommandInput;
+  let putObjectCommandInput: PutObjectCommandInput;
+  let s3Cache: RepoCacheS3;
+
+  beforeEach(() => {
+    GlobalConfig.set({ platform: 'github' });
+    jest.clearAllMocks();
+    s3Mock.reset();
+    s3Cache = new RepoCacheS3(repository, url);
+    getObjectCommandInput = createGetObjectCommandInput(repository, url);
+    putObjectCommandInput = createPutObjectCommandInput(
+      repository,
+      url,
+      repoCache
+    );
+  });
+
+  it('successfully reads from s3', async () => {
+    const json = '{}';
+    s3Mock
+      .on(GetObjectCommand, getObjectCommandInput)
+      .resolvesOnce({ Body: Readable.from([json]) });
+    await expect(s3Cache.read()).resolves.toBe(json);
+    expect(logger.warn).toHaveBeenCalledTimes(0);
+    expect(logger.debug).toHaveBeenCalledWith('RepoCacheS3.read() - success');
+  });
+
+  it('gets an unexpected response from s3', async () => {
+    s3Mock.on(GetObjectCommand, getObjectCommandInput).resolvesOnce({});
+    await expect(s3Cache.read()).resolves.toBeNull();
+    expect(logger.warn).toHaveBeenCalledWith(
+      "RepoCacheS3.read() - failure - expecting Readable return type got 'undefined' type instead"
+    );
+  });
+
+  it('doesnt warn when no cache is found', async () => {
+    const NoSuchKeyErr = new Error('NoSuchKey');
+    NoSuchKeyErr.name = 'NoSuchKey';
+    s3Mock
+      .on(GetObjectCommand, getObjectCommandInput)
+      .rejectsOnce(NoSuchKeyErr);
+    await expect(s3Cache.read()).resolves.toBeNull();
+    expect(logger.warn).toHaveBeenCalledTimes(0);
+    expect(logger.debug).toHaveBeenCalledWith(
+      `RepoCacheS3.read() - No cached file found`
+    );
+  });
+
+  it('fails to read from s3', async () => {
+    s3Mock.on(GetObjectCommand, getObjectCommandInput).rejectsOnce(err);
+    await expect(s3Cache.read()).resolves.toBeNull();
+    expect(logger.warn).toHaveBeenCalledWith(
+      { err },
+      'RepoCacheS3.read() - failure'
+    );
+  });
+
+  it('successfully writes to s3', async () => {
+    const putObjectCommandOutput: PutObjectCommandOutput = {
+      $metadata: { attempts: 1, httpStatusCode: 200, totalRetryDelay: 0 },
+    };
+    s3Mock
+      .on(PutObjectCommand, putObjectCommandInput)
+      .resolvesOnce(putObjectCommandOutput);
+    await expect(s3Cache.write(repoCache)).toResolve();
+    expect(logger.warn).toHaveBeenCalledTimes(0);
+  });
+
+  it('fails to write to s3', async () => {
+    s3Mock.on(PutObjectCommand, putObjectCommandInput).rejectsOnce(err);
+    await expect(s3Cache.write(repoCache)).toResolve();
+    expect(logger.warn).toHaveBeenCalledWith(
+      { err },
+      'RepoCacheS3.write() - failure'
+    );
+  });
+
+  it('creates an S3 client using the cache factory', () => {
+    const cache = CacheFactory.get(repository, url);
+    expect(cache instanceof RepoCacheS3).toBeTrue();
+  });
+});
diff --git a/lib/util/cache/repository/impl/s3.ts b/lib/util/cache/repository/impl/s3.ts
new file mode 100644
index 0000000000..432c0e9ed8
--- /dev/null
+++ b/lib/util/cache/repository/impl/s3.ts
@@ -0,0 +1,70 @@
+import { Readable } from 'stream';
+import {
+  GetObjectCommand,
+  GetObjectCommandInput,
+  PutObjectCommand,
+  PutObjectCommandInput,
+} from '@aws-sdk/client-s3';
+import { logger } from '../../../../logger';
+import { getS3Client, parseS3Url } from '../../../s3';
+import { streamToString } from '../../../streams';
+import type { RepoCacheRecord } from '../types';
+import { RepoCacheBase } from './base';
+
+export class RepoCacheS3 extends RepoCacheBase {
+  private readonly s3Client;
+  private readonly bucket;
+
+  constructor(repository: string, url: string) {
+    super(repository);
+    this.bucket = parseS3Url(url)?.Bucket;
+    this.s3Client = getS3Client();
+  }
+
+  async read(): Promise<string | null> {
+    const cacheFileName = this.getCacheFileName();
+    const s3Params: GetObjectCommandInput = {
+      Bucket: this.bucket,
+      Key: cacheFileName,
+    };
+    try {
+      const { Body: res } = await this.s3Client.send(
+        new GetObjectCommand(s3Params)
+      );
+      if (res instanceof Readable) {
+        logger.debug('RepoCacheS3.read() - success');
+        return await streamToString(res);
+      }
+      logger.warn(
+        `RepoCacheS3.read() - failure - expecting Readable return type got '${typeof res}' type instead`
+      );
+    } catch (err) {
+      // https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
+      if (err.name === 'NoSuchKey') {
+        logger.debug('RepoCacheS3.read() - No cached file found');
+      } else {
+        logger.warn({ err }, 'RepoCacheS3.read() - failure');
+      }
+    }
+    return null;
+  }
+
+  async write(data: RepoCacheRecord): Promise<void> {
+    const cacheFileName = this.getCacheFileName();
+    const s3Params: PutObjectCommandInput = {
+      Bucket: this.bucket,
+      Key: cacheFileName,
+      Body: JSON.stringify(data),
+      ContentType: 'application/json',
+    };
+    try {
+      await this.s3Client.send(new PutObjectCommand(s3Params));
+    } catch (err) {
+      logger.warn({ err }, 'RepoCacheS3.write() - failure');
+    }
+  }
+
+  private getCacheFileName(): string {
+    return `${this.platform}/${this.repository}/cache.json`;
+  }
+}
diff --git a/lib/util/s3.spec.ts b/lib/util/s3.spec.ts
index 09afe49675..31929e296d 100644
--- a/lib/util/s3.spec.ts
+++ b/lib/util/s3.spec.ts
@@ -1,6 +1,12 @@
 import { getS3Client, parseS3Url } from './s3';
 
 describe('util/s3', () => {
+  afterEach(() => {
+    delete process.env.RENOVATE_X_S3_ENDPOINT;
+    delete process.env.RENOVATE_X_S3_PATH_STYLE;
+    jest.resetModules();
+  });
+
   it('parses S3 URLs', () => {
     expect(parseS3Url('s3://bucket/key/path')).toEqual({
       Bucket: 'bucket',
@@ -9,7 +15,7 @@ describe('util/s3', () => {
   });
 
   it('returns null for non-S3 URLs', () => {
-    expect(parseS3Url('http://example.com/key/path')).toBeNull();
+    expect(parseS3Url(new URL('http://example.com/key/path'))).toBeNull();
   });
 
   it('returns null for invalid URLs', () => {
@@ -21,4 +27,21 @@ describe('util/s3', () => {
     const client2 = getS3Client();
     expect(client1).toBe(client2);
   });
+
+  it('is uses experimental env', async () => {
+    process.env.RENOVATE_X_S3_ENDPOINT = 'https://minio.domain.test';
+    process.env.RENOVATE_X_S3_PATH_STYLE = 'true';
+    const s3 = await import('./s3');
+    const client1 = s3.getS3Client();
+    const client2 = getS3Client();
+    expect(client1).not.toBe(client2);
+    expect(await client1.config.endpoint()).toStrictEqual({
+      hostname: 'minio.domain.test',
+      path: '/',
+      port: undefined,
+      protocol: 'https:',
+      query: undefined,
+    });
+    expect(client1.config.forcePathStyle).toBeTrue();
+  });
 });
diff --git a/lib/util/s3.ts b/lib/util/s3.ts
index 9b2eff1e4b..59bfcf6ab3 100644
--- a/lib/util/s3.ts
+++ b/lib/util/s3.ts
@@ -5,7 +5,12 @@ import { parseUrl } from './url';
 let s3Instance: S3Client | undefined;
 export function getS3Client(): S3Client {
   if (!s3Instance) {
-    s3Instance = new S3Client({});
+    const endpoint = process.env.RENOVATE_X_S3_ENDPOINT;
+    const forcePathStyle = process.env.RENOVATE_X_S3_PATH_STYLE;
+    s3Instance = new S3Client({
+      ...(endpoint && { endpoint }),
+      ...(forcePathStyle && { forcePathStyle: true }),
+    });
   }
   return s3Instance;
 }
-- 
GitLab