diff --git a/lib/util/github/graphql/cache-strategies/abstract-cache-strategy.ts b/lib/util/github/graphql/cache-strategies/abstract-cache-strategy.ts new file mode 100644 index 0000000000000000000000000000000000000000..26d75d8d54c358abb2afdc71ab2c8916d0819e70 --- /dev/null +++ b/lib/util/github/graphql/cache-strategies/abstract-cache-strategy.ts @@ -0,0 +1,152 @@ +import { DateTime } from 'luxon'; +import type { + GithubDatasourceItem, + GithubGraphqlCacheRecord, + GithubGraphqlCacheStrategy, +} from '../types'; +import { isDateExpired } from '../util'; + +/** + * Cache strategy handles the caching Github GraphQL items + * and reconciling them with newly obtained ones from paginated queries. + */ +export abstract class AbstractGithubGraphqlCacheStrategy< + GithubItem extends GithubDatasourceItem +> implements GithubGraphqlCacheStrategy<GithubItem> +{ + /** + * Time period after which a cache record is considered expired. + */ + protected static readonly cacheTTLDays = 30; + + /** + * The time which is used during single cache access cycle. + */ + protected readonly now = DateTime.now(); + + /** + * Set of all versions which were reconciled + * during the current cache access cycle. + */ + private readonly reconciledVersions = new Set<string>(); + + /** + * These fields will be persisted. + */ + private items: Record<string, GithubItem> | undefined; + protected createdAt = this.now; + protected updatedAt = this.now; + + constructor( + protected readonly cacheNs: string, + protected readonly cacheKey: string + ) {} + + /** + * Load data previously persisted by this strategy + * for given `cacheNs` and `cacheKey`. + */ + private async getItems(): Promise<Record<string, GithubItem>> { + if (this.items) { + return this.items; + } + + let result: GithubGraphqlCacheRecord<GithubItem> = { + items: {}, + createdAt: this.createdAt.toISO(), + updatedAt: this.updatedAt.toISO(), + }; + + const storedData = await this.load(); + if (storedData) { + const cacheTTLDuration = { + days: AbstractGithubGraphqlCacheStrategy.cacheTTLDays, + }; + if (!isDateExpired(this.now, storedData.createdAt, cacheTTLDuration)) { + result = storedData; + } + } + + this.createdAt = DateTime.fromISO(result.createdAt); + this.updatedAt = DateTime.fromISO(result.updatedAt); + this.items = result.items; + return this.items; + } + + /** + * If package release exists longer than this cache can exist, + * we assume it won't updated/removed on the Github side. + */ + private isStabilized(item: GithubItem): boolean { + const unstableDuration = { + days: AbstractGithubGraphqlCacheStrategy.cacheTTLDays, + }; + return isDateExpired(this.now, item.releaseTimestamp, unstableDuration); + } + + /** + * Process items received from GraphQL page + * ordered by `releaseTimestamp` in descending order + * (fresh versions go first). + */ + async reconcile(items: GithubItem[]): Promise<boolean> { + const cachedItems = await this.getItems(); + + let isPaginationDone = false; + for (const item of items) { + const { version } = item; + + // If we reached previously stored item that is stabilized, + // we assume the further pagination will not yield any new items. + const oldItem = cachedItems[version]; + if (oldItem && this.isStabilized(oldItem)) { + isPaginationDone = true; + break; + } + + cachedItems[version] = item; + this.reconciledVersions.add(version); + } + + this.items = cachedItems; + return isPaginationDone; + } + + /** + * Handle removed items for packages that are not stabilized + * and return the list of all items. + */ + async finalize(): Promise<GithubItem[]> { + const cachedItems = await this.getItems(); + const resultItems: Record<string, GithubItem> = {}; + + for (const [version, item] of Object.entries(cachedItems)) { + if (this.isStabilized(item) || this.reconciledVersions.has(version)) { + resultItems[version] = item; + } + } + + await this.store(resultItems); + return Object.values(resultItems); + } + + /** + * Update `updatedAt` field and persist the data. + */ + private async store(cachedItems: Record<string, GithubItem>): Promise<void> { + const cacheRecord: GithubGraphqlCacheRecord<GithubItem> = { + items: cachedItems, + createdAt: this.createdAt.toISO(), + updatedAt: this.now.toISO(), + }; + await this.persist(cacheRecord); + } + + /** + * Loading and persisting data is delegated to the concrete strategy. + */ + abstract load(): Promise<GithubGraphqlCacheRecord<GithubItem> | undefined>; + abstract persist( + cacheRecord: GithubGraphqlCacheRecord<GithubItem> + ): Promise<void>; +} diff --git a/lib/util/github/graphql/cache-strategies/memory-cache-strategy.spec.ts b/lib/util/github/graphql/cache-strategies/memory-cache-strategy.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..1e725e36c5b4c4301ac87f2db697369c74c3ecfa --- /dev/null +++ b/lib/util/github/graphql/cache-strategies/memory-cache-strategy.spec.ts @@ -0,0 +1,188 @@ +import { DateTime, Settings } from 'luxon'; +import * as memCache from '../../../cache/memory'; +import { clone } from '../../../clone'; +import type { GithubDatasourceItem, GithubGraphqlCacheRecord } from '../types'; +import { GithubGraphqlMemoryCacheStrategy } from './memory-cache-strategy'; + +const isoTs = (t: string) => DateTime.fromJSDate(new Date(t)).toISO(); + +const mockTime = (input: string): void => { + const now = DateTime.fromISO(isoTs(input)).valueOf(); + Settings.now = () => now; +}; + +type CacheRecord = GithubGraphqlCacheRecord<GithubDatasourceItem>; + +describe('util/github/graphql/cache-strategies/memory-cache-strategy', () => { + beforeEach(() => { + jest.resetAllMocks(); + memCache.init(); + }); + + it('resets old cache', async () => { + const items = { + '1': { version: '1', releaseTimestamp: isoTs('2020-01-01 10:00') }, + }; + const cacheRecord: CacheRecord = { + items, + createdAt: isoTs('2022-10-01 15:30'), + updatedAt: isoTs('2022-10-30 12:35'), + }; + memCache.set('github-graphql-cache:foo:bar', clone(cacheRecord)); + + // At this moment, cache is valid + let now = '2022-10-31 15:29:59'; + mockTime(now); + + let strategy = new GithubGraphqlMemoryCacheStrategy('foo', 'bar'); + let isPaginationDone = await strategy.reconcile([items['1']]); + let res = await strategy.finalize(); + + expect(res).toEqual(Object.values(items)); + expect(isPaginationDone).toBe(true); + expect(memCache.get('github-graphql-cache:foo:bar')).toEqual({ + ...cacheRecord, + updatedAt: isoTs(now), + }); + + // One second later, the cache is invalid + now = '2022-10-31 15:30:00'; + mockTime(now); + + strategy = new GithubGraphqlMemoryCacheStrategy('foo', 'bar'); + isPaginationDone = await strategy.reconcile([]); + res = await strategy.finalize(); + + expect(res).toEqual([]); + expect(isPaginationDone).toBe(false); + expect(memCache.get('github-graphql-cache:foo:bar')).toEqual({ + items: {}, + createdAt: isoTs(now), + updatedAt: isoTs(now), + }); + }); + + it('reconciles old cache record with new items', async () => { + const oldItems = { + '1': { version: '1', releaseTimestamp: isoTs('2020-01-01 10:00') }, + '2': { version: '2', releaseTimestamp: isoTs('2020-01-01 11:00') }, + '3': { version: '3', releaseTimestamp: isoTs('2020-01-01 12:00') }, + }; + const cacheRecord: CacheRecord = { + items: oldItems, + createdAt: isoTs('2022-10-30 12:00'), + updatedAt: isoTs('2022-10-30 12:00'), + }; + memCache.set('github-graphql-cache:foo:bar', clone(cacheRecord)); + + const now = '2022-10-31 15:30'; + mockTime(now); + + const newItem = { + version: '4', + releaseTimestamp: isoTs('2022-10-15 18:00'), + }; + const page = [newItem]; + + const strategy = new GithubGraphqlMemoryCacheStrategy('foo', 'bar'); + const isPaginationDone = await strategy.reconcile(page); + const res = await strategy.finalize(); + + expect(res).toEqual([...Object.values(oldItems), newItem]); + expect(isPaginationDone).toBe(false); + expect(memCache.get('github-graphql-cache:foo:bar')).toEqual({ + items: { + ...oldItems, + '4': newItem, + }, + createdAt: isoTs('2022-10-30 12:00'), + updatedAt: isoTs(now), + }); + }); + + it('signals to stop pagination', async () => { + const oldItems = { + '1': { releaseTimestamp: isoTs('2020-01-01 10:00'), version: '1' }, + '2': { releaseTimestamp: isoTs('2020-01-01 11:00'), version: '2' }, + '3': { releaseTimestamp: isoTs('2020-01-01 12:00'), version: '3' }, + }; + const cacheRecord: CacheRecord = { + items: oldItems, + createdAt: isoTs('2022-10-30 12:00'), + updatedAt: isoTs('2022-10-30 12:00'), + }; + memCache.set('github-graphql-cache:foo:bar', clone(cacheRecord)); + + const now = '2022-10-31 15:30'; + mockTime(now); + + const page = [ + ...Object.values(oldItems), + { version: '4', releaseTimestamp: isoTs('2022-10-15 18:00') }, + ].reverse(); + + const strategy = new GithubGraphqlMemoryCacheStrategy('foo', 'bar'); + const isPaginationDone = await strategy.reconcile(page); + + expect(isPaginationDone).toBe(true); + }); + + it('detects removed packages', async () => { + const items = { + // stabilized + '0': { version: '0', releaseTimestamp: isoTs('2022-09-30 10:00') }, // to be preserved + '1': { version: '1', releaseTimestamp: isoTs('2022-10-01 10:00') }, // to be preserved + // not stabilized + '2': { version: '2', releaseTimestamp: isoTs('2022-10-02 10:00') }, + '3': { version: '3', releaseTimestamp: isoTs('2022-10-03 10:00') }, // to be deleted + '4': { version: '4', releaseTimestamp: isoTs('2022-10-04 10:00') }, + '5': { version: '5', releaseTimestamp: isoTs('2022-10-05 10:00') }, // to be deleted + '6': { version: '6', releaseTimestamp: isoTs('2022-10-06 10:00') }, + '7': { version: '7', releaseTimestamp: isoTs('2022-10-07 10:00') }, // to be deleted + '8': { version: '8', releaseTimestamp: isoTs('2022-10-08 10:00') }, + }; + const cacheRecord: CacheRecord = { + items, + createdAt: isoTs('2022-10-30 12:00'), + updatedAt: isoTs('2022-10-30 12:00'), + }; + memCache.set('github-graphql-cache:foo:bar', clone(cacheRecord)); + + const now = '2022-10-31 15:30'; + mockTime(now); + + const page = [ + items['1'], + items['2'], + items['4'], + items['6'], + items['8'], + ].reverse(); + + const strategy = new GithubGraphqlMemoryCacheStrategy('foo', 'bar'); + const isPaginationDone = await strategy.reconcile(page); + const res = await strategy.finalize(); + + expect(res).toEqual([ + { version: '0', releaseTimestamp: isoTs('2022-09-30 10:00') }, + { version: '1', releaseTimestamp: isoTs('2022-10-01 10:00') }, + { version: '2', releaseTimestamp: isoTs('2022-10-02 10:00') }, + { version: '4', releaseTimestamp: isoTs('2022-10-04 10:00') }, + { version: '6', releaseTimestamp: isoTs('2022-10-06 10:00') }, + { version: '8', releaseTimestamp: isoTs('2022-10-08 10:00') }, + ]); + expect(isPaginationDone).toBe(true); + expect(memCache.get('github-graphql-cache:foo:bar')).toEqual({ + items: { + '0': { version: '0', releaseTimestamp: isoTs('2022-09-30 10:00') }, + '1': { version: '1', releaseTimestamp: isoTs('2022-10-01 10:00') }, + '2': { version: '2', releaseTimestamp: isoTs('2022-10-02 10:00') }, + '4': { version: '4', releaseTimestamp: isoTs('2022-10-04 10:00') }, + '6': { version: '6', releaseTimestamp: isoTs('2022-10-06 10:00') }, + '8': { version: '8', releaseTimestamp: isoTs('2022-10-08 10:00') }, + }, + createdAt: isoTs('2022-10-30 12:00'), + updatedAt: isoTs('2022-10-31 15:30'), + }); + }); +}); diff --git a/lib/util/github/graphql/cache-strategies/memory-cache-strategy.ts b/lib/util/github/graphql/cache-strategies/memory-cache-strategy.ts new file mode 100644 index 0000000000000000000000000000000000000000..75ae807911cffb93f82e398872d0afeb2db5f3d7 --- /dev/null +++ b/lib/util/github/graphql/cache-strategies/memory-cache-strategy.ts @@ -0,0 +1,27 @@ +import * as memCache from '../../../cache/memory'; +import type { GithubDatasourceItem, GithubGraphqlCacheRecord } from '../types'; +import { AbstractGithubGraphqlCacheStrategy } from './abstract-cache-strategy'; + +/** + * In-memory strategy meant to be used for private packages + * and for testing purposes. + */ +export class GithubGraphqlMemoryCacheStrategy< + GithubItem extends GithubDatasourceItem +> extends AbstractGithubGraphqlCacheStrategy<GithubItem> { + private fullKey(): string { + return `github-graphql-cache:${this.cacheNs}:${this.cacheKey}`; + } + + load(): Promise<GithubGraphqlCacheRecord<GithubItem> | undefined> { + const key = this.fullKey(); + const res = memCache.get(key); + return Promise.resolve(res); + } + + persist(cacheRecord: GithubGraphqlCacheRecord<GithubItem>): Promise<void> { + const key = this.fullKey(); + memCache.set(key, cacheRecord); + return Promise.resolve(); + } +} diff --git a/lib/util/github/graphql/cache-strategies/package-cache-strategy.spec.ts b/lib/util/github/graphql/cache-strategies/package-cache-strategy.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..4cf6817e8ba6bbc455d886fe144fcd621c604e42 --- /dev/null +++ b/lib/util/github/graphql/cache-strategies/package-cache-strategy.spec.ts @@ -0,0 +1,70 @@ +import { DateTime, Settings } from 'luxon'; +import * as packageCache from '../../../cache/package'; +import { clone } from '../../../clone'; +import type { GithubDatasourceItem, GithubGraphqlCacheRecord } from '../types'; +import { GithubGraphqlPackageCacheStrategy } from './package-cache-strategy'; + +const isoTs = (t: string) => DateTime.fromJSDate(new Date(t)).toISO(); + +const mockTime = (input: string): void => { + const now = DateTime.fromISO(isoTs(input)).valueOf(); + Settings.now = () => now; +}; + +type CacheRecord = GithubGraphqlCacheRecord<GithubDatasourceItem>; + +describe('util/github/graphql/cache-strategies/package-cache-strategy', () => { + const cacheGet = jest.spyOn(packageCache, 'get'); + const cacheSet = jest.spyOn(packageCache, 'set'); + + beforeEach(() => { + jest.resetAllMocks(); + }); + + it('reconciles old cache record with new items', async () => { + const oldItems = { + '1': { version: '1', releaseTimestamp: isoTs('2020-01-01 10:00') }, + '2': { version: '2', releaseTimestamp: isoTs('2020-01-01 11:00') }, + '3': { version: '3', releaseTimestamp: isoTs('2020-01-01 12:00') }, + }; + const cacheRecord: CacheRecord = { + items: oldItems, + createdAt: isoTs('2022-10-15 12:00'), + updatedAt: isoTs('2022-10-15 12:00'), + }; + cacheGet.mockResolvedValueOnce(clone(cacheRecord)); + + const now = '2022-10-30 12:00'; + mockTime(now); + + const newItem = { + version: '4', + releaseTimestamp: isoTs('2022-10-15 18:00'), + }; + const page = [newItem]; + + const strategy = new GithubGraphqlPackageCacheStrategy('foo', 'bar'); + const isPaginationDone = await strategy.reconcile(page); + const res = await strategy.finalize(); + + expect(res).toEqual([...Object.values(oldItems), newItem]); + expect(isPaginationDone).toBe(false); + expect(cacheSet.mock.calls).toEqual([ + [ + 'foo', + 'bar', + { + items: { + '1': { version: '1', releaseTimestamp: isoTs('2020-01-01 10:00') }, + '2': { version: '2', releaseTimestamp: isoTs('2020-01-01 11:00') }, + '3': { version: '3', releaseTimestamp: isoTs('2020-01-01 12:00') }, + '4': { version: '4', releaseTimestamp: isoTs('2022-10-15 18:00') }, + }, + createdAt: isoTs('2022-10-15 12:00'), + updatedAt: isoTs('2022-10-30 12:00'), + }, + 15 * 24 * 60, + ], + ]); + }); +}); diff --git a/lib/util/github/graphql/cache-strategies/package-cache-strategy.ts b/lib/util/github/graphql/cache-strategies/package-cache-strategy.ts new file mode 100644 index 0000000000000000000000000000000000000000..4bafebda533523f3910b138afcce998ab189b97c --- /dev/null +++ b/lib/util/github/graphql/cache-strategies/package-cache-strategy.ts @@ -0,0 +1,31 @@ +import * as packageCache from '../../../cache/package'; +import type { GithubDatasourceItem, GithubGraphqlCacheRecord } from '../types'; +import { AbstractGithubGraphqlCacheStrategy } from './abstract-cache-strategy'; + +/** + * Package cache strategy meant to be used for public packages. + */ +export class GithubGraphqlPackageCacheStrategy< + GithubItem extends GithubDatasourceItem +> extends AbstractGithubGraphqlCacheStrategy<GithubItem> { + load(): Promise<GithubGraphqlCacheRecord<GithubItem> | undefined> { + return packageCache.get(this.cacheNs, this.cacheKey); + } + + async persist( + cacheRecord: GithubGraphqlCacheRecord<GithubItem> + ): Promise<void> { + const expiry = this.createdAt.plus({ + days: AbstractGithubGraphqlCacheStrategy.cacheTTLDays, + }); + const ttlMinutes = expiry.diff(this.now, ['minutes']).as('minutes'); + if (ttlMinutes && ttlMinutes > 0) { + await packageCache.set( + this.cacheNs, + this.cacheKey, + cacheRecord, + ttlMinutes + ); + } + } +} diff --git a/lib/util/github/graphql/datasource-fetcher.spec.ts b/lib/util/github/graphql/datasource-fetcher.spec.ts index 8e4359921e3903e5053cad6a0a7edc5cb0a4b16e..ee375eff64de6ba18f3c0960bc6f47fffca102f1 100644 --- a/lib/util/github/graphql/datasource-fetcher.spec.ts +++ b/lib/util/github/graphql/datasource-fetcher.spec.ts @@ -206,9 +206,9 @@ describe('util/github/graphql/datasource-fetcher', () => { .reply( 200, resp([ - { version: v1, releaseTimestamp: t1, foo: '1' }, - { version: v2, releaseTimestamp: t2, foo: '2' }, { version: v3, releaseTimestamp: t3, foo: '3' }, + { version: v2, releaseTimestamp: t2, foo: '2' }, + { version: v1, releaseTimestamp: t1, foo: '1' }, ]) ); @@ -219,22 +219,22 @@ describe('util/github/graphql/datasource-fetcher', () => { ); expect(res).toEqual([ - { version: v1, releaseTimestamp: t1, bar: '1' }, - { version: v2, releaseTimestamp: t2, bar: '2' }, { version: v3, releaseTimestamp: t3, bar: '3' }, + { version: v2, releaseTimestamp: t2, bar: '2' }, + { version: v1, releaseTimestamp: t1, bar: '1' }, ]); }); it('handles paginated data', async () => { const page1 = resp( - [{ version: v1, releaseTimestamp: t1, foo: '1' }], + [{ version: v3, releaseTimestamp: t3, foo: '3' }], 'aaa' ); const page2 = resp( [{ version: v2, releaseTimestamp: t2, foo: '2' }], 'bbb' ); - const page3 = resp([{ version: v3, releaseTimestamp: t3, foo: '3' }]); + const page3 = resp([{ version: v1, releaseTimestamp: t1, foo: '1' }]); httpMock .scope('https://api.github.com/') .post('/graphql') @@ -251,9 +251,9 @@ describe('util/github/graphql/datasource-fetcher', () => { ); expect(res).toEqual([ - { version: v1, releaseTimestamp: t1, bar: '1' }, - { version: v2, releaseTimestamp: t2, bar: '2' }, { version: v3, releaseTimestamp: t3, bar: '3' }, + { version: v2, releaseTimestamp: t2, bar: '2' }, + { version: v1, releaseTimestamp: t1, bar: '1' }, ]); }); @@ -369,9 +369,9 @@ describe('util/github/graphql/datasource-fetcher', () => { describe('Cacheable flag', () => { const data = [ - { version: v1, releaseTimestamp: t1, foo: '1' }, - { version: v2, releaseTimestamp: t2, foo: '2' }, { version: v3, releaseTimestamp: t3, foo: '3' }, + { version: v2, releaseTimestamp: t2, foo: '2' }, + { version: v1, releaseTimestamp: t1, foo: '1' }, ]; test.each` diff --git a/lib/util/github/graphql/datasource-fetcher.ts b/lib/util/github/graphql/datasource-fetcher.ts index 658fca7e907a04fe03938eb11a3566e64cbc1418..6ab09ebb56f75df5c453286827da86da3c751d50 100644 --- a/lib/util/github/graphql/datasource-fetcher.ts +++ b/lib/util/github/graphql/datasource-fetcher.ts @@ -9,8 +9,11 @@ import type { } from '../../http/github'; import type { HttpResponse } from '../../http/types'; import { getApiBaseUrl } from '../url'; +import { GithubGraphqlMemoryCacheStrategy } from './cache-strategies/memory-cache-strategy'; +import { GithubGraphqlPackageCacheStrategy } from './cache-strategies/package-cache-strategy'; import type { GithubDatasourceItem, + GithubGraphqlCacheStrategy, GithubGraphqlDatasourceAdapter, GithubGraphqlPayload, GithubGraphqlRepoParams, @@ -65,7 +68,7 @@ export class GithubGraphqlDatasourceFetcher< private cursor: string | null = null; - private isCacheable = false; + private isCacheable: boolean | null = null; constructor( packageConfig: GithubPackageConfig, @@ -80,13 +83,12 @@ export class GithubGraphqlDatasourceFetcher< this.baseUrl = getApiBaseUrl(registryUrl).replace(/\/v3\/$/, '/'); // Replace for GHE } - private getFingerprint(): string { - return [ - this.baseUrl, - this.repoOwner, - this.repoName, - this.datasourceAdapter.key, - ].join(':'); + private getCacheNs(): string { + return this.datasourceAdapter.key; + } + + private getCacheKey(): string { + return [this.baseUrl, this.repoOwner, this.repoName].join(':'); } private getRawQueryOptions(): GithubHttpOptions { @@ -157,8 +159,10 @@ export class GithubGraphqlDatasourceFetcher< this.queryCount += 1; - if (!this.isCacheable && data.repository.isRepoPrivate === false) { - this.isCacheable = true; + if (this.isCacheable === null) { + // For values other than explicit `false`, + // we assume that items can not be cached. + this.isCacheable = data.repository.isRepoPrivate === false; } const res = data.repository.payload; @@ -211,14 +215,28 @@ export class GithubGraphqlDatasourceFetcher< return res; } - private async doPaginatedQuery(): Promise<ResultItem[]> { - const resultItems: ResultItem[] = []; + private _cacheStrategy: GithubGraphqlCacheStrategy<ResultItem> | undefined; - let hasNextPage: boolean | undefined = true; - let cursor: string | undefined; - while (hasNextPage && !this.hasReachedQueryLimit()) { + private cacheStrategy(): GithubGraphqlCacheStrategy<ResultItem> { + if (this._cacheStrategy) { + return this._cacheStrategy; + } + const cacheNs = this.getCacheNs(); + const cacheKey = this.getCacheKey(); + this._cacheStrategy = this.isCacheable + ? new GithubGraphqlPackageCacheStrategy<ResultItem>(cacheNs, cacheKey) + : new GithubGraphqlMemoryCacheStrategy<ResultItem>(cacheNs, cacheKey); + return this._cacheStrategy; + } + + private async doPaginatedQuery(): Promise<ResultItem[]> { + let hasNextPage = true; + let isPaginationDone = false; + let nextCursor: string | undefined; + while (hasNextPage && !isPaginationDone && !this.hasReachedQueryLimit()) { const queryResult = await this.doShrinkableQuery(); + const resultItems: ResultItem[] = []; for (const node of queryResult.nodes) { const item = this.datasourceAdapter.transform(node); // istanbul ignore if: will be tested later @@ -228,14 +246,21 @@ export class GithubGraphqlDatasourceFetcher< resultItems.push(item); } - hasNextPage = queryResult?.pageInfo?.hasNextPage; - cursor = queryResult?.pageInfo?.endCursor; - if (hasNextPage && cursor) { - this.cursor = cursor; + // It's important to call `getCacheStrategy()` after `doShrinkableQuery()` + // because `doShrinkableQuery()` may change `this.isCacheable`. + // + // Otherwise, cache items for public packages will never be persisted + // in long-term cache. + isPaginationDone = await this.cacheStrategy().reconcile(resultItems); + + hasNextPage = !!queryResult?.pageInfo?.hasNextPage; + nextCursor = queryResult?.pageInfo?.endCursor; + if (hasNextPage && nextCursor) { + this.cursor = nextCursor; } } - return resultItems; + return this.cacheStrategy().finalize(); } /** @@ -244,8 +269,7 @@ export class GithubGraphqlDatasourceFetcher< * Instead, it ensures that same package release is not fetched twice. */ private doConcurrentQuery(): Promise<ResultItem[]> { - const packageFingerprint = this.getFingerprint(); - const cacheKey = `github-datasource-promises:${packageFingerprint}`; + const cacheKey = `github-pending:${this.getCacheNs()}:${this.getCacheKey()}`; const resultPromise = memCache.get<Promise<ResultItem[]>>(cacheKey) ?? this.doPaginatedQuery(); memCache.set(cacheKey, resultPromise); diff --git a/lib/util/github/graphql/readme.md b/lib/util/github/graphql/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..002a45c11530c12e375dbccf5b72d7955b1c173f --- /dev/null +++ b/lib/util/github/graphql/readme.md @@ -0,0 +1,166 @@ +GraphQL can be used to efficiently retrieve all the tags and releases from GitHub. +The approach involves fetching items in reverse chronological order by the `created_at` field. +Items can be retrieved page by page until the first cached item is reached. + +Although sorting by the `updated_at` field would be more precise, this option is not available in the API. +As a result, we also fetch relatively all items that are relatively _fresh_. +The freshness period is equal to the TTL of the entire cache, and this allows for updates or deletions to be reflected in the cache during that time. + +In most cases, only one page of releases will need to be fetched during a Renovate run. +While it is possible to reduce this to zero for most runs, the practical implementation is complex and prone to errors. + +# Components overview + +``` +lib/util/github/graphql +│ +├── cache-strategies +│ ├── abstract-cache-strategy.ts <- common logic: `reconcile()` and `finalize()` +│ ├── memory-cache-strategy.ts <- single Renovate run (private packages) +│ └── package-cache-strategy.ts <- long-term persistence (public packages) +│ +├── query-adapters +│ ├── releases-query-adapter.ts <- GitHub releases +│ └── tags-query-adapter.ts <- GitHub tags +│ +├── datasource-fetcher.ts <- Complex pagination loop +│ +└── index.ts <- Facade that hides whole thing +``` + +The datasource-fetcher.ts file contains the core component that implements the logic for looping over paginated GraphQL results. +This class is meant to be instantiated every time we need to paginate over GraphQL results. +It is responsible for handling several aspects of the fetch process, including: + +- Making HTTP requests to the `/graphql` endpoint +- Handling and aggregating errors that may occur during the fetch process +- Dynamically adjusting the page size and retrying in the event of server errors +- Enforcing a maximum limit on the number of queries that can be made +- Detecting whether a package is private or public, and selecting the appropriate cache strategy (in-memory or long-term) accordingly +- Ensuring proper concurrency when querying the same package simultaneously. + +The `cache-strategies/` directory is responsible for caching implementation. +The core function is `reconcile()` which updates the cache data structure with pages of items one-by-one. + +The files in `query-adapters/` directory allow for GitHub releases and tags to be fetched according to their specifics and to be transformed to the form suitable for caching. +For cached items, only `version` and `releaseTimestamp` fields are mandatory. +Other fields are specific to GitHub tags or GitHub releases. + +# Process overview + +## Initial fetch + +Let's suppose we perform fetching for the first time. +For simplicity, this example assumes that we are retrieving items in small batches of 5 at a time. +The cache TTL is assumed to be 30 days. + +```js +// Page 1 +[ + { "version": "3.1.1", "releaseTimestamp": "2022-12-18" }, + { "version": "3.1.0", "releaseTimestamp": "2022-12-15" }, + { "version": "3.0.2", "releaseTimestamp": "2022-12-09" }, + { "version": "3.0.1", "releaseTimestamp": "2022-12-08" }, + { "version": "3.0.0", "releaseTimestamp": "2022-12-05" }, +] + +// Page 2 +[ + { "version": "2.2.2", "releaseTimestamp": "2022-11-23" }, + { "version": "2.2.1", "releaseTimestamp": "2022-10-17" }, + { "version": "2.2.0", "releaseTimestamp": "2022-10-13" }, + { "version": "2.1.1", "releaseTimestamp": "2022-10-07" }, + { "version": "2.1.0", "releaseTimestamp": "2022-09-21" }, +] + +// Page 3 +[ + { "version": "2.0.1", "releaseTimestamp": "2022-09-18" }, + { "version": "2.0.0", "releaseTimestamp": "2022-09-01" }, +] +``` + +As we retrieve items during the fetch process, we gradually construct a data structure in the following form: + +```js +{ + "items": { + "3.1.1": { "version": "3.1.1", "releaseTimestamp": "2022-12-18" }, + "3.1.0": { "version": "3.1.0", "releaseTimestamp": "2022-12-15" }, + "3.0.2": { "version": "3.0.2", "releaseTimestamp": "2022-12-09" }, + "3.0.1": { "version": "3.0.1", "releaseTimestamp": "2022-12-08" }, + "3.0.0": { "version": "3.0.0", "releaseTimestamp": "2022-12-05" }, + "2.2.2": { "version": "2.2.2", "releaseTimestamp": "2022-11-23" }, + "2.2.1": { "version": "2.2.1", "releaseTimestamp": "2022-10-17" }, + "2.2.0": { "version": "2.2.0", "releaseTimestamp": "2022-10-13" }, + "2.1.1": { "version": "2.1.1", "releaseTimestamp": "2022-10-07" }, + "2.1.0": { "version": "2.1.0", "releaseTimestamp": "2022-09-21" }, + "2.0.1": { "version": "2.0.1", "releaseTimestamp": "2022-09-18" }, + "2.0.0": { "version": "2.0.0", "releaseTimestamp": "2022-09-01" }, + }, + "createdAt": "2022-12-20", + "updatedAt": "2022-12-20", +} +``` + +Internally, we index each release by version name for quicker access. +When the fetch process is complete, we return the values of the items object. +If the repository is public, we also persist this data structure in a long-term cache for future use. + +## Recurring fetches + +In the case where we already have items stored in the cache, we can model the fetch process as follows. +Suppose we have a new release that changes the pagination of our items. +Also note that versions `3.0.1` and `3.0.2` are deleted since last fetch. +The resulting pagination would look like this: + +```js +// Page 1 --- FETCHED AND RECONCILED --- +[ + { "version": "4.0.0", "releaseTimestamp": "2022-12-30" }, // new <- item cached + { "version": "3.1.1", "releaseTimestamp": "2022-12-18" }, // fresh <- item updated + { "version": "3.1.0", "releaseTimestamp": "2022-12-15" }, // fresh <- item updated +//{ "version": "3.0.2", "releaseTimestamp": "2022-12-09" }, // fresh <- item deleted +//{ "version": "3.0.1", "releaseTimestamp": "2022-12-08" }, // fresh <- item deleted + { "version": "3.0.0", "releaseTimestamp": "2022-12-05" }, // fresh <- item updated + { "version": "2.2.2", "releaseTimestamp": "2022-11-23" }, // old <- fetching stopped +] + +// Page 2 --- NOT FETCHED --- +[ + { "version": "2.2.1", "releaseTimestamp": "2022-10-17" }, // old + { "version": "2.2.0", "releaseTimestamp": "2022-10-13" }, // old + { "version": "2.1.1", "releaseTimestamp": "2022-10-07" }, // old + { "version": "2.1.0", "releaseTimestamp": "2022-09-21" }, // old + { "version": "2.0.1", "releaseTimestamp": "2022-09-18" }, // old +] + +// Page 3 --- NOT FETCHED --- +[ + { "version": "2.0.0", "releaseTimestamp": "2022-09-01" }, // old +] +``` + +Given we performed fetch at the day of latest release, new cache looks like: + +```js +{ + "items": { + "4.0.0": { "version": "4.0.0", "releaseTimestamp": "2022-12-30" }, + "3.1.1": { "version": "3.1.1", "releaseTimestamp": "2022-12-18" }, + "3.1.0": { "version": "3.1.0", "releaseTimestamp": "2022-12-15" }, + "3.0.0": { "version": "3.0.0", "releaseTimestamp": "2022-12-05" }, + "2.2.2": { "version": "2.2.2", "releaseTimestamp": "2022-11-23" }, + "2.2.1": { "version": "2.2.1", "releaseTimestamp": "2022-10-17" }, + "2.2.0": { "version": "2.2.0", "releaseTimestamp": "2022-10-13" }, + "2.1.1": { "version": "2.1.1", "releaseTimestamp": "2022-10-07" }, + "2.1.0": { "version": "2.1.0", "releaseTimestamp": "2022-09-21" }, + "2.0.1": { "version": "2.0.1", "releaseTimestamp": "2022-09-18" }, + "2.0.0": { "version": "2.0.0", "releaseTimestamp": "2022-09-01" }, + }, + "createdAt": "2022-12-20", + "updatedAt": "2022-12-30", +} +``` + +It will be updated by further fetches until cache reset at `2023-01-20`. diff --git a/lib/util/github/graphql/types.ts b/lib/util/github/graphql/types.ts index a1c4195190c6c9f19593967bc560c28875dd8898..7b20caeed7b8b2f63c3a73ff80d11faa6ff8c017 100644 --- a/lib/util/github/graphql/types.ts +++ b/lib/util/github/graphql/types.ts @@ -122,3 +122,18 @@ export interface GithubGraphqlRepoParams { cursor: string | null; count: number; } + +export interface GithubGraphqlCacheRecord< + GithubItem extends GithubDatasourceItem +> { + items: Record<string, GithubItem>; + createdAt: string; + updatedAt: string; +} + +export interface GithubGraphqlCacheStrategy< + GithubItem extends GithubDatasourceItem +> { + reconcile(items: GithubItem[]): Promise<boolean>; + finalize(): Promise<GithubItem[]>; +} diff --git a/lib/util/github/graphql/util.spec.ts b/lib/util/github/graphql/util.spec.ts index 9af5472026d6866152435ebf040aa8f05ff14afa..e33c31b917e4c00eab5555a96fe8a14bc286c6bd 100644 --- a/lib/util/github/graphql/util.spec.ts +++ b/lib/util/github/graphql/util.spec.ts @@ -1,6 +1,9 @@ import { codeBlock } from 'common-tags'; import { parse as graphqlParse } from 'graphql'; -import { prepareQuery } from './util'; +import { DateTime } from 'luxon'; +import { isDateExpired, prepareQuery } from './util'; + +const isoTs = (t: string) => DateTime.fromJSDate(new Date(t)).toISO(); describe('util/github/graphql/util', () => { describe('prepareQuery', () => { @@ -28,4 +31,27 @@ describe('util/github/graphql/util', () => { expect(() => graphqlParse(prepareQuery(payloadQuery))).toThrow(); }); }); + + test.each` + currentTime | initialTimestamp | duration | expected + ${'2022-11-25 15:58'} | ${'2022-11-25 15:00'} | ${{ hours: 1 }} | ${false} + ${'2022-11-25 15:59'} | ${'2022-11-25 15:00'} | ${{ hours: 1 }} | ${false} + ${'2022-11-25 16:00'} | ${'2022-11-25 15:00'} | ${{ hours: 1 }} | ${true} + ${'2022-11-25 16:01'} | ${'2022-11-25 15:00'} | ${{ hours: 1 }} | ${true} + ${'2022-11-25 14:58'} | ${'2022-11-24 15:00'} | ${{ days: 1 }} | ${false} + ${'2022-11-25 14:59'} | ${'2022-11-24 15:00'} | ${{ days: 1 }} | ${false} + ${'2022-11-25 15:00'} | ${'2022-11-24 15:00'} | ${{ days: 1 }} | ${true} + ${'2022-11-25 15:01'} | ${'2022-11-24 15:00'} | ${{ days: 1 }} | ${true} + `( + 'isDateExpired($currentTime, $initialTimestamp, $duration) === $expected', + ({ currentTime, initialTimestamp, duration, expected }) => { + expect( + isDateExpired( + DateTime.fromISO(isoTs(currentTime)), + isoTs(initialTimestamp), + duration + ) + ).toBe(expected); + } + ); }); diff --git a/lib/util/github/graphql/util.ts b/lib/util/github/graphql/util.ts index 2da426c2159c9d2c95feb1dced25a429ad5cd437..d595c39de488e27c00d4b1a66ace15aaded53535 100644 --- a/lib/util/github/graphql/util.ts +++ b/lib/util/github/graphql/util.ts @@ -1,3 +1,5 @@ +import { DateTime, DurationLikeObject } from 'luxon'; + export function prepareQuery(payloadQuery: string): string { return ` query($owner: String!, $name: String!, $cursor: String, $count: Int!) { @@ -8,3 +10,16 @@ export function prepareQuery(payloadQuery: string): string { } `; } + +/** + * Tells whether the time `duration` is expired starting + * from the `date` (ISO date format) at the moment of `now`. + */ +export function isDateExpired( + currentTime: DateTime, + initialTimestamp: string, + duration: DurationLikeObject +): boolean { + const expiryTime = DateTime.fromISO(initialTimestamp).plus(duration); + return currentTime >= expiryTime; +}