From e2a9b5f4c6d9d8b214b71ea48fb8abbc57c6560f Mon Sep 17 00:00:00 2001
From: Sergei Zharinov <zharinov@users.noreply.github.com>
Date: Wed, 25 Sep 2024 13:52:28 -0300
Subject: [PATCH] refactor(sbt): Reimplement `parseIndexDir` usage (#31613)

---
 .../datasource/sbt-package/index.spec.ts      | 79 +++++++++++--------
 lib/modules/datasource/sbt-package/index.ts   | 52 ++++++------
 lib/modules/datasource/sbt-package/util.ts    | 21 ++---
 .../datasource/sbt-plugin/index.spec.ts       | 37 ++++++---
 lib/modules/datasource/sbt-plugin/index.ts    | 23 ++++--
 5 files changed, 125 insertions(+), 87 deletions(-)

diff --git a/lib/modules/datasource/sbt-package/index.spec.ts b/lib/modules/datasource/sbt-package/index.spec.ts
index 620a7958f8..1855156b97 100644
--- a/lib/modules/datasource/sbt-package/index.spec.ts
+++ b/lib/modules/datasource/sbt-package/index.spec.ts
@@ -1,27 +1,26 @@
+import { codeBlock } from 'common-tags';
 import { getPkgReleases } from '..';
 import { Fixtures } from '../../../../test/fixtures';
 import * as httpMock from '../../../../test/http-mock';
 import { regEx } from '../../../util/regex';
 import * as mavenVersioning from '../../versioning/maven';
 import { MAVEN_REPO } from '../maven/common';
-import { parseIndexDir } from './util';
+import { extractPageLinks } from './util';
 import { SbtPackageDatasource } from '.';
 
 describe('modules/datasource/sbt-package/index', () => {
   it('parses Maven index directory', () => {
     expect(
-      parseIndexDir(
-        Fixtures.get(`maven-index.html`),
-        (x) => !regEx(/^\.+/).test(x),
+      extractPageLinks(Fixtures.get(`maven-index.html`), (x) =>
+        regEx(/^\.+/).test(x) ? null : x,
       ),
     ).toMatchSnapshot();
   });
 
   it('parses sbt index directory', () => {
     expect(
-      parseIndexDir(
-        Fixtures.get(`sbt-plugins-index.html`),
-        (x) => !regEx(/^\.+/).test(x),
+      extractPageLinks(Fixtures.get(`sbt-plugins-index.html`), (x) =>
+        regEx(/^\.+/).test(x) ? null : x,
       ),
     ).toMatchSnapshot();
   });
@@ -59,7 +58,12 @@ describe('modules/datasource/sbt-package/index', () => {
       httpMock
         .scope('https://repo.maven.apache.org')
         .get('/maven2/com/example/')
-        .reply(200, '<a href="empty/">empty_2.12/</a>\n')
+        .reply(
+          200,
+          codeBlock`
+            <a href="empty/">empty_2.12/</a>
+          `,
+        )
         .get('/maven2/com/example/empty/')
         .reply(200, '')
         .get('/maven2/com.example/')
@@ -85,24 +89,37 @@ describe('modules/datasource/sbt-package/index', () => {
         .get('/org/example/')
         .reply(
           200,
-          [
-            `<a href="example/" title='example/'>example_2.12/</a>`,
-            `<a href="example_2.12/" title='example_2.12/'>example_2.12/</a>`,
-            `<a href="example_native/" title='example_native/'>example_native/</a>`,
-            `<a href="example_sjs/" title='example_sjs/'>example_sjs/</a>`,
-          ].join('\n'),
+          codeBlock`
+            <a href="../" title='../'>../</a>
+            <a href="example/" title='example/'>example_2.12/</a>
+            <a href="example_2.12/" title='example_2.12/'>example_2.12/</a>
+            <a href="example_native/" title='example_native/'>example_native/</a>
+            <a href="example_sjs/" title='example_sjs/'>example_sjs/</a>
+          `,
         )
         .get('/org/example/example/')
-        .reply(200, `<a href='1.2.0/'>1.2.0/</a>`)
+        .reply(
+          200,
+          codeBlock`
+            <a href='../'>../</a>
+            <a href='1.2.0/'>1.2.0/</a>
+            `,
+        )
         .get('/org/example/example_2.12/')
-        .reply(200, `<a href='1.2.3/'>1.2.3/</a>`)
+        .reply(
+          200,
+          codeBlock`
+            <a href='../'>../</a>
+            <a href='1.2.3/'>1.2.3/</a>
+          `,
+        )
         .get('/org/example/example/1.2.3/example-1.2.3.pom')
         .twice()
-        .reply(200, ``)
+        .reply(200, '')
         .get('/org/example/example_2.12/1.2.3/example-1.2.3.pom')
-        .reply(200, ``)
+        .reply(200, '')
         .get('/org/example/example_2.12/1.2.3/example_2.12-1.2.3.pom')
-        .reply(200, ``);
+        .reply(200, '');
 
       const res = await getPkgReleases({
         versioning: mavenVersioning.id,
@@ -163,15 +180,15 @@ describe('modules/datasource/sbt-package/index', () => {
         .get('/kafka-avro-serializer/7.0.1/kafka-avro-serializer-7.0.1.pom')
         .reply(
           200,
-          `
-          <project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-          xmlns="http://maven.apache.org/POM/4.0.0"
-          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">z
-            <artifactId>kafka-avro-serializer</artifactId>
-            <packaging>jar</packaging>
-            <name>kafka-avro-serializer</name>
-          </project>
-        `,
+          codeBlock`
+            <project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+            xmlns="http://maven.apache.org/POM/4.0.0"
+            xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">z
+              <artifactId>kafka-avro-serializer</artifactId>
+              <packaging>jar</packaging>
+              <name>kafka-avro-serializer</name>
+            </project>
+          `,
         );
 
       const res = await getPkgReleases({
@@ -197,7 +214,7 @@ describe('modules/datasource/sbt-package/index', () => {
         .get('/org/example/example/1.2.3/example-1.2.3.pom')
         .reply(
           200,
-          `
+          codeBlock`
             <project>
               <url>https://package.example.org/about</url>
               <scm>
@@ -231,8 +248,8 @@ describe('modules/datasource/sbt-package/index', () => {
         .get('/org/example/example_2.13/maven-metadata.xml')
         .reply(
           200,
-          `
-          <?xml version="1.0" encoding="UTF-8"?>
+          codeBlock`
+            <?xml version="1.0" encoding="UTF-8"?>
             <metadata>
               <groupId>org.example</groupId>
               <artifactId>package</artifactId>
diff --git a/lib/modules/datasource/sbt-package/index.ts b/lib/modules/datasource/sbt-package/index.ts
index 96f68b0fbf..afbb629cc5 100644
--- a/lib/modules/datasource/sbt-package/index.ts
+++ b/lib/modules/datasource/sbt-package/index.ts
@@ -13,11 +13,7 @@ import type {
   RegistryStrategy,
   ReleaseResult,
 } from '../types';
-import {
-  getLatestVersion,
-  normalizeRootRelativeUrls,
-  parseIndexDir,
-} from './util';
+import { extractPageLinks, getLatestVersion } from './util';
 
 export class SbtPackageDatasource extends MavenDatasource {
   static override id = 'sbt-package';
@@ -48,21 +44,23 @@ export class SbtPackageDatasource extends MavenDatasource {
       pkgUrl,
     );
     if (indexContent) {
-      const parseSubdirs = (content: string): string[] =>
-        parseIndexDir(content, (x) => {
-          if (x === artifact) {
-            return true;
-          }
-          if (x.startsWith(`${artifact}_native`)) {
-            return false;
-          }
-          if (x.startsWith(`${artifact}_sjs`)) {
-            return false;
-          }
-          return x.startsWith(`${artifact}_`);
-        });
-      const normalizedContent = normalizeRootRelativeUrls(indexContent, pkgUrl);
-      let artifactSubdirs = parseSubdirs(normalizedContent);
+      const rootPath = new URL(pkgUrl).pathname;
+      let artifactSubdirs = extractPageLinks(indexContent, (href) => {
+        const path = href.replace(rootPath, '');
+        if (
+          path.startsWith(`${artifact}_native`) ||
+          path.startsWith(`${artifact}_sjs`)
+        ) {
+          return null;
+        }
+
+        if (path === artifact || path.startsWith(`${artifact}_`)) {
+          return path;
+        }
+
+        return null;
+      });
+
       if (
         scalaVersion &&
         artifactSubdirs.includes(`${artifact}_${scalaVersion}`)
@@ -81,14 +79,20 @@ export class SbtPackageDatasource extends MavenDatasource {
   ): Promise<string[] | null> {
     if (artifactSubdirs) {
       const releases: string[] = [];
-      const parseReleases = (content: string): string[] =>
-        parseIndexDir(content, (x) => !regEx(/^\.+$/).test(x));
       for (const searchSubdir of artifactSubdirs) {
         const pkgUrl = ensureTrailingSlash(`${searchRoot}/${searchSubdir}`);
         const { body: content } = await downloadHttpProtocol(this.http, pkgUrl);
         if (content) {
-          const normalizedContent = normalizeRootRelativeUrls(content, pkgUrl);
-          const subdirReleases = parseReleases(normalizedContent);
+          const rootPath = new URL(pkgUrl).pathname;
+          const subdirReleases = extractPageLinks(content, (href) => {
+            const path = href.replace(rootPath, '');
+            if (path.startsWith('.')) {
+              return null;
+            }
+
+            return path;
+          });
+
           subdirReleases.forEach((x) => releases.push(x));
         }
       }
diff --git a/lib/modules/datasource/sbt-package/util.ts b/lib/modules/datasource/sbt-package/util.ts
index 7b567b7147..4a790d2aed 100644
--- a/lib/modules/datasource/sbt-package/util.ts
+++ b/lib/modules/datasource/sbt-package/util.ts
@@ -1,24 +1,15 @@
 import { coerceArray } from '../../../util/array';
+import { filterMap } from '../../../util/filter-map';
 import { compare } from '../../versioning/maven/compare';
 
 const linkRegExp = /(?<=href=['"])[^'"]*(?=\/['"])/gi;
 
-export function parseIndexDir(
-  content: string,
-  filterFn: (x: string) => boolean,
+export function extractPageLinks(
+  html: string,
+  filterMapHref: (href: string) => string | null | undefined,
 ): string[] {
-  const unfiltered = coerceArray(content.match(linkRegExp));
-  return unfiltered.filter(filterFn);
-}
-
-export function normalizeRootRelativeUrls(
-  content: string,
-  rootUrl: string | URL,
-): string {
-  const rootRelativePath = new URL(rootUrl.toString()).pathname;
-  return content.replace(linkRegExp, (href: string) =>
-    href.replace(rootRelativePath, ''),
-  );
+  const unfiltered = coerceArray(html.match(linkRegExp));
+  return filterMap(unfiltered, filterMapHref);
 }
 
 export function getLatestVersion(versions: string[] | null): string | null {
diff --git a/lib/modules/datasource/sbt-plugin/index.spec.ts b/lib/modules/datasource/sbt-plugin/index.spec.ts
index 8b0d53481d..c541938f76 100644
--- a/lib/modules/datasource/sbt-plugin/index.spec.ts
+++ b/lib/modules/datasource/sbt-plugin/index.spec.ts
@@ -1,11 +1,11 @@
-import { codeBlock, html } from 'common-tags';
+import { codeBlock } from 'common-tags';
 import { getPkgReleases } from '..';
 import { Fixtures } from '../../../../test/fixtures';
 import * as httpMock from '../../../../test/http-mock';
 import { regEx } from '../../../util/regex';
 import * as mavenVersioning from '../../versioning/maven';
 import { MAVEN_REPO } from '../maven/common';
-import { parseIndexDir } from '../sbt-package/util';
+import { extractPageLinks } from '../sbt-package/util';
 import { SbtPluginDatasource } from '.';
 
 const mavenIndexHtml = Fixtures.get(`maven-index.html`);
@@ -14,13 +14,17 @@ const sbtPluginIndex = Fixtures.get(`sbt-plugins-index.html`);
 describe('modules/datasource/sbt-plugin/index', () => {
   it('parses Maven index directory', () => {
     expect(
-      parseIndexDir(mavenIndexHtml, (x) => !regEx(/^\.+/).test(x)),
+      extractPageLinks(mavenIndexHtml, (x) =>
+        regEx(/^\.+/).test(x) ? null : x,
+      ),
     ).toMatchSnapshot();
   });
 
   it('parses sbt index directory', () => {
     expect(
-      parseIndexDir(sbtPluginIndex, (x) => !regEx(/^\.+/).test(x)),
+      extractPageLinks(sbtPluginIndex, (x) =>
+        regEx(/^\.+/).test(x) ? null : x,
+      ),
     ).toMatchSnapshot();
   });
 
@@ -43,7 +47,8 @@ describe('modules/datasource/sbt-plugin/index', () => {
         .get('/maven2/org/scalatest/')
         .reply(
           200,
-          html`
+          codeBlock`
+            <a href="../">../</a>
             <a href="scalatest/">scalatest/</a>
             <a href="scalatest_2.12/">scalatest_2.12/</a>
             <a href="scalatest_sjs2.12/">scalatest_sjs2.12/</a>
@@ -53,7 +58,13 @@ describe('modules/datasource/sbt-plugin/index', () => {
       httpMock
         .scope('https://repo.maven.apache.org')
         .get('/maven2/org/scalatest/scalatest/')
-        .reply(200, "<a href='1.2.0/'>1.2.0/</a>");
+        .reply(
+          200,
+          codeBlock`
+            <a href='../'>../</a>
+            <a href='1.2.0/'>1.2.0/</a>
+          `,
+        );
       httpMock
         .scope('https://repo.maven.apache.org')
         .get('/maven2/org/scalatest/scalatest_2.12/')
@@ -64,10 +75,11 @@ describe('modules/datasource/sbt-plugin/index', () => {
         .get('/maven2/org/foundweekends/sbt-bintray/')
         .reply(
           200,
-          html`
+          codeBlock`
             <html>
               <head> </head>
               <body>
+                <pre><a href="../">../</a></pre>
                 <pre><a href="scala_2.12/">scala_2.12/</a></pre>
               </body>
             </html>
@@ -78,10 +90,11 @@ describe('modules/datasource/sbt-plugin/index', () => {
         .get('/maven2/org/foundweekends/sbt-bintray/scala_2.12/')
         .reply(
           200,
-          html`
+          codeBlock`
             <html>
               <head> </head>
               <body>
+                <pre><a href="../">../</a></pre>
                 <pre><a href="sbt_1.0/">sbt_1.0/</a></pre>
               </body>
             </html>
@@ -92,10 +105,11 @@ describe('modules/datasource/sbt-plugin/index', () => {
         .get('/maven2/org/foundweekends/sbt-bintray/scala_2.12/sbt_1.0/')
         .reply(
           200,
-          html`
+          codeBlock`
             <html>
               <head> </head>
               <body>
+                <pre><a href="../">../</a></pre>
                 <pre><a href="0.5.5/">0.5.5/</a></pre>
               </body>
             </html>
@@ -107,7 +121,8 @@ describe('modules/datasource/sbt-plugin/index', () => {
         .get('/maven2/io/get-coursier/')
         .reply(
           200,
-          html`
+          codeBlock`
+            <a href="../">../</a>
             <a href="sbt-coursier_2.10_0.13/">sbt-coursier_2.10_0.13/</a>
             <a href="sbt-coursier_2.12_1.0/">sbt-coursier_2.12_1.0/</a>
             <a href="sbt-coursier_2.12_1.0.0-M5/"
@@ -123,7 +138,7 @@ describe('modules/datasource/sbt-plugin/index', () => {
         .get('/maven2/io/get-coursier/sbt-coursier_2.12_1.0/')
         .reply(
           200,
-          html`
+          codeBlock`
             <a href="2.0.0-RC2/">2.0.0-RC2/</a>
             <a href="2.0.0-RC6-1/">2.0.0-RC6-1/</a>
             <a href="2.0.0-RC6-2/">2.0.0-RC6-2/</a>
diff --git a/lib/modules/datasource/sbt-plugin/index.ts b/lib/modules/datasource/sbt-plugin/index.ts
index de9ca82afe..e316d1e220 100644
--- a/lib/modules/datasource/sbt-plugin/index.ts
+++ b/lib/modules/datasource/sbt-plugin/index.ts
@@ -7,7 +7,7 @@ import { compare } from '../../versioning/maven/compare';
 import { MAVEN_REPO } from '../maven/common';
 import { downloadHttpProtocol } from '../maven/util';
 import { SbtPackageDatasource } from '../sbt-package';
-import { getLatestVersion, parseIndexDir } from '../sbt-package/util';
+import { extractPageLinks, getLatestVersion } from '../sbt-package/util';
 import type {
   GetReleasesConfig,
   RegistryStrategy,
@@ -43,15 +43,20 @@ export class SbtPluginDatasource extends SbtPackageDatasource {
     scalaVersion: string,
   ): Promise<string[] | null> {
     const searchRoot = `${rootUrl}/${artifact}`;
-    const parse = (content: string): string[] =>
-      parseIndexDir(content, (x) => !regEx(/^\.+$/).test(x));
+    const hrefFilterMap = (href: string): string | null => {
+      if (href.startsWith('.')) {
+        return null;
+      }
+
+      return href;
+    };
     const { body: indexContent } = await downloadHttpProtocol(
       this.http,
       ensureTrailingSlash(searchRoot),
     );
     if (indexContent) {
       const releases: string[] = [];
-      const scalaVersionItems = parse(indexContent);
+      const scalaVersionItems = extractPageLinks(indexContent, hrefFilterMap);
       const scalaVersions = scalaVersionItems.map((x) =>
         x.replace(regEx(/^scala_/), ''),
       );
@@ -65,7 +70,10 @@ export class SbtPluginDatasource extends SbtPackageDatasource {
           ensureTrailingSlash(searchSubRoot),
         );
         if (subRootContent) {
-          const sbtVersionItems = parse(subRootContent);
+          const sbtVersionItems = extractPageLinks(
+            subRootContent,
+            hrefFilterMap,
+          );
           for (const sbtItem of sbtVersionItems) {
             const releasesRoot = `${searchSubRoot}/${sbtItem}`;
             const { body: releasesIndexContent } = await downloadHttpProtocol(
@@ -73,7 +81,10 @@ export class SbtPluginDatasource extends SbtPackageDatasource {
               ensureTrailingSlash(releasesRoot),
             );
             if (releasesIndexContent) {
-              const releasesParsed = parse(releasesIndexContent);
+              const releasesParsed = extractPageLinks(
+                releasesIndexContent,
+                hrefFilterMap,
+              );
               releasesParsed.forEach((x) => releases.push(x));
             }
           }
-- 
GitLab