From 76a123640a0d6542c7d8c1dc4deadfa76c904fa7 Mon Sep 17 00:00:00 2001
From: Sergei Zharinov <zharinov@users.noreply.github.com>
Date: Wed, 10 Mar 2021 22:10:27 +0400
Subject: [PATCH] fix(bazel): Handle comment and string parsing (#8830)

---
 lib/manager/bazel/__fixtures__/WORKSPACE3     |  27 ++
 .../bazel/__snapshots__/extract.spec.ts.snap  | 247 ++++++++++--------
 lib/manager/bazel/extract.spec.ts             |  10 +
 lib/manager/bazel/extract.ts                  | 154 +++++++----
 4 files changed, 271 insertions(+), 167 deletions(-)
 create mode 100644 lib/manager/bazel/__fixtures__/WORKSPACE3

diff --git a/lib/manager/bazel/__fixtures__/WORKSPACE3 b/lib/manager/bazel/__fixtures__/WORKSPACE3
new file mode 100644
index 0000000000..4485bacb86
--- /dev/null
+++ b/lib/manager/bazel/__fixtures__/WORKSPACE3
@@ -0,0 +1,27 @@
+    # Eigen already had a good BUILD file from Tensorflow.
+# http_archive(
+#     name = "rules_foreign_cc",
+#     url = "https://github.com/bazelbuild/rules_foreign_cc/archive/dfccdce2c9d1063c59ddd331b94eb7cb528a96ee.tar.gz",
+#     sha256 = "5469ef8b4e2c475de443c13290cf91ba7d1255899442b1e42fcb7fcdee8ceed8",
+#     strip_prefix = "rules_foreign_cc-dfccdce2c9d1063c59ddd331b94eb7cb528a96ee",
+# )
+# load("@rules_foreign_cc//:workspace_definitions.bzl", "rules_foreign_cc_dependencies")
+# rules_foreign_cc_dependencies()
+# # Usage is a little weird, and depends
+# FOREIGN_CC_EXPOSE_ALL_FILES = """filegroup(name = "all", srcs = glob(["**"]), visibility = ["//visibility:public"])"""
+
+
+########################################
+# C++ & Cross-Platform Libraries
+
+# Boost.
+# Famous C++ library that gives rise to many new additions in the C++ standard library.
+# See https://github.com/nelhage/rules_boost, recommended from https://docs.bazel.build/versions/master/rules.html
+http_archive(
+    name = "com_github_nelhage_rules_boost",
+    url = "https://github.com/nelhage/rules_boost/archive/98495a618246683c9058dd87c2c78a2c06087999.tar.gz",
+    sha256 = "f92cb7ed66a5b24f97a7fc3917407f808c70d2689273bdd68f93d70a379d22d3",
+    strip_prefix = "rules_boost-98495a618246683c9058dd87c2c78a2c06087999",
+)
+load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps")
+boost_deps() # Also pulls in a bunch of boost depenencies if you don't have them already. See https://github.com/nelhage/rules_boost/blob/master/boost/boost.bzl
diff --git a/lib/manager/bazel/__snapshots__/extract.spec.ts.snap b/lib/manager/bazel/__snapshots__/extract.spec.ts.snap
index 8296f39ca2..ac88eb6787 100644
--- a/lib/manager/bazel/__snapshots__/extract.spec.ts.snap
+++ b/lib/manager/bazel/__snapshots__/extract.spec.ts.snap
@@ -137,25 +137,77 @@ Array [
 exports[`lib/manager/bazel/extract extractPackageFile() extracts multiple types of dependencies 1`] = `
 Array [
   Object {
-    "currentDigest": "sha256:d5a717649fd93ea5b9c430d7f84e4c37ba219eb53bd73ed1d4a5a98e9edd84a7",
-    "currentValue": "latest",
-    "datasource": "docker",
-    "depName": "py3_image_base",
-    "depType": "container_pull",
-    "lookupName": "distroless/python3-debian10",
+    "currentValue": "v1.0.5",
+    "datasource": "go",
+    "depName": "com_github_bitly_go-nsq",
+    "depType": "go_repository",
+    "lookupName": "github.com/bitly/go-nsq",
     "managerData": Object {
-      "def": "container_pull(
-    name = \\"py3_image_base\\",
-    digest = \\"sha256:d5a717649fd93ea5b9c430d7f84e4c37ba219eb53bd73ed1d4a5a98e9edd84a7\\",
-    registry = \\"gcr.io\\",
-    repository = \\"distroless/python3-debian10\\",
-    tag = \\"latest\\",
+      "def": "go_repository(
+    name = \\"com_github_bitly_go-nsq\\",
+    importpath = \\"github.com/bitly/go-nsq\\",
+    tag = \\"v1.0.5\\"
+)",
+    },
+  },
+  Object {
+    "currentDigest": "dec09d789f3dba190787f8b4454c7d3c936fed9e",
+    "currentDigestShort": "dec09d7",
+    "currentValue": "v0.0.0",
+    "datasource": "go",
+    "depName": "com_github_google_uuid",
+    "depType": "go_repository",
+    "digestOneAndOnly": true,
+    "lookupName": "github.com/google/uuid",
+    "managerData": Object {
+      "def": "go_repository(
+   name = \\"com_github_google_uuid\\",
+   importpath = \\"github.com/google/uuid\\",
+   commit = \\"dec09d789f3dba190787f8b4454c7d3c936fed9e\\"
+)",
+    },
+  },
+  Object {
+    "currentValue": "v2",
+    "datasource": "go",
+    "depName": "com_gopkgin_mgo_v2",
+    "depType": "go_repository",
+    "lookupName": "gopkg.in/mgo.v2",
+    "managerData": Object {
+      "def": "go_repository(
+    name = \\"com_gopkgin_mgo_v2\\",
+    importpath = \\"gopkg.in/mgo.v2\\",
+    tag = \\"v2\\"
+)",
+    },
+  },
+  Object {
+    "currentValue": "0.3.1",
+    "datasource": "github-releases",
+    "depName": "build_bazel_rules_nodejs",
+    "depType": "git_repository",
+    "lookupName": "bazelbuild/rules_nodejs",
+    "managerData": Object {
+      "def": "git_repository(
+    name = \\"build_bazel_rules_nodejs\\",
+    remote = \\"https://github.com/bazelbuild/rules_nodejs.git\\",
+    tag = \\"0.3.1\\",
+)",
+    },
+  },
+  Object {
+    "currentValue": "0.6.1",
+    "datasource": "github-releases",
+    "depName": "build_bazel_rules_typescript",
+    "depType": "git_repository",
+    "lookupName": "bazelbuild/rules_typescript",
+    "managerData": Object {
+      "def": "git_repository(
+    name = \\"build_bazel_rules_typescript\\",
+    remote = \\"https://github.com/bazelbuild/rules_typescript.git\\",
+    tag = \\"0.6.1\\",
 )",
     },
-    "registryUrls": Array [
-      "gcr.io",
-    ],
-    "versioning": "docker",
   },
   Object {
     "currentDigest": "446923c3756ceeaa75888f52fcbdd48bb314fbf8",
@@ -192,6 +244,35 @@ Array [
     },
     "repo": "bazelbuild/bazel-toolchains",
   },
+  Object {
+    "currentValue": "0.0.3",
+    "datasource": "github-releases",
+    "depName": "io_bazel_rules_sass",
+    "depType": "git_repository",
+    "lookupName": "bazelbuild/rules_sass",
+    "managerData": Object {
+      "def": "git_repository(
+    name = \\"io_bazel_rules_sass\\",
+    remote = \\"https://github.com/bazelbuild/rules_sass.git\\",
+    tag = \\"0.0.3\\",
+)",
+    },
+  },
+  Object {
+    "currentDigest": "b3b620e8bcff18ed3378cd3f35ebeb7016d71f71",
+    "datasource": "github-releases",
+    "depName": "com_github_bazelbuild_buildtools",
+    "depType": "git_repository",
+    "lookupName": "bazelbuild/buildtools",
+    "managerData": Object {
+      "def": "git_repository(
+    name = \\"com_github_bazelbuild_buildtools\\",
+    remote = \\"https://github.com/bazelbuild/buildtools.git\\",
+    # Note, this commit matches the version of buildifier in angular/ngcontainer
+    commit = \\"b3b620e8bcff18ed3378cd3f35ebeb7016d71f71\\",
+)",
+    },
+  },
   Object {
     "currentValue": "0.7.1",
     "datasource": "github-releases",
@@ -242,6 +323,27 @@ Array [
     },
     "repo": "GoogleContainerTools/distroless",
   },
+  Object {
+    "currentDigest": "sha256:d5a717649fd93ea5b9c430d7f84e4c37ba219eb53bd73ed1d4a5a98e9edd84a7",
+    "currentValue": "latest",
+    "datasource": "docker",
+    "depName": "py3_image_base",
+    "depType": "container_pull",
+    "lookupName": "distroless/python3-debian10",
+    "managerData": Object {
+      "def": "container_pull(
+    name = \\"py3_image_base\\",
+    digest = \\"sha256:d5a717649fd93ea5b9c430d7f84e4c37ba219eb53bd73ed1d4a5a98e9edd84a7\\",
+    registry = \\"gcr.io\\",
+    repository = \\"distroless/python3-debian10\\",
+    tag = \\"latest\\",
+)",
+    },
+    "registryUrls": Array [
+      "gcr.io",
+    ],
+    "versioning": "docker",
+  },
   Object {
     "currentDigest": "446923c3756ceeaa75888f52fcbdd48bb314fbf8",
     "datasource": "github-tags",
@@ -258,107 +360,26 @@ Array [
     },
     "repo": "GoogleContainerTools/distroless",
   },
+]
+`;
+
+exports[`lib/manager/bazel/extract extractPackageFile() handle comments and strings 1`] = `
+Array [
   Object {
-    "currentValue": "v1.0.5",
-    "datasource": "go",
-    "depName": "com_github_bitly_go-nsq",
-    "depType": "go_repository",
-    "lookupName": "github.com/bitly/go-nsq",
-    "managerData": Object {
-      "def": "go_repository(
-    name = \\"com_github_bitly_go-nsq\\",
-    importpath = \\"github.com/bitly/go-nsq\\",
-    tag = \\"v1.0.5\\"
-)",
-    },
-  },
-  Object {
-    "currentDigest": "dec09d789f3dba190787f8b4454c7d3c936fed9e",
-    "currentDigestShort": "dec09d7",
-    "currentValue": "v0.0.0",
-    "datasource": "go",
-    "depName": "com_github_google_uuid",
-    "depType": "go_repository",
-    "digestOneAndOnly": true,
-    "lookupName": "github.com/google/uuid",
-    "managerData": Object {
-      "def": "go_repository(
-   name = \\"com_github_google_uuid\\",
-   importpath = \\"github.com/google/uuid\\",
-   commit = \\"dec09d789f3dba190787f8b4454c7d3c936fed9e\\"
-)",
-    },
-  },
-  Object {
-    "currentValue": "v2",
-    "datasource": "go",
-    "depName": "com_gopkgin_mgo_v2",
-    "depType": "go_repository",
-    "lookupName": "gopkg.in/mgo.v2",
-    "managerData": Object {
-      "def": "go_repository(
-    name = \\"com_gopkgin_mgo_v2\\",
-    importpath = \\"gopkg.in/mgo.v2\\",
-    tag = \\"v2\\"
-)",
-    },
-  },
-  Object {
-    "currentValue": "0.3.1",
-    "datasource": "github-releases",
-    "depName": "build_bazel_rules_nodejs",
-    "depType": "git_repository",
-    "lookupName": "bazelbuild/rules_nodejs",
-    "managerData": Object {
-      "def": "git_repository(
-    name = \\"build_bazel_rules_nodejs\\",
-    remote = \\"https://github.com/bazelbuild/rules_nodejs.git\\",
-    tag = \\"0.3.1\\",
-)",
-    },
-  },
-  Object {
-    "currentValue": "0.6.1",
-    "datasource": "github-releases",
-    "depName": "build_bazel_rules_typescript",
-    "depType": "git_repository",
-    "lookupName": "bazelbuild/rules_typescript",
-    "managerData": Object {
-      "def": "git_repository(
-    name = \\"build_bazel_rules_typescript\\",
-    remote = \\"https://github.com/bazelbuild/rules_typescript.git\\",
-    tag = \\"0.6.1\\",
-)",
-    },
-  },
-  Object {
-    "currentValue": "0.0.3",
-    "datasource": "github-releases",
-    "depName": "io_bazel_rules_sass",
-    "depType": "git_repository",
-    "lookupName": "bazelbuild/rules_sass",
-    "managerData": Object {
-      "def": "git_repository(
-    name = \\"io_bazel_rules_sass\\",
-    remote = \\"https://github.com/bazelbuild/rules_sass.git\\",
-    tag = \\"0.0.3\\",
-)",
-    },
-  },
-  Object {
-    "currentDigest": "b3b620e8bcff18ed3378cd3f35ebeb7016d71f71",
-    "datasource": "github-releases",
-    "depName": "com_github_bazelbuild_buildtools",
-    "depType": "git_repository",
-    "lookupName": "bazelbuild/buildtools",
+    "currentDigest": "98495a618246683c9058dd87c2c78a2c06087999",
+    "datasource": "github-tags",
+    "depName": "com_github_nelhage_rules_boost",
+    "depType": "http_archive",
+    "lookupName": "nelhage/rules_boost",
     "managerData": Object {
-      "def": "git_repository(
-    name = \\"com_github_bazelbuild_buildtools\\",
-    remote = \\"https://github.com/bazelbuild/buildtools.git\\",
-    # Note, this commit matches the version of buildifier in angular/ngcontainer
-    commit = \\"b3b620e8bcff18ed3378cd3f35ebeb7016d71f71\\",
+      "def": "http_archive(
+    name = \\"com_github_nelhage_rules_boost\\",
+    url = \\"https://github.com/nelhage/rules_boost/archive/98495a618246683c9058dd87c2c78a2c06087999.tar.gz\\",
+    sha256 = \\"f92cb7ed66a5b24f97a7fc3917407f808c70d2689273bdd68f93d70a379d22d3\\",
+    strip_prefix = \\"rules_boost-98495a618246683c9058dd87c2c78a2c06087999\\",
 )",
     },
+    "repo": "nelhage/rules_boost",
   },
 ]
 `;
diff --git a/lib/manager/bazel/extract.spec.ts b/lib/manager/bazel/extract.spec.ts
index 4df84266eb..9bc9c200bc 100644
--- a/lib/manager/bazel/extract.spec.ts
+++ b/lib/manager/bazel/extract.spec.ts
@@ -11,6 +11,11 @@ const workspace2File = readFileSync(
   'utf8'
 );
 
+const workspace3File = readFileSync(
+  'lib/manager/bazel/__fixtures__/WORKSPACE3',
+  'utf8'
+);
+
 const fileWithBzlExtension = readFileSync(
   'lib/manager/bazel/__fixtures__/repositories.bzl',
   'utf8'
@@ -28,12 +33,17 @@ describe('lib/manager/bazel/extract', () => {
     });
     it('extracts multiple types of dependencies', () => {
       const res = extractPackageFile(workspaceFile);
+      expect(res.deps).toHaveLength(14);
       expect(res.deps).toMatchSnapshot();
     });
     it('extracts github tags', () => {
       const res = extractPackageFile(workspace2File);
       expect(res.deps).toMatchSnapshot();
     });
+    it('handle comments and strings', () => {
+      const res = extractPackageFile(workspace3File);
+      expect(res.deps).toMatchSnapshot();
+    });
     it('extracts dependencies from *.bzl files', () => {
       const res = extractPackageFile(fileWithBzlExtension);
       expect(res.deps).toMatchSnapshot();
diff --git a/lib/manager/bazel/extract.ts b/lib/manager/bazel/extract.ts
index 769c07af83..5c42fade64 100644
--- a/lib/manager/bazel/extract.ts
+++ b/lib/manager/bazel/extract.ts
@@ -1,13 +1,13 @@
 /* eslint no-plusplus: 0  */
 import { parse as _parse } from 'url';
 import parse from 'github-url-from-git';
+import moo from 'moo';
 import * as datasourceDocker from '../../datasource/docker';
 import * as datasourceGithubReleases from '../../datasource/github-releases';
 import * as datasourceGithubTags from '../../datasource/github-tags';
 import * as datasourceGo from '../../datasource/go';
 import { logger } from '../../logger';
 import { SkipReason } from '../../types';
-import { regEx } from '../../util/regex';
 import * as dockerVersioning from '../../versioning/docker';
 import type { PackageDependency, PackageFile } from '../types';
 
@@ -53,63 +53,109 @@ function parseUrl(urlString: string): UrlParsedResult | null {
   return null;
 }
 
-function findBalancedParenIndex(longString: string): number {
-  /**
-   * Minimalistic string parser with single task -> find last char in def.
-   * It treats [)] as the last char.
-   * To find needed closing parenthesis we need to increment
-   * nesting depth when parser feeds opening parenthesis
-   * if one opening parenthesis -> 1
-   * if two opening parenthesis -> 2
-   * if two opening and one closing parenthesis -> 1
-   * if ["""] found then ignore all [)] until closing ["""] parsed.
-   * https://github.com/renovatebot/renovate/pull/3459#issuecomment-478249702
-   */
-  let intShouldNotBeOdd = 0; // openClosePythonMultiLineComment
-  let parenNestingDepth = 1;
-  return [...longString].findIndex((char, i, arr) => {
-    switch (char) {
-      case '(':
-        parenNestingDepth++;
-        break;
-      case ')':
-        parenNestingDepth--;
-        break;
-      case '"':
-        if (i > 1 && arr.slice(i - 2, i).every((prev) => char === prev)) {
-          intShouldNotBeOdd++;
-        }
-        break;
-      default:
-        break;
+const dummyLexer = {
+  main: {
+    lineComment: { match: /#.*?$/ },
+    leftParen: { match: '(' },
+    rightParen: { match: ')' },
+    longDoubleQuoted: {
+      match: '"""',
+      push: 'longDoubleQuoted',
+    },
+    doubleQuoted: {
+      match: '"',
+      push: 'doubleQuoted',
+    },
+    longSingleQuoted: {
+      match: "'''",
+      push: 'longSingleQuoted',
+    },
+    singleQuoted: {
+      match: "'",
+      push: 'singleQuoted',
+    },
+    def: {
+      match: new RegExp(
+        [
+          'container_pull',
+          'http_archive',
+          'http_file',
+          'go_repository',
+          'git_repository',
+        ].join('|')
+      ),
+    },
+    unknown: { match: /[^]/, lineBreaks: true },
+  },
+  longDoubleQuoted: {
+    stringFinish: { match: '"""', pop: 1 },
+    char: { match: /[^]/, lineBreaks: true },
+  },
+  doubleQuoted: {
+    stringFinish: { match: '"', pop: 1 },
+    char: { match: /[^]/, lineBreaks: true },
+  },
+  longSingleQuoted: {
+    stringFinish: { match: "'''", pop: 1 },
+    char: { match: /[^]/, lineBreaks: true },
+  },
+  singleQuoted: {
+    stringFinish: { match: "'", pop: 1 },
+    char: { match: /[^]/, lineBreaks: true },
+  },
+};
+
+function parseContent(content: string): string[] {
+  const lexer = moo.states(dummyLexer);
+  lexer.reset(content);
+  let balance = 0;
+
+  let def: null | string = null;
+  const result: string[] = [];
+
+  const finishDef = (): void => {
+    if (def !== null) {
+      result.push(def);
     }
+    def = null;
+  };
 
-    return !parenNestingDepth && !(intShouldNotBeOdd % 2) && char === ')';
-  });
-}
+  const startDef = (): void => {
+    finishDef();
+    def = '';
+  };
 
-function parseContent(content: string): string[] {
-  return [
-    'container_pull',
-    'http_archive',
-    'http_file',
-    'go_repository',
-    'git_repository',
-  ].reduce(
-    (acc, prefix) => [
-      ...acc,
-      ...content
-        .split(regEx(prefix + '\\s*\\(', 'g'))
-        .slice(1)
-        .map((base) => {
-          const ind = findBalancedParenIndex(base);
+  const updateDef = (chunk: string): void => {
+    if (def !== null) {
+      def += chunk;
+    }
+  };
+
+  let token = lexer.next();
+  while (token) {
+    const { type, value } = token;
+
+    if (type === 'def') {
+      startDef();
+    }
+
+    updateDef(value);
+
+    if (type === 'leftParen') {
+      balance += 1;
+    }
+
+    if (type === 'rightParen') {
+      balance -= 1;
+      if (balance <= 0) {
+        finishDef();
+      }
+    }
+
+    token = lexer.next();
+  }
 
-          return ind >= 0 && `${prefix}(${base.slice(0, ind)})`;
-        })
-        .filter(Boolean),
-    ],
-    [] as string[]
-  );
+  return result;
 }
 
 export function extractPackageFile(
-- 
GitLab