From 5c517f4adaeb7efd555c57184c1c7376dfc190f6 Mon Sep 17 00:00:00 2001
From: lluiscab <lluiscab@gmail.com>
Date: Fri, 16 Jun 2023 09:27:17 +0200
Subject: [PATCH] feat(CODEOWNERS): Improve pr files owner detection (#21955)

Co-authored-by: HonkingGoose <34918129+HonkingGoose@users.noreply.github.com>
Co-authored-by: Michael Kriese <michael.kriese@visualon.de>
Co-authored-by: Rhys Arkins <rhys@arkins.net>
---
 .../repository/update/pr/code-owners.spec.ts  | 187 +++++++++++++++++-
 .../repository/update/pr/code-owners.ts       | 150 +++++++++++---
 2 files changed, 313 insertions(+), 24 deletions(-)

diff --git a/lib/workers/repository/update/pr/code-owners.spec.ts b/lib/workers/repository/update/pr/code-owners.spec.ts
index 3fa999750a..191b66e928 100644
--- a/lib/workers/repository/update/pr/code-owners.spec.ts
+++ b/lib/workers/repository/update/pr/code-owners.spec.ts
@@ -1,3 +1,4 @@
+import { codeBlock } from 'common-tags';
 import { mock } from 'jest-mock-extended';
 import { fs, git } from '../../../../../test/util';
 import type { Pr } from '../../../../modules/platform';
@@ -22,13 +23,195 @@ describe('workers/repository/update/pr/code-owners', () => {
       expect(codeOwners).toEqual(['@jimmy']);
     });
 
+    it('respects orphan files', async () => {
+      fs.readLocalFile.mockResolvedValueOnce(
+        codeBlock`
+          * @jimmy
+          yarn.lock
+        `
+      );
+      git.getBranchFiles.mockResolvedValueOnce(['yarn.lock']);
+      const codeOwners = await codeOwnersForPr(pr);
+      expect(codeOwners).toEqual([]);
+    });
+
+    it('does not return any owners if PR has no changes', async () => {
+      fs.readLocalFile.mockResolvedValueOnce('* @jimmy');
+      git.getBranchFiles.mockResolvedValueOnce([]);
+      const codeOwners = await codeOwnersForPr(pr);
+      expect(codeOwners).toEqual([]);
+    });
+
     it('returns more specific code owners', async () => {
       fs.readLocalFile.mockResolvedValueOnce(
         ['* @jimmy', 'package.json @john @maria'].join('\n')
       );
       git.getBranchFiles.mockResolvedValueOnce(['package.json']);
       const codeOwners = await codeOwnersForPr(pr);
-      expect(codeOwners).toEqual(['@john', '@maria']);
+      expect(codeOwners).toEqual(['@john', '@maria', '@jimmy']);
+    });
+
+    describe('returns more specific code owners in monorepos', () => {
+      const mockCodeOwners = codeBlock`
+        # By default, assign to @john
+        #
+        * @john
+
+        # Lockfiles are not owned by anyone, any package dependency update may modify them.
+        # Assigning lockfiles an owner will cause issues as merge requests to be assigned to incorrect users
+        yarn.lock
+
+        # Assign each package to its respective user
+        #
+        packages/a/ @maria
+        packages/b/ @jimmy
+        packages/c/ @dan
+        packages/d/ @maria @jimmy
+        packages/e/ @jimmy
+
+      `;
+
+      it('does not assign changes for yarn.lock', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce(['yarn.lock']);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual([]);
+      });
+
+      it('assigns root changes to @john (*)', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce(['package.json', 'yarn.lock']);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual(['@john']);
+      });
+
+      it('assigns changes in package A to @maria (a), @john (*)', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce([
+          'packages/a/package.json',
+          'yarn.lock',
+        ]);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual(['@maria', '@john']);
+      });
+
+      it('assigns changes in package B to @jimmy (b), @john (*)', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce([
+          'packages/b/package.json',
+          'yarn.lock',
+        ]);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual(['@jimmy', '@john']);
+      });
+
+      it('assigns changes in package C to @dan (c), @john (*)', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce([
+          'packages/c/package.json',
+          'yarn.lock',
+        ]);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual(['@dan', '@john']);
+      });
+
+      it('assigns changes in package D to @maria (d), @jimmy (d), @john (*)', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce([
+          'packages/d/package.json',
+          'yarn.lock',
+        ]);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual(['@maria', '@jimmy', '@john']);
+      });
+
+      it('assigns changes in package A and B to @maria (a), @jimmy (b), @john (*)', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce([
+          'packages/a/package.json',
+          'packages/b/package.json',
+          'yarn.lock',
+        ]);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual(['@maria', '@jimmy', '@john']);
+      });
+
+      it('assigns changes in package A, B and C to @john, @maria (a), @jimmy (b), @dan (c), @john (*)', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce([
+          'packages/a/package.json',
+          'packages/b/package.json',
+          'packages/c/package.json',
+          'yarn.lock',
+        ]);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual(['@maria', '@jimmy', '@dan', '@john']);
+      });
+
+      it('assigns changes in package C and D to @dan (c), @maria (d), @jimmy (e), @john (*)', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce([
+          'packages/c/package.json',
+          'packages/d/package.json',
+          'yarn.lock',
+        ]);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual(['@dan', '@maria', '@jimmy', '@john']);
+      });
+
+      it('assigns changes in package D and E to @jimmy (d, e), @maria (d), @john (*)', async () => {
+        fs.readLocalFile.mockResolvedValueOnce(mockCodeOwners);
+        git.getBranchFiles.mockResolvedValueOnce([
+          'packages/d/package.json',
+          'packages/e/package.json',
+          'yarn.lock',
+        ]);
+        const codeOwners = await codeOwnersForPr(pr);
+        expect(codeOwners).toEqual(['@jimmy', '@maria', '@john']);
+      });
+    });
+
+    it('does not require all files to match a single rule, regression test for #12611', async () => {
+      fs.readLocalFile.mockResolvedValueOnce(
+        codeBlock`
+          * @reviewer-1 @reviewer-2 @reviewer-3 @reviewer-4 @reviewer-5
+
+          server/pom.xml @reviewer-1
+          client/package.json @reviewer-1
+          client/package-lock.json @reviewer-1
+        `
+      );
+      git.getBranchFiles.mockResolvedValueOnce(['server/pom.xml']);
+      const codeOwners = await codeOwnersForPr(pr);
+      expect(codeOwners).toEqual([
+        '@reviewer-1', // matched by file
+        '@reviewer-2', // matched by wildcard
+        '@reviewer-3',
+        '@reviewer-4',
+        '@reviewer-5',
+      ]);
+
+      fs.readLocalFile.mockResolvedValueOnce(
+        codeBlock`
+          * @reviewer-1 @reviewer-2 @reviewer-3 @reviewer-4 @reviewer-5
+
+          server/pom.xml @reviewer-1
+          client/package.json @reviewer-1
+          client/package-lock.json @reviewer-1
+        `
+      );
+      git.getBranchFiles.mockResolvedValueOnce([
+        'client/package.json',
+        'client/package-lock.json',
+      ]);
+      const codeOwners2 = await codeOwnersForPr(pr);
+      expect(codeOwners2).toEqual([
+        '@reviewer-1', // matched by file
+        '@reviewer-2', // matched by wildcard
+        '@reviewer-3',
+        '@reviewer-4',
+        '@reviewer-5',
+      ]);
     });
 
     it('ignores comments and leading/trailing whitespace', async () => {
@@ -43,7 +226,7 @@ describe('workers/repository/update/pr/code-owners', () => {
       );
       git.getBranchFiles.mockResolvedValueOnce(['package.json']);
       const codeOwners = await codeOwnersForPr(pr);
-      expect(codeOwners).toEqual(['@john', '@maria']);
+      expect(codeOwners).toEqual(['@john', '@maria', '@jimmy']);
     });
 
     it('returns empty array when no code owners set', async () => {
diff --git a/lib/workers/repository/update/pr/code-owners.ts b/lib/workers/repository/update/pr/code-owners.ts
index 081fa6797e..59d6d643c4 100644
--- a/lib/workers/repository/update/pr/code-owners.ts
+++ b/lib/workers/repository/update/pr/code-owners.ts
@@ -5,9 +5,79 @@ import { readLocalFile } from '../../../../util/fs';
 import { getBranchFiles } from '../../../../util/git';
 import { newlineRegex, regEx } from '../../../../util/regex';
 
+interface FileOwnerRule {
+  usernames: string[];
+  pattern: string;
+  score: number;
+  match: (path: string) => boolean;
+}
+
+function extractOwnersFromLine(line: string): FileOwnerRule {
+  const [pattern, ...usernames] = line.split(regEx(/\s+/));
+  const matchPattern = ignore().add(pattern);
+  return {
+    usernames,
+    pattern,
+    score: pattern.length,
+    match: (path: string) => matchPattern.ignores(path),
+  };
+}
+
+interface FileOwnersScore {
+  file: string;
+  userScoreMap: Map<string, number>;
+}
+
+function matchFileToOwners(
+  file: string,
+  rules: FileOwnerRule[]
+): FileOwnersScore {
+  const usernames = new Map<string, number>();
+
+  for (const rule of rules) {
+    if (!rule.match(file)) {
+      continue;
+    }
+
+    for (const user of rule.usernames) {
+      usernames.set(user, rule.score);
+    }
+  }
+
+  return { file, userScoreMap: usernames };
+}
+
+interface OwnerFileScore {
+  username: string;
+  fileScoreMap: Map<string, number>;
+}
+
+function getOwnerList(filesWithOwners: FileOwnersScore[]): OwnerFileScore[] {
+  const userFileMap = new Map<string, Map<string, number>>();
+
+  for (const fileMatch of filesWithOwners) {
+    for (const [username, score] of fileMatch.userScoreMap.entries()) {
+      // Get / create user file score
+      const fileMap = userFileMap.get(username) ?? new Map<string, number>();
+      if (!userFileMap.has(username)) {
+        userFileMap.set(username, fileMap);
+      }
+
+      // Add file to user
+      fileMap.set(fileMatch.file, (fileMap.get(fileMatch.file) ?? 0) + score);
+    }
+  }
+
+  return Array.from(userFileMap.entries()).map(([key, value]) => ({
+    username: key,
+    fileScoreMap: value,
+  }));
+}
+
 export async function codeOwnersForPr(pr: Pr): Promise<string[]> {
   logger.debug('Searching for CODEOWNERS file');
   try {
+    // Find CODEOWNERS file
     const codeOwnersFile =
       (await readLocalFile('CODEOWNERS', 'utf8')) ??
       (await readLocalFile('.github/CODEOWNERS', 'utf8')) ??
@@ -21,37 +91,73 @@ export async function codeOwnersForPr(pr: Pr): Promise<string[]> {
 
     logger.debug(`Found CODEOWNERS file: ${codeOwnersFile}`);
 
+    // Get list of modified files in PR
     const prFiles = await getBranchFiles(pr.sourceBranch);
-    const rules = codeOwnersFile
+
+    if (!prFiles?.length) {
+      logger.debug('PR includes no files');
+      return [];
+    }
+
+    // Convert CODEOWNERS file into list of matching rules
+    const fileOwnerRules = codeOwnersFile
       .split(newlineRegex)
+      // Remove empty and commented lines
       .map((line) => line.trim())
       .filter((line) => line && !line.startsWith('#'))
-      .map((line) => {
-        const [pattern, ...usernames] = line.split(regEx(/\s+/));
-        return {
-          usernames,
-          match: (path: string) => {
-            const matcher = ignore().add(pattern);
-            return matcher.ignores(path);
-          },
-        };
-      })
-      .reverse();
+      // Extract pattern & usernames
+      .map(extractOwnersFromLine);
+
     logger.debug(
-      { prFiles, rules },
+      { prFiles, fileOwnerRules },
       'PR files and rules to match for CODEOWNERS'
     );
-    const matchingRule = rules.find((rule) => prFiles?.every(rule.match));
-    if (!matchingRule) {
-      logger.debug('No matching CODEOWNERS rule found');
-      return [];
-    }
+
+    // Apply rules & get list of owners for each prFile
+    const emptyRules = fileOwnerRules.filter(
+      (rule) => rule.usernames.length === 0
+    );
+    const fileOwners =
+      // Map through all prFiles and match said file(s) with all the rules
+      prFiles
+        .map((file) => matchFileToOwners(file, fileOwnerRules))
+
+        // Match file again but this time only with emptyRules, to ensure that files which have no owner set remain owner-less
+        .map((fileMatch) => {
+          const matchEmpty = emptyRules.find((rule) =>
+            rule.match(fileMatch.file)
+          );
+          if (matchEmpty) {
+            return { ...fileMatch, userScoreMap: new Map<string, number>() };
+          }
+          return fileMatch;
+        });
+
+    logger.debug(
+      `CODEOWNERS matched the following files: ${fileOwners
+        .map((f) => f.file)
+        .join(', ')}`
+    );
+
+    // Get list of all matched users and the files they own (reverse keys of fileOwners)
+    const usersWithOwnedFiles = getOwnerList(fileOwners);
+
+    // Calculate a match score for each user. This allows sorting of the final user array in a way that guarantees that users matched with more precise patterns are first and users matched with less precise patterns are last (wildcards)
+    const userScore = usersWithOwnedFiles
+      .map((userMatch) => ({
+        user: userMatch.username,
+        score: Array.from(userMatch.fileScoreMap.values()).reduce(
+          (acc, score) => acc + score,
+          0
+        ),
+      }))
+      .sort((a, b) => b.score - a.score);
+
     logger.debug(
-      `CODEOWNERS matched the following usernames: ${JSON.stringify(
-        matchingRule.usernames
-      )}`
+      `CODEOWNERS matched the following users: ${JSON.stringify(userScore)}`
     );
-    return matchingRule.usernames;
+
+    return userScore.map((u) => u.user);
   } catch (err) {
     logger.warn({ err, pr }, 'Failed to determine CODEOWNERS for PR.');
     return [];
-- 
GitLab