From 52fccc0086aeab96d5e25cd32ef040cc265ba791 Mon Sep 17 00:00:00 2001 From: Oliver Eggert Date: Wed, 27 May 2026 12:41:03 -0700 Subject: [PATCH] update release notes script to filter cherry-picked commits already in release build --- tools/generate-release-notes.py | 151 +++++++++++++++++++++++++++----- 1 file changed, 129 insertions(+), 22 deletions(-) diff --git a/tools/generate-release-notes.py b/tools/generate-release-notes.py index e89ec06676..49e5f71edb 100644 --- a/tools/generate-release-notes.py +++ b/tools/generate-release-notes.py @@ -53,6 +53,15 @@ SKIP_PATTERNS = [ re.compile(r"^Merge tag ", re.IGNORECASE), ] +# Patterns for normalizing commit titles when detecting cherry-pick duplicates. +# Strips trailing "(#NNNN)" PR-number suffixes and conventional-commit prefixes. +PR_NUM_RE = re.compile(r"\s*\(#\d+\)") +CONV_COMMIT_RE = re.compile( + r"^(fix|feat|refactor|chore|docs|test|tests|ci|build|style|perf|revert|release|bugfix)" + r"(\([^)]*\))?\s*:\s*", + re.IGNORECASE, +) + # --- API helpers --- @@ -164,19 +173,54 @@ def fetch_version_info(ref): def fetch_commits(from_ref, to_ref): - """Fetch all commits between two refs using the GitHub compare API.""" - commits = [] - page = 1 - while True: - data = run_gh_rest( - f"repos/XRPLF/rippled/compare/{from_ref}...{to_ref}?per_page=250&page={page}" - ) - batch = data.get("commits", []) - commits.extend(batch) - if len(batch) < 250: - break - page += 1 - return commits + """Fetch commits between two refs, filtering out incoming cherry-pick duplicates.""" + + def key(c): + t = c["commit"]["message"].split("\n")[0] + t = PR_NUM_RE.sub("", t) + t = CONV_COMMIT_RE.sub("", t) + return re.sub(r"\s+", " ", t).strip().lower() + + def paginate(base, head): + results, page = [], 1 + while True: + data = run_gh_rest( + f"repos/XRPLF/rippled/compare/{base}...{head}?per_page=250&page={page}" + ) + batch = data.get("commits", []) + results.extend(batch) + if len(batch) < 250: + break + page += 1 + return results + + incoming = paginate(from_ref, to_ref) + shipped = paginate(to_ref, from_ref) + incoming_keys = {key(c) for c in incoming} + shipped_keys = {key(c) for c in shipped} + + before = len(incoming) + deduped = [c for c in incoming if key(c) not in shipped_keys] + dropped = before - len(deduped) + if dropped: + print(f" Filtered {dropped} cherry-pick duplicates.") + + # Surface backward-diff commits with no forward-diff match. These are + # either real release-branch originals or cherry-pick dupes that drifted + # enough to escape matching. + unmatched = [ + c for c in shipped + if key(c) not in incoming_keys + and not should_skip(c["commit"]["message"].split("\n")[0]) + ] + for c in unmatched: + c["_potential_dupe"] = True + if unmatched: + print(f" Adding {len(unmatched)} unmatched {from_ref} commit(s) to draft " + f"flagged as [POTENTIAL DUPE — VERIFY].") + deduped.extend(unmatched) + + return deduped def parse_features_macro(text): @@ -542,6 +586,13 @@ def main(): pr_shas = {} # PR/issue number → commit SHA (for file lookups on Issues) pr_bodies = {} # PR/issue number → commit body (for fallback descriptions) orphan_commits = [] # Commits with no PR/Issues link + # Potential dupe commits are kept in their own parallel buckets so they + # don't collide with real entries by PR number. They go through the same + # PR-enrichment pipeline to give reviewers full side-by-side context. + dupe_pr_numbers = {} + dupe_pr_shas = {} + dupe_pr_bodies = {} + dupe_orphan_commits = [] authors = set() for commit in commits: @@ -552,18 +603,28 @@ def main(): author = commit["commit"]["author"]["name"] email = commit["commit"]["author"].get("email", "") - # Skip Ripple employees from credits - login = (commit.get("author") or {}).get("login") - if not email.lower().endswith("@ripple.com") and email not in EXCLUDED_EMAILS: - if login: - authors.add(f"@{login}") - else: - authors.add(author) + # Skip Ripple employees from credits and skip potential-dupe commits entirely + if not commit.get("_potential_dupe"): + login = (commit.get("author") or {}).get("login") + if not email.lower().endswith("@ripple.com") and email not in EXCLUDED_EMAILS: + if login: + authors.add(f"@{login}") + else: + authors.add(author) if should_skip(message): continue pr_number = extract_pr_number(message) + if commit.get("_potential_dupe"): + if pr_number: + dupe_pr_numbers[pr_number] = message + dupe_pr_shas[pr_number] = sha + dupe_pr_bodies[pr_number] = body + else: + dupe_orphan_commits.append({"sha": sha, "message": message, "body": body}) + continue + if pr_number: pr_numbers[pr_number] = message pr_shas[pr_number] = sha @@ -586,12 +647,14 @@ def main(): print(f"Building changelog entries...") - # Fetch all PR details in batches via GraphQL - pr_details = fetch_prs_graphql(list(pr_numbers.keys())) + # Fetch all PR details in batches via GraphQL. + all_pr_numbers = list(set(pr_numbers.keys()) | set(dupe_pr_numbers.keys())) + pr_details = fetch_prs_graphql(all_pr_numbers) # Build entries, sorting amendments automatically amendment_entries = [] entries = [] + DUPE_MARKER = "[POTENTIAL DUPE — VERIFY]" for pr_number, commit_msg in pr_numbers.items(): pr_data = pr_details.get(pr_number) @@ -638,6 +701,50 @@ def main(): entry = format_commit_entry(sha, orphan["message"], orphan["body"], files) entries.append(entry) + # Build entries for potential dupes + for pr_number, commit_msg in dupe_pr_numbers.items(): + sha = dupe_pr_shas[pr_number] + pr_data = pr_details.get(pr_number) + print(f" Building potential-dupe entry for #{pr_number} ({sha[:7]})...") + + if pr_data: + title = f"{DUPE_MARKER} {pr_data['title']}" + body = pr_data.get("body", "") + labels = pr_data.get("labels", []) + files = pr_data.get("files", []) + link_type = pr_data.get("type", "pull") + if not files: + files = fetch_commit_files(sha) + if is_amendment(files) and amendment_diff: + entry = format_uncategorized_entry(pr_number, title, labels, body, link_type=link_type) + amendment_entries.append(entry) + else: + entry = format_uncategorized_entry(pr_number, title, labels, body, files, link_type) + entries.append(entry) + else: + # PR/Issue lookup failed — fall back to commit-only entry + files = fetch_commit_files(sha) + title = f"{DUPE_MARKER} {commit_msg}" + if is_amendment(files) and amendment_diff: + entry = format_commit_entry(sha, title, dupe_pr_bodies[pr_number]) + amendment_entries.append(entry) + else: + entry = format_commit_entry(sha, title, dupe_pr_bodies[pr_number], files) + entries.append(entry) + + # Potential dupe orphans (no PR link at all) + for orphan in dupe_orphan_commits: + sha = orphan["sha"] + print(f" Building potential-dupe orphan entry for {sha[:7]}...") + files = fetch_commit_files(sha) + title = f"{DUPE_MARKER} {orphan['message']}" + if is_amendment(files) and amendment_diff: + entry = format_commit_entry(sha, title, orphan["body"]) + amendment_entries.append(entry) + else: + entry = format_commit_entry(sha, title, orphan["body"], files) + entries.append(entry) + # Generate markdown markdown = generate_markdown(version, args.date, amendment_diff, amendment_unchanged, amendment_entries, entries, authors, version_commit)