update release notes script to filter cherry-picked commits already in release build

This commit is contained in:
Oliver Eggert
2026-05-27 12:41:03 -07:00
parent d7456276b9
commit 52fccc0086

View File

@@ -53,6 +53,15 @@ SKIP_PATTERNS = [
re.compile(r"^Merge tag ", re.IGNORECASE),
]
# Patterns for normalizing commit titles when detecting cherry-pick duplicates.
# Strips trailing "(#NNNN)" PR-number suffixes and conventional-commit prefixes.
PR_NUM_RE = re.compile(r"\s*\(#\d+\)")
CONV_COMMIT_RE = re.compile(
r"^(fix|feat|refactor|chore|docs|test|tests|ci|build|style|perf|revert|release|bugfix)"
r"(\([^)]*\))?\s*:\s*",
re.IGNORECASE,
)
# --- API helpers ---
@@ -164,19 +173,54 @@ def fetch_version_info(ref):
def fetch_commits(from_ref, to_ref):
"""Fetch all commits between two refs using the GitHub compare API."""
commits = []
page = 1
while True:
data = run_gh_rest(
f"repos/XRPLF/rippled/compare/{from_ref}...{to_ref}?per_page=250&page={page}"
)
batch = data.get("commits", [])
commits.extend(batch)
if len(batch) < 250:
break
page += 1
return commits
"""Fetch commits between two refs, filtering out incoming cherry-pick duplicates."""
def key(c):
t = c["commit"]["message"].split("\n")[0]
t = PR_NUM_RE.sub("", t)
t = CONV_COMMIT_RE.sub("", t)
return re.sub(r"\s+", " ", t).strip().lower()
def paginate(base, head):
results, page = [], 1
while True:
data = run_gh_rest(
f"repos/XRPLF/rippled/compare/{base}...{head}?per_page=250&page={page}"
)
batch = data.get("commits", [])
results.extend(batch)
if len(batch) < 250:
break
page += 1
return results
incoming = paginate(from_ref, to_ref)
shipped = paginate(to_ref, from_ref)
incoming_keys = {key(c) for c in incoming}
shipped_keys = {key(c) for c in shipped}
before = len(incoming)
deduped = [c for c in incoming if key(c) not in shipped_keys]
dropped = before - len(deduped)
if dropped:
print(f" Filtered {dropped} cherry-pick duplicates.")
# Surface backward-diff commits with no forward-diff match. These are
# either real release-branch originals or cherry-pick dupes that drifted
# enough to escape matching.
unmatched = [
c for c in shipped
if key(c) not in incoming_keys
and not should_skip(c["commit"]["message"].split("\n")[0])
]
for c in unmatched:
c["_potential_dupe"] = True
if unmatched:
print(f" Adding {len(unmatched)} unmatched {from_ref} commit(s) to draft "
f"flagged as [POTENTIAL DUPE — VERIFY].")
deduped.extend(unmatched)
return deduped
def parse_features_macro(text):
@@ -542,6 +586,13 @@ def main():
pr_shas = {} # PR/issue number → commit SHA (for file lookups on Issues)
pr_bodies = {} # PR/issue number → commit body (for fallback descriptions)
orphan_commits = [] # Commits with no PR/Issues link
# Potential dupe commits are kept in their own parallel buckets so they
# don't collide with real entries by PR number. They go through the same
# PR-enrichment pipeline to give reviewers full side-by-side context.
dupe_pr_numbers = {}
dupe_pr_shas = {}
dupe_pr_bodies = {}
dupe_orphan_commits = []
authors = set()
for commit in commits:
@@ -552,18 +603,28 @@ def main():
author = commit["commit"]["author"]["name"]
email = commit["commit"]["author"].get("email", "")
# Skip Ripple employees from credits
login = (commit.get("author") or {}).get("login")
if not email.lower().endswith("@ripple.com") and email not in EXCLUDED_EMAILS:
if login:
authors.add(f"@{login}")
else:
authors.add(author)
# Skip Ripple employees from credits and skip potential-dupe commits entirely
if not commit.get("_potential_dupe"):
login = (commit.get("author") or {}).get("login")
if not email.lower().endswith("@ripple.com") and email not in EXCLUDED_EMAILS:
if login:
authors.add(f"@{login}")
else:
authors.add(author)
if should_skip(message):
continue
pr_number = extract_pr_number(message)
if commit.get("_potential_dupe"):
if pr_number:
dupe_pr_numbers[pr_number] = message
dupe_pr_shas[pr_number] = sha
dupe_pr_bodies[pr_number] = body
else:
dupe_orphan_commits.append({"sha": sha, "message": message, "body": body})
continue
if pr_number:
pr_numbers[pr_number] = message
pr_shas[pr_number] = sha
@@ -586,12 +647,14 @@ def main():
print(f"Building changelog entries...")
# Fetch all PR details in batches via GraphQL
pr_details = fetch_prs_graphql(list(pr_numbers.keys()))
# Fetch all PR details in batches via GraphQL.
all_pr_numbers = list(set(pr_numbers.keys()) | set(dupe_pr_numbers.keys()))
pr_details = fetch_prs_graphql(all_pr_numbers)
# Build entries, sorting amendments automatically
amendment_entries = []
entries = []
DUPE_MARKER = "[POTENTIAL DUPE — VERIFY]"
for pr_number, commit_msg in pr_numbers.items():
pr_data = pr_details.get(pr_number)
@@ -638,6 +701,50 @@ def main():
entry = format_commit_entry(sha, orphan["message"], orphan["body"], files)
entries.append(entry)
# Build entries for potential dupes
for pr_number, commit_msg in dupe_pr_numbers.items():
sha = dupe_pr_shas[pr_number]
pr_data = pr_details.get(pr_number)
print(f" Building potential-dupe entry for #{pr_number} ({sha[:7]})...")
if pr_data:
title = f"{DUPE_MARKER} {pr_data['title']}"
body = pr_data.get("body", "")
labels = pr_data.get("labels", [])
files = pr_data.get("files", [])
link_type = pr_data.get("type", "pull")
if not files:
files = fetch_commit_files(sha)
if is_amendment(files) and amendment_diff:
entry = format_uncategorized_entry(pr_number, title, labels, body, link_type=link_type)
amendment_entries.append(entry)
else:
entry = format_uncategorized_entry(pr_number, title, labels, body, files, link_type)
entries.append(entry)
else:
# PR/Issue lookup failed — fall back to commit-only entry
files = fetch_commit_files(sha)
title = f"{DUPE_MARKER} {commit_msg}"
if is_amendment(files) and amendment_diff:
entry = format_commit_entry(sha, title, dupe_pr_bodies[pr_number])
amendment_entries.append(entry)
else:
entry = format_commit_entry(sha, title, dupe_pr_bodies[pr_number], files)
entries.append(entry)
# Potential dupe orphans (no PR link at all)
for orphan in dupe_orphan_commits:
sha = orphan["sha"]
print(f" Building potential-dupe orphan entry for {sha[:7]}...")
files = fetch_commit_files(sha)
title = f"{DUPE_MARKER} {orphan['message']}"
if is_amendment(files) and amendment_diff:
entry = format_commit_entry(sha, title, orphan["body"])
amendment_entries.append(entry)
else:
entry = format_commit_entry(sha, title, orphan["body"], files)
entries.append(entry)
# Generate markdown
markdown = generate_markdown(version, args.date, amendment_diff, amendment_unchanged, amendment_entries, entries, authors, version_commit)