-
Notifications
You must be signed in to change notification settings - Fork 492
/
Copy pathpick_doc_commits.py
executable file
·271 lines (229 loc) · 9.02 KB
/
pick_doc_commits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Helps find commits to cherrypick into a release branch.
Usage:
pick_doc_commits.py --main=origin/main --release=origin/release/5.5
It will find commits on the main branch that are not on the release branch, and
filter them down to the docs-only commits that should be cherrypicked. It will
also print the commits that were filtered out.
This tool will not actually modify the git repo, it will only print the commands
to run.
Must be run from inside the repo, ideally after a recent `git pull`. Does not
care which branch is currently checked out.
"""
import argparse
import datetime
import re
import subprocess
import sys
import textwrap
from typing import List
# The script will print extra info when this is > 0, and more at higher levels.
# Controlled by the --verbose flag.
verbosity = 0
def debug_log(message: str):
"""Prints a message to stderr if verbosity is greater than zero."""
global verbosity
if verbosity > 0:
sys.stderr.write(f"VERBOSE: {message}\n")
def run_git(command: List[str]) -> List[str]:
"""Runs a git command and returns its stdout as a list of lines.
Prints the command and its output to debug_log() if verbosity is greater
than 1.
Args:
command: The args to pass to `git`, without the leading `git` itself.
Returns:
A list of the non-empty lines printed to stdout, without trailing
newlines.
Raises:
Exception: The command failed.
"""
try:
if verbosity > 1: # Higher verbosity required
debug_log("Running command: 'git " + " ".join(command) + "'")
result = subprocess.run(["git", *command], capture_output=True, text=True)
if result.returncode != 0:
raise Exception(f"Error running command '{command}':\n{result.stderr}")
lines = result.stdout.split("\n")
# Remove empty and whitespace-only lines.
lines = [line.strip() for line in lines if line.strip()]
global verbose
if verbosity > 1:
debug_log("-----BEGIN GIT OUTPUT-----")
for line in lines:
debug_log(line)
debug_log("-----END GIT OUTPUT-----")
return lines
except Exception as e:
raise Exception(f"Error running command '{command}': {e}")
class Commit:
"""A git commit hash and its one-line message."""
def __init__(self, hash: str, message: str = ""):
"""Creates a new Commit with the given hash.
Args:
hash: The hexadecimal hash of the commit.
message: The one-line summary of the commit. If empty, this method
will ask git for the commit message.
"""
self.hash = hash.strip()
if not message:
# Ask git for the commit message.
lines = run_git(["log", "-1", "--pretty=%s", self.hash])
# Should just be one line, but could be zero.
message = " ".join(lines)
self.message = message.strip()
@staticmethod
def from_line(line: str) -> "Commit":
"""Creates a Commit from a string of the form '<hash> [<message>]'."""
parts = line.split(" ", maxsplit=1)
parts = [part.strip() for part in parts if part.strip()]
assert len(parts) >= 1, f"Expected at least one part in line '{line}'"
return Commit(hash=parts[0], message=parts[1] if len(parts) > 1 else "")
def __repr__(self):
return f"Commit('{self.hash[:8]}', '{self.message}')"
def __str__(self):
return f"{self.hash[:8]} {self.message}"
def is_doc_only_commit(commit: Commit) -> bool:
"""Returns True if the commit only touched "documentation files"."""
def is_doc_file(path: str) -> bool:
"""Returns true if the path is considered to be a "documentation file"."""
return (
# Everything under docs, regardless of the file type.
path.startswith("docs/")
# Any markdown or RST file in the repo.
or path.endswith(".md")
or path.endswith(".rst")
)
# The first line is the full hash, and the rest are the files modified by
# the commit, relative to the root of the repo.
lines = run_git(["diff-tree", "--name-only", "-r", commit.hash])
all_files = frozenset(lines[1:])
doc_files = frozenset(filter(is_doc_file, all_files))
non_doc_files = all_files - doc_files
is_doc_only = all_files == doc_files
if verbosity > 0 and not is_doc_only:
debug_log(
f"{repr(commit)} touches {len(non_doc_files)} non-doc files, "
+ f"like '{sorted(non_doc_files)[0]}'."
)
return is_doc_only
def print_wrapped(text: str, width: int = 80) -> None:
"""Print text wrapped to fit within the given width.
Indents additional lines by four spaces.
"""
print("\n ".join(textwrap.wrap(text, width=width - 4, break_on_hyphens=False)))
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Prints differences between git branches."
)
parser.add_argument(
"--main",
default="origin/main",
type=str,
help="The name of the main (source) branch to pick commits from.",
)
parser.add_argument(
"--release",
type=str,
help="The name of the release (destination) branch to pick commits onto, "
+ "ideally with the 'origin/' prefix",
)
parser.add_argument(
"-v",
"--verbose",
action="count",
default=0,
help="Log extra output. Specify more times (-vv) for more output.",
)
return parser.parse_args()
def main():
args = parse_args()
main_branch = args.main
release_branch = args.release
global verbosity
verbosity = args.verbose
# Returns a list of hashes that are on the main branch but not the release
# branch. Each hash is preceded by `+ ` if the commit has not been cherry
# picked onto the release branch, or `- ` if it has.
cherry_lines = run_git(["cherry", release_branch, main_branch])
print_wrapped(
f"Commits on '{main_branch}' that have already been cherry-picked into '{release_branch}':"
)
if not cherry_lines:
print("- <none>")
candidate_commits = []
for line in cherry_lines:
commit = Commit.from_line(line[2:])
if line.startswith("+ "):
candidate_commits.append(commit)
elif line.startswith("- "):
print(f"- {commit}")
print("")
# Filter out and print the commits that touch non-documentation files.
print_wrapped(
f"Will not pick these commits on '{main_branch}' that touch non-documentation files:"
)
if not candidate_commits:
print("- <none>")
doc_only_commits = []
for commit in candidate_commits:
if is_doc_only_commit(commit):
doc_only_commits.append(commit)
else:
print(f"- {commit}")
print("")
# Print the commits to cherry-pick.
print_wrapped(
f"Remaining '{main_branch}' commits that touch only documentation files; "
+ f"will be cherry-picked into '{release_branch}':"
)
if not doc_only_commits:
print("- <none>")
for commit in doc_only_commits:
print(f"- {commit}")
print("")
# Print instructions for cherry-picking the commits.
if doc_only_commits:
# Recommend a unique branch name.
suffix = datetime.datetime.utcnow().strftime("%Y%m%d%H%M")
branch_name = "cherrypick-" + release_branch.replace("/", "-") + "-" + suffix
print("Cherry pick by running the commands:")
print("```")
print(f"git checkout {release_branch}")
print(
# Split lines with backslashes to make long lists more legible but
# still copy-pasteable.
"git cherry-pick \\\n "
+ " \\\n ".join([commit.hash for commit in doc_only_commits])
)
print(f"git checkout -b {branch_name}")
print("```")
print("")
print("To verify that this worked, re-run this script with the arguments:")
print("```")
print(f"--main={main_branch} --release={branch_name}")
print("```")
print("It should show no doc-only commits to cherry-pick.")
print("")
print(f"Then, push {branch_name} to GitHub:")
print("```")
print(f"git push --set-upstream origin {branch_name}")
print("```")
print("")
print_wrapped(
"When creating the PR, remember to set the 'into' branch to be "
# Remove "origin/" if present since it won't appear in the GitHub
# UI.
+ f"'{re.sub('^origin/', '', release_branch)}'."
)
else:
print_wrapped(
"It looks like there are no doc-only commits "
+ f"on '{main_branch}' to cherry-pick into '{release_branch}'."
)
if __name__ == "__main__":
main()