forked from mantidproject/mantid
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommits-report.py
317 lines (297 loc) · 11.4 KB
/
commits-report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# Mantid Repository : https://github.com/mantidproject/mantid
#
# Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
# NScD Oak Ridge National Laboratory, European Spallation Source,
# Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
# SPDX - License - Identifier: GPL - 3.0 +
# pylint: disable=invalid-name
import datetime
import csv
import os
import re
temp_filename = "all-commits.stdout"
regex_git_log_entry = re.compile(
r"Author:\s+(.+?)\s+Date:\s+(.+?)\B\s+(\S+).*?((\d+)\sfile.+?)?((\d+)+\sinsertion.+?)?((\d+)+\sdeletion.+?)?(commit\s[0-9a-f]{40}|$)",
re.DOTALL,
)
regex_git_log_splitter = re.compile(r"commit\s[0-9a-f]{40}")
regex_name_email_address = re.compile(r"(.*?)<(\S+)>")
organisations = ["STFC", "ORNL", "ESS", "ILL", "PSI", "ANSTO", "KITWARE", "JUELICH", "OTHERS", "CSNS"]
domains = {
"stfc.ac.uk": "STFC",
"clrc.ac.uk": "STFC",
"tessella.com": "STFC",
"ornl.gov": "ORNL",
"sns.gov": "ORNL",
"esss.se": "ESS",
"ill.fr": "ILL",
"ill.eu": "ILL",
"psi.ch": "PSI",
"ansto.gov.au": "ANSTO",
"ansto": "ANSTO",
"mantidproject.org": "OTHERS",
"[email protected]": "PSI",
"[email protected]": "ORNL",
"uwstout.edu": "ORNL",
"kitware.com": "KITWARE",
"juelich.de": "JUELICH",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "ORNL",
"[email protected]": "ORNL",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "ESS",
"[email protected]": "ORNL",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "ORNL",
"[email protected]": "ORNL",
"[email protected]": "ORNL",
"[email protected]": "ORNL",
"[email protected]": "STFC",
"[email protected]": "ORNL",
"owen@laptop-ubuntu": "STFC",
"[email protected]": "STFC",
"Janik@Janik": "ORNL",
"[email protected]": "OTHERS",
"[email protected]": "OTHERS",
"[email protected]": "OTHERS",
"[email protected]": "OTHERS",
"[email protected]": "ORNL",
"[email protected]": "STFC",
"[email protected]": "ORNL",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "ILL",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "ILL",
"[email protected]": "JUELICH",
"[email protected]": "ILL",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "OTHERS",
"[email protected]": "OTHERS",
"[email protected]": "OTHERS",
"[email protected]": "ILL",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "ORNL",
"[email protected]": "ESS",
"[email protected]": "ORNL",
"[email protected]": "STFC",
"[email protected]": "ESS",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "JUELICH",
"[email protected]": "STFC",
"[email protected]": "JUELICH",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "OTHERS",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "ESS",
"[email protected]": "ESS",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"[email protected]": "STFC",
"Waite": "STFC",
"[email protected]": "STFC",
"[email protected]": "ORNL",
"[email protected]": "STFC",
"[email protected]": "ORNL",
}
aliases = {
"Anthony": "Anthony Lim",
"AnthonyLim23": "Anthony Lim",
"abuts": "Alex Buts",
"Ayomide Bamidele": "Andre Bamidele",
"DanielMurphy22": "Daniel Murphy",
"Harrietbrown": "Harriet Brown",
"PhilColebrooke": "Phil Colebrooke",
"Phil": "Phil Colebrooke",
"Richard": "Richard Waite",
"RichardWaiteSTFC": "Richard Waite",
"Stephen": "Stephen Smith",
"StephenSmith25": "Stephen Smith",
"StephenSmith": "Stephen Smith",
"Anders-Markvardsen": "Anders Markvardsen",
"AndreiSavici": "Andrei Savici",
"Antti Soininnen": "Antti Soininen",
"Bilheux": "Jean Bilheux",
"brandonhewer": "Brandon Hewer",
"celinedurniak": "Celine Durniak",
"DavidFair": "David Fairbrother",
"DiegoMonserrat": "Diego Monserrat",
"Dimitar Borislavov Tasev": "Dimitar Tasev",
"Tasev": "Dimitar Tasev",
"giovannidisiena": "Giovanni Di Siena ",
"hankwustfc": "Hank Wu ",
"igudich": "Igor Gudich",
"josephframsay": "Joseph Ramsay",
"LamarMoore": "Lamar Moore",
"Moore": "Lamar Moore",
"LolloB": "Lorenzo Basso",
"NickDraper": "Nick Draper",
"Pete Peterson": "Peter Peterson",
"Parker, Peter G": "Peter Parker",
"Raquel Alvarez": "Raquel Alvarez Banos",
"reimundILL": "Verena Reimund ",
"Ricardo Leal": "Ricardo Ferraz Leal",
"Ricardo M. Ferraz Leal": "Ricardo Ferraz Leal",
"Rob": "Robert Applin",
"Rob Applin": "Robert Applin",
"robertapplin ": "Robert Applin",
"Sam": "Sam Jenkins",
"SamJenkins1": "Sam Jenkins",
"simonfernandes": "Simon Fernandes",
"MichaelWedel": "Michael Wedel",
"Steven E. Hahn": "Steven Hahn",
"VickieLynch": "Vickie Lynch",
}
def generate_commit_data():
print("Generating git commit data...")
os.system("git --no-pager log --shortstat > " + temp_filename)
def parse_commit_data():
print("Reading the file")
# Open a file: file
commit_entries = []
commit_entry = ""
with open(temp_filename, "r", encoding="utf-8") as file:
# read all lines at once
log_line = file.readline()
while log_line:
if regex_git_log_splitter.match(log_line):
commit_entries.append(commit_entry)
commit_entry = log_line
else:
commit_entry += log_line
log_line = file.readline()
# find the matches
print("searching for regex matches")
with open("commits-report.csv", mode="w", newline="") as output_file:
commit_writer = csv.writer(output_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
commit_writer.writerow(
[
"Author",
"Email",
"Facility",
"Date_time",
"Year",
"Quarter",
"Month",
"Week",
"Commits",
"Files Changed",
"Insertions",
"Deletions",
"Net Lines Changed",
]
)
for commit_text in commit_entries:
parse_log_entry(commit_text, commit_writer)
def parse_log_entry(commit_text, commit_writer):
if commit_text == "":
return
# black listed log entry that crashes the regex engine
if commit_text.startswith("commit 4a6c0077a1dff965d767dc45a1517c7411a69070") or commit_text.startswith(
"commit 16a4f16c99e3dc3b59d214067781c932f5a9eb8a"
):
return
try:
match = regex_git_log_entry.search(commit_text)
# skip merges
if match.group(3) != "Merge":
author = match.group(1)
name, email = extract_name_email_from_author(author)
date_time_str = match.group(2).strip()
date_time = None
try:
date_time = datetime.datetime.strptime(date_time_str, "%a %b %d %H:%M:%S %Y %z")
except ValueError as e:
print("Date Parsing failed")
print(date_time_str, e)
print(commit_text)
return
files = 0 if match.group(5) is None else int(match.group(5))
insertions = 0 if match.group(7) is None else int(match.group(7))
deletions = 0 if match.group(9) is None else int(match.group(9))
facility = get_user_facility(email, date_time)
commit_writer.writerow(
[
name,
email,
facility,
date_time.strftime("%Y-%m-%d %H:%M"),
date_time.strftime("%Y"),
(date_time.month - 1) // 3 + 1,
date_time.strftime("%m"),
date_time.isocalendar()[1],
1,
files,
insertions,
deletions,
insertions - deletions,
]
)
except RuntimeError as e:
print("Match failed", e)
print(commit_text)
def extract_name_email_from_author(author):
match = regex_name_email_address.search(author)
if match:
original_name = match.group(1).strip()
name = aliases[original_name] if original_name in aliases.keys() else original_name
return name, match.group(2)
else:
return None
def get_user_facility(email, datetime):
facility = "UNKNOWN"
for domain in domains.keys():
if domain in email:
# ORNL didn't join until 2009
if domains[domain] == "ORNL" and datetime.year < 2009:
domain = "stfc.ac.uk"
facility = domains[domain]
if facility == "UNKNOWN":
print("Unmatached email", email)
return facility
if __name__ == "__main__":
print("Generating github commit metrics...\n")
generate_commit_data()
parse_commit_data()
os.remove(temp_filename)
print("\n\nAll done!\n")