forked from python/mypy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdiff-cache.py
147 lines (116 loc) · 4.58 KB
/
diff-cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""Produce a diff between mypy caches.
With some infrastructure, this can allow for distributing small cache diffs to users in
many cases instead of full cache artifacts.
"""
import argparse
import json
import os
import sys
from collections import defaultdict
from typing import Any, Dict, Optional, Set
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore
def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
if sqlite:
return SqliteMetadataStore(input_dir)
else:
return FilesystemMetadataStore(input_dir)
def merge_deps(all: Dict[str, Set[str]], new: Dict[str, Set[str]]) -> None:
for k, v in new.items():
all.setdefault(k, set()).update(v)
def load(cache: MetadataStore, s: str) -> Any:
data = cache.read(s)
obj = json.loads(data)
if s.endswith(".meta.json"):
# For meta files, zero out the mtimes and sort the
# dependencies to avoid spurious conflicts
obj["mtime"] = 0
obj["data_mtime"] = 0
if "dependencies" in obj:
all_deps = obj["dependencies"] + obj["suppressed"]
num_deps = len(obj["dependencies"])
thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"]))
def unzip(x: Any) -> Any:
return zip(*x) if x else ((), (), ())
obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps]))
obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:]))
obj["dep_prios"] = prios1 + prios2
obj["dep_lines"] = lines1 + lines2
if s.endswith(".deps.json"):
# For deps files, sort the deps to avoid spurious mismatches
for v in obj.values():
v.sort()
return obj
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--verbose", action="store_true", default=False, help="Increase verbosity"
)
parser.add_argument(
"--sqlite", action="store_true", default=False, help="Use a sqlite cache"
)
parser.add_argument("input_dir1", help="Input directory for the cache")
parser.add_argument("input_dir2", help="Input directory for the cache")
parser.add_argument("output", help="Output file")
args = parser.parse_args()
cache1 = make_cache(args.input_dir1, args.sqlite)
cache2 = make_cache(args.input_dir2, args.sqlite)
type_misses: Dict[str, int] = defaultdict(int)
type_hits: Dict[str, int] = defaultdict(int)
updates: Dict[str, Optional[str]] = {}
deps1: Dict[str, Set[str]] = {}
deps2: Dict[str, Set[str]] = {}
misses = hits = 0
cache1_all = list(cache1.list_all())
for s in cache1_all:
obj1 = load(cache1, s)
try:
obj2 = load(cache2, s)
except FileNotFoundError:
obj2 = None
typ = s.split(".")[-2]
if obj1 != obj2:
misses += 1
type_misses[typ] += 1
# Collect the dependencies instead of including them directly in the diff
# so we can produce a much smaller direct diff of them.
if ".deps." not in s:
if obj2 is not None:
updates[s] = json.dumps(obj2)
else:
updates[s] = None
elif obj2:
merge_deps(deps1, obj1)
merge_deps(deps2, obj2)
else:
hits += 1
type_hits[typ] += 1
cache1_all_set = set(cache1_all)
for s in cache2.list_all():
if s not in cache1_all_set:
updates[s] = cache2.read(s)
# Compute what deps have been added and merge them all into the
# @root deps file.
new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2}
new_deps = {k: v for k, v in new_deps.items() if v}
try:
root_deps = load(cache1, "@root.deps.json")
except FileNotFoundError:
root_deps = {}
merge_deps(new_deps, root_deps)
new_deps_json = {k: list(v) for k, v in new_deps.items() if v}
updates["@root.deps.json"] = json.dumps(new_deps_json)
# Drop updates to deps.meta.json for size reasons. The diff
# applier will manually fix it up.
updates.pop("./@deps.meta.json", None)
updates.pop("@deps.meta.json", None)
###
print("Generated incremental cache:", hits, "hits,", misses, "misses")
if args.verbose:
print("hits", type_hits)
print("misses", type_misses)
with open(args.output, "w") as f:
json.dump(updates, f)
if __name__ == "__main__":
main()