forked from arschlochnop/hack_tools_for_me
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.py
135 lines (104 loc) · 4.44 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python
#
# https://github.com/git/git/blob/master/Documentation/technical/index-format.txt
#
import binascii
import collections
import mmap
import struct
import sys
def check(boolean, message):
if not boolean:
import sys
print "error: " + message
sys.exit(1)
def parse(filename, pretty=True):
with open(filename, "rb") as o:
f = mmap.mmap(o.fileno(), 0, access=mmap.ACCESS_READ)
def read(format):
# "All binary numbers are in network byte order."
# Hence "!" = network order, big endian
format = "! " + format
bytes = f.read(struct.calcsize(format))
return struct.unpack(format, bytes)[0]
index = collections.OrderedDict()
# 4-byte signature, b"DIRC"
index["signature"] = f.read(4).decode("ascii")
check(index["signature"] == "DIRC", "Not a Git index file")
# 4-byte version number
index["version"] = read("I")
check(index["version"] in {2, 3},
"Unsupported version: %s" % index["version"])
# 32-bit number of index entries, i.e. 4-byte
index["entries"] = read("I")
yield index
for n in range(index["entries"]):
entry = collections.OrderedDict()
entry["entry"] = n + 1
entry["ctime_seconds"] = read("I")
entry["ctime_nanoseconds"] = read("I")
if pretty:
entry["ctime"] = entry["ctime_seconds"]
entry["ctime"] += entry["ctime_nanoseconds"] / 1000000000
del entry["ctime_seconds"]
del entry["ctime_nanoseconds"]
entry["mtime_seconds"] = read("I")
entry["mtime_nanoseconds"] = read("I")
if pretty:
entry["mtime"] = entry["mtime_seconds"]
entry["mtime"] += entry["mtime_nanoseconds"] / 1000000000
del entry["mtime_seconds"]
del entry["mtime_nanoseconds"]
entry["dev"] = read("I")
entry["ino"] = read("I")
# 4-bit object type, 3-bit unused, 9-bit unix permission
entry["mode"] = read("I")
if pretty:
entry["mode"] = "%06o" % entry["mode"]
entry["uid"] = read("I")
entry["gid"] = read("I")
entry["size"] = read("I")
entry["sha1"] = binascii.hexlify(f.read(20)).decode("ascii")
entry["flags"] = read("H")
# 1-bit assume-valid
entry["assume-valid"] = bool(entry["flags"] & (0b10000000 << 8))
# 1-bit extended, must be 0 in version 2
entry["extended"] = bool(entry["flags"] & (0b01000000 << 8))
# 2-bit stage (?)
stage_one = bool(entry["flags"] & (0b00100000 << 8))
stage_two = bool(entry["flags"] & (0b00010000 << 8))
entry["stage"] = stage_one, stage_two
# 12-bit name length, if the length is less than 0xFFF (else, 0xFFF)
namelen = entry["flags"] & 0xFFF
# 62 bytes so far
entrylen = 62
if entry["extended"] and (index["version"] == 3):
entry["extra-flags"] = read("H")
# 1-bit reserved
entry["reserved"] = bool(entry["extra-flags"] & (0b10000000 << 8))
# 1-bit skip-worktree
entry["skip-worktree"] = bool(entry["extra-flags"] & (0b01000000 << 8))
# 1-bit intent-to-add
entry["intent-to-add"] = bool(entry["extra-flags"] & (0b00100000 << 8))
# 13-bits unused
# used = entry["extra-flags"] & (0b11100000 << 8)
# check(not used, "Expected unused bits in extra-flags")
entrylen += 2
if namelen < 0xFFF:
entry["name"] = f.read(namelen).decode("utf-8", "replace")
entrylen += namelen
else:
# Do it the hard way
name = []
while True:
byte = f.read(1)
if byte == "\x00":
break
name.append(byte)
entry["name"] = b"".join(name).decode("utf-8", "replace")
entrylen += 1
padlen = (8 - (entrylen % 8)) or 8
nuls = f.read(padlen)
check(set(nuls) == set(['\x00']), "padding contained non-NUL")
yield entry
f.close()