Skip to content

Commit 6a072aa

Browse files
Refactored for pep8
1 parent 7b10d31 commit 6a072aa

File tree

7 files changed

+371
-341
lines changed

7 files changed

+371
-341
lines changed

lib/cache.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
2-
import os
31
import functools
42
import logging
3+
import os
54

65
import redis
76

lib/layers.py

+250
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
import tarfile
2+
import tempfile
3+
4+
import backports.lzma as lzma
5+
import simplejson as json
6+
7+
import cache
8+
import rqueue
9+
import storage.local
10+
store = storage.load()
11+
12+
FILE_TYPES = {
13+
tarfile.REGTYPE: 'f',
14+
tarfile.DIRTYPE: 'd',
15+
tarfile.LNKTYPE: 'l',
16+
tarfile.SYMTYPE: 's',
17+
tarfile.CHRTYPE: 'c',
18+
tarfile.BLKTYPE: 'b',
19+
}
20+
21+
# queue for requesting diff calculations from workers
22+
diff_queue = rqueue.CappedCollection(cache.redis_conn, "diff-worker", 1024)
23+
24+
25+
def generate_ancestry(image_id, parent_id=None):
26+
if not parent_id:
27+
store.put_content(store.image_ancestry_path(image_id),
28+
json.dumps([image_id]))
29+
return
30+
data = store.get_content(store.image_ancestry_path(parent_id))
31+
data = json.loads(data)
32+
data.insert(0, image_id)
33+
store.put_content(store.image_ancestry_path(image_id), json.dumps(data))
34+
35+
36+
class layer_archive(object):
37+
'''Context manager for untaring a possibly xz/lzma compressed archive.'''
38+
def __init__(self, fobj):
39+
self.orig_fobj = fobj
40+
self.lzma_fobj = None
41+
self.tar_obj = None
42+
43+
def __enter__(self):
44+
target_fobj = self.orig_fobj
45+
try:
46+
# try to decompress the archive
47+
self.lzma_fobj = lzma.LZMAFile(filename=target_fobj)
48+
self.lzma_fobj.read()
49+
self.lzma_fobj.seek(0)
50+
except lzma._lzma.LZMAError:
51+
pass # its okay if we can't
52+
else:
53+
target_fobj = self.lzma_fobj
54+
finally: # reset whatever fp we ended up using
55+
target_fobj.seek(0)
56+
57+
# untar the fobj, whether it was the original or the lzma
58+
self.tar_obj = tarfile.open(mode='r|*', fileobj=target_fobj)
59+
return self.tar_obj
60+
61+
def __exit__(self, type, value, traceback):
62+
# clean up
63+
self.tar_obj.close()
64+
self.lzma_fobj.close()
65+
self.orig_fobj.seek(0)
66+
67+
68+
def serialize_tar_info(tar_info):
69+
'''serialize a tarfile.TarInfo instance
70+
Take a single tarfile.TarInfo instance and serialize it to a
71+
tuple. Consider union whiteouts by filename and mark them as
72+
deleted in the third element. Don't include union metadata
73+
files.
74+
'''
75+
is_deleted = False
76+
filename = tar_info.name
77+
78+
# notice and strip whiteouts
79+
if filename == ".":
80+
filename = '/'
81+
82+
if filename.startswith("./"):
83+
filename = "/" + filename[2:]
84+
85+
if filename.startswith("/.wh."):
86+
filename = "/" + filename[5:]
87+
is_deleted = True
88+
89+
if filename.startswith("/.wh."):
90+
return None
91+
92+
return (
93+
filename,
94+
FILE_TYPES[tar_info.type],
95+
is_deleted,
96+
tar_info.size,
97+
tar_info.mtime,
98+
tar_info.mode,
99+
tar_info.uid,
100+
tar_info.gid,
101+
)
102+
103+
104+
def read_tarfile(tar_fobj):
105+
# iterate over each file in the tar and then serialize it
106+
return [
107+
i for i in [serialize_tar_info(m) for m in tar_fobj.getmembers()]
108+
if i is not None
109+
]
110+
111+
112+
def get_image_files_cache(image_id):
113+
image_files_path = store.image_files_path(image_id)
114+
if store.exists(image_files_path):
115+
return store.get_content(image_files_path)
116+
117+
118+
def set_image_files_cache(image_id, files_json):
119+
image_files_path = store.image_files_path(image_id)
120+
store.put_content(image_files_path, files_json)
121+
122+
123+
def get_image_files_from_fobj(layer_file):
124+
'''get files from open file-object containing a layer
125+
126+
Download the specified layer and determine the file contents.
127+
Alternatively, process a passed in file-object containing the
128+
layer data.
129+
130+
'''
131+
layer_file.seek(0)
132+
with layer_archive(layer_file) as tar_fobj:
133+
# read passed in tarfile directly
134+
files = read_tarfile(tar_fobj)
135+
136+
return files
137+
138+
139+
def get_image_files_json(image_id):
140+
'''return json file listing for given image id
141+
Download the specified layer and determine the file contents.
142+
Alternatively, process a passed in file-object containing the
143+
layer data.
144+
'''
145+
files_json = get_image_files_cache(image_id)
146+
if files_json:
147+
return files_json
148+
149+
# download remote layer
150+
image_path = store.image_layer_path(image_id)
151+
with tempfile.TemporaryFile() as tmp_fobj:
152+
for buf in store.stream_read(image_path):
153+
tmp_fobj.write(buf)
154+
tmp_fobj.seek(0)
155+
# decompress and untar layer
156+
files_json = json.dumps(get_image_files_from_fobj(tmp_fobj))
157+
set_image_files_cache(image_id, files_json)
158+
return files_json
159+
160+
161+
def get_file_info_map(file_infos):
162+
'''convert a list of file info tuples to dictionaries
163+
Convert a list of layer file info tuples to a dictionary using the
164+
first element (filename) as the key.
165+
'''
166+
return dict((file_info[0], file_info[1:]) for file_info in file_infos)
167+
168+
169+
def get_image_diff_cache(image_id):
170+
image_diff_path = store.image_diff_path(image_id)
171+
if store.exists(image_diff_path):
172+
return store.get_content(image_diff_path)
173+
174+
175+
def set_image_diff_cache(image_id, diff_json):
176+
image_diff_path = store.image_diff_path(image_id)
177+
store.put_content(image_diff_path, diff_json)
178+
179+
180+
def get_image_diff_json(image_id):
181+
'''get json describing file differences in layer
182+
Calculate the diff information for the files contained within
183+
the layer. Return a dictionary of lists grouped by whether they
184+
were deleted, changed or created in this layer.
185+
186+
To determine what happened to a file in a layer we walk backwards
187+
through the ancestry until we see the file in an older layer. Based
188+
on whether the file was previously deleted or not we know whether
189+
the file was created or modified. If we do not find the file in an
190+
ancestor we know the file was just created.
191+
192+
- File marked as deleted by union fs tar: DELETED
193+
- Ancestor contains non-deleted file: CHANGED
194+
- Ancestor contains deleted marked file: CREATED
195+
- No ancestor contains file: CREATED
196+
'''
197+
198+
# check the cache first
199+
diff_json = get_image_diff_cache(image_id)
200+
if diff_json:
201+
return diff_json
202+
203+
# we need all ancestral layers to calculate the diff
204+
ancestry_path = store.image_ancestry_path(image_id)
205+
ancestry = json.loads(store.get_content(ancestry_path))[1:]
206+
# grab the files from the layer
207+
files = json.loads(get_image_files_json(image_id))
208+
# convert to a dictionary by filename
209+
info_map = get_file_info_map(files)
210+
211+
deleted = {}
212+
changed = {}
213+
created = {}
214+
215+
# walk backwards in time by iterating the ancestry
216+
for id in ancestry:
217+
# get the files from the current ancestor
218+
ancestor_files = json.loads(get_image_files_json(id))
219+
# convert to a dictionary of the files mapped by filename
220+
ancestor_map = get_file_info_map(ancestor_files)
221+
# iterate over each of the top layer's files
222+
for filename, info in info_map.items():
223+
ancestor_info = ancestor_map.get(filename)
224+
# if the file in the top layer is already marked as deleted
225+
if info[1]:
226+
deleted[filename] = info
227+
del info_map[filename]
228+
# if the file exists in the current ancestor
229+
elif ancestor_info:
230+
# if the file was marked as deleted in the ancestor
231+
if ancestor_info[1]:
232+
# is must have been just created in the top layer
233+
created[filename] = info
234+
else:
235+
# otherwise it must have simply changed in the top layer
236+
changed[filename] = info
237+
del info_map[filename]
238+
created.update(info_map)
239+
240+
# return dictionary of files grouped by file action
241+
diff_json = json.dumps({
242+
'deleted': deleted,
243+
'changed': changed,
244+
'created': created,
245+
})
246+
247+
# store results in cache
248+
set_image_diff_cache(image_id, diff_json)
249+
250+
return diff_json

lib/rlock.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
# https://gist.github.com/adewes/6103220
22

3-
from redis import WatchError
3+
import redis
44
import time
5-
5+
6+
67
class LockTimeout(BaseException):
78
pass
8-
9+
10+
911
class Lock(object):
10-
11-
"""
12-
Implements a distributed lock using Redis.
13-
"""
14-
12+
13+
'''Implements a distributed lock using Redis.'''
14+
1515
def __init__(self, redis, lock_type, key, expires=60, timeout=10):
1616
self.key = key
1717
self.lock_type = lock_type
1818
self.redis = redis
1919
self.timeout = timeout
2020
self.expires = expires
21-
21+
2222
def lock_key(self):
23-
return "%s:locks:%s" % (self.lock_type,self.key)
24-
23+
return "%s:locks:%s" % (self.lock_type, self.key)
24+
2525
def __enter__(self):
2626
timeout = self.timeout
2727
while timeout >= 0:
@@ -31,21 +31,21 @@ def __enter__(self):
3131
pipe.watch(lock_key)
3232
try:
3333
lock_value = float(self.redis.get(lock_key))
34-
except (ValueError,TypeError):
34+
except (ValueError, TypeError):
3535
lock_value = None
3636
if not lock_value or lock_value < time.time():
3737
try:
3838
pipe.multi()
39-
pipe.set(lock_key,expires)
40-
pipe.expire(lock_key,self.expires+1)
39+
pipe.set(lock_key, expires)
40+
pipe.expire(lock_key, self.expires + 1)
4141
pipe.execute()
4242
return expires
43-
except WatchError:
43+
except redis.WatchError:
4444
print "Someone tinkered with the lock!"
4545
pass
4646
timeout -= 0.01
4747
time.sleep(0.01)
4848
raise LockTimeout("Timeout whilst waiting for lock")
49-
49+
5050
def __exit__(self, exc_type, exc_value, traceback):
5151
self.redis.delete(self.lock_key())

0 commit comments

Comments
 (0)