fog-engine
diff --git a/‎lib/cache.py
+1-2 b/‎lib/cache.py
+1-2
diff --git a/‎lib/layers.py
+250 b/‎lib/layers.py
+250
diff --git a/‎lib/rlock.py
+16-16 b/‎lib/rlock.py
+16-16
@@ -1,7 +1,6 @@
-
-import os
 import functools
 import logging
+import os
 
 import redis
 
 
@@ -0,0 +1,250 @@
+import tarfile
+import tempfile
+
+import backports.lzma as lzma
+import simplejson as json
+
+import cache
+import rqueue
+import storage.local
+store = storage.load()
+
+FILE_TYPES = {
+    tarfile.REGTYPE: 'f',
+    tarfile.DIRTYPE: 'd',
+    tarfile.LNKTYPE: 'l',
+    tarfile.SYMTYPE: 's',
+    tarfile.CHRTYPE: 'c',
+    tarfile.BLKTYPE: 'b',
+}
+
+# queue for requesting diff calculations from workers
+diff_queue = rqueue.CappedCollection(cache.redis_conn, "diff-worker", 1024)
+
+
+def generate_ancestry(image_id, parent_id=None):
+    if not parent_id:
+        store.put_content(store.image_ancestry_path(image_id),
+                          json.dumps([image_id]))
+        return
+    data = store.get_content(store.image_ancestry_path(parent_id))
+    data = json.loads(data)
+    data.insert(0, image_id)
+    store.put_content(store.image_ancestry_path(image_id), json.dumps(data))
+
+
+class layer_archive(object):
+    '''Context manager for untaring a possibly xz/lzma compressed archive.'''
+    def __init__(self, fobj):
+        self.orig_fobj = fobj
+        self.lzma_fobj = None
+        self.tar_obj = None
+
+    def __enter__(self):
+        target_fobj = self.orig_fobj
+        try:
+            # try to decompress the archive
+            self.lzma_fobj = lzma.LZMAFile(filename=target_fobj)
+            self.lzma_fobj.read()
+            self.lzma_fobj.seek(0)
+        except lzma._lzma.LZMAError:
+            pass  # its okay if we can't
+        else:
+            target_fobj = self.lzma_fobj
+        finally:  # reset whatever fp we ended up using
+            target_fobj.seek(0)
+
+        # untar the fobj, whether it was the original or the lzma
+        self.tar_obj = tarfile.open(mode='r|*', fileobj=target_fobj)
+        return self.tar_obj
+
+    def __exit__(self, type, value, traceback):
+        # clean up
+        self.tar_obj.close()
+        self.lzma_fobj.close()
+        self.orig_fobj.seek(0)
+
+
+def serialize_tar_info(tar_info):
+    '''serialize a tarfile.TarInfo instance
+    Take a single tarfile.TarInfo instance and serialize it to a
+    tuple. Consider union whiteouts by filename and mark them as
+    deleted in the third element. Don't include union metadata
+    files.
+    '''
+    is_deleted = False
+    filename = tar_info.name
+
+    # notice and strip whiteouts
+    if filename == ".":
+        filename = '/'
+
+    if filename.startswith("./"):
+        filename = "/" + filename[2:]
+
+    if filename.startswith("/.wh."):
+        filename = "/" + filename[5:]
+        is_deleted = True
+
+    if filename.startswith("/.wh."):
+        return None
+
+    return (
+        filename,
+        FILE_TYPES[tar_info.type],
+        is_deleted,
+        tar_info.size,
+        tar_info.mtime,
+        tar_info.mode,
+        tar_info.uid,
+        tar_info.gid,
+    )
+
+
+def read_tarfile(tar_fobj):
+    # iterate over each file in the tar and then serialize it
+    return [
+        i for i in [serialize_tar_info(m) for m in tar_fobj.getmembers()]
+        if i is not None
+    ]
+
+
+def get_image_files_cache(image_id):
+    image_files_path = store.image_files_path(image_id)
+    if store.exists(image_files_path):
+        return store.get_content(image_files_path)
+
+
+def set_image_files_cache(image_id, files_json):
+    image_files_path = store.image_files_path(image_id)
+    store.put_content(image_files_path, files_json)
+
+
+def get_image_files_from_fobj(layer_file):
+    '''get files from open file-object containing a layer
+
+    Download the specified layer and determine the file contents.
+    Alternatively, process a passed in file-object containing the
+    layer data.
+
+    '''
+    layer_file.seek(0)
+    with layer_archive(layer_file) as tar_fobj:
+        # read passed in tarfile directly
+        files = read_tarfile(tar_fobj)
+
+    return files
+
+
+def get_image_files_json(image_id):
+    '''return json file listing for given image id
+    Download the specified layer and determine the file contents.
+    Alternatively, process a passed in file-object containing the
+    layer data.
+    '''
+    files_json = get_image_files_cache(image_id)
+    if files_json:
+        return files_json
+
+    # download remote layer
+    image_path = store.image_layer_path(image_id)
+    with tempfile.TemporaryFile() as tmp_fobj:
+        for buf in store.stream_read(image_path):
+            tmp_fobj.write(buf)
+        tmp_fobj.seek(0)
+        # decompress and untar layer
+        files_json = json.dumps(get_image_files_from_fobj(tmp_fobj))
+    set_image_files_cache(image_id, files_json)
+    return files_json
+
+
+def get_file_info_map(file_infos):
+    '''convert a list of file info tuples to dictionaries
+    Convert a list of layer file info tuples to a dictionary using the
+    first element (filename) as the key.
+    '''
+    return dict((file_info[0], file_info[1:]) for file_info in file_infos)
+
+
+def get_image_diff_cache(image_id):
+    image_diff_path = store.image_diff_path(image_id)
+    if store.exists(image_diff_path):
+        return store.get_content(image_diff_path)
+
+
+def set_image_diff_cache(image_id, diff_json):
+    image_diff_path = store.image_diff_path(image_id)
+    store.put_content(image_diff_path, diff_json)
+
+
+def get_image_diff_json(image_id):
+    '''get json describing file differences in layer
+    Calculate the diff information for the files contained within
+    the layer. Return a dictionary of lists grouped by whether they
+    were deleted, changed or created in this layer.
+
+    To determine what happened to a file in a layer we walk backwards
+    through the ancestry until we see the file in an older layer. Based
+    on whether the file was previously deleted or not we know whether
+    the file was created or modified. If we do not find the file in an
+    ancestor we know the file was just created.
+
+        - File marked as deleted by union fs tar: DELETED
+        - Ancestor contains non-deleted file:     CHANGED
+        - Ancestor contains deleted marked file:  CREATED
+        - No ancestor contains file:              CREATED
+    '''
+
+    # check the cache first
+    diff_json = get_image_diff_cache(image_id)
+    if diff_json:
+        return diff_json
+
+    # we need all ancestral layers to calculate the diff
+    ancestry_path = store.image_ancestry_path(image_id)
+    ancestry = json.loads(store.get_content(ancestry_path))[1:]
+    # grab the files from the layer
+    files = json.loads(get_image_files_json(image_id))
+    # convert to a dictionary by filename
+    info_map = get_file_info_map(files)
+
+    deleted = {}
+    changed = {}
+    created = {}
+
+    # walk backwards in time by iterating the ancestry
+    for id in ancestry:
+        # get the files from the current ancestor
+        ancestor_files = json.loads(get_image_files_json(id))
+        # convert to a dictionary of the files mapped by filename
+        ancestor_map = get_file_info_map(ancestor_files)
+        # iterate over each of the top layer's files
+        for filename, info in info_map.items():
+            ancestor_info = ancestor_map.get(filename)
+            # if the file in the top layer is already marked as deleted
+            if info[1]:
+                deleted[filename] = info
+                del info_map[filename]
+            # if the file exists in the current ancestor
+            elif ancestor_info:
+                # if the file was marked as deleted in the ancestor
+                if ancestor_info[1]:
+                    # is must have been just created in the top layer
+                    created[filename] = info
+                else:
+                    # otherwise it must have simply changed in the top layer
+                    changed[filename] = info
+                del info_map[filename]
+    created.update(info_map)
+
+    # return dictionary of files grouped by file action
+    diff_json = json.dumps({
+        'deleted': deleted,
+        'changed': changed,
+        'created': created,
+    })
+
+    # store results in cache
+    set_image_diff_cache(image_id, diff_json)
+
+    return diff_json
@@ -1,27 +1,27 @@
 # https://gist.github.com/adewes/6103220
 
-from redis import WatchError
+import redis
 import time
- 
+
+
 class LockTimeout(BaseException):
     pass
- 
+
+
 class Lock(object):
- 
-    """
-    Implements a distributed lock using Redis.
-    """
- 
+
+    '''Implements a distributed lock using Redis.'''
+
     def __init__(self, redis, lock_type, key, expires=60, timeout=10):
         self.key = key
         self.lock_type = lock_type
         self.redis = redis
         self.timeout = timeout
         self.expires = expires
- 
+
     def lock_key(self):
-        return "%s:locks:%s" % (self.lock_type,self.key)
- 
+        return "%s:locks:%s" % (self.lock_type, self.key)
+
     def __enter__(self):
         timeout = self.timeout
         while timeout >= 0:
@@ -31,21 +31,21 @@ def __enter__(self):
             pipe.watch(lock_key)
             try:
                 lock_value = float(self.redis.get(lock_key))
-            except (ValueError,TypeError):
+            except (ValueError, TypeError):
                 lock_value = None
             if not lock_value or lock_value < time.time():
                 try:
                     pipe.multi()
-                    pipe.set(lock_key,expires)
-                    pipe.expire(lock_key,self.expires+1)
+                    pipe.set(lock_key, expires)
+                    pipe.expire(lock_key, self.expires + 1)
                     pipe.execute()
                     return expires
-                except WatchError:
+                except redis.WatchError:
                     print "Someone tinkered with the lock!"
                     pass
             timeout -= 0.01
             time.sleep(0.01)
         raise LockTimeout("Timeout whilst waiting for lock")
- 
+
     def __exit__(self, exc_type, exc_value, traceback):
         self.redis.delete(self.lock_key())