Merge branch 'dev' into master

GreenWizard2015 · Oct 11, 2024 · 550633b · 550633b
2 parents 1a3c4cd + 039902b
commit 550633b
Show file tree

Hide file tree

Showing 10 changed files with 531 additions and 44 deletions.
diff --git a/Core/CDatasetLoader.py b/Core/CDatasetLoader.py
@@ -4,9 +4,14 @@
 from Core.CDataSampler import CDataSampler
 import numpy as np
 import tensorflow as tf
+from enum import Enum
 
+class ESampling(Enum):
+  AS_IS = 'as_is'
+  UNIFORM = 'uniform'
+
 class CDatasetLoader:
-  def __init__(self, folder, samplerArgs, stats):
+  def __init__(self, folder, samplerArgs, sampling, stats):
     # recursively find all 'train.npz' files
     trainFiles = glob.glob(os.path.join(folder, '**', 'train.npz'), recursive=True)
     if 0 == len(trainFiles):
@@ -40,10 +45,23 @@ def __init__(self, folder, samplerArgs, stats):
     }
     dtype = np.uint8 if len(self._datasets) < 256 else np.uint32
     # create an array of dataset indices to sample from
-    self._indices = np.concatenate([
-      np.full((v, ), k, dtype=dtype) # id of the dataset
-      for k, v in validSamples.items()
-    ])
+    sampling = ESampling(sampling)
+    if ESampling.AS_IS == sampling: # just concatenate the indices
+      self._indices = np.concatenate([
+        np.full((v, ), k, dtype=dtype) # id of the dataset
+        for k, v in validSamples.items()
+      ])
+    if ESampling.UNIFORM == sampling:
+      maxSize = max(validSamples.values())
+      chunks = []
+      for k, size in validSamples.items():
+        # all datasets should have the same number of samples represented in the indices
+        # so that the sampling is uniform
+        chunk = np.full((maxSize, ), k, dtype=dtype)
+        chunks.append(chunk)
+        continue
+      self._indices = np.concatenate(chunks)
+
     self._currentId = 0
 
     self._batchSize = samplerArgs.get('batch_size', 16)

diff --git a/Core/CModelDiffusion.py b/Core/CModelDiffusion.py
@@ -0,0 +1,272 @@
+import os
+import numpy as np
+import NN.networks as networks
+import tensorflow as tf
+import tensorflow_probability as tfp
+import NN.Utils as NNU
+import time
+from tensorflow.keras import layers as L
+
+# TODO: Implement the standard diffusion process (with the prediction of the noise, proper sampling, etc)
+class CModelDiffusion:
+  '''
+  Wrapper for the diffusion model to predict the gaze point
+  Diffusion T is equal to the stddev of the gaussian noise
+  '''
+  def __init__(self, timesteps, model='simple', user=None, stats=None, use_encoders=False, **kwargs):
+    if user is None:
+      user = {
+        'userId': 0,
+        'placeId': 0,
+        'screenId': 0,
+      }
+    else:
+      user = {
+        'userId': stats['userId'].index(user['userId']),
+        'placeId': stats['placeId'].index(user['placeId']),
+        'screenId': stats['screenId'].index(user['screenId']),
+      }
+    self._user = user
+
+    self._modelID = model
+    self._timesteps = timesteps
+    embeddings = {
+      'userId': len(stats['userId']),
+      'placeId': len(stats['placeId']),
+      'screenId': len(stats['screenId']),
+      'size': 64,
+    }
+    self._modelRaw = networks.Face2LatentModel(
+      steps=timesteps, latentSize=64, embeddings=embeddings,
+      diffusion=True
+    )
+    self._model = self._modelRaw['main']
+    self._embeddings = {
+      'userId': L.Embedding(len(stats['userId']), embeddings['size']),
+      'placeId': L.Embedding(len(stats['placeId']), embeddings['size']),
+      'screenId': L.Embedding(len(stats['screenId']), embeddings['size']),
+    }
+    self._intermediateEncoders = {}
+    if use_encoders:
+      shapes = self._modelRaw['intermediate shapes']
+      for name, shape in shapes.items():
+        enc = networks.IntermediatePredictor(name='%s-encoder' % name)
+        enc.build(shape)
+        self._intermediateEncoders[name] = enc
+        continue
+
+    self._maxDiffusionT = 100.0
+    if 'weights' in kwargs:
+      self.load(**kwargs['weights'])
+    self.compile()
+    # add signatures to help tensorflow optimize the graph
+    specification = self._modelRaw['inputs specification']
+    self._trainStep = tf.function(
+      self._trainStep,
+      input_signature=[
+        (
+          { 'clean': specification, 'augmented': specification, },
+          ( tf.TensorSpec(shape=(None, None, None, 2), dtype=tf.float32), )
+        )
+      ]
+    )
+    self._eval = tf.function(
+      self._eval,
+      input_signature=[(
+        specification,
+        ( tf.TensorSpec(shape=(None, None, None, 2), dtype=tf.float32), )
+      )]
+    )
+
+    return
+
+  def _step2mean(self, step):
+    step = tf.cast(step, tf.float32) / self._maxDiffusionT
+    step = tf.cast(step, tf.float32) + 1e-6
+    # step = tf.pow(step, 2.0) # make it decrease faster
+    return tf.clip_by_value(step, 1e-3, 1.0)
+
+  def _replaceByEmbeddings(self, data):
+    data = dict(**data) # copy
+    for name, emb in self._embeddings.items():
+      data[name] = emb(data[name][..., 0])
+      continue
+    return data
+
+  def _makeGaussian(self, mean, stddev):
+    stddev = tf.concat([stddev, stddev], axis=-1)
+    return tfp.distributions.MultivariateNormalDiag(mean, stddev)
+
+  @tf.function
+  def _infer(self, data, training=False):
+    print('Instantiate _infer')
+    data = self._replaceByEmbeddings(data)
+    shp = tf.shape(data['userId'])
+    B, N = shp[0], self.timesteps
+    result = tf.zeros((B, N, 2), dtype=tf.float32)
+    for step in tf.range(self._maxDiffusionT, -1, -5):
+      mean = self._step2mean(
+        tf.fill((B, N, 1), step)
+      )
+      stepData = dict(**data)
+      stepData['diffusionT'] = mean
+      stepData['diffusionPoints'] = tf.random.normal((B, N, 2), mean=result, stddev=mean)
+      result = self._model(stepData, training=training)['result']
+    return result
+
+  def predict(self, data, **kwargs):
+    B = self._timesteps
+    userId = kwargs.get('userId', self._user['userId'])
+    placeId = kwargs.get('placeId', self._user['placeId'])
+    screenId = kwargs.get('screenId', self._user['screenId'])
+    # put them as (1, B, ?)
+    data['userId'] = np.full((1, B, 1), userId, dtype=np.int32)
+    data['placeId'] = np.full((1, B, 1), placeId, dtype=np.int32)
+    data['screenId'] = np.full((1, B, 1), screenId, dtype=np.int32)
+
+    data = self._replaceByEmbeddings(data) # replace embeddings
+
+    result = self._infer(data)
+    return result.numpy()
+
+  def __call__(self, data, startPos=None):
+    predictions = self.predict(data)
+    return {
+      'coords': predictions[0, -1, :],
+    }
+
+  def compile(self):
+    self._optimizer = NNU.createOptimizer()
+    return
+
+  def _modelFilename(self, folder, postfix=''):
+    postfix = '-' + postfix if postfix else ''
+    return os.path.join(folder, '%s-%s%s.h5' % (self._modelID, 'model', postfix))
+
+  def save(self, folder=None, postfix=''):
+    path = self._modelFilename(folder, postfix)
+    self._model.save_weights(path)
+    embeddings = {}
+    for nm in self._embeddings.keys():
+      weights = self._embeddings[nm].get_weights()[0]
+      embeddings[nm] = weights
+      continue
+    np.savez_compressed(path.replace('.h5', '-embeddings.npz'), **embeddings)
+    # save intermediate encoders
+    if self._intermediateEncoders:
+      encoders = {}
+      for nm, encoder in self._intermediateEncoders.items():
+        # save each variable separately
+        for ww in encoder.trainable_variables:
+          encoders['%s-%s' % (nm, ww.name)] = ww.numpy()
+        continue
+      np.savez_compressed(path.replace('.h5', '-intermediate-encoders.npz'), **encoders)
+    return
+
+  def load(self, folder=None, postfix='', embeddings=False):
+    path = self._modelFilename(folder, postfix) if not os.path.isfile(folder) else folder
+    self._model.load_weights(path)
+    if embeddings:
+      embeddings = np.load(path.replace('.h5', '-embeddings.npz'))
+      for nm, emb in self._embeddings.items():
+        w = embeddings[nm]
+        if not emb.built: emb.build((None, w.shape[0]))
+        emb.set_weights([w]) # replace embeddings
+        continue
+
+    if self._intermediateEncoders:
+      encodersName = path.replace('.h5', '-intermediate-encoders.npz')
+      if os.path.isfile(encodersName):
+        encoders = np.load(encodersName)
+        for nm, encoder in self._intermediateEncoders.items():
+          for ww in encoder.trainable_variables:
+            w = encoders['%s-%s' % (nm, ww.name)]
+            ww.assign(w)
+          continue
+    return
+
+  def lock(self, isLocked):
+    self._model.trainable = not isLocked
+    return
+
+  @property
+  def timesteps(self):
+    return self._timesteps
+
+  def trainable_variables(self):
+    parts = list(self._embeddings.values()) + [self._model] + list(self._intermediateEncoders.values())
+    return sum([p.trainable_variables for p in parts], [])  
+
+  def _pointLoss(self, ytrue, ypred):
+    # pseudo huber loss
+    delta = 0.01
+    tf.assert_equal(tf.shape(ytrue), tf.shape(ypred))
+    diff = tf.square(ytrue - ypred)
+    loss = tf.sqrt(diff + delta ** 2) - delta
+    tf.assert_equal(tf.shape(loss), tf.shape(ytrue))
+    return tf.reduce_mean(loss, axis=-1)
+
+  def _trainStep(self, Data):
+    print('Instantiate _trainStep')
+    ###############
+    x, (y, ) = Data
+    y = y[..., 0, :]
+    losses = {}
+    with tf.GradientTape() as tape:
+      data = x['augmented']
+      data = self._replaceByEmbeddings(data)
+      # add sampled T
+      B = tf.shape(y)[0]
+      N = self.timesteps
+      maxT = 100
+      diffusionT = tf.random.uniform((B, 1), minval=0, maxval=maxT, dtype=tf.int32)
+      # (B, 1) -> (B, N, 1)
+      diffusionT = tf.tile(diffusionT, (1, N))[..., None]
+      diffusionT = self._step2mean(diffusionT)
+      tf.assert_equal(tf.shape(diffusionT), (B, N, 1))
+
+      # store the diffusion parameters
+      data['diffusionT'] = diffusionT
+      # sample the points
+      data['diffusionPoints'] = tf.random.normal((B, N, 2), mean=y, stddev=diffusionT)
+      predictions = self._model(data, training=True)
+    #   intermediate = predictions['intermediate']
+    #   assert len(intermediate) == 0, 'Intermediate predictions are not supported'
+
+      predictedMean = predictions['result']
+      gaussian = self._makeGaussian(predictedMean, diffusionT)
+      losses['log_prob'] = tf.reduce_mean(
+        -gaussian.log_prob(y)
+      )
+      losses['points'] = self._pointLoss(y, predictedMean)
+      loss = sum(losses.values())
+      losses['loss'] = loss
+
+    self._optimizer.minimize(loss, tape.watched_variables(), tape=tape)
+    ###############
+    return losses
+
+  def fit(self, data):
+    t = time.time()
+    losses = self._trainStep(data)
+    losses = {k: v.numpy() for k, v in losses.items()}
+    return {'time': int((time.time() - t) * 1000), 'losses': losses}
+
+  def _eval(self, xy):
+    print('Instantiate _eval')
+    x, (y,) = xy
+    y = y[:, :, 0]
+    B, N = tf.shape(y)[0], tf.shape(y)[1]
+
+    predictions = self._infer(x)
+
+    mean = self._step2mean(tf.fill((B, N, 1), 0))
+    gaussian = self._makeGaussian(predictions, mean)
+    loss = tf.nn.sigmoid( -gaussian.log_prob(y) )
+    points = predictions
+    _, dist = NNU.normVec(y - predictions)
+    return loss, points, dist
+
+  def eval(self, data):
+    loss, sampled, dist = self._eval(data)
+    return loss.numpy(), sampled.numpy(), dist.numpy()
diff --git a/Core/CModelTrainer.py b/Core/CModelTrainer.py
@@ -40,26 +40,52 @@ def _pointLoss(self, ytrue, ypred):
     tf.assert_equal(tf.shape(loss), tf.shape(ytrue))
     return tf.reduce_mean(loss, axis=-1)
 
-  def _trainStep(self, Data):
-    print('Instantiate _trainStep')
-    ###############
-    x, (y, ) = Data
-    y = y[..., 0, :]
-    losses = {}
-    with tf.GradientTape() as tape:
-      data = x['augmented']
+  def _trainOn(self, data, y_list):
+      def calculate_loss(predictions):
+        # select the smallest loss from the list of suggested points
+        losses = []
+        for y in y_list:
+          loss = self._pointLoss(y, predictions)[..., None]
+          losses.append(loss)
+          continue
+        losses = tf.concat(losses, axis=-1)
+        shp = tf.shape(y_list[0])
+        tf.assert_equal(tf.shape(losses), tf.concat([shp[:-1], [len(y_list)]], axis=0))
+        losses = tf.reduce_min(losses, axis=-1)
+        tf.assert_equal(tf.shape(losses), shp[:-1])
+        return tf.reduce_mean(losses)
+
       data = self._replaceByEmbeddings(data)
       predictions = self._model(data, training=True)
       intermediate = predictions['intermediate']
-      losses['final'] = tf.reduce_mean(self._pointLoss(y, predictions['result']))
+      finalPredictions = predictions['result']
+      losses = {}
+      losses['final'] = calculate_loss(finalPredictions)
       for name, encoder in self._intermediateEncoders.items():
         latent = intermediate[name]
         pts = encoder(latent, training=True)
-        loss = self._pointLoss(y, pts)
+        loss = calculate_loss(pts)
         losses['loss-%s' % name] = tf.reduce_mean(loss)
         continue
-      loss = sum(losses.values())
-      losses['loss'] = loss
+      return losses, tf.stop_gradient(finalPredictions)
+
+  def _trainStep(self, Data):
+    print('Instantiate _trainStep')
+    ###############
+    x, (y, ) = Data
+    y = y[..., 0, :]
+    losses = {}
+    with tf.GradientTape() as tape:
+      lossesClean, y_clean = self._trainOn(x['clean'], [y])
+      # ensure that the augmentations are not affect predictions
+      lossesAugmented, _ = self._trainOn(x['augmented'], [y, y_clean])
+      assert lossesClean.keys() == lossesAugmented.keys(), 'Losses keys mismatch'
+      # combine losses
+      losses = {k: lossesClean[k] + lossesAugmented[k] for k in lossesClean.keys()}
+      # calculate total loss and final loss
+      losses['total-clean'] = sum(lossesClean.values())
+      losses['total-augmented'] = sum(lossesAugmented.values())
+      losses['loss'] = loss = sum([losses['total-clean'], losses['total-augmented']])
 
     self._optimizer.minimize(loss, tape.watched_variables(), tape=tape)
     ###############