NN/Nerf2D.py

import tensorflow as tf
from .utils import extractInterpolated, ensure4d, generateSquareGrid
from .CBaseModel import CBaseModel

class CNerf2D(CBaseModel):
  def __init__(self, 
    encoder, renderer,
    trainingLoss=None,
    residual=False,
    extraLatents=None,
    **kwargs
  ):
    super().__init__(**kwargs)
    self._encoder = encoder
    self._renderer = renderer
    self._bindTrainingLoss(trainingLoss)
    self._bindExtraLatents(extraLatents)
    self._residual = residual
    return
  
  def _bindTrainingLoss(self, trainingLoss):
    self._lossParams = dict() # use default loss parameters
    if trainingLoss is None: return
    # validate training loss
    assert callable(trainingLoss), "training loss must be callable"
    self._lossParams = dict(lossFn=trainingLoss)
    return
  
  def _bindExtraLatents(self, extraLatents):
    self._extraLatents = extraLatents
    if extraLatents is None: return
    # validate extra latents config structure if it is present
    assert isinstance(extraLatents, list), "extra latents must be a list"
    for latent in extraLatents:
      assert isinstance(latent, dict), "extra latent must be a dict"
      assert 'name' in latent, "extra latent must have 'name' key"
      continue
    return

  def _extractLatents(self, encodedSrc, positions, training=True):
    B, N = tf.shape(positions)[0], tf.shape(positions)[1]
    tf.assert_equal(tf.shape(positions), (B, N, 2))
    # obtain latent vector for each sampled position
    latents = self._encoder.latentAt(encoded=encodedSrc, pos=positions, training=training)
    tf.assert_equal(tf.shape(latents)[:1], (B * N,))
    return latents

  def _extractUpscaled(self, img, points):
    B = tf.shape(img)[0]
    points = tf.reshape(points, (B, -1, 2))
    N = tf.shape(points)[1]
    RV = extractInterpolated(img, points)
    # RV could be a RGB or grayscale image
    RV = tf.tile(RV, [1, 1, 3]) # ensure 3 channels
    RV = RV[..., :3] # take only the first 3 channels
    RV = tf.reshape(RV, (B, N, 3)) # ensure proper shape

    RV = self._converter.convert(RV) # convert to the target format
    return RV
  
  def _withResidual(self, img, points):
    B, N = tf.shape(points)[0], tf.shape(points)[1]
    if not self._residual: return tf.zeros((B, N, 3), dtype=img.dtype)

    return self._extractUpscaled(img, points)

  def _extractExtraLatents(self, config, src, points, latents):
    name = config['name'].lower()
    if ('upscaled' == name) or ('grayscale' == name):
      return self._converter.convert(
        self._extractUpscaled(src, points)
      )
    
    raise NotImplementedError(f"Unknown extra latent ({name})")
  
  def _withExtraLatents(self, latents, src, points):
    if self._extraLatents is None: return latents
    
    extraData = [
      self._extractExtraLatents(latentConfig, src, points, latents)
      for latentConfig in self._extraLatents
    ]

    C = sum([x.shape[-1] for x in extraData])
    extraData = tf.reshape( # ensure proper shape, especially for last dimension
      tf.concat(extraData, axis=-1),
      tf.concat([tf.shape(latents)[:-1], [C]], axis=0)
    )
    return tf.concat([latents, extraData], axis=-1)
  
  def _extractR(self, YData):
    R = None
    if 'blur R' in YData:
      R = YData['blur R']
      tf.assert_equal(R, R[:, 0:1], "R must be the same for all points")
      R = R[:, 0]
    return R
  
  def train_step(self, data):
    (src, YData) = data
    src = ensure4d(src)
    x0 = YData['sampled']
    positions = YData['positions']
    B, N = tf.shape(positions)[0], tf.shape(positions)[1]
    # remove this keys from the dictionary
    YData = {k: v for k, v in YData.items() if k not in ['sampled', 'positions']}
    
    with tf.GradientTape() as tape:
      encodedSrc = self._encoder(src=src, training=True, R=self._extractR(YData))
      latents = self._extractLatents(encodedSrc=encodedSrc, positions=positions, training=True)
      # train the restorator
      residual = self._withResidual(src, points=positions)
      latents = self._withExtraLatents(latents, src=src, points=positions)
      # flatten values
      BN = B * N
      latents = tf.reshape(latents, (BN, tf.shape(latents)[-1]))
      positions = tf.reshape(positions, (BN, 2))
      x0 = tf.reshape(x0, (BN, tf.shape(x0)[-1]))
      # Prepare additional parameters
      params = dict(**YData, residual=residual)
      params = {k: tf.reshape(v, (BN, tf.shape(v)[-1])) for k, v in params.items()}
      # actual training step
      loss = self._renderer.train_step(
        x0=self._converter.convert(x0), # convert to the target format
        latents=latents,
        positions=positions,
        params={**self._lossParams, **params},
      )['loss']
      tf.assert_equal(tf.shape(loss), (BN, ))
      
    self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    self._loss.update_state(loss)
    return self.metrics_to_dict(self._loss)
  
  def test_step(self, images):
    (src, dest) = images
    src = ensure4d(src)
    dest = ensure4d(dest)
    # call the model itself to obtain the reconstructed image in the proper format
    # add original data for debugging purposes
    B = tf.shape(src)[0]
    coords = generateSquareGrid(tf.shape(dest)[1], 1.0, 0.0)
    coords = tf.tile(coords[None], [B, 1, 1])
    values = extractInterpolated(dest, coords)
    reconstructed = self(src, size=tf.shape(dest)[1], training=False, GT=values)
    return self._testMetrics(dest, reconstructed)

  def _createAlgorithmInterceptor(self, interceptor, image, pos):
    from NN.restorators.samplers.CWatcherWithExtras import CWatcherWithExtras
    res = CWatcherWithExtras(
      watcher=interceptor,
      converter=self._converter,
      residuals=None # residuals applied in the renderer
    )
    return res.interceptor()
  #####################################################
  def _withBlur(self, reverseArgs, B):
    R = None
    if 'blurRadius' in reverseArgs:
      R = reverseArgs['blurRadius']
      if not tf.is_tensor(R):
        R = tf.convert_to_tensor(R, dtype=tf.float32)
      R = tf.reshape(R, [1, 1])
      R = tf.tile(R, [B, 1])
    else:
      R = tf.zeros((B, 1), dtype=tf.float32) # let encoder to decide needed it or not
    return R
  
  @tf.function
  def _inference(
    self, src, pos, 
    batchSize, reverseArgs, initialValues, encoderParams,
    GT=None
  ):
    B = tf.shape(src)[0]
    N = tf.shape(pos)[0]
    tf.assert_equal(tf.shape(pos), (N, 2), "pos must be a 2D tensor of shape (N, 2)")

    if initialValues is not None:
      C = tf.shape(initialValues)[-1]
      tf.assert_equal(tf.shape(initialValues)[:1], (B, ))
      initialValues = tf.reshape(initialValues, (B, N, C))

    R = self._withBlur(reverseArgs, B)
    encoded = self._encoder(src, training=False, params=encoderParams, R=R)
    def getChunk(ind, sz):
      posC = pos[ind:ind+sz]
      sz = tf.shape(posC)[0]
      flatB = B * sz

      # same coordinates for all images in the batch
      posC = tf.tile(posC, [B, 1])
      tf.assert_equal(tf.shape(posC), (flatB, 2))
      posCB = tf.reshape(posC, (B, sz, 2))

      latents = self._encoder.latentAt(
        encoded=encoded, pos=posCB, params=encoderParams,
        training=False
      )
      tf.assert_equal(tf.shape(latents)[:1], (flatB,))
      value = (flatB, )
      if initialValues is not None:
        value = initialValues[:, ind:ind+sz, :]
        value = self._converter.convert(value) # convert initial values to the proper format
        tf.assert_equal(tf.shape(value), (B, sz, C))
        value = tf.reshape(value, (flatB, C))
        pass

      # add extra latents if needed
      latents = self._withExtraLatents(latents=latents, src=src, points=posC)
      # get residuals if needed
      residual = self._withResidual(src, points=posCB)
      residual = tf.reshape(residual, (-1, 3))
      tf.assert_equal(tf.shape(residual), (flatB, 3))

      reverseArgsNew = reverseArgs
      if GT is not None:
        # add ground truth values to copy of reverseArgs
        reverseArgsNew = {**reverseArgs, 'GT': tf.reshape(GT[:, ind:ind+sz], (flatB, 3))}
        pass
      return dict(
        latents=latents, pos=posC, reverseArgs=reverseArgsNew, value=value,
        residual=residual
      )

    probes = self._renderer.batched(ittr=getChunk, B=B, N=N, batchSize=batchSize, training=False)
    C = tf.shape(probes)[-1]
    tf.assert_equal(C, 3, "Expected 3 channels in the output")
    # convert to the proper format
    probes = self._converter.convertBack(probes)
    return probes
  
  @tf.function
  def call(self, 
    src,
    size=32, scale=1.0, shift=0.0, # required be a default arguments for building the model
    pos=None,
    batchSize=None, # renderers batch size
    initialValues=None, # initial values for the restoration process
    reverseArgs=None,
    GT=None, # ground truth values for debugging purposes
  ):
    src = ensure4d(src)
    B = tf.shape(src)[0]
    # precompute the output shape
    sampleShape = None
    if pos is None:
      pos = generateSquareGrid(size, scale, shift)
      sampleShape = [B, size, size, 3]
    else:
      sampleShape = [B, tf.shape(pos)[0], 3]
      pass
    # prepare the reverseArgs and encoderParams
    if reverseArgs is None: reverseArgs = {}
    assert isinstance(reverseArgs, dict), "reverseArgs must be a dict"
    # extract encoder parameters from reverseArgs
    encoderParams = reverseArgs.get("encoder", {})
    reverseArgs = {k: v for k, v in reverseArgs.items() if k != 'encoder'}
    # add interceptors if needed
    if 'algorithmInterceptor' in reverseArgs:
      newParams = {k: v for k, v in reverseArgs.items()}
      newParams['algorithmInterceptor'] = self._createAlgorithmInterceptor(
        interceptor=reverseArgs['algorithmInterceptor'],
        image=src, pos=tf.tile(pos[None], [B, 1, 1])
      )
      reverseArgs = newParams
      pass
    
    probes = self._inference(
      src=src, pos=pos,
      batchSize=batchSize,
      reverseArgs=reverseArgs,
      encoderParams=encoderParams,
      initialValues=initialValues,
      GT=GT
    )
    probes = tf.reshape(probes, sampleShape)
    return probes
  
  def get_input_shape(self):
    return self._encoder.get_input_shape()