Skip to content

Commit

Permalink
adjust optimizer, increase model size, add LayerNormalization somewhere
Browse files Browse the repository at this point in the history
  • Loading branch information
GreenWizard2015 committed Jul 10, 2024
1 parent df36145 commit 130d1e5
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
6 changes: 4 additions & 2 deletions NN/Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ def __init__(self, mlp=None, **kwargs):
if mlp is None: mlp = lambda x: x
self._mlp = mlp
self._norm = L.LayerNormalization()
self._norm2 = L.LayerNormalization()
return

def build(self, input_shapes):
Expand All @@ -253,14 +254,15 @@ def call(self, x):
xhat = self._lastDense(xhat)
x0 = x[0]
x = tf.concat([x0, xhat], axis=-1)
return self._combiner(x)
res = self._combiner(x)
return self._norm2(res)
####################################
# Hacky way to provide same optimizer for all models
def createOptimizer(config=None):
if config is None:
config = {
'learning_rate': 1e-4,
'weight_decay': 1e-1,
'weight_decay': 1e-4,
'exclude_from_weight_decay': [
'batch_normalization', 'bias',
'CEL_', # exclude CCoordsEncodingLayer from weight decay
Expand Down
22 changes: 14 additions & 8 deletions NN/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ def Face2StepModel(pointsN, eyeSize, latentSize, embeddingsSize):
for i, EFeat in enumerate(encodedEFList):
combined = CFusingBlock(name='F2S/ResMul-%d' % i)([
combined,
sMLP(sizes=[latentSize] * 1, activation='relu', name='F2S/MLP-%d' % i)(
L.Concatenate(-1)([combined, encodedP, EFeat, embeddings])
sMLP(sizes=[latentSize] * 3, activation='relu', name='F2S/MLP-%d' % i)(
L.LayerNormalization()(
L.Concatenate(-1)([combined, encodedP, EFeat, embeddings])
)
)
])
# save intermediate output
Expand All @@ -74,6 +76,7 @@ def Face2StepModel(pointsN, eyeSize, latentSize, embeddingsSize):
continue

combined = L.Dense(latentSize, name='F2S/Combine')(combined)
combined = L.LayerNormalization()(combined)
# combined = CQuantizeLayer()(combined)
return tf.keras.Model(
inputs={
Expand All @@ -92,7 +95,7 @@ def Step2LatentModel(latentSize, embeddingsSize):
latents = L.Input((None, latentSize))
embeddingsInput = L.Input((None, embeddingsSize))
T = L.Input((None, 1))
embeddings = embeddingsInput[..., :1] * 0.0
embeddings = embeddingsInput

stepsData = latents
intermediate = {}
Expand All @@ -105,11 +108,11 @@ def Step2LatentModel(latentSize, embeddingsSize):
intermediate['S2L/enc0'] = temporal
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
for blockId in range(3):
temp = L.Concatenate(-1)([temporal, encodedT])
for _ in range(1):
temp = L.Concatenate(-1)([temporal, encodedT, embeddings])
for _ in range(3):
temp = L.LSTM(latentSize, return_sequences=True)(temp)
temp = sMLP(sizes=[latentSize] * 1, activation='relu')(
L.Concatenate(-1)([temporal, temp])
temp = sMLP(sizes=[latentSize] * 3, activation='relu')(
L.Concatenate(-1)([temporal, temp, encodedT, embeddings])
)
temporal = CFusingBlock()([temporal, temp])
intermediate['S2L/ResLSTM-%d' % blockId] = temporal
Expand Down Expand Up @@ -165,6 +168,7 @@ def Face2LatentModel(
# add diffusion features to the embeddings
emb = L.Concatenate(-1)([emb, encodedDT, encodedDP])

emb = L.LayerNormalization()(emb)
Face2Step = Face2StepModel(pointsN, eyeSize, latentSize, embeddingsSize=emb.shape[-1])
Step2Latent = Step2LatentModel(latentSize, embeddingsSize=emb.shape[-1])

Expand Down Expand Up @@ -196,7 +200,9 @@ def Face2LatentModel(
}
res['result'] = IntermediatePredictor(
shift=0.0 if diffusion else 0.5 # shift points to the center, if not using diffusion
)(res['latent'])
)(
L.Concatenate(-1)([res['latent'], T, emb])
)

if diffusion:
inputs['diffusionT'] = diffusionT
Expand Down

0 comments on commit 130d1e5

Please sign in to comment.