-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnetwork.py
103 lines (83 loc) · 3.02 KB
/
network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from abc import ABC
from collections import OrderedDict
from typing import List, Union
import numpy as np
from act import BaseActivation
from layer import BaseLayer
class BaseNetwork(ABC):
def __init__(self):
'''
define the variables to store state of network,
and other variables as necessary
'''
self.net = OrderedDict()
self.cache = OrderedDict()
self.grads = OrderedDict()
...
def build(self, layers: list):
'''
build network structure from list of layers
store the structure as an OrderedDict in self.net
'''
...
def forward(self, x):
'''
implement forward pass sequentially through all layers in network
store the intermediate activations in a dictionary
'''
...
def backward(self, dloss):
'''
implement backward pass using chain rule,
given the gradient of loss function with respect to y_pred
store the gradients at each layer & activation in a dictionary
'''
...
def update(self, learning_rate: float):
'''
update the learnable parameters in network,
using the backpropagated gradients and learning rate
'''
class MLP(BaseNetwork):
'''
defines a simple, sequential multi-layer perceptron
an obvious limitation is that we only support strictly sequential operations
to support a more general structure, eg multiple prediction heads from a backbone,
we need to define a directed computation graph,
that tells the input data & the gradients where to go next
'''
def __init__(self, layers: List[Union[BaseActivation, BaseLayer]]):
super().__init__()
self.build(layers)
def build(self, layers: List[Union[BaseActivation, BaseLayer]]):
self.net = OrderedDict()
# a simple and straightforward sequential naming convention
for layer_idx, layer in enumerate(layers):
self.net[f'{str(layer)}_{layer_idx}'] = layer
def forward(self, x: np.ndarray):
'''
args:
x: expecting shape [batch, feature_dim]
'''
# tranpose input from [BSZ, D_in] to [D_in, BSZ]
x_ = x.T
for layer in self.net.values():
x_ = layer.forward(x_)
# tranpose output from [D_out, BSZ] to [BSZ, D_out]
return x_.T
def backward(self, dloss: float):
'''
args:
dloss: gradient of loss function wrt model's output, y_pred
'''
# first gradient comes from loss function
grad = dloss
# run backward on every layer from last layer to input layer
for layer in reversed(self.net.values()):
grad = layer.backward(grad)
def update(self, learning_rate: float):
# the simplest update - SGD
# of course, we can add more sophisticated optimizers - SGD with momentum is a start
for layer in self.net.values():
if layer.learnable:
layer.update(learning_rate)