forked from VikParuchuri/zero_to_gpt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dense.py
44 lines (35 loc) · 1.21 KB
/
dense.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from network import Module
from activation import Relu
import numpy as np
import math
class Dense(Module):
def __init__(self, input_size, output_size, bias=True, activation=True, seed=0):
self.add_bias = bias
self.add_activation = activation
self.hidden = None
self.prev_hidden = None
np.random.seed(seed)
k = math.sqrt(1 / input_size)
self.weights = np.random.rand(input_size, output_size) * (2 * k) - k
self.bias = np.ones((1, output_size)) * (2 * k) - k
self.activation = Relu()
super().__init__()
def forward(self, x):
self.prev_hidden = x.copy()
x = np.matmul(x, self.weights)
if self.add_bias:
x += self.bias
if self.add_activation:
x = self.activation.forward(x)
self.hidden = x.copy()
return x
def backward(self, grad, lr):
if self.add_activation:
grad = self.activation.backward(grad, lr, self.hidden)
w_grad = self.prev_hidden.T @ grad
b_grad = np.mean(grad, axis=0)
self.weights -= w_grad * lr
if self.add_bias:
self.bias -= b_grad * lr
grad = grad @ self.weights.T
return grad