forked from RobertCsordas/ndr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlistops_big_lowlr.yaml
73 lines (73 loc) · 1.19 KB
/
listops_big_lowlr.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
program: main.py
command:
- ${env}
- python3
- ${program}
- ${args}
method: grid
metric:
name: validation/valid/accuracy/total
goal: maximize
parameters:
log:
value: wandb
profile:
value: listops_trafo
listops.variant:
value: big
transformer.variant:
value: ndr_geometric
test_batch_size:
value: 16
transformer.encoder_n_layers:
value: 20
n_test_layers:
value: 24
lr:
value: 0.0002
state_size:
value: 512
dropout:
value: 0.1
transformer.attention_dropout:
value: 0.1
wd:
value: 0.09
transformer.n_heads:
value: 16
batch_size:
value: 512
optimizer:
value: adamw
stop_after:
value: 100000
max_length_per_batch:
value: none
amp:
value: 1
wandb_bug_workaround:
value: 1
geometric.pos_encoding:
value: direction
ndr.scalar_gate:
value: 0
ndr.drop_gate:
value: 0.05
grad_clip:
value: 1
length_bucketed_sampling:
value: 1
transformer.ff_multiplier:
value: 2
trafo_classifier.out_mode:
value: linear
trafo_classifier.norm_att:
value: 0
sweep_id_for_grid_search:
distribution: categorical
values:
- 1
- 2
- 3
- 4
- 5