r518: bi-rnn apparently working

example to add later
wenzhu888 · Mar 17, 2017 · f3b0e75 · f3b0e75
1 parent 2e49155
commit f3b0e75
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -47,8 +47,7 @@ neural networks in C/C++, to deploy no-so-large models without worrying about
 * CPU only. As such, KANN is **not** intended for training huge neural
   networks.
 
-* No out-of-box bidirectional RNNs and seq2seq models. No batch
-  normalization.
+* No seq2seq. No batch normalization.
 
 * Verbose APIs for training RNNs.
 

diff --git a/kann.c b/kann.c
@@ -213,13 +213,13 @@ void kann_rnn_end(kann_t *a)
 
 static int kann_class_error_core(const kann_t *ann, int *base)
 {
-	int i, j, k, n, off, n_err = 0;
+	int i, j, k, m, n, off, n_err = 0;
 	for (i = 0, *base = 0; i < ann->n; ++i) {
 		kad_node_t *p = ann->v[i];
 		if ((p->op == 13 || p->op == 22) && p->n_child == 2 && p->n_d == 0) { // ce_bin or ce_multi
 			kad_node_t *x = p->child[0], *t = p->child[1];
-			n = kad_len(t) / t->d[0];
-			for (j = off = 0; j < t->d[0]; ++j, off += n) {
+			n = t->d[t->n_d - 1], m = kad_len(t) / n;
+			for (j = off = 0; j < m; ++j, off += n) {
 				float t_sum = 0.0f, t_min = 1.0f, t_max = 0.0f, x_max = 0.0f, x_min = 1.0f;
 				int x_max_k = -1, t_max_k = -1;
 				for (k = 0; k < n; ++k) {

diff --git a/kann.h b/kann.h
@@ -27,7 +27,7 @@
 #ifndef KANN_H
 #define KANN_H
 
-#define KANN_VERSION "r512"
+#define KANN_VERSION "r518"
 
 #define KANN_F_IN       0x1   // input
 #define KANN_F_OUT      0x2   // output

diff --git a/kautodiff.c b/kautodiff.c
@@ -808,6 +808,9 @@ kad_node_t **kad_unroll(int n_v, kad_node_t **v, int *new_n, int *len)
 	}
 	free(t);
 	for (i = 0; i < n_v; ++i) v[i]->tmp = 0;
+	for (i = 0; i < w.n; ++i) // stack may change the output dimension
+		if (w.v[i]->n_child > 0)
+			kad_op_list[w.v[i]->op](w.v[i], KAD_SYNC_DIM);
 	kad_allocate_internal(w.n, w.v);
 	*new_n = w.n;
 	return w.v;
@@ -1091,11 +1094,8 @@ int kad_op_cmul(kad_node_t *p, int action)
 	int n_a_row, n_b_row, n_col, n_a_col, n_b_col;
 	kad_node_t *q[2];
 
-	q[0] = p->child[0];
-	q[1] = p->child[1];
-	n_a_col = q[0]->n_d == 1? q[0]->d[0] : kad_len(q[0]) / q[0]->d[0];
-	n_b_col = q[1]->n_d == 1? q[1]->d[0] : kad_len(q[1]) / q[1]->d[0];
-	n_a_row = kad_len(q[0]) / n_a_col, n_b_row = kad_len(q[1]) / n_b_col;
+	q[0] = p->child[0], n_a_col = q[0]->d[q[0]->n_d - 1], n_a_row = kad_len(q[0]) / n_a_col;
+	q[1] = p->child[1], n_b_col = q[1]->d[q[1]->n_d - 1], n_b_row = kad_len(q[1]) / n_b_col;
 	n_col = n_a_col;
 	if (action == KAD_SYNC_DIM) {
 		if (n_a_col != n_b_col) return -1;
@@ -1113,7 +1113,7 @@ int kad_op_cmul(kad_node_t *p, int action)
 	return 0;
 }
 
-int kad_op_matmul(kad_node_t *p, int action)
+int kad_op_matmul(kad_node_t *p, int action) // TODO: matmul and cmul have different broadcasting rules
 {
 	int n_a_row, n_b_row, n_a_col, n_b_col;
 	kad_node_t *q[2];
@@ -1425,8 +1425,8 @@ int kad_op_reverse(kad_node_t *p, int action) // TODO: not tested
 	int axis, i, j, n, d0, d1;
 
 	axis = p->ptr? *(int32_t*)p->ptr : 0;
-	if (axis < 0) axis += p->n_d;
-	assert(axis >= 0 && axis < p->n_d);
+	if (axis < 0) axis += q->n_d;
+	assert(axis >= 0 && axis < q->n_d);
 	for (i = 0, d0 = 1; i < axis; ++i) d0 *= q->d[i];
 	n = q->d[axis];
 	for (i = axis + 1, d1 = 1; i < q->n_d; ++i) d1 *= q->d[i];

diff --git a/kautodiff.h b/kautodiff.h
@@ -27,7 +27,7 @@
 #ifndef KANN_AUTODIFF_H
 #define KANN_AUTODIFF_H
 
-#define KAD_VERSION "r517"
+#define KAD_VERSION "r518"
 
 #include <stdio.h>
 #include <stdint.h>
@@ -193,6 +193,7 @@ kad_node_t *kad_slice(kad_node_t *x, int axis, int start, int end); // take a sl
 kad_node_t *kad_concat(int axis, int n, ...);                       // concatenate on the axis-th dimension
 kad_node_t *kad_concat_array(int axis, int n, kad_node_t **p);      // the array version of concat
 kad_node_t *kad_reshape(kad_node_t *x, int n_d, int *d);            // reshape; similar behavior to TensorFlow's reshape()
+kad_node_t *kad_reverse(kad_node_t *x, int axis);
 kad_node_t *kad_switch(int n, kad_node_t **p);                      // manually (as a hyperparameter) choose one input, default to 0
 
 // miscellaneous operations on a compiled graph