r534: support -ansi -pedantic

wenzhu888 · Mar 31, 2017 · 62e646a · 62e646a
1 parent e8f458f
commit 62e646a
Show file tree

Hide file tree

Showing 5 changed files with 288 additions and 269 deletions.
diff --git a/Makefile b/Makefile
@@ -1,5 +1,6 @@
 CC=			gcc
 CFLAGS=		-g -Wall -Wextra -Wc++-compat -O2
+CFLAGS_LIB=	#-ansi -pedantic -Wno-long-long # ANSI C does not have inline which affects performance a little bit
 CPPFLAGS=	-DHAVE_PTHREAD
 INCLUDES=	-I.
 EXE=		examples/mlp examples/mnist-cnn examples/inspect examples/textgen examples/rnn-bit \
@@ -20,6 +21,12 @@ endif
 
 all:kautodiff.o kann.o kann_extra/kann_data.o $(EXE)
 
+kautodiff.o:kautodiff.c
+		$(CC) -c $(CFLAGS) $(CFLAGS_LIB) $(INCLUDES) $(CPPFLAGS) -o $@ $<
+
+kann.o:kann.c
+		$(CC) -c $(CFLAGS) $(CFLAGS_LIB) $(INCLUDES) $(CPPFLAGS) -o $@ $<
+
 kann_extra/kann_data.o:kann_extra/kann_data.c
 		$(CC) -c $(CFLAGS) -DHAVE_ZLIB $< -o $@
 

diff --git a/kann.c b/kann.c
@@ -80,7 +80,7 @@ kann_t *kann_new(kad_node_t *cost, int n_rest, ...)
 		if (a->v[i]->pre) has_recur = 1;
 		if (kad_is_pivot(a->v[i])) has_pivot = 1;
 	}
-	if (has_recur && !has_pivot) { // an RNN that doesn't have a pivot; then add a pivot on top of cost and recompile
+	if (has_recur && !has_pivot) { /* an RNN that doesn't have a pivot; then add a pivot on top of cost and recompile */
 		cost->ext_flag &= ~KANN_F_COST;
 		roots[n_roots-1] = cost = kad_avg(1, &cost), cost->ext_flag |= KANN_F_COST;
 		free(a->v);
@@ -112,7 +112,7 @@ kann_t *kann_unroll(kann_t *a, ...)
 	for (i = 0; i < n_pivots; ++i) len[i] = va_arg(ap, int);
 	va_end(ap);
 	b = (kann_t*)calloc(1, sizeof(kann_t));
-	b->x = a->x, b->g = a->g, b->c = a->c; // these arrays are shared
+	b->x = a->x, b->g = a->g, b->c = a->c; /* these arrays are shared */
 	b->v = kad_unroll(a->n, a->v, &b->n, len);
 	return b;
 }
@@ -197,7 +197,7 @@ void kann_rnn_start(kann_t *a)
 	kann_set_batch_size(a, 1);
 	for (i = 0; i < a->n; ++i) {
 		kad_node_t *p = a->v[i];
-		if (p->pre) { // NB: BE CAREFUL of the interaction between kann_rnn_start() and kann_set_batch_size()
+		if (p->pre) { /* NB: BE CAREFUL of the interaction between kann_rnn_start() and kann_set_batch_size() */
 			kad_node_t *q = p->pre;
 			if (q->x) memcpy(p->x, q->x, kad_len(p) * sizeof(float));
 			else memset(p->x, 0, kad_len(p) * sizeof(float));
@@ -216,7 +216,7 @@ static int kann_class_error_core(const kann_t *ann, int *base)
 	int i, j, k, m, n, off, n_err = 0;
 	for (i = 0, *base = 0; i < ann->n; ++i) {
 		kad_node_t *p = ann->v[i];
-		if ((p->op == 13 || p->op == 22) && p->n_child == 2 && p->n_d == 0) { // ce_bin or ce_multi
+		if ((p->op == 13 || p->op == 22) && p->n_child == 2 && p->n_d == 0) { /* ce_bin or ce_multi */
 			kad_node_t *x = p->child[0], *t = p->child[1];
 			n = t->d[t->n_d - 1], m = kad_len(t) / n;
 			for (j = off = 0; j < m; ++j, off += n) {
@@ -249,24 +249,24 @@ static int kann_class_error_core(const kann_t *ann, int *base)
 
 struct mtaux_t;
 
-typedef struct { // per-worker data
+typedef struct { /* per-worker data */
 	kann_t *a;
 	float cost;
 	int action;
 	pthread_t tid;
 	struct mtaux_t *g;
 } mtaux1_t;
 
-typedef struct mtaux_t { // cross-worker data
+typedef struct mtaux_t { /* cross-worker data */
 	int n_threads, max_batch_size;
 	int cal_grad, cost_label;
-	volatile int n_idle; // we will be busy waiting on this, so volatile necessary
+	volatile int n_idle; /* we will be busy waiting on this, so volatile necessary */
 	pthread_mutex_t mtx;
 	pthread_cond_t cv;
 	mtaux1_t *mt;
 } mtaux_t;
 
-static void *mt_worker(void *data) // pthread worker
+static void *mt_worker(void *data) /* pthread worker */
 {
 	mtaux1_t *mt1 = (mtaux1_t*)data;
 	mtaux_t *mt = mt1->g;
@@ -286,7 +286,7 @@ static void *mt_worker(void *data) // pthread worker
 	pthread_exit(0);
 }
 
-static void mt_destroy(mtaux_t *mt) // de-allocate an entire mtaux_t struct
+static void mt_destroy(mtaux_t *mt) /* de-allocate an entire mtaux_t struct */
 {
 	int i;
 	pthread_mutex_lock(&mt->mtx);
@@ -328,7 +328,7 @@ void kann_mt(kann_t *ann, int n_threads, int max_batch_size)
 	}
 	for (i = 1; i < n_threads; ++i)
 		pthread_create(&mt->mt[i].tid, 0, mt_worker, &mt->mt[i]);
-	while (mt->n_idle < n_threads - 1); // busy waiting until all threads in sync
+	while (mt->n_idle < n_threads - 1); /* busy waiting until all threads in sync */
 	ann->mt = mt;
 }
 
@@ -339,8 +339,8 @@ float kann_cost(kann_t *a, int cost_label, int cal_grad)
 	float cost;
 
 	if (mt == 0) return kann_cost_core(a, cost_label, cal_grad);
-	B = kad_sync_dim(a->n, a->v, -1); // get the current batch size
-	assert(B <= mt->max_batch_size); // TODO: can be relaxed
+	B = kad_sync_dim(a->n, a->v, -1); /* get the current batch size */
+	assert(B <= mt->max_batch_size); /* TODO: can be relaxed */
 	n_var = kann_size_var(a);
 
 	pthread_mutex_lock(&mt->mtx);
@@ -350,7 +350,7 @@ float kann_cost(kann_t *a, int cost_label, int cal_grad)
 		for (j = 0; j < a->n; ++j)
 			if (kad_is_feed(a->v[j]))
 				mt->mt[i].a->v[j]->x = &a->v[j]->x[k * kad_len(a->v[j]) / a->v[j]->d[0]];
-		kad_sync_dim(mt->mt[i].a->n, mt->mt[i].a->v, size); // TODO: we can point ->x to internal nodes, too
+		kad_sync_dim(mt->mt[i].a->n, mt->mt[i].a->v, size); /* TODO: we can point ->x to internal nodes, too */
 		k += size;
 		memcpy(mt->mt[i].a->x, a->x, n_var * sizeof(float));
 		mt->mt[i].action = 1;
@@ -360,7 +360,7 @@ float kann_cost(kann_t *a, int cost_label, int cal_grad)
 	pthread_mutex_unlock(&mt->mtx);
 
 	mt->mt[0].cost = kann_cost_core(mt->mt[0].a, cost_label, cal_grad);
-	while (mt->n_idle < mt->n_threads - 1); // busy waiting until all threads in sync
+	while (mt->n_idle < mt->n_threads - 1); /* busy waiting until all threads in sync */
 
 	memset(a->g, 0, n_var * sizeof(float));
 	for (i = k = 0, cost = 0.0f; i < mt->n_threads; ++i) {
@@ -369,7 +369,7 @@ float kann_cost(kann_t *a, int cost_label, int cal_grad)
 		kad_saxpy(n_var, (float)size / B, mt->mt[i].a->g, a->g);
 		k += size;
 	}
-	for (j = 0; j < a->n; ++j) { // copy values back at recurrent nodes (needed by textgen; TODO: temporary solution)
+	for (j = 0; j < a->n; ++j) { /* copy values back at recurrent nodes (needed by textgen; TODO: temporary solution) */
 		kad_node_t *p = a->v[j];
 		if (p->pre && p->n_d >= 2 && p->d[0] == B) {
 			for (i = k = 0; i < mt->n_threads; ++i) {
@@ -398,7 +398,10 @@ void kann_switch(kann_t *ann, int is_train)
 {
 	mtaux_t *mt = (mtaux_t*)ann->mt;
 	int i;
-	if (mt == 0) return kann_switch_core(ann, is_train);
+	if (mt == 0) {
+		kann_switch_core(ann, is_train);
+		return;
+	}
 	for (i = 0; i < mt->n_threads; ++i)
 		kann_switch_core(mt->mt[i].a, is_train);
 }
@@ -470,7 +473,7 @@ kann_t *kann_load(const char *fn)
  *** @@LAYER: layers and model generation ***
  **********************************************/
 
-/////////// General but more complex APIs ///////////
+/********** General but more complex APIs **********/
 
 kad_node_t *kann_new_leaf_array(int *offset, kad_node_p *par, uint8_t flag, float x0_01, int n_d, int32_t d[KAD_MAX_DIM])
 {
@@ -539,7 +542,7 @@ static inline kad_node_t *cmul_norm2(int *offset, kad_node_t **par, kad_node_t *
 
 kad_node_t *kann_layer_rnn2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag)
 {
-	int n0, n1 = h0->d[h0->n_d-1], use_norm = !!(rnn_flag & KANN_RNN_NORM);;
+	int n0, n1 = h0->d[h0->n_d-1], use_norm = !!(rnn_flag & KANN_RNN_NORM);
 	kad_node_t *t, *w, *u, *b, *out;
 
 	u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
@@ -561,7 +564,7 @@ kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_n
 	kad_node_t *t, *r, *z, *w, *u, *b, *s, *out;
 
 	if (in) n0 = in->n_d >= 2? kad_len(in) / in->d[0] : kad_len(in);
-	// z = sigm(x_t * W_z + h_{t-1} * U_z + b_z)
+	/* z = sigm(x_t * W_z + h_{t-1} * U_z + b_z) */
 	u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
 	b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
 	t = cmul_norm2(offset, par, h0, u, use_norm);
@@ -570,7 +573,7 @@ kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_n
 		t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
 	}
 	z = kad_sigm(kad_add(t, b));
-	// r = sigm(x_t * W_r + h_{t-1} * U_r + b_r)
+	/* r = sigm(x_t * W_r + h_{t-1} * U_r + b_r) */
 	u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
 	b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
 	t = cmul_norm2(offset, par, h0, u, use_norm);
@@ -579,7 +582,7 @@ kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_n
 		t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
 	}
 	r = kad_sigm(kad_add(t, b));
-	// s = tanh(x_t * W_s + (h_{t-1} # r) * U_s + b_s)
+	/* s = tanh(x_t * W_s + (h_{t-1} # r) * U_s + b_s) */
 	u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
 	b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
 	t = cmul_norm2(offset, par, kad_mul(r, h0), u, use_norm);
@@ -588,13 +591,13 @@ kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_n
 		t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
 	}
 	s = kad_tanh(kad_add(t, b));
-	// h_t = z # h_{t-1} + (1 - z) # s
+	/* h_t = z # h_{t-1} + (1 - z) # s */
 	out = kad_add(kad_mul(kad_1minus(z), s), kad_mul(z, h0));
 	out->pre = h0;
 	return out;
 }
 
-/////////// APIs without offset & par ///////////
+/********** APIs without offset & par **********/
 
 kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...)
 {
@@ -655,31 +658,31 @@ kad_node_t *kann_layer_lstm(kad_node_t *in, int n1, int rnn_flag)
 	c0 = (rnn_flag & KANN_RNN_VAR_H0)? kad_var(0, 0, 2, 1, n1) : kad_const(0, 2, 1, n1);
 	c0->x = (float*)calloc(n1, sizeof(float));
 
-	// i = sigm(x_t * W_i + h_{t-1} * U_i + b_i)
+	/* i = sigm(x_t * W_i + h_{t-1} * U_i + b_i) */
 	w = kann_new_weight(n1, n0);
 	u = kann_new_weight(n1, n1);
 	b = kann_new_bias(n1);
 	i = kad_sigm(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
-	// f = sigm(x_t * W_f + h_{t-1} * U_f + b_f)
+	/* f = sigm(x_t * W_f + h_{t-1} * U_f + b_f) */
 	w = kann_new_weight(n1, n0);
 	u = kann_new_weight(n1, n1);
-	b = kann_new_vec(n1, 1.0f); // see Jozefowicz et al on using a large bias
+	b = kann_new_vec(n1, 1.0f); /* see Jozefowicz et al on using a large bias */
 	f = kad_sigm(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
-	// o = sigm(x_t * W_o + h_{t-1} * U_o + b_o)
+	/* o = sigm(x_t * W_o + h_{t-1} * U_o + b_o) */
 	w = kann_new_weight(n1, n0);
 	u = kann_new_weight(n1, n1);
 	b = kann_new_bias(n1);
 	o = kad_sigm(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
-	// g = tanh(x_t * W_g + h_{t-1} * U_g + b_g)
+	/* g = tanh(x_t * W_g + h_{t-1} * U_g + b_g) */
 	w = kann_new_weight(n1, n0);
 	u = kann_new_weight(n1, n1);
 	b = kann_new_bias(n1);
 	g = kad_tanh(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
-	// c_t = c_{t-1} # f + g # i
-	c = kad_add(kad_mul(f, c0), kad_mul(g, i)); // can't be kad_mul(c0, f)!!!
+	/* c_t = c_{t-1} # f + g # i */
+	c = kad_add(kad_mul(f, c0), kad_mul(g, i)); /* can't be kad_mul(c0, f)!!! */
 	c->pre = c0;
-	// h_t = tanh(c_t) # o
-	if (rnn_flag & KANN_RNN_NORM) c = kann_layer_layernorm(c); // see Ba et al (2016) about how to apply layer normalization to LSTM
+	/* h_t = tanh(c_t) # o */
+	if (rnn_flag & KANN_RNN_NORM) c = kann_layer_layernorm(c); /* see Ba et al (2016) about how to apply layer normalization to LSTM */
 	out = kad_mul(kad_tanh(c), o);
 	out->pre = h0;
 	return out;

diff --git a/kann.h b/kann.h
@@ -27,28 +27,28 @@
 #ifndef KANN_H
 #define KANN_H
 
-#define KANN_VERSION "r530"
+#define KANN_VERSION "r534"
 
-#define KANN_F_IN       0x1   // input
-#define KANN_F_OUT      0x2   // output
-#define KANN_F_TRUTH    0x4   // truth output
-#define KANN_F_COST     0x8   // final cost
+#define KANN_F_IN       0x1   /* input */
+#define KANN_F_OUT      0x2   /* output */
+#define KANN_F_TRUTH    0x4   /* truth output */
+#define KANN_F_COST     0x8   /* final cost */
 
-#define KANN_C_CEB      1   // binary cross-entropy cost, used with sigmoid
-#define KANN_C_CEM      2   // multi-class cross-entropy cost, used with softmax
-#define KANN_C_CEB_NEG  3   // binary cross-enytopy-like cost, used with tanh
-#define KANN_C_MSE      4   // mean square error
+#define KANN_C_CEB      1   /* binary cross-entropy cost, used with sigmoid */
+#define KANN_C_CEM      2   /* multi-class cross-entropy cost, used with softmax */
+#define KANN_C_CEB_NEG  3   /* binary cross-enytopy-like cost, used with tanh */
+#define KANN_C_MSE      4   /* mean square error */
 
-#define KANN_RNN_VAR_H0 0x1 // take the initial hidden values as variables
-#define KANN_RNN_NORM   0x2 // apply layer normalization
+#define KANN_RNN_VAR_H0 0x1 /* take the initial hidden values as variables */
+#define KANN_RNN_NORM   0x2 /* apply layer normalization */
 
 #include "kautodiff.h"
 
 typedef struct {
-	int n;            // number of nodes in the computational graph
-	kad_node_t **v;   // list of nodes
-	float *x, *g, *c; // collated variable values, gradients and constant values
-	void *mt;         // auxiliary data for multi-threading; NULL if multi-threading disabled
+	int n;            /* number of nodes in the computational graph */
+	kad_node_t **v;   /* list of nodes */
+	float *x, *g, *c; /* collated variable values, gradients and constant values */
+	void *mt;         /* auxiliary data for multi-threading; NULL if multi-threading disabled */
 } kann_t;
 
 extern int kann_verbose;
@@ -91,8 +91,8 @@ kann_t *kann_new(kad_node_t *cost, int n_rest, ...);
 kann_t *kann_unroll(kann_t *a, ...);
 
 kann_t *kann_clone(kann_t *a, int batch_size);
-void kann_delete(kann_t *a);          // delete a network generated by kann_new() or kann_layer_final()
-void kann_delete_unrolled(kann_t *a); // delete a network generated by kann_unroll()
+void kann_delete(kann_t *a);          /* delete a network generated by kann_new() or kann_layer_final() */
+void kann_delete_unrolled(kann_t *a); /* delete a network generated by kann_unroll() */
 
 /**
  * Enable/disable multi-threading (requiring pthread)
@@ -188,7 +188,7 @@ void kann_RMSprop(int n, float h0, const float *h, float decay, const float *g,
 void kann_shuffle(int n, int *s);
 float kann_grad_clip(float thres, int n, float *g);
 
-// common layers
+/* common layers */
 kad_node_t *kann_layer_input(int n1);
 kad_node_t *kann_layer_dense(kad_node_t *in, int n1);
 kad_node_t *kann_layer_dropout(kad_node_t *t, float r);
@@ -200,7 +200,7 @@ kad_node_t *kann_layer_conv2d(kad_node_t *in, int n_flt, int k_rows, int k_cols,
 kad_node_t *kann_layer_conv1d(kad_node_t *in, int n_flt, int k_size, int stride, int pad);
 kad_node_t *kann_layer_cost(kad_node_t *t, int n_out, int cost_type);
 
-kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...); // flag can be KAD_CONST or KAD_VAR
+kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...); /* flag can be KAD_CONST or KAD_VAR */
 kad_node_t *kann_new_scalar(uint8_t flag, float x);
 kad_node_t *kann_new_weight(int n_row, int n_col);
 kad_node_t *kann_new_bias(int n);
@@ -214,12 +214,12 @@ kad_node_t *kann_layer_layernorm2(int *offset, kad_node_t **par, kad_node_t *in)
 kad_node_t *kann_layer_rnn2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);
 kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);
 
-// operations on network with a single input node and a single output node
+/* operations on network with a single input node and a single output node */
 int kann_train_fnn1(kann_t *ann, float lr, int mini_size, int max_epoch, int max_drop_streak, float frac_val, int n, float **_x, float **_y);
 float kann_cost_fnn1(kann_t *a, int n, float **x, float **y);
 const float *kann_apply1(kann_t *a, float *x);
 
-// model I/O
+/* model I/O */
 void kann_save_fp(FILE *fp, kann_t *ann);
 void kann_save(const char *fn, kann_t *ann);
 kann_t *kann_load_fp(FILE *fp);