Skip to content

Commit

Permalink
r534: support -ansi -pedantic
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Mar 31, 2017
1 parent e8f458f commit 62e646a
Show file tree
Hide file tree
Showing 5 changed files with 288 additions and 269 deletions.
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
CC= gcc
CFLAGS= -g -Wall -Wextra -Wc++-compat -O2
CFLAGS_LIB= #-ansi -pedantic -Wno-long-long # ANSI C does not have inline which affects performance a little bit
CPPFLAGS= -DHAVE_PTHREAD
INCLUDES= -I.
EXE= examples/mlp examples/mnist-cnn examples/inspect examples/textgen examples/rnn-bit \
Expand All @@ -20,6 +21,12 @@ endif

all:kautodiff.o kann.o kann_extra/kann_data.o $(EXE)

kautodiff.o:kautodiff.c
$(CC) -c $(CFLAGS) $(CFLAGS_LIB) $(INCLUDES) $(CPPFLAGS) -o $@ $<

kann.o:kann.c
$(CC) -c $(CFLAGS) $(CFLAGS_LIB) $(INCLUDES) $(CPPFLAGS) -o $@ $<

kann_extra/kann_data.o:kann_extra/kann_data.c
$(CC) -c $(CFLAGS) -DHAVE_ZLIB $< -o $@

Expand Down
67 changes: 35 additions & 32 deletions kann.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ kann_t *kann_new(kad_node_t *cost, int n_rest, ...)
if (a->v[i]->pre) has_recur = 1;
if (kad_is_pivot(a->v[i])) has_pivot = 1;
}
if (has_recur && !has_pivot) { // an RNN that doesn't have a pivot; then add a pivot on top of cost and recompile
if (has_recur && !has_pivot) { /* an RNN that doesn't have a pivot; then add a pivot on top of cost and recompile */
cost->ext_flag &= ~KANN_F_COST;
roots[n_roots-1] = cost = kad_avg(1, &cost), cost->ext_flag |= KANN_F_COST;
free(a->v);
Expand Down Expand Up @@ -112,7 +112,7 @@ kann_t *kann_unroll(kann_t *a, ...)
for (i = 0; i < n_pivots; ++i) len[i] = va_arg(ap, int);
va_end(ap);
b = (kann_t*)calloc(1, sizeof(kann_t));
b->x = a->x, b->g = a->g, b->c = a->c; // these arrays are shared
b->x = a->x, b->g = a->g, b->c = a->c; /* these arrays are shared */
b->v = kad_unroll(a->n, a->v, &b->n, len);
return b;
}
Expand Down Expand Up @@ -197,7 +197,7 @@ void kann_rnn_start(kann_t *a)
kann_set_batch_size(a, 1);
for (i = 0; i < a->n; ++i) {
kad_node_t *p = a->v[i];
if (p->pre) { // NB: BE CAREFUL of the interaction between kann_rnn_start() and kann_set_batch_size()
if (p->pre) { /* NB: BE CAREFUL of the interaction between kann_rnn_start() and kann_set_batch_size() */
kad_node_t *q = p->pre;
if (q->x) memcpy(p->x, q->x, kad_len(p) * sizeof(float));
else memset(p->x, 0, kad_len(p) * sizeof(float));
Expand All @@ -216,7 +216,7 @@ static int kann_class_error_core(const kann_t *ann, int *base)
int i, j, k, m, n, off, n_err = 0;
for (i = 0, *base = 0; i < ann->n; ++i) {
kad_node_t *p = ann->v[i];
if ((p->op == 13 || p->op == 22) && p->n_child == 2 && p->n_d == 0) { // ce_bin or ce_multi
if ((p->op == 13 || p->op == 22) && p->n_child == 2 && p->n_d == 0) { /* ce_bin or ce_multi */
kad_node_t *x = p->child[0], *t = p->child[1];
n = t->d[t->n_d - 1], m = kad_len(t) / n;
for (j = off = 0; j < m; ++j, off += n) {
Expand Down Expand Up @@ -249,24 +249,24 @@ static int kann_class_error_core(const kann_t *ann, int *base)

struct mtaux_t;

typedef struct { // per-worker data
typedef struct { /* per-worker data */
kann_t *a;
float cost;
int action;
pthread_t tid;
struct mtaux_t *g;
} mtaux1_t;

typedef struct mtaux_t { // cross-worker data
typedef struct mtaux_t { /* cross-worker data */
int n_threads, max_batch_size;
int cal_grad, cost_label;
volatile int n_idle; // we will be busy waiting on this, so volatile necessary
volatile int n_idle; /* we will be busy waiting on this, so volatile necessary */
pthread_mutex_t mtx;
pthread_cond_t cv;
mtaux1_t *mt;
} mtaux_t;

static void *mt_worker(void *data) // pthread worker
static void *mt_worker(void *data) /* pthread worker */
{
mtaux1_t *mt1 = (mtaux1_t*)data;
mtaux_t *mt = mt1->g;
Expand All @@ -286,7 +286,7 @@ static void *mt_worker(void *data) // pthread worker
pthread_exit(0);
}

static void mt_destroy(mtaux_t *mt) // de-allocate an entire mtaux_t struct
static void mt_destroy(mtaux_t *mt) /* de-allocate an entire mtaux_t struct */
{
int i;
pthread_mutex_lock(&mt->mtx);
Expand Down Expand Up @@ -328,7 +328,7 @@ void kann_mt(kann_t *ann, int n_threads, int max_batch_size)
}
for (i = 1; i < n_threads; ++i)
pthread_create(&mt->mt[i].tid, 0, mt_worker, &mt->mt[i]);
while (mt->n_idle < n_threads - 1); // busy waiting until all threads in sync
while (mt->n_idle < n_threads - 1); /* busy waiting until all threads in sync */
ann->mt = mt;
}

Expand All @@ -339,8 +339,8 @@ float kann_cost(kann_t *a, int cost_label, int cal_grad)
float cost;

if (mt == 0) return kann_cost_core(a, cost_label, cal_grad);
B = kad_sync_dim(a->n, a->v, -1); // get the current batch size
assert(B <= mt->max_batch_size); // TODO: can be relaxed
B = kad_sync_dim(a->n, a->v, -1); /* get the current batch size */
assert(B <= mt->max_batch_size); /* TODO: can be relaxed */
n_var = kann_size_var(a);

pthread_mutex_lock(&mt->mtx);
Expand All @@ -350,7 +350,7 @@ float kann_cost(kann_t *a, int cost_label, int cal_grad)
for (j = 0; j < a->n; ++j)
if (kad_is_feed(a->v[j]))
mt->mt[i].a->v[j]->x = &a->v[j]->x[k * kad_len(a->v[j]) / a->v[j]->d[0]];
kad_sync_dim(mt->mt[i].a->n, mt->mt[i].a->v, size); // TODO: we can point ->x to internal nodes, too
kad_sync_dim(mt->mt[i].a->n, mt->mt[i].a->v, size); /* TODO: we can point ->x to internal nodes, too */
k += size;
memcpy(mt->mt[i].a->x, a->x, n_var * sizeof(float));
mt->mt[i].action = 1;
Expand All @@ -360,7 +360,7 @@ float kann_cost(kann_t *a, int cost_label, int cal_grad)
pthread_mutex_unlock(&mt->mtx);

mt->mt[0].cost = kann_cost_core(mt->mt[0].a, cost_label, cal_grad);
while (mt->n_idle < mt->n_threads - 1); // busy waiting until all threads in sync
while (mt->n_idle < mt->n_threads - 1); /* busy waiting until all threads in sync */

memset(a->g, 0, n_var * sizeof(float));
for (i = k = 0, cost = 0.0f; i < mt->n_threads; ++i) {
Expand All @@ -369,7 +369,7 @@ float kann_cost(kann_t *a, int cost_label, int cal_grad)
kad_saxpy(n_var, (float)size / B, mt->mt[i].a->g, a->g);
k += size;
}
for (j = 0; j < a->n; ++j) { // copy values back at recurrent nodes (needed by textgen; TODO: temporary solution)
for (j = 0; j < a->n; ++j) { /* copy values back at recurrent nodes (needed by textgen; TODO: temporary solution) */
kad_node_t *p = a->v[j];
if (p->pre && p->n_d >= 2 && p->d[0] == B) {
for (i = k = 0; i < mt->n_threads; ++i) {
Expand Down Expand Up @@ -398,7 +398,10 @@ void kann_switch(kann_t *ann, int is_train)
{
mtaux_t *mt = (mtaux_t*)ann->mt;
int i;
if (mt == 0) return kann_switch_core(ann, is_train);
if (mt == 0) {
kann_switch_core(ann, is_train);
return;
}
for (i = 0; i < mt->n_threads; ++i)
kann_switch_core(mt->mt[i].a, is_train);
}
Expand Down Expand Up @@ -470,7 +473,7 @@ kann_t *kann_load(const char *fn)
*** @@LAYER: layers and model generation ***
**********************************************/

/////////// General but more complex APIs ///////////
/********** General but more complex APIs **********/

kad_node_t *kann_new_leaf_array(int *offset, kad_node_p *par, uint8_t flag, float x0_01, int n_d, int32_t d[KAD_MAX_DIM])
{
Expand Down Expand Up @@ -539,7 +542,7 @@ static inline kad_node_t *cmul_norm2(int *offset, kad_node_t **par, kad_node_t *

kad_node_t *kann_layer_rnn2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag)
{
int n0, n1 = h0->d[h0->n_d-1], use_norm = !!(rnn_flag & KANN_RNN_NORM);;
int n0, n1 = h0->d[h0->n_d-1], use_norm = !!(rnn_flag & KANN_RNN_NORM);
kad_node_t *t, *w, *u, *b, *out;

u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
Expand All @@ -561,7 +564,7 @@ kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_n
kad_node_t *t, *r, *z, *w, *u, *b, *s, *out;

if (in) n0 = in->n_d >= 2? kad_len(in) / in->d[0] : kad_len(in);
// z = sigm(x_t * W_z + h_{t-1} * U_z + b_z)
/* z = sigm(x_t * W_z + h_{t-1} * U_z + b_z) */
u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
t = cmul_norm2(offset, par, h0, u, use_norm);
Expand All @@ -570,7 +573,7 @@ kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_n
t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
}
z = kad_sigm(kad_add(t, b));
// r = sigm(x_t * W_r + h_{t-1} * U_r + b_r)
/* r = sigm(x_t * W_r + h_{t-1} * U_r + b_r) */
u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
t = cmul_norm2(offset, par, h0, u, use_norm);
Expand All @@ -579,7 +582,7 @@ kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_n
t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
}
r = kad_sigm(kad_add(t, b));
// s = tanh(x_t * W_s + (h_{t-1} # r) * U_s + b_s)
/* s = tanh(x_t * W_s + (h_{t-1} # r) * U_s + b_s) */
u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
t = cmul_norm2(offset, par, kad_mul(r, h0), u, use_norm);
Expand All @@ -588,13 +591,13 @@ kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_n
t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
}
s = kad_tanh(kad_add(t, b));
// h_t = z # h_{t-1} + (1 - z) # s
/* h_t = z # h_{t-1} + (1 - z) # s */
out = kad_add(kad_mul(kad_1minus(z), s), kad_mul(z, h0));
out->pre = h0;
return out;
}

/////////// APIs without offset & par ///////////
/********** APIs without offset & par **********/

kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...)
{
Expand Down Expand Up @@ -655,31 +658,31 @@ kad_node_t *kann_layer_lstm(kad_node_t *in, int n1, int rnn_flag)
c0 = (rnn_flag & KANN_RNN_VAR_H0)? kad_var(0, 0, 2, 1, n1) : kad_const(0, 2, 1, n1);
c0->x = (float*)calloc(n1, sizeof(float));

// i = sigm(x_t * W_i + h_{t-1} * U_i + b_i)
/* i = sigm(x_t * W_i + h_{t-1} * U_i + b_i) */
w = kann_new_weight(n1, n0);
u = kann_new_weight(n1, n1);
b = kann_new_bias(n1);
i = kad_sigm(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
// f = sigm(x_t * W_f + h_{t-1} * U_f + b_f)
/* f = sigm(x_t * W_f + h_{t-1} * U_f + b_f) */
w = kann_new_weight(n1, n0);
u = kann_new_weight(n1, n1);
b = kann_new_vec(n1, 1.0f); // see Jozefowicz et al on using a large bias
b = kann_new_vec(n1, 1.0f); /* see Jozefowicz et al on using a large bias */
f = kad_sigm(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
// o = sigm(x_t * W_o + h_{t-1} * U_o + b_o)
/* o = sigm(x_t * W_o + h_{t-1} * U_o + b_o) */
w = kann_new_weight(n1, n0);
u = kann_new_weight(n1, n1);
b = kann_new_bias(n1);
o = kad_sigm(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
// g = tanh(x_t * W_g + h_{t-1} * U_g + b_g)
/* g = tanh(x_t * W_g + h_{t-1} * U_g + b_g) */
w = kann_new_weight(n1, n0);
u = kann_new_weight(n1, n1);
b = kann_new_bias(n1);
g = kad_tanh(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
// c_t = c_{t-1} # f + g # i
c = kad_add(kad_mul(f, c0), kad_mul(g, i)); // can't be kad_mul(c0, f)!!!
/* c_t = c_{t-1} # f + g # i */
c = kad_add(kad_mul(f, c0), kad_mul(g, i)); /* can't be kad_mul(c0, f)!!! */
c->pre = c0;
// h_t = tanh(c_t) # o
if (rnn_flag & KANN_RNN_NORM) c = kann_layer_layernorm(c); // see Ba et al (2016) about how to apply layer normalization to LSTM
/* h_t = tanh(c_t) # o */
if (rnn_flag & KANN_RNN_NORM) c = kann_layer_layernorm(c); /* see Ba et al (2016) about how to apply layer normalization to LSTM */
out = kad_mul(kad_tanh(c), o);
out->pre = h0;
return out;
Expand Down
42 changes: 21 additions & 21 deletions kann.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,28 @@
#ifndef KANN_H
#define KANN_H

#define KANN_VERSION "r530"
#define KANN_VERSION "r534"

#define KANN_F_IN 0x1 // input
#define KANN_F_OUT 0x2 // output
#define KANN_F_TRUTH 0x4 // truth output
#define KANN_F_COST 0x8 // final cost
#define KANN_F_IN 0x1 /* input */
#define KANN_F_OUT 0x2 /* output */
#define KANN_F_TRUTH 0x4 /* truth output */
#define KANN_F_COST 0x8 /* final cost */

#define KANN_C_CEB 1 // binary cross-entropy cost, used with sigmoid
#define KANN_C_CEM 2 // multi-class cross-entropy cost, used with softmax
#define KANN_C_CEB_NEG 3 // binary cross-enytopy-like cost, used with tanh
#define KANN_C_MSE 4 // mean square error
#define KANN_C_CEB 1 /* binary cross-entropy cost, used with sigmoid */
#define KANN_C_CEM 2 /* multi-class cross-entropy cost, used with softmax */
#define KANN_C_CEB_NEG 3 /* binary cross-enytopy-like cost, used with tanh */
#define KANN_C_MSE 4 /* mean square error */

#define KANN_RNN_VAR_H0 0x1 // take the initial hidden values as variables
#define KANN_RNN_NORM 0x2 // apply layer normalization
#define KANN_RNN_VAR_H0 0x1 /* take the initial hidden values as variables */
#define KANN_RNN_NORM 0x2 /* apply layer normalization */

#include "kautodiff.h"

typedef struct {
int n; // number of nodes in the computational graph
kad_node_t **v; // list of nodes
float *x, *g, *c; // collated variable values, gradients and constant values
void *mt; // auxiliary data for multi-threading; NULL if multi-threading disabled
int n; /* number of nodes in the computational graph */
kad_node_t **v; /* list of nodes */
float *x, *g, *c; /* collated variable values, gradients and constant values */
void *mt; /* auxiliary data for multi-threading; NULL if multi-threading disabled */
} kann_t;

extern int kann_verbose;
Expand Down Expand Up @@ -91,8 +91,8 @@ kann_t *kann_new(kad_node_t *cost, int n_rest, ...);
kann_t *kann_unroll(kann_t *a, ...);

kann_t *kann_clone(kann_t *a, int batch_size);
void kann_delete(kann_t *a); // delete a network generated by kann_new() or kann_layer_final()
void kann_delete_unrolled(kann_t *a); // delete a network generated by kann_unroll()
void kann_delete(kann_t *a); /* delete a network generated by kann_new() or kann_layer_final() */
void kann_delete_unrolled(kann_t *a); /* delete a network generated by kann_unroll() */

/**
* Enable/disable multi-threading (requiring pthread)
Expand Down Expand Up @@ -188,7 +188,7 @@ void kann_RMSprop(int n, float h0, const float *h, float decay, const float *g,
void kann_shuffle(int n, int *s);
float kann_grad_clip(float thres, int n, float *g);

// common layers
/* common layers */
kad_node_t *kann_layer_input(int n1);
kad_node_t *kann_layer_dense(kad_node_t *in, int n1);
kad_node_t *kann_layer_dropout(kad_node_t *t, float r);
Expand All @@ -200,7 +200,7 @@ kad_node_t *kann_layer_conv2d(kad_node_t *in, int n_flt, int k_rows, int k_cols,
kad_node_t *kann_layer_conv1d(kad_node_t *in, int n_flt, int k_size, int stride, int pad);
kad_node_t *kann_layer_cost(kad_node_t *t, int n_out, int cost_type);

kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...); // flag can be KAD_CONST or KAD_VAR
kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...); /* flag can be KAD_CONST or KAD_VAR */
kad_node_t *kann_new_scalar(uint8_t flag, float x);
kad_node_t *kann_new_weight(int n_row, int n_col);
kad_node_t *kann_new_bias(int n);
Expand All @@ -214,12 +214,12 @@ kad_node_t *kann_layer_layernorm2(int *offset, kad_node_t **par, kad_node_t *in)
kad_node_t *kann_layer_rnn2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);
kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);

// operations on network with a single input node and a single output node
/* operations on network with a single input node and a single output node */
int kann_train_fnn1(kann_t *ann, float lr, int mini_size, int max_epoch, int max_drop_streak, float frac_val, int n, float **_x, float **_y);
float kann_cost_fnn1(kann_t *a, int n, float **x, float **y);
const float *kann_apply1(kann_t *a, float *x);

// model I/O
/* model I/O */
void kann_save_fp(FILE *fp, kann_t *ann);
void kann_save(const char *fn, kann_t *ann);
kann_t *kann_load_fp(FILE *fp);
Expand Down
Loading

0 comments on commit 62e646a

Please sign in to comment.