Skip to content

Commit

Permalink
r514: softmax/ce_multi/stdnorm act on last dim
Browse files Browse the repository at this point in the history
This makes more sense and stays closer to tensorflow.
  • Loading branch information
lh3 committed Mar 14, 2017
1 parent 5dce9f2 commit 9c00524
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
14 changes: 7 additions & 7 deletions kautodiff.c
Original file line number Diff line number Diff line change
Expand Up @@ -1517,10 +1517,10 @@ int kad_op_ce_multi(kad_node_t *p, int action)
kad_node_t *y0 = p->child[1]; // truth
int i, j, n1, d0;

d0 = y0->n_d > 1? y0->d[0] : 1;
n1 = kad_len(y0) / d0;
n1 = y0->d[y0->n_d - 1];
d0 = kad_len(y0) / n1;
if (action == KAD_SYNC_DIM) {
if (kad_len(y0) != kad_len(y1) || y0->d[0] != y1->d[0]) return -1;
if (kad_len(y0) != kad_len(y1) || y0->d[y0->n_d - 1] != y1->d[y1->n_d - 1]) return -1;
p->n_d = 0;
} else if (action == KAD_FORWARD) {
double cost = 0.0;
Expand Down Expand Up @@ -1549,8 +1549,8 @@ int kad_op_stdnorm(kad_node_t *p, int action)
int i, j, n, m;
kad_node_t *q = p->child[0];
assert(q->n_d > 0);
if (q->n_d == 1) m = 1, n = kad_len(q);
else m = q->d[0], n = kad_len(q) / m;
n = q->d[q->n_d - 1];
m = kad_len(q) / n;
if (action == KAD_SYNC_DIM) {
kad_copy_dim1(p, q);
} else if (action == KAD_ALLOC) {
Expand Down Expand Up @@ -1665,8 +1665,8 @@ int kad_op_softmax(kad_node_t *p, int action)
int i, j, n1, d0;
kad_node_t *q = p->child[0];

d0 = q->n_d > 1? q->d[0] : 1;
n1 = kad_len(q) / d0;
n1 = q->d[q->n_d - 1];
d0 = kad_len(q) / n1;
if (action == KAD_SYNC_DIM) {
kad_copy_dim1(p, q);
} else if (action == KAD_FORWARD) {
Expand Down
9 changes: 5 additions & 4 deletions kautodiff.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#ifndef KANN_AUTODIFF_H
#define KANN_AUTODIFF_H

#define KAD_VERSION "r513"
#define KAD_VERSION "r514"

#include <stdio.h>
#include <stdint.h>
Expand Down Expand Up @@ -147,8 +147,9 @@ kad_node_t *kad_mul(kad_node_t *x, kad_node_t *y); // f(x,y) = x * y (generalize
kad_node_t *kad_matmul(kad_node_t *x, kad_node_t *y); // f(x,y) = x * y (general matrix product)
kad_node_t *kad_cmul(kad_node_t *x, kad_node_t *y); // f(x,y) = x * y^T (column-wise matrix product; i.e. y is transposed)

// loss functions; output scalar
kad_node_t *kad_mse(kad_node_t *x, kad_node_t *y); // mean square error
kad_node_t *kad_ce_multi(kad_node_t *x, kad_node_t *y); // multi-class cross-entropy; output is a scalar; x is the preidction and y is the truth
kad_node_t *kad_ce_multi(kad_node_t *x, kad_node_t *y); // multi-class cross-entropy; x is the preidction and y is the truth
kad_node_t *kad_ce_bin(kad_node_t *x, kad_node_t *y); // binary cross-entropy for (0,1)
kad_node_t *kad_ce_bin_neg(kad_node_t *x, kad_node_t *y); // binary cross-entropy for (-1,1)

Expand All @@ -169,13 +170,13 @@ kad_node_t *kad_square(kad_node_t *x); // f(x) = x^2 (el
kad_node_t *kad_sigm(kad_node_t *x); // f(x) = 1/(1+exp(-x)) (element-wise sigmoid)
kad_node_t *kad_tanh(kad_node_t *x); // f(x) = (1-exp(-2x)) / (1+exp(-2x)) (element-wise tanh)
kad_node_t *kad_relu(kad_node_t *x); // f(x) = max{0,x} (element-wise rectifier, aka ReLU)
kad_node_t *kad_softmax(kad_node_t *x);// f_i(x_1,...,x_n) = exp(x_i) / \sum_j exp(x_j) (softmax)
kad_node_t *kad_softmax(kad_node_t *x);// f_i(x_1,...,x_n) = exp(x_i) / \sum_j exp(x_j) (softmax: tf.nn.softmax(x,dim=-1))
kad_node_t *kad_1minus(kad_node_t *x); // f(x) = 1 - x
kad_node_t *kad_exp(kad_node_t *x); // f(x) = exp(x)
kad_node_t *kad_log(kad_node_t *x); // f(x) = log(x)
kad_node_t *kad_sin(kad_node_t *x); // f(x) = sin(x)

kad_node_t *kad_stdnorm(kad_node_t *x); // layer normalization
kad_node_t *kad_stdnorm(kad_node_t *x); // layer normalization; applied to the last dimension

// operators taking an indefinite number of operands (e.g. pooling)
kad_node_t *kad_avg(int n, kad_node_t **x); // f(x_1,...,x_n) = \sum_i x_i/n (mean pooling)
Expand Down

0 comments on commit 9c00524

Please sign in to comment.