lib/layers.py

import numpy as np

# Theano
import collections
import theano
import theano.tensor as tensor
from theano.tensor.nnet import conv, conv3d2d, sigmoid, relu
from theano.tensor.signal import pool

trainable_params = []


def get_trainable_params():
    global trainable_params
    return trainable_params


class Weight(object):
    def __init__(self,
                 w_shape,
                 is_bias,
                 mean=0,
                 std=0.01,
                 filler='msra',
                 fan_in=None,
                 fan_out=None,
                 name=None):
        super(Weight, self).__init__()
        assert (is_bias in [True, False])
        rng = np.random.RandomState()

        if isinstance(w_shape, collections.Iterable) and not is_bias:
            if len(w_shape) > 1 and len(w_shape) < 5:
                fan_in = np.prod(w_shape[1:])
                fan_out = np.prod(w_shape) / w_shape[1]
                n = (fan_in + fan_out) / 2.
            elif len(w_shape) == 5:
                # 3D Convolution filter
                fan_in = np.prod(w_shape[1:])
                fan_out = np.prod(w_shape) / w_shape[2]
                n = (fan_in + fan_out) / 2.
            else:
                raise NotImplementedError(
                    'Filter shape with ndim > 5 not supported: len(w_shape) = %d' % len(w_shape))
        else:
            n = 1

        if fan_in and fan_out:
            n = (fan_in + fan_out) / 2.

        if filler == 'gaussian':
            self.np_values = np.asarray(rng.normal(mean, std, w_shape), dtype=theano.config.floatX)
        elif filler == 'msra':
            self.np_values = np.asarray(
                rng.normal(mean, np.sqrt(2. / n), w_shape), dtype=theano.config.floatX)
        elif filler == 'xavier':
            scale = np.sqrt(3. / n)
            self.np_values = np.asarray(
                rng.uniform(
                    low=-scale, high=scale, size=w_shape), dtype=theano.config.floatX)
        elif filler == 'constant':
            self.np_values = np.cast[theano.config.floatX](mean * np.ones(
                w_shape, dtype=theano.config.floatX))
        elif filler == 'orth':
            ndim = np.prod(w_shape)
            W = np.random.randn(ndim, ndim)
            u, s, v = np.linalg.svd(W)
            self.np_values = u.astype(theano.config.floatX).reshape(w_shape)
        else:
            raise NotImplementedError('Filler %s not implemented' % filler)

        self.is_bias = is_bias  # Either the weight is bias or not
        self.val = theano.shared(value=self.np_values)
        self.shape = w_shape
        self.name = name

        global trainable_params
        trainable_params.append(self)


class InputLayer(object):
    def __init__(self, input_shape, tinput=None):
        self._output_shape = input_shape
        self._input = tinput

    @property
    def output(self):
        if self._input is None:
            raise ValueError('Cannot call output for the layer. Initialize' \
                             + ' the layer with an input argument')
        return self._input

    @property
    def output_shape(self):
        return self._output_shape


class Layer(object):
    ''' Layer abstract class. support basic functionalities.
    If you want to set the output shape, either prev_layer or input_shape must
    be defined.

    If you want to use the computation graph, provide either prev_layer or set_input
    '''

    def __init__(self, prev_layer):
        self._output = None
        self._output_shape = None
        self._prev_layer = prev_layer
        self._input_shape = prev_layer.output_shape
        # Define self._output_shape

    def set_output(self):
        '''Override the function'''
        # set self._output using self._input=self._prev_layer.output
        raise NotImplementedError('Layer virtual class')

    @property
    def output_shape(self):
        if self._output_shape is None:
            raise ValueError('Set output shape first')
        return self._output_shape

    @property
    def output(self):
        if self._output is None:
            self.set_output()
        return self._output


class TensorProductLayer(Layer):
    def __init__(self, prev_layer, n_out, params=None, bias=True):
        super().__init__(prev_layer)
        self._bias = bias
        n_in = self._input_shape[-1]

        if params is None:
            self.W = Weight((n_in, n_out), is_bias=False)
            if bias:
                self.b = Weight((n_out,), is_bias=True, mean=0.1, filler='constant')
        else:
            self.W = params[0]
            if bias:
                self.b = params[1]

        # parameters of the model
        self.params = [self.W]
        if bias:
            self.params.append(self.b)

        self._output_shape = [self._input_shape[0]]
        self._output_shape.extend(self._input_shape[1:-1])
        self._output_shape.append(n_out)

    def set_output(self):
        self._output = tensor.dot(self._prev_layer.output, self.W.val)
        if self._bias:
            self._output += self.b.val


class BlockDiagonalLayer(Layer):
    """
    Compute block diagonal matrix multiplication efficiently using broadcasting

    Last dimension will be used for matrix multiplication.

    prev_layer.output_shape = N x D_1 x D_2 x ... x D_{n-1} x D_n
    output_shape            = N x D_1 x D_2 x ... x D_{n-1} x n_out
    """

    def __init__(self, prev_layer, n_out, params=None, bias=True):
        super().__init__(prev_layer)
        self._bias = bias
        self._output_shape = list(self._input_shape)
        self._output_shape[-1] = n_out
        self._output_shape = tuple(self._output_shape)

        if params is None:
            self._W_shape = list(self._input_shape[1:])
            self._W_shape.append(n_out)
            self._W_shape = tuple(self._W_shape)
            self.W = Weight(self._W_shape, is_bias=False)
            if bias:
                self.b = Weight(self._output_shape[1:], is_bias=True, mean=0.1, filler='constant')
        else:
            self.W = params[0]
            if bias:
                self.b = params[1]

        # parameters of the model
        self.params = [self.W]
        if bias:
            self.params.append(self.b)

    def set_output(self):
        self._output = tensor.sum(tensor.shape_padright(self._prev_layer.output) *
                                  tensor.shape_padleft(self.W.val),
                                  axis=-2)
        if self._bias:
            self._output += tensor.shape_padleft(self.b.val)


class AddLayer(Layer):
    def __init__(self, prev_layer, add_layer):
        super().__init__(prev_layer)
        self._output_shape = self._input_shape
        self._add_layer = add_layer

    def set_output(self):
        self._output = self._prev_layer.output + self._add_layer.output


class EltwiseMultiplyLayer(Layer):
    def __init__(self, prev_layer, mult_layer):
        super().__init__(prev_layer)
        self._output_shape = self._input_shape
        self._mult_layer = mult_layer

    def set_output(self):
        self._output = self._prev_layer.output * self._mult_layer.output


class FlattenLayer(Layer):
    def __init__(self, prev_layer):
        super().__init__(prev_layer)
        self._output_shape = [self._input_shape[0], np.prod(self._input_shape[1:])]

    def set_output(self):
        self._output = \
            self._prev_layer.output.flatten(2)  # flatten from the second dim


class DimShuffleLayer(Layer):
    def __init__(self, prev_layer, shuffle_pattern):
        super().__init__(prev_layer)
        self._shuffle_pattern = shuffle_pattern
        self._output_shape = list(self._input_shape)
        for out_dim, in_dim in enumerate(shuffle_pattern):
            self._output_shape[out_dim] = self._input_shape[in_dim]
        self._output_shape = tuple(self._output_shape)

    def set_output(self):
        self._output = self._prev_layer.output.dimshuffle(self._shuffle_pattern)


class ReshapeLayer(Layer):
    def __init__(self, prev_layer, reshape):
        super().__init__(prev_layer)
        self._output_shape = [self._prev_layer.output_shape[0]]
        self._output_shape.extend(reshape)
        self._output_shape = tuple(self._output_shape)
        print('Reshape the prev layer to [%s]' % ','.join(str(x) for x in self._output_shape))

    def set_output(self):
        self._output = tensor.reshape(self._prev_layer.output, self._output_shape)


class ConvLayer(Layer):
    """Conv Layer
    filter_shape: [n_out_channel, n_height, n_width]

    self._input_shape: [batch_size, n_in_channel, n_height, n_width]
    """

    def __init__(self, prev_layer, filter_shape, padding=True, params=None):
        super().__init__(prev_layer)
        self._padding = padding
        self._filter_shape = [filter_shape[0], self._input_shape[1], filter_shape[1],
                              filter_shape[2]]
        if params is None:
            self.W = Weight(self._filter_shape, is_bias=False)
            self.b = Weight((filter_shape[0],), is_bias=True, mean=0.1, filler='constant')
        else:
            for i, s in enumerate(self._filter_shape):
                assert (params[0].shape[i] == s)
            self.W = params[0]
            self.b = params[1]

        self.params = [self.W, self.b]

        # Define self._output_shape
        if padding and filter_shape[1] * filter_shape[2] > 1:
            self._padding = [0, 0, int((filter_shape[1] - 1) / 2), int((filter_shape[2] - 1) / 2)]
            self._output_shape = [self._input_shape[0], filter_shape[0], self._input_shape[2],
                                  self._input_shape[3]]
        else:
            self._padding = [0] * 4
            # TODO: for the 'valid' convolution mode the following is the
            # output shape. Diagnose failure
            self._output_shape = [self._input_shape[0], filter_shape[0],
                                  self._input_shape[2] - filter_shape[1] + 1,
                                  self._input_shape[3] - filter_shape[2] + 1]

    def set_output(self):
        if sum(self._padding) > 0:
            padded_input = tensor.alloc(0.0,  # Value to fill the tensor
                                        self._input_shape[0],
                                        self._input_shape[1],
                                        self._input_shape[2] + 2 * self._padding[2],
                                        self._input_shape[3] + 2 * self._padding[3])

            padded_input = tensor.set_subtensor(
                padded_input[:, :, self._padding[2]:self._padding[2] + self._input_shape[2],
                self._padding[3]:self._padding[3] + self._input_shape[3]],
                self._prev_layer.output)

            padded_input_shape = [self._input_shape[0], self._input_shape[1],
                                  self._input_shape[2] + 2 * self._padding[2],
                                  self._input_shape[3] + 2 * self._padding[3]]
        else:
            padded_input = self._prev_layer.output
            padded_input_shape = self._input_shape

        conv_out = conv.conv2d(
            input=padded_input,
            filters=self.W.val,
            filter_shape=self._filter_shape,
            image_shape=np.asarray(
                padded_input_shape, dtype=np.int16),
            border_mode='valid')

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self._output = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x')


class PoolLayer(Layer):
    def __init__(self, prev_layer, pool_size=(2, 2), padding=(1, 1)):
        super().__init__(prev_layer)
        self._pool_size = pool_size
        self._padding = padding
        img_rows = self._input_shape[2] + 2 * padding[0]
        img_cols = self._input_shape[3] + 2 * padding[1]
        out_r = (img_rows - pool_size[0]) // pool_size[0] + 1
        out_c = (img_cols - pool_size[1]) // pool_size[1] + 1
        self._output_shape = [self._input_shape[0], self._input_shape[1], out_r, out_c]

    def set_output(self):
        pooled_out = pool.pool_2d(
            input=self._prev_layer.output,
            ds=self._pool_size,
            ignore_border=True,
            padding=self._padding)
        self._output = pooled_out


class Pool3DLayer(Layer):
    def __init__(self, prev_layer, pool_size=(2, 2, 2), padding=(1, 1, 1)):
        super().__init__(prev_layer)
        self._pool_size = pool_size
        self._padding = padding                           # (B, D, C, H, W)
        img_ds = self._input_shape[1] + 2 * padding[0]
        img_rows = self._input_shape[3] + 2 * padding[1]
        img_cols = self._input_shape[4] + 2 * padding[2]
        out_d = (img_ds - pool_size[0]) // pool_size[0]
        out_r = (img_rows - pool_size[1]) // pool_size[1]
        out_c = (img_cols - pool_size[2]) // pool_size[2]
        self._output_shape = [self._input_shape[0], out_d, self._input_shape[2], out_r, out_c]

    def set_output(self):
        pooled_out = pool.pool_3d(
            input=self._prev_layer.output,
            ds=self._pool_size,
            ignore_border=True,
            padding=self._padding)
        self._output = pooled_out


class Unpool3DLayer(Layer):
    """3D Unpooling layer for a convolutional network """

    def __init__(self, prev_layer, unpool_size=(2, 2, 2), padding=(0, 0, 0)):
        super().__init__(prev_layer)
        self._unpool_size = unpool_size
        self._padding = padding
        output_shape = (self._input_shape[0],  # batch
                        unpool_size[0] * self._input_shape[1] + 2 * padding[0],  # depth
                        self._input_shape[2],  # out channel
                        unpool_size[1] * self._input_shape[3] + 2 * padding[1],  # row
                        unpool_size[2] * self._input_shape[4] + 2 * padding[2])  # col
        self._output_shape = output_shape

    def set_output(self):
        output_shape = self._output_shape
        padding = self._padding
        unpool_size = self._unpool_size
        unpooled_output = tensor.alloc(0.0,  # Value to fill the tensor
                                       output_shape[0],
                                       output_shape[1] + 2 * padding[0],
                                       output_shape[2],
                                       output_shape[3] + 2 * padding[1],
                                       output_shape[4] + 2 * padding[2])

        unpooled_output = tensor.set_subtensor(unpooled_output[
                                               :,
                                               padding[0]:output_shape[1] + padding[0]:unpool_size[0],
                                               :,
                                               padding[1]:output_shape[3] + padding[1]:unpool_size[1],
                                               padding[2]:output_shape[4] + padding[2]:unpool_size[2]
                                               ],
                                               self._prev_layer.output)
        self._output = unpooled_output


class Conv3DLayer(Layer):
    """3D Convolution layer"""

    def __init__(self, prev_layer, filter_shape, padding=None, params=None):
        super().__init__(prev_layer)
        self._filter_shape = [filter_shape[0],  # out channel
                              filter_shape[1],  # time
                              self._input_shape[2],  # in channel
                              filter_shape[2],  # height
                              filter_shape[3]]  # width
        self._padding = padding

        # signals: (batch,       in channel, depth_i, row_i, column_i)
        # filters: (out channel, in channel, depth_f, row_f, column_f)

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        if params is None:
            self.W = Weight(self._filter_shape, is_bias=False)
            self.b = Weight((filter_shape[0],), is_bias=True, mean=0.1, filler='constant')
            params = [self.W, self.b]
        else:
            self.W = params[0]
            self.b = params[1]

        self.params = [self.W, self.b]

        if padding is None:
            self._padding = [0, int((filter_shape[1] - 1) / 2), 0, int((filter_shape[2] - 1) / 2),
                             int((filter_shape[3] - 1) / 2)]

        self._output_shape = [self._input_shape[0], self._input_shape[1], filter_shape[0],
                              self._input_shape[3], self._input_shape[4]]

    def set_output(self):
        padding = self._padding
        input_shape = self._input_shape
        if np.sum(self._padding) > 0:
            padded_input = tensor.alloc(0.0,  # Value to fill the tensor
                                        input_shape[0],
                                        input_shape[1] + 2 * padding[1],
                                        input_shape[2],
                                        input_shape[3] + 2 * padding[3],
                                        input_shape[4] + 2 * padding[4])

            padded_input = tensor.set_subtensor(
                padded_input[:, padding[1]:padding[1] + input_shape[1], :, padding[3]:padding[3] +
                                                                                      input_shape[3],
                padding[4]:padding[4] + input_shape[4]],
                self._prev_layer.output)
        else:
            padded_input = self._prev_layer.output

        self._output = conv3d2d.conv3d(padded_input, self.W.val) + \
                       self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')


class FCConv3DLayer(Layer):
    """3D Convolution layer with FC input and hidden unit"""

    def __init__(self, prev_layer, fc_layer, filter_shape, padding=None, params=None):
        """Prev layer is the 3D hidden layer"""
        super().__init__(prev_layer)
        self._fc_layer = fc_layer
        self._filter_shape = [filter_shape[0],  # out channel
                              filter_shape[2],  # time
                              filter_shape[1],  # in channel
                              filter_shape[3],  # height
                              filter_shape[4]]  # width
        self._padding = padding

        if padding is None:
            self._padding = [0, int((self._filter_shape[1] - 1) / 2), 0, int(
                (self._filter_shape[3] - 1) / 2), int((self._filter_shape[4] - 1) / 2)]

        self._output_shape = [self._input_shape[0], self._input_shape[1], filter_shape[0],
                              self._input_shape[3], self._input_shape[4]]

        if params is None:
            self.Wh = Weight(self._filter_shape, is_bias=False)

            self._Wx_shape = [self._fc_layer._output_shape[1], np.prod(self._output_shape[1:])]

            # Each 3D cell will have independent weights but for computational
            # speed, we expand the cells and compute a matrix multiplication.
            self.Wx = Weight(
                self._Wx_shape,
                is_bias=False,
                fan_in=self._input_shape[1],
                fan_out=self._output_shape[2])

            self.b = Weight((filter_shape[0],), is_bias=True, mean=0.1, filler='constant')
            params = [self.Wh, self.Wx, self.b]
        else:
            self.Wh = params[0]
            self.Wx = params[1]
            self.b = params[2]

        self.params = [self.Wh, self.Wx, self.b]

    def set_output(self):
        padding = self._padding
        input_shape = self._input_shape
        padded_input = tensor.alloc(0.0,  # Value to fill the tensor
                                    input_shape[0],
                                    input_shape[1] + 2 * padding[1],
                                    input_shape[2],
                                    input_shape[3] + 2 * padding[3],
                                    input_shape[4] + 2 * padding[4])

        padded_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_shape[
            1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]],
                                            self._prev_layer.output)

        fc_output = tensor.reshape(
            tensor.dot(self._fc_layer.output, self.Wx.val), self._output_shape)
        self._output = conv3d2d.conv3d(padded_input, self.Wh.val) + \
                       fc_output + self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')


class CConv3DLayer(Layer):
    """3D Convolution layer with FC input and hidden unit"""

    def __init__(self, prev_layer, curr_layer, filter_shape, padding=None, params=None):
        """Prev layer is the 3D hidden layer"""
        super().__init__(prev_layer)
        self._curr_layer = curr_layer
        self._filter_shape = [filter_shape[0],  # out channel
                              filter_shape[2],  # time
                              filter_shape[1],  # in channel
                              filter_shape[3],  # height
                              filter_shape[4]]  # width
        self._padding = padding

        if padding is None:
            self._padding = [0, int((self._filter_shape[1] - 1) / 2), 0, int(
                (self._filter_shape[3] - 1) / 2), int((self._filter_shape[4] - 1) / 2)]

        self._output_shape = [self._input_shape[0], self._input_shape[1], filter_shape[0],
                              self._input_shape[3], self._input_shape[4]]

        if params is None:
            self.Wh = Weight(self._filter_shape, is_bias=False)
            self.Wx = Weight(self._filter_shape, is_bias=False)

            # self._Wx_shape = [self._curr_layer._output_shape[1], np.prod(self._output_shape[1:])]

            # Each 3D cell will have independent weights but for computational
            # speed, we expand the cells and compute a matrix multiplication.
            # self.Wx = Weight(
            #     self._Wx_shape,
            #     is_bias=False,
            #     fan_in=self._input_shape[1],
            #     fan_out=self._output_shape[2])
            #
            # self.Wh = Weight(
            #     self._Wx_shape,
            #     is_bias=False,
            #     fan_in=self._input_shape[1],
            #     fan_out=self._output_shape[2])

            self.bh = Weight((filter_shape[0],), is_bias=True, mean=0.1, filler='constant')
            self.bx = Weight((filter_shape[0],), is_bias=True, mean=0.1, filler='constant')
            params = [self.Wh, self.Wx, self.bh, self.bx]
        else:
            self.Wh = params[0]
            self.Wx = params[1]
            self.bh = params[2]
            self.bx = params[3]

        self.params = [self.Wh, self.Wx, self.bx, self.bh]

    def set_output(self):
        padding = self._padding
        input_shape = self._input_shape
        padded_input_prev = tensor.alloc(0.0,  # Value to fill the tensor
                                    input_shape[0],
                                    input_shape[1] + 2 * padding[1],
                                    input_shape[2],
                                    input_shape[3] + 2 * padding[3],
                                    input_shape[4] + 2 * padding[4])

        padded_input_curr = tensor.alloc(0.0,  # Value to fill the tensor
                                    input_shape[0],
                                    input_shape[1] + 2 * padding[1],
                                    input_shape[2],
                                    input_shape[3] + 2 * padding[3],
                                    input_shape[4] + 2 * padding[4])

        padded_input_prev = tensor.set_subtensor(padded_input_prev[:, padding[1]:padding[1] + input_shape[
            1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]],
                                            self._prev_layer.output)

        padded_input_curr = tensor.set_subtensor(padded_input_curr[:, padding[1]:padding[1] + input_shape[
            1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]],
                                            self._curr_layer.output)

        prev_out = conv3d2d.conv3d(padded_input_prev, self.Wh.val) + self.bh.val.dimshuffle('x', 'x', 0, 'x', 'x')
        curr_out = conv3d2d.conv3d(padded_input_curr, self.Wx.val) + self.bx.val.dimshuffle('x', 'x', 0, 'x', 'x')

        self._output = prev_out + curr_out


class Conv3DLSTMLayer(Layer):
    """Convolution 3D LSTM layer

    Unlike a standard LSTM cell witch doesn't have a spatial information,
    Convolutional 3D LSTM has limited connection that respects spatial
    configuration of LSTM cells.

    The filter_shape defines the size of neighbor that the 3D LSTM cells will consider.
    """

    def __init__(self, prev_layer, filter_shape, padding=None, params=None):

        super().__init__(prev_layer)
        prev_layer._input_shape
        n_c = filter_shape[0]
        n_x = self._input_shape[2]
        n_neighbor_d = filter_shape[1]
        n_neighbor_h = filter_shape[2]
        n_neighbor_w = filter_shape[3]

        # Compute all gates in one convolution
        self._gate_filter_shape = [4 * n_c, 1, n_x + n_c, 1, 1]

        self._filter_shape = [filter_shape[0],  # num out hidden representation
                              filter_shape[1],  # time
                              self._input_shape[2],  # in channel
                              filter_shape[2],  # height
                              filter_shape[3]]  # width
        self._padding = padding

        # signals: (batch,       in channel, depth_i, row_i, column_i)
        # filters: (out channel, in channel, depth_f, row_f, column_f)

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        if params is None:
            self.W = Weight(self._filter_shape, is_bias=False)
            self.b = Weight((filter_shape[0],), is_bias=True, mean=0.1, filler='constant')
            params = [self.W, self.b]
        else:
            self.W = params[0]
            self.b = params[1]

        self.params = [self.W, self.b]

        if padding is None:
            self._padding = [0, int((filter_shape[1] - 1) / 2), 0, int((filter_shape[2] - 1) / 2),
                             int((filter_shape[3] - 1) / 2)]

        self._output_shape = [self._input_shape[0], self._input_shape[1], filter_shape[0],
                              self._input_shape[3], self._input_shape[4]]

    def set_output(self):
        padding = self._padding
        input_shape = self._input_shape
        padded_input = tensor.alloc(0.0,  # Value to fill the tensor
                                    input_shape[0],
                                    input_shape[1] + 2 * padding[1],
                                    input_shape[2],
                                    input_shape[3] + 2 * padding[3],
                                    input_shape[4] + 2 * padding[4])

        padded_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_shape[
            1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]],
                                            self._prev_layer.output)

        self._output = conv3d2d.conv3d(padded_input, self.W.val) + \
                       self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')


class SoftmaxWithLoss3D(object):
    """
    Softmax with loss (n_batch, n_vox, n_label, n_vox, n_vox)
    """

    def __init__(self, input):
        self.input = input
        self.exp_x = tensor.exp(self.input)
        self.sum_exp_x = tensor.sum(self.exp_x, axis=2, keepdims=True)

    def prediction(self):
        return self.exp_x / self.sum_exp_x

    def error(self, y, threshold=0.5):
        return tensor.mean(tensor.eq(tensor.ge(self.prediction(), threshold), y))

    def focal_loss(self, y, r=2):
        comp = tensor.ones_like(self.exp_x) - self.exp_x
        exp_comp = tensor.power(comp, r)
        focal_input = exp_comp * self.input

        return tensor.mean(
            tensor.sum(-y * focal_input, axis=2, keepdims=True) + tensor.log(self.sum_exp_x))

    def focal_loss2(self, y, r=2):
        x = self.exp_x / self.sum_exp_x
        log_x = tensor.log(x)
        comp = tensor.ones_like(self.exp_x) - self.exp_x
        exp_comp = tensor.power(comp, r)
        focal_input = exp_comp * log_x

        return tensor.mean(
            tensor.sum(-y * focal_input, axis=2, keepdims=True))

    def loss(self, y):
        """
        y must be a tensor that has the same dimensions as the input. For each
        channel, only one element is one indicating the ground truth prediction
        label.
        """
        return tensor.mean(
            tensor.sum(-y * self.input, axis=2, keepdims=True) + tensor.log(self.sum_exp_x))


class ConcatLayer(Layer):
    def __init__(self, prev_layers, axis=1):
        """
        list of prev layers to concatenate
        axis to concatenate

        For tensor5, channel dimension is axis=2 (due to theano conv3d
        convention). For image, axis=1
        """
        assert (len(prev_layers) > 1)
        super().__init__(prev_layers[0])
        self._axis = axis
        self._prev_layers = prev_layers

        self._output_shape = self._input_shape.copy()
        for prev_layer in prev_layers[1:]:
            self._output_shape[axis] += prev_layer._output_shape[axis]
        print('Concat the prev layer to [%s]' % ','.join(str(x) for x in self._output_shape))

    def set_output(self):
        self._output = tensor.concatenate([x.output for x in self._prev_layers], axis=self._axis)


class LeakyReLU(Layer):
    def __init__(self, prev_layer, leakiness=0.01):
        super().__init__(prev_layer)
        self._leakiness = leakiness
        self._output_shape = self._input_shape

    def set_output(self):
        self._input = self._prev_layer.output
        if self._leakiness:
            # The following is faster than T.maximum(leakiness * x, x),
            # and it works with nonsymbolic inputs as well. Also see:
            # http://github.com/benanne/Lasagne/pull/163#issuecomment-81765117
            f1 = 0.5 * (1 + self._leakiness)
            f2 = 0.5 * (1 - self._leakiness)
            self._output = f1 * self._input + f2 * abs(self._input)
            # self.param = [self.leakiness]
        else:
            self._output = 0.5 * (self._input + abs(self._input))


class SigmoidLayer(Layer):
    def __init__(self, prev_layer):
        super().__init__(prev_layer)
        self._output_shape = self._input_shape

    def set_output(self):
        self._output = sigmoid(self._prev_layer.output)


class TanhLayer(Layer):
    def __init__(self, prev_layer):
        super().__init__(prev_layer)

    def set_output(self):
        self._output = tensor.tanh(self._prev_layer.output)


class ComplementLayer(Layer):
    """ Compute 1 - input_layer.output """

    def __init__(self, prev_layer):
        super().__init__(prev_layer)
        self._output_shape = self._input_shape

    def set_output(self):
        self._output = tensor.ones_like(self._prev_layer.output) - self._prev_layer.output


class SELayer(Layer):
    """ SE inception """

    def __init__(self, prev_layer, reduction=16, params=None, bias=True):
        super().__init__(prev_layer)
        self.bias = bias
        in_channel = self._input_shape[1]
        self._output_shape = self._input_shape

        self._pooled_shape = [self._input_shape[0], self._input_shape[1], 1, 1]
        self._pool_size = (self._input_shape[2], self._input_shape[3])
        self._padding = (1, 1)

        if params is None:
            self.W1 = Weight((in_channel, reduction), is_bias=False)
            self.W2 = Weight((reduction, in_channel), is_bias=False)
            if bias:
                self.b1 = Weight((reduction, ), is_bias=True, mean=0.1, filler='constant')
                self.b2 = Weight((in_channel, ), is_bias=True, mean=0.1, filler='constant')
        else:
            self.W1 = params[0]
            self.W2 = params[1]

            if bias:
                self.b1 = params[2]
                self.b2 = params[3]

        self.params = [self.W1, self.W2]
        if bias:
            self.params.append(self.b1)
            self.params.append(self.b2)

    def set_output(self):

        pooled_output = pool.pool_2d(self._prev_layer.output,
                                     ds=self._pool_size,
                                     ignore_border=True,
                                     padding=self._padding,
                                     mode='average_inc_pad')
        # reshape_pooled = tensor.reshape(pooled_output, (self._input_shape[0], self._input_shape[1]))
        reshape_pooled = pooled_output.flatten(2)
        output1 = tensor.dot(reshape_pooled, self.W1.val)
        if self.bias:
            output1 += self.b1.val

        output1 = relu(output1)

        output2 = tensor.dot(output1, self.W2.val)
        if self.bias:
            output2  += self.b2.val
        output2 = sigmoid(output2)

        print(output2.shape)
        se_output = tensor.reshape(output2, [self._input_shape[0], self._input_shape[1], 1, 1])
        self._output = self._prev_layer.output * se_output


class SE3DLayer(Layer):
    """ SE 3D inception """

    def __init__(self, prev_layer, reduction=16, params=None, bias=True):
        super().__init__(prev_layer)
        self.bias = bias
        in_channel = self._input_shape[2]
        self._output_shape = self._input_shape

        self._pooled_shape = [self._input_shape[0], 1, self._input_shape[2], 1, 1]
        self._pool_size = (self._input_shape[1], self._input_shape[3], self._input_shape[4])
        self._padding = (1, 1, 1)

        if params is None:
            self.W1 = Weight((in_channel, reduction), is_bias=False)
            self.W2 = Weight((reduction, in_channel), is_bias=False)
            if bias:
                self.b1 = Weight((reduction, ), is_bias=True, mean=0.1, filler='constant')
                self.b2 = Weight((in_channel, ), is_bias=True, mean=0.1, filler='constant')
        else:
            self.W1 = params[0]
            self.W2 = params[1]

            if bias:
                self.b1 = params[2]
                self.b2 = params[3]

        self.params = [self.W1, self.W2]
        if bias:
            self.params.append(self.b1)
            self.params.append(self.b2)

    def set_output(self):
        shuffled_output = self._prev_layer.output.dimshuffle(0, 2, 1, 3, 4)
        pooled_output = pool.pool_3d(shuffled_output,
                                     ds=self._pool_size,
                                     ignore_border=True,
                                     padding=self._padding,
                                     mode='average_inc_pad')
        reshape_pooled = tensor.reshape(pooled_output, (self._input_shape[0], self._input_shape[2]))
        output1 = tensor.dot(reshape_pooled, self.W1.val)
        if self.bias:
            output1 += self.b1.val

        output1 = relu(output1)

        output2 = tensor.dot(output1, self.W2.val)
        if self.bias:
            output2  += self.b2.val
        output2 = sigmoid(output2)
        se_out = tensor.reshape(output2, (self._input_shape[0], 1, self._input_shape[2], 1, 1))
        self._output = self._prev_layer.output * se_out