dreamcoder/differentiation.py

import math
import random
from dreamcoder.utilities import *


class InvalidLoss(Exception):
    pass


class DN(object):
    '''differentiable node: parent object of every differentiable operation'''

    def __init__(self, arguments):
        self.gradient = None
        if arguments != []:
            self.data = None
        self.arguments = arguments

        # descendents: every variable that takes this variable as input
        # descendents: [(DN,float)]
        # the additional float parameter is d Descendent / d This
        self.descendents = []

        self.recalculate()

    def __str__(self):
        if self.arguments == []:
            return self.name
        return "(%s %s)" % (self.name, " ".join(str(x)
                                                for x in self.arguments))

    def __repr__(self):
        return "DN(op = %s, data = %s, grad = %s, #descendents = %d, args = %s)" % (
            self.name, self.data, self.gradient, len(self.descendents), self.arguments)

    @property
    def derivative(self): return self.differentiate()

    def differentiate(self):
        if self.gradient is None:
            self.gradient = sum(partial * descendent.differentiate()
                                for descendent, partial in self.descendents)
        return self.gradient

    def zeroEverything(self):
        if self.gradient is None and self.descendents == [] and (
                self.data is None or self.arguments == []):
            return

        self.gradient = None
        self.descendents = []
        if self.arguments != []:
            self.data = None

        for x in self.arguments:
            x.zeroEverything()

    def lightweightRecalculate(self):
        return self.forward(*[a.lightweightRecalculate()
                              for a in self.arguments])

    def recalculate(self):
        if self.data is None:
            inputs = [a.recalculate() for a in self.arguments]
            self.data = self.forward(*inputs)
            # if invalid(self.data):
            #     eprint("I am invalid",repr(self))
            #     eprint("Here are my inputs",inputs)
            #     self.zeroEverything()
            #     eprint("Here I am after being zeroed",repr(self))
            #     raise Exception('invalid loss')
            #assert valid(self.data)
            partials = self.backward(*inputs)
            for d, a in zip(partials, self.arguments):
                # if invalid(d):
                #     eprint("I have an invalid derivative",self)
                #     eprint("Inputs",inputs)
                #     eprint("partials",partials)
                #     raise Exception('invalid derivative')
                a.descendents.append((self, d))
        return self.data

    def backPropagation(self):
        self.gradient = 1.
        self.recursivelyDifferentiate()

    def recursivelyDifferentiate(self):
        self.differentiate()
        for x in self.arguments:
            x.recursivelyDifferentiate()

    def updateNetwork(self):
        self.zeroEverything()
        l = self.recalculate()
        self.backPropagation()
        return l

    def log(self): return Logarithm(self)

    def square(self): return Square(self)

    def exp(self): return Exponentiation(self)

    def clamp(self, l, u): return Clamp(self, l, u)

    def __abs__(self): return AbsoluteValue(self)

    def __add__(self, o): return Addition(self, Placeholder.maybe(o))

    def __radd__(self, o): return Addition(self, Placeholder.maybe(o))

    def __sub__(self, o): return Subtraction(self, Placeholder.maybe(o))

    def __rsub__(self, o): return Subtraction(Placeholder.maybe(o), self)

    def __mul__(self, o): return Multiplication(self, Placeholder.maybe(o))

    def __rmul__(self, o): return Multiplication(self, Placeholder.maybe(o))

    def __neg__(self): return Negation(self)

    def __truediv__(self, o): return Division(self, Placeholder.maybe(o))

    def __rtruediv__(self, o): return Division(Placeholder.maybe(o), self)

    def numericallyVerifyGradients(self, parameters):
        calculatedGradients = [p.derivative for p in parameters]
        e = 0.00001
        for j, p in enumerate(parameters):
            p.data -= e
            y1 = self.lightweightRecalculate()
            p.data += 2 * e
            y2 = self.lightweightRecalculate()
            p.data -= e
            d = (y2 - y1) / (2 * e)
            if abs(calculatedGradients[j] - d) > 0.1:
                eprint(
                    "Bad gradient: expected %f, got %f" %
                    (d, calculatedGradients[j]))

    def gradientDescent(
            self,
            parameters,
            _=None,
            lr=0.001,
            steps=10**3,
            update=None):
        for j in range(steps):
            l = self.updateNetwork()
            if update is not None and j % update == 0:
                eprint("LOSS:", l)
                for p in parameters:
                    eprint(p.data, '\t', p.derivative)
            if invalid(l):
                raise InvalidLoss()

            for p in parameters:
                p.data -= lr * p.derivative
        return self.data

    def restartingOptimize(self, parameters, _=None, attempts=1,
                           s=1., decay=0.5, grow=0.1,
                           lr=0.1, steps=10**3, update=None):
        ls = []
        for _ in range(attempts):
            for p in parameters:
                p.data = random.random()*10 - 5
            ls.append(
                self.resilientBackPropagation(
                    parameters, lr=lr, steps=steps,
                    decay=decay, grow=grow))
        return min(ls)

    def resilientBackPropagation(
            self,
            parameters,
            _=None,
            decay=0.5,
            grow=1.2,
            lr=0.1,
            steps=10**3,
            update=None):
        previousSign = [None] * len(parameters)
        lr = [lr] * len(parameters)
        for j in range(steps):
            l = self.updateNetwork()

            if update is not None and j % update == 0:
                eprint("LOSS:", l)
                eprint("\t".join(str(p.derivative) for p in parameters))
            if invalid(l):
                raise InvalidLoss()

            newSigns = [p.derivative > 0 for p in parameters]
            for i, p in enumerate(parameters):
                if p.derivative > 0:
                    p.data -= lr[i]
                elif p.derivative < 0:
                    p.data += lr[i]
                if previousSign[i] is not None:
                    if previousSign[i] == newSigns[i]:
                        lr[i] *= grow
                    else:
                        lr[i] *= decay
            previousSign = newSigns

        return self.data


class Placeholder(DN):
    COUNTER = 0

    def __init__(self, initialValue=0., name=None):
        self.data = initialValue
        super(Placeholder, self).__init__([])
        if name is None:
            name = "p_" + str(Placeholder.COUNTER)
            Placeholder.COUNTER += 1
        self.name = name

    @staticmethod
    def named(namePrefix, initialValue=0.):
        p = Placeholder(initialValue, namePrefix + str(Placeholder.COUNTER))
        Placeholder.COUNTER += 1
        return p

    def __str__(self):
        return "Placeholder(%s = %s)" % (self.name, self.data)

    @staticmethod
    def maybe(x):
        if isinstance(x, DN):
            return x
        return Placeholder(float(x))

    def forward(self): return self.data

    def backward(self): return []


class Clamp(DN):
    def __init__(self, x, l, u):
        assert u > l
        self.l = l
        self.u = u
        super(Clamp, self).__init__([x])
        self.name = "clamp"

    def forward(self, x):
        if x > self.u:
            return self.u
        if x < self.l:
            return self.l
        return x

    def backward(self, x):
        if x > self.u or x < self.l:
            return [0.]
        else:
            return [1.]


class Addition(DN):
    def __init__(self, x, y):
        super(Addition, self).__init__([x, y])
        self.name = '+'

    def forward(self, x, y): return x + y

    def backward(self, x, y): return [1., 1.]


class Subtraction(DN):
    def __init__(self, x, y):
        super(Subtraction, self).__init__([x, y])
        self.name = '-'

    def forward(self, x, y): return x - y

    def backward(self, x, y): return [1., -1.]


class Negation(DN):
    def __init__(self, x):
        super(Negation, self).__init__([x])
        self.name = '-'

    def forward(self, x): return -x

    def backward(self, x): return [-1.]


class AbsoluteValue(DN):
    def __init__(self, x):
        super(AbsoluteValue, self).__init__([x])
        self.name = 'abs'

    def forward(self, x): return abs(x)

    def backward(self, x):
        if x > 0:
            return [1.]
        return [-1.]


class Multiplication(DN):
    def __init__(self, x, y):
        super(Multiplication, self).__init__([x, y])
        self.name = '*'

    def forward(self, x, y): return x * y

    def backward(self, x, y): return [y, x]


class Division(DN):
    def __init__(self, x, y):
        super(Division, self).__init__([x, y])
        self.name = '/'

    def forward(self, x, y): return x / y

    def backward(self, x, y): return [1.0 / y, -x / (y * y)]


class Square(DN):
    def __init__(self, x):
        super(Square, self).__init__([x])
        self.name = 'sq'

    def forward(self, x): return x * x

    def backward(self, x): return [2 * x]


class Exponentiation(DN):
    def __init__(self, x):
        super(Exponentiation, self).__init__([x])
        self.name = 'exp'

    def forward(self, x): return math.exp(x)

    def backward(self, x): return [math.exp(x)]


class Logarithm(DN):
    def __init__(self, x):
        super(Logarithm, self).__init__([x])
        self.name = 'log'

    def forward(self, x): return math.log(x)

    def backward(self, x): return [1. / x]


class LSE(DN):
    def __init__(self, xs):
        super(LSE, self).__init__(xs)
        self.name = 'LSE'

    def forward(self, *xs):
        m = max(xs)
        return m + math.log(sum(math.exp(y - m) for y in xs))

    def backward(self, *xs):
        m = max(xs)
        zm = sum(math.exp(x - m) for x in xs)
        return [math.exp(x - m) / zm for x in xs]


if __name__ == "__main__":
    x = Placeholder(10., "x")
    y = Placeholder(2., "y")
    z = x - LSE([x, y])
    z.updateNetwork()
    eprint("dL/dx = %f\tdL/dy = %f" % (x.derivative, y.derivative))

    x.data = 2.
    y.data = 10.
    z.updateNetwork()
    eprint("dL/dx = %f\tdL/dy = %f" % (x.differentiate(), y.differentiate()))

    x.data = 2.
    y.data = 2.
    z.updateNetwork()
    eprint("z = ", z.data, z)
    eprint("dL/dx = %f\tdL/dy = %f" % (x.differentiate(), y.differentiate()))

    loss = -z
    eprint(loss)

    lr = 0.001
    loss.gradientDescent([x, y], steps=10000, update=1000)