Improved some of the models (used for ordinal classification)

rpmcruz · Feb 3, 2018 · a167652 · a167652
1 parent 3bbb104
commit a167652
Show file tree

Hide file tree

Showing 12 changed files with 392 additions and 166 deletions.
diff --git a/ensemble/boosting/adaboost.py b/ensemble/boosting/adaboost.py
@@ -1,23 +1,18 @@
-# -*- coding: utf-8 -*-
-
-# NOTE: I convert y from {0,1} to {-1,+1} and then back again because
-# it makes it easier for the learning method :P
-
 from sklearn.base import clone
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.tree import DecisionTreeClassifier
-from utils import choose_threshold
 import numpy as np
-import itertools
 
+# NOTE: I convert y from {0,1} to {-1,+1} and then back again because
+# it makes it easier for the learning method :P
 
 class AdaBoost(BaseEstimator, ClassifierMixin):
     def __init__(self, T, base_estimator=None, balanced=False):
         if base_estimator is None:
             base_estimator = DecisionTreeClassifier(max_depth=1)
         self.estimator = base_estimator
         self.T = T
-        self.balanced = False
+        self.balanced = balanced
         self.classes_ = (0, 1)
 
     def fit(self, X, y):
@@ -32,7 +27,7 @@ def fit(self, X, y):
         self.a = [0]*self.T
         epsilon = 1e-6  # to avoid division by zero (Schapire and Singer, 1999)
 
-        for t in xrange(self.T):
+        for t in range(self.T):
             self.h[t] = clone(self.estimator).fit(X, y, D)
             yp = self.h[t].predict(X)
 
@@ -42,14 +37,14 @@ def fit(self, X, y):
             D = D/np.sum(D)  # normalize distribution
         return self
 
+    def decision_function(self, X):
+        return self.predict_proba(X)
+
     def predict_proba(self, X):
         # thresholds: we overload this function for easy integration with the
         # other models.
-        return np.sum(
-            [self.a[t]*self.h[t].predict(X) for t in xrange(self.T)], 0)
+        return np.sum([a*h.predict(X) for a, h in zip(self.a, self.h)], 0)
 
     def predict(self, X):
         s = self.predict_proba(X)
-        y = np.sign(s)
-        #y[y == 0] = 1  # HACK: sometimes sign is 0
-        return (y+1)/2  # change domain back to {0,1}
+        return (np.sign(s)+1)/2  # change domain back to {0,1}
diff --git a/ensemble/boosting/rankboost.py b/ensemble/boosting/rankboost.py
@@ -1,66 +1,16 @@
-# -*- coding: utf-8 -*-
-
-# Boosting algorithm which uses another metric for success.
-# Algorithm from Wu et al (2008)
-
-# NOTE: I convert y from {0,1} to {-1,+1} and then back again because
-# it makes it easier for the learning method :P
-
 from sklearn.base import clone
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.tree import DecisionTreeClassifier
-from utils import choose_threshold
 import numpy as np
-import itertools
-
-
-class AdaBoost(BaseEstimator, ClassifierMixin):
-    def __init__(self, T, base_estimator=None, balanced=False):
-        if base_estimator is None:
-            base_estimator = DecisionTreeClassifier(max_depth=1)
-        self.estimator = base_estimator
-        self.T = T
-        self.balanced = False
-        self.classes_ = (0, 1)
-
-    def fit(self, X, y):
-        if self.balanced:
-            cw = (2./np.sum(y == 0), 2./np.sum(y == 1))
-            D = np.asarray([cw[_y] for _y in y])
-        else:
-            D = np.repeat(1./len(X), len(X))
-
-        y = y*2-1  # change domain to {-1,+1}
-        self.h = [None]*self.T
-        self.a = [0]*self.T
-        epsilon = 1e-6  # to avoid division by zero (Schapire and Singer, 1999)
-
-        for t in xrange(self.T):
-            self.h[t] = clone(self.estimator).fit(X, y, D)
-            yp = self.h[t].predict(X)
-
-            err = 1-np.sum((yp*y > 0)*D)
-            self.a[t] = 0.5*np.log((1-err+epsilon)/(err+epsilon))
-            D = D*np.exp(-self.a[t]*y*yp)
-            D = D/np.sum(D)  # normalize distribution
-        return self
-
-    def predict_proba(self, X):
-        # thresholds: we overload this function for easy integration with the
-        # other models.
-        return np.sum(
-            [self.a[t]*self.h[t].predict(X) for t in xrange(self.T)], 0)
-
-    def predict(self, X):
-        s = self.predict_proba(X)
-        y = np.sign(s)
-        #y[y == 0] = 1  # HACK: sometimes sign is 0
-        return (y+1)/2  # change domain back to {0,1}
-
 
+# Boosting algorithm which uses another metric for success.
+# Algorithm from Wu et al (2008)
 # Freund (2003)
 # https://fr.wikipedia.org/wiki/RankBoost
 
+# NOTE: I convert y from {0,1} to {-1,+1} and then back again because
+# it makes it easier for the learning method :P
+
 class RankBoost(BaseEstimator, ClassifierMixin):
     def __init__(self, T, base_estimator=None):
         if base_estimator is None:
@@ -87,16 +37,16 @@ def fit(self, X, y):
         #ys = np.zeros(len_diff)
 
         for i, x0 in enumerate(X[y==0]):
-            for j in xrange(n1):
-                Xs[i * n1 + j] = x0
+            for j in range(n1):
+                Xs[i*n1 + j] = x0
 
         for i, x1 in enumerate(X[y==1]):
-            for j in xrange(n0):
-                Xs[nn + i + n1 * j] = x1
+            for j in range(n0):
+                Xs[nn + i + n1*j] = x1
 
         """
-        for X1, y1 in itertools.izip(X, y):
-            for X2, y2 in itertools.izip(X, y):
+        for X1, y1 in zip(X, y):
+            for X2, y2 in zip(X, y):
                 if y1 > y2:
                     Xs[i] = X2
                     #ys[i] = 0
@@ -109,7 +59,7 @@ def fit(self, X, y):
         self.a = [0]*self.T
         epsilon = 1e-6  # to avoid division by zero (Schapire and Singer, 1999)
 
-        for t in xrange(self.T):
+        for t in range(self.T):
             # Train weak ranker ft based on distribution Dt
             Ds = np.r_[D, D]/2
             self.h[t] = clone(self.estimator).fit(Xs, ys, Ds)
@@ -149,23 +99,7 @@ def fit(self, X, y):
             # Update D
             D = D*np.exp(self.a[t]*df)
             D = D/np.sum(D)  # normalize distribution
-
-        H = self.predict_proba(X)
-        self.th = choose_threshold(H, y)
         return self
 
-    def predict_proba(self, X):
-        return np.sum(
-            [self.a[t]*self.h[t].predict(X) for t in xrange(self.T)], 0)
-
     def predict(self, X):
-        s = self.predict_proba(X)
-        return (s >= self.th).astype(int)
-
-if __name__ == '__main__':
-    import test
-    from smote import SMOTE
-    T = 20
-    test.test(
-        ('AdaBoost', 'AdaBoost b', 'AdaBoost SMOTE', 'RankBoost'),
-        (AdaBoost(T), AdaBoost(T, balanced=True), SMOTE(AdaBoost(T)), RankBoost(T)))
+        return np.sum([a*h.predict(X) for a, h in zip(self.a, self.h)], 0)
diff --git a/ensemble/multiclass/multiordinal.py → ensemble/multiclass/prefix_vs_suffix.py b/ensemble/multiclass/multiordinal.py → ensemble/multiclass/prefix_vs_suffix.py
@@ -5,7 +5,7 @@
 # Multi-class ensemble where classes are only compared to neighbor (ordinal)
 # classes
 
-class MultiOrdinal(BaseEstimator, ClassifierMixin):
+class PrefixVsSuffix(BaseEstimator, ClassifierMixin):
     def __init__(self, estimator, alpha=1):
         self.estimator = estimator
         self.alpha = alpha
@@ -20,11 +20,13 @@ def fit(self, X, y):
             _y = np.r_[np.ones(len(Xpos), int), np.zeros(len(Xneg), int)]
 
             C = np.exp(self.alpha * np.abs(y-k-0.5))
-            m = clone(self.estimator).fit(_X, _y, sample_weight=C)
+            m = clone(self.estimator)
+            try:
+                m.fit(_X, _y, sample_weight=C)
+            except:
+                m.fit(_X, _y)
             self.ensemble.append(m)
         return self
 
     def predict(self, X):
-        yps = [m.predict(X) for m in self.ensemble]
-        r = 1+np.sum(yps, 0)
-        return r
+        return np.sum([m.predict(X) for m in self.ensemble], 0)
diff --git a/neuralnet/python/keras/neuralnet.py b/neuralnet/python/keras/neuralnet.py
@@ -0,0 +1,82 @@
+from keras.models import Model
+from keras.layers import Input, Dense
+from keras import regularizers
+from keras.callbacks import EarlyStopping
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.preprocessing import OneHotEncoder
+import numpy as np
+
+# Ordinal network encoding:
+# http://orca.st.usm.edu/~zwang/files/rank.pdf
+
+def create_model(nfeatures, nhidden, l2, K, is_ordinal):
+    reg = regularizers.l2(l2) if l2 else None
+
+    input_layer = Input([nfeatures])
+    hidden = Dense(
+        nhidden, activation='tanh', kernel_regularizer=reg)(input_layer)
+    if is_ordinal:
+        act = 'softmax'
+    else:
+        act = 'sigmoid'
+    output = Dense(K, activation=act)(hidden)
+
+    model = Model(input_layer, output)
+    model.compile('adam', 'categorical_crossentropy')
+    #model.summary()
+    return model
+
+def class_weight(y):
+    klasses = np.unique(y)
+    count = np.bincount(y)[klasses]
+    return len(y) / (len(klasses)*count)
+
+
+class MultiClassNet(BaseEstimator, ClassifierMixin):
+    def __init__(self, nhidden, l2=0, balanced=False):
+        self.nhidden = nhidden
+        self.l2 = l2
+        self.balanced = balanced
+
+    def fit(self, X, y):
+        self.classes_ = np.unique(y)
+        K = len(self.classes_)
+        yy = OneHotEncoder(sparse=False).fit_transform(y[:, np.newaxis])
+        self.model = create_model(X.shape[1], self.nhidden, self.l2, K, False)
+        cb = EarlyStopping('loss', 0.001, 10)
+        ww = class_weight(y) if self.balanced else None
+        self.logs = self.model.fit(
+            X, yy, 128, 10000, 0, callbacks=[cb], class_weight=ww)
+        return self
+
+    def predict_proba(self, X):
+        return self.model.predict(X)
+
+    def predict(self, X):
+        return np.argmax(self.model.predict(X), 1)
+
+
+class OrdinalNet(BaseEstimator, ClassifierMixin):
+    def __init__(self, nhidden, l2=0, balanced=False):
+        self.nhidden = nhidden
+        self.l2 = l2
+        self.balanced = balanced
+
+    def fit(self, X, y):
+        self.classes_ = np.unique(y)
+        K = len(self.classes_)
+        yy = np.zeros((len(y), K), int)  # ordinal encoding
+        for i,_y in enumerate(y):
+            yy[i, 0:_y+1] = 1
+        self.model = create_model(X.shape[1], self.nhidden, self.l2, K, True)
+        cb = EarlyStopping('loss', 0.001, 10)
+        ww = class_weight(y) if self.balanced else None
+        self.logs = self.model.fit(
+            X, yy, 128, 10000, 0, callbacks=[cb], class_weight=ww)
+        return self
+
+    def predict_proba(self, X):
+        return self.model.predict(X)
+
+    def predict(self, X):
+        return np.argmax(self.model.predict(X), 1)
diff --git a/neuralnet/python/keras/ranknet.py b/neuralnet/python/keras/ranknet.py
@@ -0,0 +1,58 @@
+from keras.models import Model
+from keras.layers import Input, Dense, Subtract
+from keras import regularizers
+from keras.callbacks import EarlyStopping
+from sklearn.base import BaseEstimator, RegressorMixin
+import numpy as np
+
+def preprocess(X, y):
+    K = len(np.unique(y))
+    N = len(X)
+    Nk = np.bincount(y)
+
+    X1 = np.repeat(X, N, 0)
+    X2 = np.tile(X.T, N).T
+
+    y1 = np.repeat(y, N)
+    y2 = np.tile(y, N)
+    yy = (y1 > y2) + (y1 == y2)*0.5
+
+    pairs = K*(K-1)
+    ww = len(X1) / (pairs * (Nk[y1]*Nk[y2]))
+    return X1, X2, yy, ww
+
+
+def create_model(nfeatures, nhidden, l2):
+    reg = regularizers.l2(l2) if l2 else None
+
+    input1 = Input([nfeatures])
+    input2 = Input([nfeatures])
+
+    hidden = Dense(nhidden, activation='tanh', kernel_regularizer=reg)
+
+    output1 = hidden(input1)
+    output2 = hidden(input2)
+    diff = Subtract()([output1, output2])
+    output = Dense(1, activation='sigmoid')(diff)
+
+    model = Model([input1, input2], output)
+    model.compile('adam', 'binary_crossentropy')
+    #model.summary()
+    return model
+
+
+class RankNet(BaseEstimator, RegressorMixin):
+    def __init__(self, nhidden, l2=0):
+        self.nhidden = nhidden
+        self.l2 = l2
+
+    def fit(self, X, y):
+        self.model = create_model(X.shape[1], self.nhidden, self.l2)
+        X1, X2, yy, ww = preprocess(X, y)
+        cb = EarlyStopping('loss', 0.001, 10)
+        self.logs = self.model.fit(
+            [X1, X2], yy, 128, 10000, 0, callbacks=[cb], sample_weight=ww)
+        return self
+
+    def predict(self, X):
+        return self.model.predict([X, np.zeros_like(X)])[:, 0]
diff --git a/neuralnet/python/neuralnet.py → neuralnet/python/numpy/neuralnet.py b/neuralnet/python/neuralnet.py → neuralnet/python/numpy/neuralnet.py
diff --git a/neuralnet/python/ranknet.py → neuralnet/python/numpy/ranknet.py b/neuralnet/python/ranknet.py → neuralnet/python/numpy/ranknet.py
diff --git a/ordinal/rank2ordinal.py b/ordinal/rank2ordinal.py
@@ -0,0 +1,26 @@
+from sklearn.base import BaseEstimator, ClassifierMixin
+from rank2ordinal.threshold import decide_thresholds
+import numpy as np
+
+
+class Rank2Ordinal(BaseEstimator, ClassifierMixin):
+    def __init__(self, estimator, threshold_strategy='uniform'):
+        self.estimator = estimator
+        self.threshold_strategy = threshold_strategy
+
+    def fit(self, X, y):
+        self.classes_ = np.unique(y)
+        self.estimator.fit(X, y)
+
+        K = len(self.classes_)
+        scores = self.estimator.predict(X)
+        self.ths = decide_thresholds(scores, y, K, self.threshold_strategy)
+        return self
+
+    # this function passes the ranking score for use by some metrics
+    def predict_proba(self, X):
+        return self.estimator.predict(X)
+
+    def predict(self, X):
+        scores = self.estimator.predict(X)
+        return np.sum(scores[:, np.newaxis] >= self.ths, 1)