Skip to content

Commit

Permalink
Improved some of the models (used for ordinal classification)
Browse files Browse the repository at this point in the history
  • Loading branch information
rpmcruz committed Feb 3, 2018
1 parent 3bbb104 commit a167652
Show file tree
Hide file tree
Showing 12 changed files with 392 additions and 166 deletions.
23 changes: 9 additions & 14 deletions ensemble/boosting/adaboost.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
# -*- coding: utf-8 -*-

# NOTE: I convert y from {0,1} to {-1,+1} and then back again because
# it makes it easier for the learning method :P

from sklearn.base import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.tree import DecisionTreeClassifier
from utils import choose_threshold
import numpy as np
import itertools

# NOTE: I convert y from {0,1} to {-1,+1} and then back again because
# it makes it easier for the learning method :P

class AdaBoost(BaseEstimator, ClassifierMixin):
def __init__(self, T, base_estimator=None, balanced=False):
if base_estimator is None:
base_estimator = DecisionTreeClassifier(max_depth=1)
self.estimator = base_estimator
self.T = T
self.balanced = False
self.balanced = balanced
self.classes_ = (0, 1)

def fit(self, X, y):
Expand All @@ -32,7 +27,7 @@ def fit(self, X, y):
self.a = [0]*self.T
epsilon = 1e-6 # to avoid division by zero (Schapire and Singer, 1999)

for t in xrange(self.T):
for t in range(self.T):
self.h[t] = clone(self.estimator).fit(X, y, D)
yp = self.h[t].predict(X)

Expand All @@ -42,14 +37,14 @@ def fit(self, X, y):
D = D/np.sum(D) # normalize distribution
return self

def decision_function(self, X):
return self.predict_proba(X)

def predict_proba(self, X):
# thresholds: we overload this function for easy integration with the
# other models.
return np.sum(
[self.a[t]*self.h[t].predict(X) for t in xrange(self.T)], 0)
return np.sum([a*h.predict(X) for a, h in zip(self.a, self.h)], 0)

def predict(self, X):
s = self.predict_proba(X)
y = np.sign(s)
#y[y == 0] = 1 # HACK: sometimes sign is 0
return (y+1)/2 # change domain back to {0,1}
return (np.sign(s)+1)/2 # change domain back to {0,1}
92 changes: 13 additions & 79 deletions ensemble/boosting/rankboost.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,16 @@
# -*- coding: utf-8 -*-

# Boosting algorithm which uses another metric for success.
# Algorithm from Wu et al (2008)

# NOTE: I convert y from {0,1} to {-1,+1} and then back again because
# it makes it easier for the learning method :P

from sklearn.base import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.tree import DecisionTreeClassifier
from utils import choose_threshold
import numpy as np
import itertools


class AdaBoost(BaseEstimator, ClassifierMixin):
def __init__(self, T, base_estimator=None, balanced=False):
if base_estimator is None:
base_estimator = DecisionTreeClassifier(max_depth=1)
self.estimator = base_estimator
self.T = T
self.balanced = False
self.classes_ = (0, 1)

def fit(self, X, y):
if self.balanced:
cw = (2./np.sum(y == 0), 2./np.sum(y == 1))
D = np.asarray([cw[_y] for _y in y])
else:
D = np.repeat(1./len(X), len(X))

y = y*2-1 # change domain to {-1,+1}
self.h = [None]*self.T
self.a = [0]*self.T
epsilon = 1e-6 # to avoid division by zero (Schapire and Singer, 1999)

for t in xrange(self.T):
self.h[t] = clone(self.estimator).fit(X, y, D)
yp = self.h[t].predict(X)

err = 1-np.sum((yp*y > 0)*D)
self.a[t] = 0.5*np.log((1-err+epsilon)/(err+epsilon))
D = D*np.exp(-self.a[t]*y*yp)
D = D/np.sum(D) # normalize distribution
return self

def predict_proba(self, X):
# thresholds: we overload this function for easy integration with the
# other models.
return np.sum(
[self.a[t]*self.h[t].predict(X) for t in xrange(self.T)], 0)

def predict(self, X):
s = self.predict_proba(X)
y = np.sign(s)
#y[y == 0] = 1 # HACK: sometimes sign is 0
return (y+1)/2 # change domain back to {0,1}


# Boosting algorithm which uses another metric for success.
# Algorithm from Wu et al (2008)
# Freund (2003)
# https://fr.wikipedia.org/wiki/RankBoost

# NOTE: I convert y from {0,1} to {-1,+1} and then back again because
# it makes it easier for the learning method :P

class RankBoost(BaseEstimator, ClassifierMixin):
def __init__(self, T, base_estimator=None):
if base_estimator is None:
Expand All @@ -87,16 +37,16 @@ def fit(self, X, y):
#ys = np.zeros(len_diff)

for i, x0 in enumerate(X[y==0]):
for j in xrange(n1):
Xs[i * n1 + j] = x0
for j in range(n1):
Xs[i*n1 + j] = x0

for i, x1 in enumerate(X[y==1]):
for j in xrange(n0):
Xs[nn + i + n1 * j] = x1
for j in range(n0):
Xs[nn + i + n1*j] = x1

"""
for X1, y1 in itertools.izip(X, y):
for X2, y2 in itertools.izip(X, y):
for X1, y1 in zip(X, y):
for X2, y2 in zip(X, y):
if y1 > y2:
Xs[i] = X2
#ys[i] = 0
Expand All @@ -109,7 +59,7 @@ def fit(self, X, y):
self.a = [0]*self.T
epsilon = 1e-6 # to avoid division by zero (Schapire and Singer, 1999)

for t in xrange(self.T):
for t in range(self.T):
# Train weak ranker ft based on distribution Dt
Ds = np.r_[D, D]/2
self.h[t] = clone(self.estimator).fit(Xs, ys, Ds)
Expand Down Expand Up @@ -149,23 +99,7 @@ def fit(self, X, y):
# Update D
D = D*np.exp(self.a[t]*df)
D = D/np.sum(D) # normalize distribution

H = self.predict_proba(X)
self.th = choose_threshold(H, y)
return self

def predict_proba(self, X):
return np.sum(
[self.a[t]*self.h[t].predict(X) for t in xrange(self.T)], 0)

def predict(self, X):
s = self.predict_proba(X)
return (s >= self.th).astype(int)

if __name__ == '__main__':
import test
from smote import SMOTE
T = 20
test.test(
('AdaBoost', 'AdaBoost b', 'AdaBoost SMOTE', 'RankBoost'),
(AdaBoost(T), AdaBoost(T, balanced=True), SMOTE(AdaBoost(T)), RankBoost(T)))
return np.sum([a*h.predict(X) for a, h in zip(self.a, self.h)], 0)
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Multi-class ensemble where classes are only compared to neighbor (ordinal)
# classes

class MultiOrdinal(BaseEstimator, ClassifierMixin):
class PrefixVsSuffix(BaseEstimator, ClassifierMixin):
def __init__(self, estimator, alpha=1):
self.estimator = estimator
self.alpha = alpha
Expand All @@ -20,11 +20,13 @@ def fit(self, X, y):
_y = np.r_[np.ones(len(Xpos), int), np.zeros(len(Xneg), int)]

C = np.exp(self.alpha * np.abs(y-k-0.5))
m = clone(self.estimator).fit(_X, _y, sample_weight=C)
m = clone(self.estimator)
try:
m.fit(_X, _y, sample_weight=C)
except:
m.fit(_X, _y)
self.ensemble.append(m)
return self

def predict(self, X):
yps = [m.predict(X) for m in self.ensemble]
r = 1+np.sum(yps, 0)
return r
return np.sum([m.predict(X) for m in self.ensemble], 0)
82 changes: 82 additions & 0 deletions neuralnet/python/keras/neuralnet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from keras.models import Model
from keras.layers import Input, Dense
from keras import regularizers
from keras.callbacks import EarlyStopping
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Ordinal network encoding:
# http://orca.st.usm.edu/~zwang/files/rank.pdf

def create_model(nfeatures, nhidden, l2, K, is_ordinal):
reg = regularizers.l2(l2) if l2 else None

input_layer = Input([nfeatures])
hidden = Dense(
nhidden, activation='tanh', kernel_regularizer=reg)(input_layer)
if is_ordinal:
act = 'softmax'
else:
act = 'sigmoid'
output = Dense(K, activation=act)(hidden)

model = Model(input_layer, output)
model.compile('adam', 'categorical_crossentropy')
#model.summary()
return model

def class_weight(y):
klasses = np.unique(y)
count = np.bincount(y)[klasses]
return len(y) / (len(klasses)*count)


class MultiClassNet(BaseEstimator, ClassifierMixin):
def __init__(self, nhidden, l2=0, balanced=False):
self.nhidden = nhidden
self.l2 = l2
self.balanced = balanced

def fit(self, X, y):
self.classes_ = np.unique(y)
K = len(self.classes_)
yy = OneHotEncoder(sparse=False).fit_transform(y[:, np.newaxis])
self.model = create_model(X.shape[1], self.nhidden, self.l2, K, False)
cb = EarlyStopping('loss', 0.001, 10)
ww = class_weight(y) if self.balanced else None
self.logs = self.model.fit(
X, yy, 128, 10000, 0, callbacks=[cb], class_weight=ww)
return self

def predict_proba(self, X):
return self.model.predict(X)

def predict(self, X):
return np.argmax(self.model.predict(X), 1)


class OrdinalNet(BaseEstimator, ClassifierMixin):
def __init__(self, nhidden, l2=0, balanced=False):
self.nhidden = nhidden
self.l2 = l2
self.balanced = balanced

def fit(self, X, y):
self.classes_ = np.unique(y)
K = len(self.classes_)
yy = np.zeros((len(y), K), int) # ordinal encoding
for i,_y in enumerate(y):
yy[i, 0:_y+1] = 1
self.model = create_model(X.shape[1], self.nhidden, self.l2, K, True)
cb = EarlyStopping('loss', 0.001, 10)
ww = class_weight(y) if self.balanced else None
self.logs = self.model.fit(
X, yy, 128, 10000, 0, callbacks=[cb], class_weight=ww)
return self

def predict_proba(self, X):
return self.model.predict(X)

def predict(self, X):
return np.argmax(self.model.predict(X), 1)
58 changes: 58 additions & 0 deletions neuralnet/python/keras/ranknet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from keras.models import Model
from keras.layers import Input, Dense, Subtract
from keras import regularizers
from keras.callbacks import EarlyStopping
from sklearn.base import BaseEstimator, RegressorMixin
import numpy as np

def preprocess(X, y):
K = len(np.unique(y))
N = len(X)
Nk = np.bincount(y)

X1 = np.repeat(X, N, 0)
X2 = np.tile(X.T, N).T

y1 = np.repeat(y, N)
y2 = np.tile(y, N)
yy = (y1 > y2) + (y1 == y2)*0.5

pairs = K*(K-1)
ww = len(X1) / (pairs * (Nk[y1]*Nk[y2]))
return X1, X2, yy, ww


def create_model(nfeatures, nhidden, l2):
reg = regularizers.l2(l2) if l2 else None

input1 = Input([nfeatures])
input2 = Input([nfeatures])

hidden = Dense(nhidden, activation='tanh', kernel_regularizer=reg)

output1 = hidden(input1)
output2 = hidden(input2)
diff = Subtract()([output1, output2])
output = Dense(1, activation='sigmoid')(diff)

model = Model([input1, input2], output)
model.compile('adam', 'binary_crossentropy')
#model.summary()
return model


class RankNet(BaseEstimator, RegressorMixin):
def __init__(self, nhidden, l2=0):
self.nhidden = nhidden
self.l2 = l2

def fit(self, X, y):
self.model = create_model(X.shape[1], self.nhidden, self.l2)
X1, X2, yy, ww = preprocess(X, y)
cb = EarlyStopping('loss', 0.001, 10)
self.logs = self.model.fit(
[X1, X2], yy, 128, 10000, 0, callbacks=[cb], sample_weight=ww)
return self

def predict(self, X):
return self.model.predict([X, np.zeros_like(X)])[:, 0]
File renamed without changes.
File renamed without changes.
26 changes: 26 additions & 0 deletions ordinal/rank2ordinal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from sklearn.base import BaseEstimator, ClassifierMixin
from rank2ordinal.threshold import decide_thresholds
import numpy as np


class Rank2Ordinal(BaseEstimator, ClassifierMixin):
def __init__(self, estimator, threshold_strategy='uniform'):
self.estimator = estimator
self.threshold_strategy = threshold_strategy

def fit(self, X, y):
self.classes_ = np.unique(y)
self.estimator.fit(X, y)

K = len(self.classes_)
scores = self.estimator.predict(X)
self.ths = decide_thresholds(scores, y, K, self.threshold_strategy)
return self

# this function passes the ranking score for use by some metrics
def predict_proba(self, X):
return self.estimator.predict(X)

def predict(self, X):
scores = self.estimator.predict(X)
return np.sum(scores[:, np.newaxis] >= self.ths, 1)
Loading

0 comments on commit a167652

Please sign in to comment.