Waikato · aosojnik · Nov 9, 2015 · Nov 9, 2015 · Nov 9, 2015 · Nov 9, 2015
diff --git a/README.md b/README.md
@@ -1,10 +1,29 @@
-moa
-===
+# MOA (Massive Online Analysis)
 
-This is the next developer version of MOA with important changes:
+![MOA][logo]
 
-    - Use examples instead of Instances 
+[logo]: http://moa.cms.waikato.ac.nz/wp-content/uploads/2014/11/LogoMOA.jpg "Logo MOA"
+
+MOA is the most popular open source framework for data stream mining, with a very active growing community ([blog](http://moa.cms.waikato.ac.nz/blog/)). It includes a collection of machine learning algorithms (classification, regression, clustering, outlier detection, concept drift detection and recommender systems) and tools for evaluation. Related to the WEKA project, MOA is also written in Java, while scaling to more demanding problems.
+
+http://moa.cms.waikato.ac.nz/
+
+## Using MOA
+
+* [Getting Started](http://moa.cms.waikato.ac.nz/getting-started/)
+* [Documentation](http://moa.cms.waikato.ac.nz/documentation/)
+* [About MOA](http://moa.cms.waikato.ac.nz/details/)
+
+MOA performs BIG DATA stream mining in real time, and large scale machine learning. MOA can be extended with new mining algorithms, and new stream generators or evaluation measures. The goal is to provide a benchmark suite for the stream mining community. 
+
+## Mailing lists
+* MOA users: http://groups.google.com/group/moa-users
+* MOA developers: http://groups.google.com/group/moa-development
+
+## Citing MOA
+If you want to refer to MOA in a publication, please cite the following JMLR paper: 
+
+> Albert Bifet, Geoff Holmes, Richard Kirkby, Bernhard Pfahringer (2010);
+> MOA: Massive Online Analysis; Journal of Machine Learning Research 11: 1601-1604 
 
-    - Use samoa instances instead of weka instances 
 
-    - Use lib javacliparser 
diff --git a/moa/pom.xml b/moa/pom.xml
@@ -4,7 +4,7 @@
   <parent>
     <groupId>nz.ac.waikato.cms.moa</groupId>
     <artifactId>moa-pom</artifactId>
-    <version>2014.04</version>
+    <version>2016.03-SNAPSHOT</version>
   </parent>
 
   <artifactId>moa</artifactId>
@@ -53,6 +53,12 @@
       <artifactId>sizeofag</artifactId>
     </dependency>
 
+	<dependency>
+		<groupId>net.sf.meka</groupId>
+		<artifactId>meka</artifactId>
+		<version>1.9.0</version>
+	</dependency> 
+
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
@@ -125,7 +131,7 @@
     								</goals>
     							</pluginExecutionFilter>
     							<action>
-    								<ignore></ignore>
+    								<ignore />
     							</action>
     						</pluginExecution>
     					</pluginExecutions>

diff --git a/moa/src/main/java/com/yahoo/labs/samoa/instances/InstancesHeader.java b/moa/src/main/java/com/yahoo/labs/samoa/instances/InstancesHeader.java
@@ -62,6 +62,16 @@ public static String getAttributeNameString(InstancesHeader context,
                 + context.attribute(instAttIndex).name() + "]";
     }
 
+    public static String getInputAttributeNameString(InstancesHeader context,
+                                                int attIndex) {
+        if ((context == null) || (attIndex >= context.numInputAttributes())) {
+            return "[att " + (attIndex + 1) + "]";
+        }
+        int instAttIndex = attIndex;
+        return "[att " + (attIndex + 1) + ":"
+                + context.inputAttribute(instAttIndex).name() + "]";
+    }
+
     // is impervious to class index changes - attIndex is true attribute index
     // regardless of class position
     public static String getNominalValueString(InstancesHeader context,

diff --git a/moa/src/main/java/com/yahoo/labs/samoa/instances/MultiLabelPrediction.java b/moa/src/main/java/com/yahoo/labs/samoa/instances/MultiLabelPrediction.java
@@ -15,8 +15,9 @@
 package com.yahoo.labs.samoa.instances;
 
 import moa.core.DoubleVector;
+import java.io.Serializable;
 
-public class MultiLabelPrediction implements Prediction {
+public class MultiLabelPrediction implements Prediction, Serializable {
 	protected DoubleVector [] prediction;
 
 	public MultiLabelPrediction() {
@@ -40,12 +41,20 @@ public int numOutputAttributes() {
 
 	@Override
 	public int numClasses(int outputAttributeIndex) {
-		return prediction[outputAttributeIndex].numValues();
+		int ret = 0;
+		if (prediction.length > outputAttributeIndex) {
+			ret =  prediction[outputAttributeIndex].numValues();
+		}
+		return ret;
 	}
 
 	@Override
 	public double[] getVotes(int outputAttributeIndex) {
-		return prediction[outputAttributeIndex].getArrayCopy();
+		double ret[] = null;
+		if (prediction.length > outputAttributeIndex) {
+			ret = prediction[outputAttributeIndex].getArrayCopy();
+		}
+		return ret;
 	}
 
 	@Override
@@ -55,7 +64,11 @@ public double[] getVotes() {
 
 	@Override
 	public double getVote(int outputAttributeIndex, int classIndex) {
-		return prediction[outputAttributeIndex].getValue(classIndex);
+		double ret = 0.0;
+		if (prediction.length > outputAttributeIndex) {
+			ret = prediction[outputAttributeIndex].getValue(classIndex);
+		}
+		return ret;
 	}
 
 	@Override

diff --git a/moa/src/main/java/com/yahoo/labs/samoa/instances/Prediction.java b/moa/src/main/java/com/yahoo/labs/samoa/instances/Prediction.java
@@ -91,7 +91,14 @@ public interface Prediction {
      *
      * @return the votes for the first output attribute outputAttributeIndex.
      */
-        public int size();
+     public int size();
+
+    /**
+     * The text of the prediction, that is the description of the values of the prediction
+     *
+     * @return the text
+     */
+     public String toString();
 
 
 }
diff --git a/moa/src/main/java/com/yahoo/labs/samoa/instances/SamoaToWekaInstanceConverter.java b/moa/src/main/java/com/yahoo/labs/samoa/instances/SamoaToWekaInstanceConverter.java
@@ -16,13 +16,14 @@
 package com.yahoo.labs.samoa.instances;
 
 import java.util.ArrayList;
+import java.io.Serializable;
 
 /**
  * The Class SamoaToWekaInstanceConverter.
  *
  * @author abifet
  */
-public class SamoaToWekaInstanceConverter {
+public class SamoaToWekaInstanceConverter implements Serializable {
 
     /** The weka instance information. */
     protected weka.core.Instances wekaInstanceInformation;

diff --git a/moa/src/main/java/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java b/moa/src/main/java/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java
@@ -51,7 +51,7 @@ public int branchForInstance(Instance inst) {
     @Override
     public String describeConditionForBranch(int branch, InstancesHeader context) {
         if ((branch == 0) || (branch == 1)) {
-            return InstancesHeader.getAttributeNameString(context,
+            return InstancesHeader.getInputAttributeNameString(context,
                     this.attIndex)
                     + (branch == 0 ? " = " : " != ")
                     + InstancesHeader.getNominalValueString(context,

diff --git a/moa/src/main/java/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java b/moa/src/main/java/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java
@@ -64,7 +64,7 @@ public String describeConditionForBranch(int branch, InstancesHeader context) {
         if ((branch == 0) || (branch == 1)) {
             char compareChar = branch == 0 ? '<' : '>';
             int equalsBranch = this.equalsPassesTest ? 0 : 1;
-            return InstancesHeader.getAttributeNameString(context,
+            return InstancesHeader.getInputAttributeNameString(context,
                     this.attIndex)
                     + ' '
                     + compareChar

diff --git a/moa/src/main/java/moa/classifiers/meta/PairedLearners.java b/moa/src/main/java/moa/classifiers/meta/PairedLearners.java
@@ -0,0 +1,144 @@
+/*
+ *    PairedLearners.java
+ *    Copyright (C) 2015 Instituto Federal de Pernambuco, Recife, Brazil
+ *    @author Paulo Gonçalves ([email protected])
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 3 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package moa.classifiers.meta;
+
+import com.github.javacliparser.FloatOption;
+import com.github.javacliparser.IntOption;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+
+import moa.classifiers.AbstractClassifier;
+import moa.classifiers.Classifier;
+import moa.core.Measurement;
+import moa.options.ClassOption;
+import com.yahoo.labs.samoa.instances.Instance;
+import moa.core.MiscUtils;
+
+/**
+ * Creates two classifiers: a stable and a reactive. The first represents the
+ * actual stable concept, while the second is trained on the most recent data.
+ * If the accuracy of the reactive is higher than that of the stable, it means
+ * the concept has changed. The stable classifier is then substituted by the
+ * reactive, and the reactive is reset.
+ *
+ * <p>
+ * Stephen H. Bach, Marcus A. Maloof, "Paired Learners for Concept Drift",
+ * Eighth IEEE International Conference on Data Mining (ICDM), 2008,
+ * pp.23-32</p>
+ *
+ * @author Paulo Gonçalves (paulogoncalves at recife.ifpe.edu.br)
+ *
+ */
+
+public class PairedLearners extends AbstractClassifier {
+    private static final long serialVersionUID = 1L;
+
+    public ClassOption stableLearnerOption = new ClassOption("stableLearner", 
+            's', "Stable learner", Classifier.class, "bayes.NaiveBayes");
+
+    public ClassOption reactiveLearnerOption = new ClassOption("reactiveLearner", 
+            'r', "Reactive learner", Classifier.class, "bayes.NaiveBayes");
+
+    public IntOption windowSizeOption = new IntOption("windowSize", 
+            'w', "Window size for the reactive learner", 
+            12, 1, Integer.MAX_VALUE);
+
+    public FloatOption thresholdOption = new FloatOption("threshold",
+            't', "Threashold for creating a new stable learner",
+            0.2, 0, 1);
+
+    protected int[] c;
+    protected Classifier stableLearner;
+    protected Classifier reactiveLearner;
+    protected int t;
+    protected int w;
+    protected int theta;
+    protected Instance[] instances;
+    protected int changeDetected = 0;
+    protected int numberOfErrors = 0;
+    protected int i;
+
+    @Override
+    public void resetLearningImpl() {
+        this.t = 0;
+        this.w = this.windowSizeOption.getValue();
+        this.c = new int[this.w];
+        this.theta = (int) (this.w * this.thresholdOption.getValue());
+        this.instances = new Instance[this.w];
+
+        this.stableLearner = ((Classifier) getPreparedClassOption(this.stableLearnerOption)).copy();
+        this.stableLearner.resetLearning();
+        this.reactiveLearner = ((Classifier) getPreparedClassOption(this.reactiveLearnerOption)).copy();
+        this.reactiveLearner.resetLearning();
+    }
+
+    @Override
+    public void trainOnInstanceImpl(Instance inst) {
+        this.instances[this.t] = inst;
+        int trueClass = (int) inst.classValue();
+        boolean stablePrediction = MiscUtils.maxIndex(this.stableLearner.getVotesForInstance(inst)) == trueClass;
+        boolean reactivePrediction = MiscUtils.maxIndex(this.reactiveLearner.getVotesForInstance(inst)) == trueClass;
+
+        this.numberOfErrors = this.numberOfErrors - this.c[this.t];
+        if(!stablePrediction && reactivePrediction) {
+            this.c[this.t] = 1;
+            this.numberOfErrors++;
+	} else {
+            this.c[this.t] = 0;
+        }        
+        if (this.theta < this.numberOfErrors) {
+            this.changeDetected++;
+            this.stableLearner = this.reactiveLearner.copy();
+            Arrays.fill(this.c, 0);   // Resets c
+            this.numberOfErrors = 0;
+        }
+        this.stableLearner.trainOnInstance(inst);
+        this.reactiveLearner.resetLearning();
+        for (i=0; i<this.instances.length && this.instances[i] != null; i++) {
+            this.reactiveLearner.trainOnInstance(this.instances[i]);
+        }
+        this.t = (this.t + 1) % this.w;
+    }
+
+    @Override
+    public double[] getVotesForInstance(Instance inst) {
+        return this.stableLearner.getVotesForInstance(inst);
+    }
+    @Override
+    public boolean isRandomizable() {
+        return false;
+    }
+
+    @Override
+    protected Measurement[] getModelMeasurementsImpl() {
+        List<Measurement> measurementList = new LinkedList();
+        measurementList.add(new Measurement("Change detected", this.changeDetected));
+        Measurement[] modelMeasurements = ((AbstractClassifier) this.stableLearner).getModelMeasurements();
+        if (modelMeasurements != null) {
+            measurementList.addAll(Arrays.asList(modelMeasurements));
+        }
+        this.changeDetected = 0;
+        return measurementList.toArray(new Measurement[measurementList.size()]);
+    }
+    @Override
+    public void getModelDescription(StringBuilder out, int indent) {
+
+    }
+}
diff --git a/moa/src/main/java/moa/classifiers/meta/WEKAClassifier.java b/moa/src/main/java/moa/classifiers/meta/WEKAClassifier.java
@@ -171,15 +171,15 @@ public double[] getVotesForInstance(Instance samoaInstance) {
             for (int i = 0; i < inst.numClasses(); i++) {
                 votes[i] = 1.0 / inst.numClasses();
             }
-        } else {
-            try {
-                votes = this.classifier.distributionForInstance(inst);
-            } catch (Exception e) {
-                System.err.println(e.getMessage());
-            }
-        }
-        return votes;
-    }
+		} else {
+			try {
+				votes = this.classifier.distributionForInstance(inst);
+			} catch (Exception e) {
+				System.err.println(e.getMessage());
+			}
+		}
+		return votes;
+	}
 
     @Override
     public boolean isRandomizable() {

diff --git a/moa/src/main/java/moa/classifiers/multilabel/HoeffdingTreeClassifLeaves.java b/moa/src/main/java/moa/classifiers/multilabel/HoeffdingTreeClassifLeaves.java
@@ -107,6 +107,7 @@ protected LearningNode newLearningNode(double[] initialClassObservations, Classi
     @Override
     protected void attemptToSplit(ActiveLearningNode node, SplitNode parent,
             int parentIndex) {
+        //ßSystem.out.println("Attempt to Split");
         if (!node.observedClassDistributionIsPure()) {
             SplitCriterion splitCriterion = (SplitCriterion) getPreparedClassOption(this.splitCriterionOption);
             AttributeSplitSuggestion[] bestSplitSuggestions = node.getBestSplitSuggestions(splitCriterion, this);
@@ -119,6 +120,8 @@ protected void attemptToSplit(ActiveLearningNode node, SplitNode parent,
                         this.splitConfidenceOption.getValue(), node.getWeightSeen());
                 AttributeSplitSuggestion bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1];
                 AttributeSplitSuggestion secondBestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 2];
+               // System.out.println(bestSuggestion.merit+" - "+secondBestSuggestion.merit+":"+
+               //         (bestSuggestion.merit - secondBestSuggestion.merit)+" > "+hoeffdingBound+ "<"+this.tieThresholdOption.getValue());
                 if ((bestSuggestion.merit - secondBestSuggestion.merit > hoeffdingBound)
                         || (hoeffdingBound < this.tieThresholdOption.getValue())) {
                     shouldSplit = true;