fix bugs of dot in python

If there are no `transpose()` in `self.theta`, a *ValueError: matrices are not aligned* is occurring. The former test case just ignore this situation. Author: Xusen Yin <[email protected]> Closes #463 from yinxusen/python-naive-bayes and squashes the following commits: fcbe3bc [Xusen Yin] fix bugs of dot in python
kiszk · Apr 22, 2014 · c919798 · c919798
1 parent 0f87e6a
commit c919798
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 5 deletions.
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
@@ -154,7 +154,7 @@ def __init__(self, labels, pi, theta):
 
     def predict(self, x):
         """Return the most likely class for a data vector x"""
-        return self.labels[numpy.argmax(self.pi + _dot(x, self.theta))]
+        return self.labels[numpy.argmax(self.pi + _dot(x, self.theta.transpose()))]
 
 class NaiveBayes(object):
     @classmethod

diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
@@ -104,10 +104,10 @@ def test_clustering(self):
     def test_classification(self):
         from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
         data = [
-            LabeledPoint(0.0, [1, 0]),
-            LabeledPoint(1.0, [0, 1]),
-            LabeledPoint(0.0, [2, 0]),
-            LabeledPoint(1.0, [0, 2])
+            LabeledPoint(0.0, [1, 0, 0]),
+            LabeledPoint(1.0, [0, 1, 1]),
+            LabeledPoint(0.0, [2, 0, 0]),
+            LabeledPoint(1.0, [0, 2, 1])
         ]
         rdd = self.sc.parallelize(data)
         features = [p.features.tolist() for p in data]