Skip to content

Commit

Permalink
Revert "[SPARK-25764][ML][EXAMPLES] Update BisectingKMeans example to…
Browse files Browse the repository at this point in the history
… use ClusteringEvaluator"

This reverts commit d0ecff2.
  • Loading branch information
cloud-fan committed Oct 20, 2018
1 parent ec96d34 commit 4acbda4
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
// $example on$
import org.apache.spark.ml.clustering.BisectingKMeans;
import org.apache.spark.ml.clustering.BisectingKMeansModel;
import org.apache.spark.ml.evaluation.ClusteringEvaluator;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
Expand Down Expand Up @@ -51,14 +50,9 @@ public static void main(String[] args) {
BisectingKMeans bkm = new BisectingKMeans().setK(2).setSeed(1);
BisectingKMeansModel model = bkm.fit(dataset);

// Make predictions
Dataset<Row> predictions = model.transform(dataset);

// Evaluate clustering by computing Silhouette score
ClusteringEvaluator evaluator = new ClusteringEvaluator();

double silhouette = evaluator.evaluate(predictions);
System.out.println("Silhouette with squared euclidean distance = " + silhouette);
// Evaluate clustering.
double cost = model.computeCost(dataset);
System.out.println("Within Set Sum of Squared Errors = " + cost);

// Shows the result.
System.out.println("Cluster Centers: ");
Expand Down
12 changes: 3 additions & 9 deletions examples/src/main/python/ml/bisecting_k_means_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

# $example on$
from pyspark.ml.clustering import BisectingKMeans
from pyspark.ml.evaluation import ClusteringEvaluator
# $example off$
from pyspark.sql import SparkSession

Expand All @@ -42,14 +41,9 @@
bkm = BisectingKMeans().setK(2).setSeed(1)
model = bkm.fit(dataset)

# Make predictions
predictions = model.transform(dataset)

# Evaluate clustering by computing Silhouette score
evaluator = ClusteringEvaluator()

silhouette = evaluator.evaluate(predictions)
print("Silhouette with squared euclidean distance = " + str(silhouette))
# Evaluate clustering.
cost = model.computeCost(dataset)
print("Within Set Sum of Squared Errors = " + str(cost))

# Shows the result.
print("Cluster Centers: ")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ package org.apache.spark.examples.ml

// $example on$
import org.apache.spark.ml.clustering.BisectingKMeans
import org.apache.spark.ml.evaluation.ClusteringEvaluator
// $example off$
import org.apache.spark.sql.SparkSession

Expand Down Expand Up @@ -49,14 +48,9 @@ object BisectingKMeansExample {
val bkm = new BisectingKMeans().setK(2).setSeed(1)
val model = bkm.fit(dataset)

// Make predictions
val predictions = model.transform(dataset)

// Evaluate clustering by computing Silhouette score
val evaluator = new ClusteringEvaluator()

val silhouette = evaluator.evaluate(predictions)
println(s"Silhouette with squared euclidean distance = $silhouette")
// Evaluate clustering.
val cost = model.computeCost(dataset)
println(s"Within Set Sum of Squared Errors = $cost")

// Shows the result.
println("Cluster Centers: ")
Expand Down

0 comments on commit 4acbda4

Please sign in to comment.