forked from byzer-org/byzer-lang
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7452baa
commit 24206fa
Showing
5 changed files
with
163 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,9 +3,10 @@ package streaming.common | |
import java.io.{BufferedReader, InputStreamReader} | ||
|
||
import org.apache.hadoop.conf.Configuration | ||
import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem, Path} | ||
import org.apache.hadoop.fs.{FSDataOutputStream, FileStatus, FileSystem, Path} | ||
|
||
import scala.collection.mutable.ArrayBuffer | ||
import scala.collection.JavaConversions._ | ||
|
||
/** | ||
* 5/5/16 WilliamZhu([email protected]) | ||
|
@@ -32,6 +33,12 @@ object HDFSOperator { | |
} | ||
|
||
|
||
def listModelDirectory(path: String): Seq[FileStatus] = { | ||
val fs = FileSystem.get(new Configuration()) | ||
fs.listStatus(new Path(path)).filter(f => f.isDirectory) | ||
} | ||
|
||
|
||
def saveFile(path: String, fileName: String, iterator: Iterator[(String, String)]) = { | ||
|
||
var dos: FSDataOutputStream = null | ||
|
24 changes: 24 additions & 0 deletions
24
streamingpro-spark-2.0/src/main/java/org/apache/spark/util/WowXORShiftRandom.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
package org.apache.spark.util | ||
|
||
import org.apache.spark.util.random.XORShiftRandom | ||
|
||
/** | ||
* Created by allwefantasy on 5/2/2018. | ||
*/ | ||
class WowXORShiftRandom { | ||
val random = new XORShiftRandom() | ||
|
||
def nextDouble = { | ||
random.nextDouble() | ||
} | ||
|
||
} | ||
|
||
object WowXORShiftRandom { | ||
def main(args: Array[String]): Unit = { | ||
val random = new WowXORShiftRandom() | ||
(0 until 1000).foreach { f => | ||
println(random.nextDouble) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
45 changes: 33 additions & 12 deletions
45
streamingpro-spark-2.0/src/main/java/streaming/dsl/mmlib/algs/SQLNaiveBayes.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,59 @@ | ||
package streaming.dsl.mmlib.algs | ||
|
||
import net.csdn.common.logging.Loggers | ||
import org.apache.spark.Partitioner | ||
import org.apache.spark.ml.classification.{NaiveBayes, NaiveBayesModel} | ||
import org.apache.spark.ml.linalg.Vector | ||
import org.apache.spark.sql.{DataFrame, SparkSession} | ||
import org.apache.spark.sql.expressions.UserDefinedFunction | ||
import streaming.dsl.mmlib.SQLAlg | ||
import org.apache.spark.ml.linalg.SQLDataTypes.VectorType | ||
import streaming.common.HDFSOperator | ||
|
||
import scala.collection.mutable.ArrayBuffer | ||
|
||
/** | ||
* Created by allwefantasy on 13/1/2018. | ||
*/ | ||
class SQLNaiveBayes extends SQLAlg with Functions { | ||
|
||
override def train(df: DataFrame, path: String, params: Map[String, String]): Unit = { | ||
val bayes = new NaiveBayes() | ||
configureModel(bayes, params) | ||
val model = bayes.fit(df) | ||
model.write.overwrite().save(path) | ||
trainModels[NaiveBayesModel](df, path, params, () => { | ||
new NaiveBayes() | ||
}) | ||
} | ||
|
||
override def load(sparkSession: SparkSession, path: String): Any = { | ||
val model = NaiveBayesModel.load(path) | ||
model | ||
loadModels(path, (tempPath) => { | ||
NaiveBayesModel.load(tempPath) | ||
}) | ||
} | ||
|
||
override def predict(sparkSession: SparkSession, _model: Any,name:String): UserDefinedFunction = { | ||
val model = sparkSession.sparkContext.broadcast(_model.asInstanceOf[NaiveBayesModel]) | ||
override def predict(sparkSession: SparkSession, _model: Any, name: String): UserDefinedFunction = { | ||
|
||
val models = sparkSession.sparkContext.broadcast(_model.asInstanceOf[ArrayBuffer[NaiveBayesModel]]) | ||
|
||
val f = (vec: Vector) => { | ||
val predictRaw = model.value.getClass.getMethod("predictRaw", classOf[Vector]).invoke(model.value, vec).asInstanceOf[Vector] | ||
val raw2probability = model.value.getClass.getMethod("raw2probability", classOf[Vector]).invoke(model.value, predictRaw).asInstanceOf[Vector] | ||
//model.getClass.getMethod("probability2prediction", classOf[Vector]).invoke(model, raw2probability).asInstanceOf[Vector] | ||
raw2probability | ||
models.value.map { model => | ||
val predictRaw = model.getClass.getMethod("predictRaw", classOf[Vector]).invoke(model, vec).asInstanceOf[Vector] | ||
val raw2probability = model.getClass.getMethod("raw2probability", classOf[Vector]).invoke(model, predictRaw).asInstanceOf[Vector] | ||
//model.getClass.getMethod("probability2prediction", classOf[Vector]).invoke(model, raw2probability).asInstanceOf[Vector] | ||
//概率,分类 | ||
(raw2probability(raw2probability.argmax), raw2probability) | ||
}.sortBy(f => f._1).reverse.head._2 | ||
} | ||
|
||
val f2 = (vec: Vector) => { | ||
models.value.map { model => | ||
val predictRaw = model.getClass.getMethod("predictRaw", classOf[Vector]).invoke(model, vec).asInstanceOf[Vector] | ||
val raw2probability = model.getClass.getMethod("raw2probability", classOf[Vector]).invoke(model, predictRaw).asInstanceOf[Vector] | ||
//model.getClass.getMethod("probability2prediction", classOf[Vector]).invoke(model, raw2probability).asInstanceOf[Vector] | ||
raw2probability | ||
} | ||
} | ||
|
||
sparkSession.udf.register(name + "_raw", f2) | ||
|
||
UserDefinedFunction(f, VectorType, Some(Seq(VectorType))) | ||
} | ||
} |