forked from byzer-org/byzer-lang
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request byzer-org#981 from allwefantasy/TRY
Try
- Loading branch information
Showing
42 changed files
with
1,346 additions
and
668 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
85 changes: 85 additions & 0 deletions
85
streamingpro-mlsql/src/main/java/streaming/core/datasource/MLSQLBaseStreamSource.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
package streaming.core.datasource | ||
|
||
import java.util.concurrent.TimeUnit | ||
|
||
import org.apache.spark.sql.streaming.{DataStreamWriter, Trigger} | ||
import org.apache.spark.sql.{DataFrameWriter, Row} | ||
import streaming.dsl.{DslTool, ScriptSQLExec} | ||
|
||
/** | ||
* 2019-03-20 WilliamZhu([email protected]) | ||
*/ | ||
abstract class MLSQLBaseStreamSource extends MLSQLSource with MLSQLSink with MLSQLSourceInfo with MLSQLRegistry with DslTool { | ||
|
||
def rewriteConfig(config: Map[String, String]) = { | ||
config | ||
} | ||
|
||
|
||
override def save(batchWriter: DataFrameWriter[Row], config: DataSinkConfig): Any = { | ||
val oldDF = config.df.get | ||
var option = config.config | ||
if (option.contains("fileNum")) { | ||
option -= "fileNum" | ||
} | ||
|
||
val writer: DataStreamWriter[Row] = oldDF.writeStream | ||
var path = config.path | ||
|
||
val Array(db, table) = parseRef(aliasFormat, path, (options: Map[String, String]) => { | ||
writer.options(options) | ||
}) | ||
|
||
path = table | ||
|
||
require(option.contains("checkpointLocation"), "checkpointLocation is required") | ||
require(option.contains("duration"), "duration is required") | ||
require(option.contains("mode"), "mode is required") | ||
|
||
if (option.contains("partitionByCol")) { | ||
val cols = option("partitionByCol").split(",").filterNot(f => f.isEmpty) | ||
if (cols.size != 0) { | ||
writer.partitionBy(option("partitionByCol").split(","): _*) | ||
} | ||
option -= "partitionByCol" | ||
} | ||
|
||
val duration = option("duration").toInt | ||
option -= "duration" | ||
|
||
|
||
val mode = option("mode") | ||
option -= "mode" | ||
|
||
val format = config.config.getOrElse("implClass", fullFormat) | ||
|
||
writer.format(format).outputMode(mode).options(option) | ||
|
||
val dbtable = if (option.contains("dbtable")) option("dbtable") else path | ||
|
||
if (dbtable != null && dbtable != "-") { | ||
writer.option("path", dbtable) | ||
} | ||
|
||
ScriptSQLExec.contextGetOrForTest().execListener.env().get("streamName") match { | ||
case Some(name) => writer.queryName(name) | ||
case None => | ||
} | ||
writer.trigger(Trigger.ProcessingTime(duration, TimeUnit.SECONDS)).start() | ||
} | ||
|
||
|
||
override def register(): Unit = { | ||
DataSourceRegistry.register(MLSQLDataSourceKey(fullFormat, MLSQLSparkDataSourceType), this) | ||
DataSourceRegistry.register(MLSQLDataSourceKey(shortFormat, MLSQLSparkDataSourceType), this) | ||
} | ||
|
||
override def sourceInfo(config: DataAuthConfig): SourceInfo = { | ||
|
||
val Array(db, table) = config.path.split("\\.") match { | ||
case Array(db, table) => Array(db, table) | ||
case Array(table) => Array("", table) | ||
} | ||
SourceInfo(shortFormat, db, table) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,8 +18,8 @@ | |
|
||
package streaming.core.datasource | ||
|
||
import org.apache.spark.sql._ | ||
import _root_.streaming.dsl.MLSQLExecuteContext | ||
import org.apache.spark.sql._ | ||
|
||
/** | ||
* 2018-12-20 WilliamZhu([email protected]) | ||
|
@@ -34,6 +34,10 @@ trait MLSQLDataSource { | |
|
||
def shortFormat: String | ||
|
||
def aliasFormat: String = { | ||
shortFormat | ||
} | ||
|
||
} | ||
|
||
trait MLSQLSource extends MLSQLDataSource with MLSQLSourceInfo { | ||
|
@@ -48,15 +52,15 @@ trait RewriteableSource { | |
} | ||
|
||
trait MLSQLSink extends MLSQLDataSource { | ||
def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Unit | ||
def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Any | ||
} | ||
|
||
trait MLSQLDirectSource extends MLSQLDataSource { | ||
def load(reader: DataFrameReader, config: DataSourceConfig): DataFrame | ||
} | ||
|
||
trait MLSQLDirectSink extends MLSQLDataSource { | ||
def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Unit | ||
def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Any | ||
} | ||
|
||
case class SourceInfo(sourceType: String, db: String, table: String) | ||
|
44 changes: 44 additions & 0 deletions
44
streamingpro-mlsql/src/main/java/streaming/core/datasource/impl/MLSQLCarbondata.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package streaming.core.datasource.impl | ||
|
||
import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter, Row} | ||
import streaming.core.datasource._ | ||
import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} | ||
|
||
/** | ||
* 2019-03-20 WilliamZhu([email protected]) | ||
*/ | ||
class MLSQLCarbondata(override val uid: String) extends MLSQLBaseFileSource with WowParams { | ||
def this() = this(BaseParams.randomUID()) | ||
|
||
|
||
override def load(reader: DataFrameReader, config: DataSourceConfig): DataFrame = { | ||
val format = config.config.getOrElse("implClass", fullFormat) | ||
reader.options(config.config).format(format).table(config.path) | ||
} | ||
|
||
|
||
override def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Unit = { | ||
val Array(db, table) = parseRef(shortFormat, config.path, (options: Map[String, String]) => { | ||
writer.options(options) | ||
}) | ||
|
||
if (db.isEmpty) { | ||
writer.option("tableName", table) | ||
} else { | ||
writer.option("tableName", db).option("dbName", table) | ||
} | ||
|
||
val format = config.config.getOrElse("implClass", fullFormat) | ||
writer.options(rewriteConfig(config.config)).format(format).save() | ||
} | ||
|
||
override def sourceInfo(config: DataAuthConfig): SourceInfo = { | ||
val Array(db, table) = parseRef(shortFormat, config.path, (options: Map[String, String]) => { | ||
}) | ||
SourceInfo(shortFormat, db, table) | ||
} | ||
|
||
override def fullFormat: String = "org.apache.spark.sql.CarbonSource" | ||
|
||
override def shortFormat: String = "carbondata" | ||
} |
38 changes: 38 additions & 0 deletions
38
streamingpro-mlsql/src/main/java/streaming/core/datasource/impl/MLSQLConsole.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package streaming.core.datasource.impl | ||
|
||
import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter, Row} | ||
import streaming.core.datasource.{DataSinkConfig, DataSourceConfig, MLSQLBaseStreamSource} | ||
import streaming.dsl.ScriptSQLExec | ||
import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} | ||
|
||
/** | ||
* 2019-03-20 WilliamZhu([email protected]) | ||
*/ | ||
class MLSQLConsole(override val uid: String) extends MLSQLBaseStreamSource with WowParams { | ||
def this() = this(BaseParams.randomUID()) | ||
|
||
override def load(reader: DataFrameReader, config: DataSourceConfig): DataFrame = { | ||
throw new RuntimeException(s"load is not support with ${shortFormat} ") | ||
} | ||
|
||
def isStream = { | ||
val context = ScriptSQLExec.contextGetOrForTest() | ||
context.execListener.env().contains("streamName") | ||
} | ||
|
||
|
||
override def save(batchWriter: DataFrameWriter[Row], config: DataSinkConfig): Any = { | ||
|
||
if (isStream) { | ||
return super.save(batchWriter, config) | ||
|
||
} | ||
throw new RuntimeException(s"save is not support with ${shortFormat} in batch mode") | ||
|
||
} | ||
|
||
override def fullFormat: String = "console" | ||
|
||
override def shortFormat: String = "console" | ||
|
||
} |
27 changes: 27 additions & 0 deletions
27
streamingpro-mlsql/src/main/java/streaming/core/datasource/impl/MLSQLCrawlerSql.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package streaming.core.datasource.impl | ||
|
||
import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter, Row} | ||
import streaming.core.datasource._ | ||
import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} | ||
|
||
/** | ||
* 2019-03-20 WilliamZhu([email protected]) | ||
*/ | ||
class MLSQLCrawlerSql(override val uid: String) extends MLSQLBaseFileSource with WowParams { | ||
def this() = this(BaseParams.randomUID()) | ||
|
||
|
||
override def load(reader: DataFrameReader, config: DataSourceConfig): DataFrame = { | ||
reader.option("path", config.path).options(rewriteConfig(config.config)).format("org.apache.spark.sql.execution.datasources.crawlersql").load() | ||
} | ||
|
||
override def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Unit = { | ||
throw new RuntimeException(s"save is not supported in ${shortFormat}") | ||
} | ||
|
||
override def fullFormat: String = "crawlersql" | ||
|
||
override def shortFormat: String = fullFormat | ||
|
||
} | ||
|
32 changes: 32 additions & 0 deletions
32
streamingpro-mlsql/src/main/java/streaming/core/datasource/impl/MLSQLCsvStr.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package streaming.core.datasource.impl | ||
|
||
import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter, Row} | ||
import streaming.core.datasource._ | ||
import streaming.dsl.ScriptSQLExec | ||
import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} | ||
|
||
/** | ||
* 2019-03-20 WilliamZhu([email protected]) | ||
*/ | ||
class MLSQLCsvStr(override val uid: String) extends MLSQLBaseFileSource with WowParams { | ||
def this() = this(BaseParams.randomUID()) | ||
|
||
|
||
override def load(reader: DataFrameReader, config: DataSourceConfig): DataFrame = { | ||
val context = ScriptSQLExec.contextGetOrForTest() | ||
val items = cleanBlockStr(context.execListener.env()(cleanStr(config.path))).split("\n") | ||
val spark = config.df.get.sparkSession | ||
import spark.implicits._ | ||
reader.options(rewriteConfig(config.config)).csv(spark.createDataset[String](items)) | ||
} | ||
|
||
override def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Unit = { | ||
throw new RuntimeException(s"save is not supported in ${shortFormat}") | ||
} | ||
|
||
override def fullFormat: String = "csvStr" | ||
|
||
override def shortFormat: String = fullFormat | ||
|
||
} | ||
|
Oops, something went wrong.