forked from byzer-org/byzer-lang
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
允许用户自定义一个插件,该插件会在用户执行 load, save 以及 !hdfs/!fs 之前被执行 (byzer-org#1802)
* support hooks before load/save/fs execute * rename rewrite_[number] to more specific name * pathPrefix fix in LoadAdaptor & SaveAdaptor * pathPrefix fix in LoadAdaptor & SaveAdaptor Co-authored-by: jiachuan.zhu <[email protected]>
- Loading branch information
1 parent
8f0ebe5
commit 844c0cb
Showing
7 changed files
with
241 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,27 +4,31 @@ import org.apache.hadoop.util.ToolRunner | |
import org.apache.spark.sql.expressions.UserDefinedFunction | ||
import org.apache.spark.sql.mlsql.session.MLSQLException | ||
import org.apache.spark.sql.{DataFrame, SparkSession} | ||
import streaming.core.datasource.{FSConfig, RewritableFSConfig} | ||
import streaming.dsl.mmlib.SQLAlg | ||
import streaming.dsl.mmlib.algs.Functions | ||
import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams} | ||
import streaming.dsl.{MLSQLExecuteContext, ScriptSQLExec} | ||
import tech.mlsql.common.utils.serder.json.JSONTool | ||
import tech.mlsql.ets.hdfs.WowFsShell | ||
import tech.mlsql.runtime.AppRuntimeStore | ||
|
||
/** | ||
* 2019-05-07 WilliamZhu([email protected]) | ||
*/ | ||
* 2019-05-07 WilliamZhu([email protected]) | ||
*/ | ||
class HDFSCommand(override val uid: String) extends SQLAlg with Functions with WowParams { | ||
def this() = this(BaseParams.randomUID()) | ||
|
||
override def batchPredict(df: DataFrame, path: String, params: Map[String, String]): DataFrame = train(df, path, params) | ||
|
||
override def train(df: DataFrame, path: String, params: Map[String, String]): DataFrame = { | ||
val spark = df.sparkSession | ||
val conf = df.sparkSession.sessionState.newHadoopConf() | ||
val fsConf = configRewrite(AppRuntimeStore.FS_BEFORE_CONFIG_KEY, | ||
FSConfig(df.sparkSession.sessionState.newHadoopConf(), path, params), ScriptSQLExec.context()) | ||
val args = JSONTool.parseJson[List[String]](params("parameters")) | ||
conf.setQuietMode(false) | ||
fsConf.conf.setQuietMode(false) | ||
var output = "" | ||
val fsShell = new WowFsShell(conf, path) | ||
val fsShell = new WowFsShell(fsConf.conf, fsConf.path) | ||
try { | ||
ToolRunner.run(fsShell, args.toArray) | ||
output = fsShell.getError | ||
|
@@ -45,6 +49,20 @@ class HDFSCommand(override val uid: String) extends SQLAlg with Functions with W | |
} | ||
} | ||
|
||
def configRewrite(orderKey: String, | ||
config: FSConfig, | ||
context: MLSQLExecuteContext): FSConfig = { | ||
AppRuntimeStore.store.getLoadSave(orderKey) match { | ||
case Some(item) => | ||
item.customClassItems.classNames.map { className => | ||
val instance = Class.forName(className).newInstance().asInstanceOf[RewritableFSConfig] | ||
instance.rewrite(config, context) | ||
}.headOption.getOrElse(config) | ||
case None => | ||
config | ||
} | ||
} | ||
|
||
|
||
override def skipPathPrefix: Boolean = false | ||
|
||
|
4 changes: 2 additions & 2 deletions
4
streamingpro-mlsql/src/main/java/tech/mlsql/plugin/load/DefaultLoaderPlugin.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,13 @@ | ||
package tech.mlsql.plugin.load | ||
|
||
import org.apache.spark.sql.DataFrame | ||
import streaming.core.datasource.{DataSourceConfig, RewriteableSource, SourceInfo} | ||
import streaming.core.datasource.{DataSourceConfig, RewritableSource, SourceInfo} | ||
import streaming.dsl.MLSQLExecuteContext | ||
|
||
/** | ||
* 11/12/2019 WilliamZhu([email protected]) | ||
*/ | ||
class DefaultLoaderPlugin extends RewriteableSource { | ||
class DefaultLoaderPlugin extends RewritableSource { | ||
override def rewrite(df: DataFrame, config: DataSourceConfig, sourceInfo: Option[SourceInfo], context: MLSQLExecuteContext): DataFrame = { | ||
val conf = config.config | ||
var table = df | ||
|
Oops, something went wrong.