forked from byzer-org/byzer-lang
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request byzer-org#815 from allwefantasy/ISSUE-814
Refractor datasource in LoadAdaptor and SaveAdaptor
- Loading branch information
Showing
9 changed files
with
430 additions
and
124 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
69 changes: 69 additions & 0 deletions
69
streamingpro-mlsql/src/main/java/streaming/core/datasource/DataSourceRegistry.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package streaming.core.datasource | ||
|
||
import com.google.common.reflect.ClassPath | ||
import org.apache.spark.sql.SaveMode | ||
import streaming.log.Logging | ||
|
||
import scala.collection.JavaConverters._ | ||
|
||
/** | ||
* 2018-12-20 WilliamZhu([email protected]) | ||
*/ | ||
object DataSourceRegistry extends Logging { | ||
private val registry = new java.util.concurrent.ConcurrentHashMap[String, MLSQLDataSource]() | ||
|
||
def register(name: String, obj: MLSQLDataSource) = { | ||
registry.put(name, obj) | ||
|
||
} | ||
|
||
def fetch(name: String): Option[MLSQLDataSource] = { | ||
if (registry.containsKey(name)) { | ||
Option(registry.get(name)) | ||
} else None | ||
} | ||
|
||
private def registerFromPackage(name: String) = { | ||
ClassPath.from(getClass.getClassLoader).getTopLevelClasses(name).asScala.foreach { clzz => | ||
val dataSource = Class.forName(clzz.getName).newInstance() | ||
if (dataSource.isInstanceOf[MLSQLRegistry]) { | ||
dataSource.asInstanceOf[MLSQLRegistry].register() | ||
} else { | ||
logWarning( | ||
s""" | ||
|${clzz.getName} does not implement MLSQLRegistry, | ||
|we cannot register it automatically. | ||
""".stripMargin) | ||
} | ||
} | ||
} | ||
|
||
registerFromPackage("streaming.core.datasource.impl") | ||
registerFromPackage("streaming.contri.datasource.impl") | ||
} | ||
|
||
trait MLSQLRegistry { | ||
def register(): Unit | ||
} | ||
|
||
case class DataSourceConfig(path: String, config: Map[String, String]) | ||
|
||
case class DataSinkConfig(path: String, config: Map[String, String], mode: SaveMode) |
45 changes: 45 additions & 0 deletions
45
streamingpro-mlsql/src/main/java/streaming/core/datasource/MLSQLSource.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package streaming.core.datasource | ||
|
||
import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter, Row} | ||
|
||
/** | ||
* 2018-12-20 WilliamZhu([email protected]) | ||
*/ | ||
|
||
trait MLSQLDataSource { | ||
def dbSplitter = { | ||
"\\." | ||
} | ||
|
||
def fullFormat: String | ||
|
||
def shortFormat: String | ||
|
||
} | ||
|
||
trait MLSQLSource extends MLSQLDataSource { | ||
def load(reader: DataFrameReader, config: DataSourceConfig): DataFrame | ||
} | ||
|
||
trait MLSQLSink extends MLSQLDataSource { | ||
def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Unit | ||
} | ||
|
80 changes: 80 additions & 0 deletions
80
streamingpro-mlsql/src/main/java/streaming/core/datasource/impl/MLSQLElasticSearch.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package streaming.core.datasource.impl | ||
|
||
import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter, Row} | ||
import streaming.core.datasource._ | ||
import streaming.dsl.ScriptSQLExec | ||
|
||
class MLSQLElasticSearch extends MLSQLSource with MLSQLSink with MLSQLRegistry { | ||
|
||
|
||
override def fullFormat: String = "org.elasticsearch.spark.sql" | ||
|
||
override def shortFormat: String = "es" | ||
|
||
override def dbSplitter: String = "/" | ||
|
||
override def load(reader: DataFrameReader, config: DataSourceConfig): DataFrame = { | ||
var dbtable = config.path | ||
// if contains splitter, then we will try to find dbname in dbMapping. | ||
// otherwize we will do nothing since elasticsearch use something like index/type | ||
// it will do no harm. | ||
if (config.path.contains(dbSplitter)) { | ||
val Array(_dbname, _dbtable) = config.path.split(dbSplitter, 2) | ||
if (ScriptSQLExec.dbMapping.containsKey(_dbname)) { | ||
dbtable = _dbtable | ||
ScriptSQLExec.dbMapping.get(_dbname).foreach { f => | ||
reader.option(f._1, f._2) | ||
} | ||
} | ||
} | ||
//load configs should overwrite connect configs | ||
reader.options(config.config) | ||
reader.format(config.config.getOrElse("implClass", fullFormat)).load(dbtable) | ||
} | ||
|
||
override def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Unit = { | ||
var dbtable = config.path | ||
// if contains splitter, then we will try to find dbname in dbMapping. | ||
// otherwize we will do nothing since elasticsearch use something like index/type | ||
// it will do no harm. | ||
if (config.path.contains(dbSplitter)) { | ||
val Array(_dbname, _dbtable) = config.path.split(dbSplitter, 2) | ||
if (ScriptSQLExec.dbMapping.containsKey(_dbname)) { | ||
dbtable = _dbtable | ||
ScriptSQLExec.dbMapping.get(_dbname).foreach { f => | ||
writer.option(f._1, f._2) | ||
} | ||
} | ||
} | ||
writer.mode(config.mode) | ||
//load configs should overwrite connect configs | ||
writer.options(config.config) | ||
config.config.get("partitionByCol").map { item => | ||
writer.partitionBy(item.split(","): _*) | ||
} | ||
writer.format(config.config.getOrElse("implClass", fullFormat)).save(dbtable) | ||
} | ||
|
||
override def register(): Unit = { | ||
DataSourceRegistry.register(fullFormat, this) | ||
DataSourceRegistry.register(shortFormat, this) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.