Skip to content

Commit

Permalink
涉及到初始化Hive的话,那么就有20~30秒的延迟,而这个也就是初始化 HiveClient;
Browse files Browse the repository at this point in the history
  • Loading branch information
fansy1990 committed Jan 4, 2019
1 parent 2edf971 commit 075b81c
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 9 deletions.
36 changes: 36 additions & 0 deletions test_demo/src/main/java/justspark/SparkNothingDemo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package justspark;

import demo.engine.SparkYarnJob;
import demo.engine.engine.type.EngineType;
import demo.engine.model.Args;
import prepare.Prepare;

import java.io.IOException;

/**
* @Author: fansy
* @Time: 2018/12/19 17:22
* @Email: [email protected]
*/
public class SparkNothingDemo {
static String mainClass = "justspark.SparkNothing";
public static void main(String[] args) throws IOException {
Prepare.uploadJar();
first();
second();
}

public static void first() {
String appName = "spark nothing test-true";
String[] arguments = {"true", "default.spark_nothing", appName};
Args innerArgs = Args.getArgs(appName, mainClass, arguments, EngineType.SPARK);
SparkYarnJob.runAndMonitor(innerArgs);
}
public static void second() {
String appName = "spark nothing test-false";
String[] arguments = {"false", "default.spark_nothing", appName};
Args innerArgs = Args.getArgs(appName, mainClass, arguments, EngineType.SPARK);
SparkYarnJob.runAndMonitor(innerArgs);
}

}
10 changes: 5 additions & 5 deletions test_demo/src/main/java/prepare/Prepare.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
public class Prepare {
private static Logger log = LoggerFactory.getLogger(Prepare.class);
public static void main(String[] args) throws IOException {
// uploadJar();
uploadData();
uploadJar();
// uploadData();
// load2Hive_second();
load2Hive_third();
// load2Hive_third();
// load2Hive_fourth();
}

Expand Down Expand Up @@ -89,12 +89,12 @@ public static void load2Hive_third(){
}

/**
* 导入600万记录,同时使用AGE分区
* 导入600万记录,同时不使用分区
*/
public static void load2Hive_fourth(){
// default.demo_15m : 629880 records
String mainClass = "prepare.Load2Hive";
String[] arguments = {"/user/root/data.csv","default.demo_600w","100","fourth load to hive","AGE"};
String[] arguments = {"/user/root/data.csv","default.demo_600w","100","fourth load to hive",""};
Args innerArgs = Args.getArgs("Load data to Hive 600w",mainClass,arguments, EngineType.SPARK);
// SubmitResult submitResult = SparkYarnJob.run(innerArgs);
// SparkYarnJob.monitor(submitResult);
Expand Down
8 changes: 5 additions & 3 deletions test_demo/src/main/java/statics/DescribeStaticsDemo.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ public class DescribeStaticsDemo {
static String mainClass = "statics.DescribeStatics";
public static void main(String[] args) throws IOException {
// Prepare.uploadJar();
first();
// second();
// first();
second();
}

public static void first() {
Expand All @@ -27,7 +27,9 @@ public static void first() {
}

/**
* 1个 ApplicationMaster ; 2个子节点Executor; 耗时4.7 mins
* hive 表按照Age分区:1个 ApplicationMaster ; 2个子节点Executor; 耗时4.7 mins
* hive 表按照不分区:1个 ApplicationMaster ; 2个子节点Executor; 耗时1.6 mins
*/
public static void second(){
String appName = "demo_600w statics";
Expand Down
6 changes: 6 additions & 0 deletions test_demo/src/main/java/statics/DescribeStaticsDemo总结
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@

1. 使用命令行提交的方式来提交:
30m
spark-submit --class statics.DescribeStatics --deploy-mode cluster test_demo-1.0-SNAPSHOT.jar default.demo_30m default.demo_30_statics statics_30m_01


600w 条记录
spark-submit --class statics.DescribeStatics --deploy-mode cluster test_demo-1.0-SNAPSHOT.jar default.demo_600w default.demo_600w_statics statics_600w_01


Spark延迟30秒 博客:https://blog.csdn.net/fansy1990/article/details/85266553
28 changes: 28 additions & 0 deletions test_demo/src/main/scala/justspark/SparkNothing.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package justspark

import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{SaveMode, SparkSession}

/**
* author : fanzhe
* email : [email protected]
* date : 2019/1/4 PM9:39.
*/
object SparkNothing {
def main(args: Array[String]): Unit = {
if(args.length != 3){
println("Usage: justspark.SparkNothing <use_hive_or_not> <hive_table> <appName>")
System.exit(-1)
}
//
val (use_hive, table, appName) = (args(0).toBoolean,args(1),args(2))
println(new java.util.Date()+": begin spark init...")
val spark = SparkSession.builder().appName(appName).enableHiveSupport().getOrCreate()
println(new java.util.Date()+": spark init done!")

if(use_hive) {
spark.createDataFrame(List((1, "0"))).write.mode(SaveMode.Overwrite).saveAsTable(table)
}
spark.stop()
}
}
5 changes: 4 additions & 1 deletion test_demo/src/main/scala/statics/DescribeStatics.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ object DescribeStatics {
}
//
val (input, table, appName) = (args(0),args(1),args(2))

println(new java.util.Date()+": begin spark init...")
val spark = SparkSession.builder().appName(appName).enableHiveSupport().getOrCreate()
println(new java.util.Date()+": spark init done!")

val data = spark.read.table(input)
println(new java.util.Date() + ": data.size" + data.count)

data.describe( data.schema.fieldNames :_*).write.mode(SaveMode.Overwrite).saveAsTable(table)

spark.stop()
Expand Down

0 comments on commit 075b81c

Please sign in to comment.