Update updataData.py

werowe · web-flow · commit 1858e5715721 · 2018-10-17T15:02:50.000+01:00
diff --git a/updataData.py b/updataData.py
@@ -1,25 +1,25 @@
-reader =spark.read.format("org.elasticsearch.spark.sql").option("es.read.metadata", "true").option("es.read.field.as.array.include", "title") \
+from pyspark import SparkContext, SparkConf
+from pyspark.sql import SQLContext
+
+reader =spark.read.format("org.elasticsearch.spark.sql") \
+.option("es.read.metadata", "true") \
+.option("es.read.field.as.array.include", "title") \
 .option("es.read.field.as.array.include", "outlinks_unique") \
-.option("es.nodes.wan.only","true").option("es.port","9200").option("es.net.ssl","false").option("es.nodes", "http://localhost")
+.option("es.nodes.wan.only","true") \ 
+.option("es.port","9200") \
+.option("es.net.ssl","false") \
+.option("es.nodes", "http://localhost")
 
 df = reader.load("schools")
 
 df.filter(df["school"] == "Harvard").show()
 
-j = {
-    "script": {
-    "lang": "painless",
-    "inline": "ctx._source.location = params.location",
-    "params": location:<Boston>
- }
-}
-
 esconf={}
 esconf["es.mapping.id" = 1 ]
 esconf["es.nodes"] = "localhost"
 esconf["es.port"] = "9200"
-esconf["es.update.script.inline"] = j
-esconf["es.update.script.params"] = location:<Cambridge>
+esconf["es.update.script.inline"] = "ctx._source.location = params.location"
+esconf["es.update.script.params"] = "location:<Cambridge>"
 esconf["es.write.operation"] = "update"
 
 df.write.format("org.elasticsearch.spark.sql").options(**esconf).mode("overwrite").save("backup_/items")