|
1 |
| -reader =spark.read.format("org.elasticsearch.spark.sql").option("es.read.metadata", "true").option("es.read.field.as.array.include", "title") \ |
| 1 | +from pyspark import SparkContext, SparkConf |
| 2 | +from pyspark.sql import SQLContext |
| 3 | + |
| 4 | +reader =spark.read.format("org.elasticsearch.spark.sql") \ |
| 5 | +.option("es.read.metadata", "true") \ |
| 6 | +.option("es.read.field.as.array.include", "title") \ |
2 | 7 | .option("es.read.field.as.array.include", "outlinks_unique") \
|
3 |
| -.option("es.nodes.wan.only","true").option("es.port","9200").option("es.net.ssl","false").option("es.nodes", "http://localhost") |
| 8 | +.option("es.nodes.wan.only","true") \ |
| 9 | +.option("es.port","9200") \ |
| 10 | +.option("es.net.ssl","false") \ |
| 11 | +.option("es.nodes", "http://localhost") |
4 | 12 |
|
5 | 13 | df = reader.load("schools")
|
6 | 14 |
|
7 | 15 | df.filter(df["school"] == "Harvard").show()
|
8 | 16 |
|
9 |
| -j = { |
10 |
| - "script": { |
11 |
| - "lang": "painless", |
12 |
| - "inline": "ctx._source.location = params.location", |
13 |
| - "params": location:<Boston> |
14 |
| - } |
15 |
| -} |
16 |
| - |
17 | 17 | esconf={}
|
18 | 18 | esconf["es.mapping.id" = 1 ]
|
19 | 19 | esconf["es.nodes"] = "localhost"
|
20 | 20 | esconf["es.port"] = "9200"
|
21 |
| -esconf["es.update.script.inline"] = j |
22 |
| -esconf["es.update.script.params"] = location:<Cambridge> |
| 21 | +esconf["es.update.script.inline"] = "ctx._source.location = params.location" |
| 22 | +esconf["es.update.script.params"] = "location:<Cambridge>" |
23 | 23 | esconf["es.write.operation"] = "update"
|
24 | 24 |
|
25 | 25 | df.write.format("org.elasticsearch.spark.sql").options(**esconf).mode("overwrite").save("backup_/items")
|
|
0 commit comments