Skip to content

Commit

Permalink
[SPARK-35325][SQL][TESTS] Add nested column ORC encryption test case
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

This PR aims to enrich ORC encryption test coverage for nested columns.

### Why are the changes needed?

This will provide a test coverage for this feature.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs with the newly added test case.

Closes apache#32449 from dongjoon-hyun/SPARK-35325.

Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
  • Loading branch information
dongjoon-hyun committed May 6, 2021
1 parent a0c76a8 commit 19661f6
Showing 1 changed file with 50 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,54 @@ class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
}
}
}

test("SPARK-35325: Write and read encrypted nested columns") {
val conf = spark.sessionState.newHadoopConf()
val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new Random)
assume(!provider.getKeyNames.isEmpty,
s"$provider doesn't has the test keys. ORC shim is created with old Hadoop libraries")

val originalNestedData = Row(1, Row("123456789", "[email protected]", "Dongjoon"))
val rowNestedDataWithoutKey =
Row(1, Row(null, "841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3",
"Dongjoon"))

withTempDir { dir =>
val path = dir.getAbsolutePath
withTable("encrypted") {
sql(
s"""
|CREATE TABLE encrypted (
| id INT,
| contact struct<ssn:STRING, email:STRING, name:STRING>
|)
|USING ORC
|LOCATION "$path"
|OPTIONS (
| hadoop.security.key.provider.path "test:///",
| orc.key.provider "hadoop",
| orc.encrypt "pii:contact.ssn,contact.email",
| orc.mask "nullify:contact.ssn;sha256:contact.email"
|)
|""".stripMargin)
sql("INSERT INTO encrypted VALUES(1, ('123456789', '[email protected]', 'Dongjoon'))")
checkAnswer(sql("SELECT * FROM encrypted"), originalNestedData)
}
withTable("normal") {
sql(
s"""
|CREATE TABLE normal (
| id INT,
| contact struct<ssn:STRING, email:STRING, name:STRING>
|)
|USING ORC
|LOCATION "$path"
|OPTIONS (
| orc.key.provider "memory"
|)
|""".stripMargin)
checkAnswer(sql("SELECT * FROM normal"), rowNestedDataWithoutKey)
}
}
}
}

0 comments on commit 19661f6

Please sign in to comment.