Skip to content

Commit

Permalink
[SPARK-15165] [SQL] Codegen can break because toCommentSafeString is …
Browse files Browse the repository at this point in the history
…not actually safe

## What changes were proposed in this pull request?

toCommentSafeString method replaces "\u" with "\\\\u" to avoid codegen breaking.
But if the even number of "\" is put before "u", like "\\\\u", in the string literal in the query, codegen can break.

Following code causes compilation error.

```
val df = Seq(...).toDF
df.select("'\\\\\\\\u002A/'").show
```

The reason of the compilation error is because "\\\\\\\\\\\\\\\\u002A/" is translated into "*/" (the end of comment).

Due to this unsafety, arbitrary code can be injected like as follows.

```
val df = Seq(...).toDF
// Inject "System.exit(1)"
df.select("'\\\\\\\\u002A/{System.exit(1);}/*'").show
```

## How was this patch tested?

Added new test cases.

Author: Kousuke Saruta <[email protected]>
Author: sarutak <[email protected]>

Closes apache#12939 from sarutak/SPARK-15165.
  • Loading branch information
sarutak authored and davies committed May 17, 2016
1 parent bebe5f9 commit c0c3ec3
Show file tree
Hide file tree
Showing 3 changed files with 320 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,18 @@ package object util {
def toCommentSafeString(str: String): String = {
val len = math.min(str.length, 128)
val suffix = if (str.length > len) "..." else ""
str.substring(0, len).replace("*/", "\\*\\/").replace("\\u", "\\\\u") + suffix

// Unicode literals, like \u0022, should be escaped before
// they are put in code comment to avoid codegen breaking.
// To escape them, single "\" should be prepended to a series of "\" just before "u"
// only when the number of "\" is odd.
// For example, \u0022 should become to \\u0022
// but \\u0022 should not become to \\\u0022 because the first backslash escapes the second one,
// and \u0022 will remain, means not escaped.
// Otherwise, the runtime Java compiler will fail to compile or code injection can be allowed.
// For details, see SPARK-15165.
str.substring(0, len).replace("*/", "*\\/")
.replaceAll("(^|[^\\\\])(\\\\(\\\\\\\\)*u)", "$1\\\\$2") + suffix
}

/* FIX ME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,48 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
true,
InternalRow(UTF8String.fromString("\\u")))
}

test("check compilation error doesn't occur caused by specific literal") {
// The end of comment (*/) should be escaped.
GenerateUnsafeProjection.generate(
Literal.create("*/Compilation error occurs/*", StringType) :: Nil)

// `\u002A` is `*` and `\u002F` is `/`
// so if the end of comment consists of those characters in queries, we need to escape them.
GenerateUnsafeProjection.generate(
Literal.create("\\u002A/Compilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\u002A/Compilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\u002a/Compilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\u002a/Compilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("*\\u002FCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("*\\\\u002FCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("*\\002fCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("*\\\\002fCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\002A\\002FCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\002A\\002FCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\002A\\\\002FCompilation error occurs/*", StringType) :: Nil)

// \ u002X is an invalid unicode literal so it should be escaped.
GenerateUnsafeProjection.generate(
Literal.create("\\u002X/Compilation error occurs", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\u002X/Compilation error occurs", StringType) :: Nil)

// \ u001 is an invalid unicode literal so it should be escaped.
GenerateUnsafeProjection.generate(
Literal.create("\\u001/Compilation error occurs", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\u001/Compilation error occurs", StringType) :: Nil)

}
}
264 changes: 264 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2496,4 +2496,268 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}
}

test("check code injection is prevented") {
// The end of comment (*/) should be escaped.
var literal =
"""|*/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
var expected =
"""|*/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

// `\u002A` is `*` and `\u002F` is `/`
// so if the end of comment consists of those characters in queries, we need to escape them.
literal =
"""|\\u002A/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\u002A/"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\\\u002A/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|\\u002A/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\u002a/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\u002a/"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\\\u002a/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|\\u002a/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|*\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"*\\u002F"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|*\\\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|*\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|*\\u002f
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"*\\u002f"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|*\\\\u002f
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|*\\u002f
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\u002A\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\u002A\\u002F"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\\\u002A\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\\\u002A\\u002F"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\u002A\\\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\u002A\\\\u002F"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\\\u002A\\\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|\\u002A\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)
}
}

0 comments on commit c0c3ec3

Please sign in to comment.