Skip to content

Commit

Permalink
[SPARK-20549] java.io.CharConversionException: Invalid UTF-32' in Jso…
Browse files Browse the repository at this point in the history
…nToStructs

## What changes were proposed in this pull request?

A fix for the same problem was made in apache#17693 but ignored `JsonToStructs`. This PR uses the same fix for `JsonToStructs`.

## How was this patch tested?

Regression test

Author: Burak Yavuz <[email protected]>

Closes apache#17826 from brkyvz/SPARK-20549.
  • Loading branch information
brkyvz authored and cloud-fan committed May 2, 2017
1 parent afb21bf commit 86174ea
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ case class GetJsonObject(json: Expression, path: Expression)
try {
/* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
detect character encoding which could fail for some malformed strings */
Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
new ByteArrayInputStream(jsonStr.getBytes), "UTF-8"))) { parser =>
Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, jsonStr)) { parser =>
val output = new ByteArrayOutputStream()
val matched = Utils.tryWithResource(
jsonFactory.createGenerator(output, JsonEncoding.UTF8)) { generator =>
Expand Down Expand Up @@ -398,9 +397,8 @@ case class JsonTuple(children: Seq[Expression])
try {
/* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
detect character encoding which could fail for some malformed strings */
Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
new ByteArrayInputStream(json.getBytes), "UTF-8"))) {
parser => parseRow(parser, input)
Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
parseRow(parser, input)
}
} catch {
case _: JsonProcessingException =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.json

import java.io.InputStream
import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text
Expand All @@ -33,7 +33,10 @@ private[sql] object CreateJacksonParser extends Serializable {
val bb = record.getByteBuffer
assert(bb.hasArray)

jsonFactory.createParser(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
val bain = new ByteArrayInputStream(
bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())

jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
}

def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
)
}

test("SPARK-20549: from_json bad UTF-8") {
val schema = StructType(StructField("a", IntegerType) :: Nil)
checkEvaluation(
JsonToStructs(schema, Map.empty, Literal(badJson), gmtId),
null)
}

test("from_json with timestamp") {
val schema = StructType(StructField("t", TimestampType) :: Nil)

Expand Down

0 comments on commit 86174ea

Please sign in to comment.