Skip to content

Commit

Permalink
No value in nested struct causes arrayIndexOutOfBounds
Browse files Browse the repository at this point in the history
This PR is a copy of databricks#118 so the author will be him. I just only fixed some nits and make the test codes and data smaller.

Author: Jan Scherbaum <[email protected]>
Author: hyukjinkwon <[email protected]>

Closes databricks#121 from HyukjinKwon/ISSUE-117.
  • Loading branch information
lokm01 authored and HyukjinKwon committed May 13, 2016
1 parent ad7abbd commit 19eb277
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,14 @@ private[xml] object StaxXmlParser {
// The fields are sorted so `TreeMap` is used.
val convertedValuesMap = convertValues(valuesMap, schema)
val row = TreeMap((fields ++ convertedValuesMap).toSeq : _*).values.toSeq
Row.fromSeq(row)

// Return null rather than empty row. For nested structs empty row causes
// ArrayOutOfBounds exceptions when executing an action.
if (row.isEmpty) {
null
} else {
Row.fromSeq(row)
}
}

/**
Expand Down
16 changes: 16 additions & 0 deletions src/test/resources/null-nested-struct.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" ?>
<root>
<item>
<b>
<es>
<e>1</e>
</es>
</b>
</item>
<item>
<!-- Issue 117 - This is where an empty Row would be produced instead of null -->
<b>
<es></es>
</b>
</item>
</root>
10 changes: 10 additions & 0 deletions src/test/scala/com/databricks/spark/xml/XmlSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class XmlSuite extends FunSuite with BeforeAndAfterAll {
val topicsFile = "src/test/resources/topics-namespaces.xml"
val gpsEmptyField = "src/test/resources/gps-empty-field.xml"
val agesMixedTypes = "src/test/resources/ages-mixed-types.xml"
val nullNestedStructFile = "src/test/resources/null-nested-struct.xml"

val booksTag = "book"
val booksRootTag = "books"
Expand Down Expand Up @@ -657,4 +658,13 @@ class XmlSuite extends FunSuite with BeforeAndAfterAll {

assert(results.size === numTopics)
}

test("Missing nested struct represented as null instead of empty Row") {
val result = sqlContext
.xmlFile(nullNestedStructFile, rowTag = "item")
.select("b.es")
.collect()

assert(result(1).toSeq === Seq(null))
}
}

0 comments on commit 19eb277

Please sign in to comment.