Skip to content

Commit

Permalink
Correct some according to scala style and remove a test unintentional…
Browse files Browse the repository at this point in the history
…ly added
  • Loading branch information
HyukjinKwon committed Nov 5, 2015
1 parent 6eb52a2 commit 764b83d
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ private[sql] class StaxXmlParser(parser: XMLEventReader) {
// We need to concatenate values if character token is found again.
// Java StAX XML produces character event sequentially sometimes.
// TODO: Check why it is.
if(eventsInFragment.last.isCharacters){
if (eventsInFragment.last.isCharacters){
val previous = eventsInFragment.last.asCharacters.getData
val current = event.asCharacters.getData
eventsInFragment.last.asInstanceOf[CharacterEvent].setData(previous + current)
Expand Down Expand Up @@ -199,7 +199,8 @@ private[sql] object StaxXmlParser {
xml.mapPartitions { iter =>
iter.flatMap { xml =>
val factory = XMLInputFactory.newInstance()
val parser = new StaxXmlParser(factory.createXMLEventReader(new ByteArrayInputStream(xml.getBytes)))
val parser = new StaxXmlParser(
factory.createXMLEventReader(new ByteArrayInputStream(xml.getBytes)))
// Skip the first event
startConvertObject(parser, schema, rootTag)
}
Expand All @@ -209,7 +210,9 @@ private[sql] object StaxXmlParser {
/**
* Parse the current token (and related children) according to a desired schema
*/
private def startConvertObject(parser: StaxXmlParser, schema: StructType, rootTag: String): Option[Row] = {
private def startConvertObject(parser: StaxXmlParser,
schema: StructType,
rootTag: String): Option[Row] = {
if (parser.readAllEventsInFragment) {
parser.nextEvent
Some(convertObject(parser, schema, rootTag))
Expand Down Expand Up @@ -358,7 +361,8 @@ private[sql] object StaxXmlParser {
val field = event.asStartElement.getName.getLocalPart
schema.getFieldIndex(field) match {
case Some(index) =>
// For XML, it can contains the same keys. So we need to manually merge them to an array.
// For XML, it can contains the same keys.
// So we need to manually merge them to an array.
// TODO: This routine is hacky and should go out of this.
val dataType = schema(index).dataType
dataType match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ private[sql] object StaxXmlPartialSchemaParser {
schemaData.mapPartitions { iter =>
iter.flatMap { xml =>
val factory = XMLInputFactory.newInstance()
val parser = new StaxXmlParser(factory.createXMLEventReader(new ByteArrayInputStream(xml.getBytes)))
val parser = new StaxXmlParser(
factory.createXMLEventReader(new ByteArrayInputStream(xml.getBytes)))
startInferField(parser, rootTag)
}
}
Expand Down Expand Up @@ -92,7 +93,7 @@ private[sql] object StaxXmlPartialSchemaParser {
inferArray(parser, parentField)

case _ =>
//TODO: Now it skips unsupported types (we might have to treat null values).
// TODO: Now it skips unsupported types (we might have to treat null values).
StringType
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/main/scala/org/apache/spark/sql/xml/util/XmlFile.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ import org.apache.spark.rdd.RDD
private[xml] object XmlFile {
val DEFAULT_CHARSET = Charset.forName("UTF-8")

def withCharset(context: SparkContext, location: String, charset: String, rootTag: String): RDD[String] = {
def withCharset(context: SparkContext, location: String,
charset: String,
rootTag: String): RDD[String] = {
context.hadoopConfiguration.set(XmlInputFormat.START_TAG_KEY, s"<$rootTag>")
context.hadoopConfiguration.set(XmlInputFormat.END_TAG_KEY, s"</$rootTag>")
if (Charset.forName(charset) == DEFAULT_CHARSET) {
Expand Down
22 changes: 0 additions & 22 deletions src/test/scala/SimpleExample.scala

This file was deleted.

0 comments on commit 764b83d

Please sign in to comment.