Skip to content

Commit

Permalink
[PDI-18440][PDI-18442] Fixes issues with null v.s. empty values (pent…
Browse files Browse the repository at this point in the history
…aho#7232)

* [PDI-18440][PDI-18442] Adds new Kettle properties

* [PDI-18440] Causes the 'Get XML Data' step to yield null values on missing elements

* [PDI-18442] Allows the 'Filter rows' step to filter empty values

* [PDI-18440] The 'Data grid' should yield null values for empty values when the metadata doesn't specify 'Set empty string'
  • Loading branch information
sailingscally authored Jan 30, 2020
1 parent 92f55c1 commit 6d0e66d
Show file tree
Hide file tree
Showing 8 changed files with 308 additions and 28 deletions.
18 changes: 18 additions & 0 deletions core/src/main/java/org/pentaho/di/core/Const.java
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,24 @@ public String getMessage() {
*/
public static final String KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL = "KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL";

/**
* This flag will prevent Kettle from converting {@code null} strings to empty strings in {@link org.pentaho.di.core.row.value.ValueMetaBase}
* The default value is {@code false}.
*/
public static final String KETTLE_DO_NOT_NORMALIZE_NULL_STRING_TO_EMPTY = "KETTLE_DO_NOT_NORMALIZE_NULL_STRING_TO_EMPTY";

/**
* This flag will prevent Kettle from yielding {@code null} as the value of an empty XML tag in {@link org.pentaho.di.core.xml.XMLHandler}
* The default value is {@code false} and an empty XML tag will produce a {@code null} value.
*/
public static final String KETTLE_XML_EMPTY_TAG_YIELDS_EMPTY_VALUE = "KETTLE_XML_EMPTY_TAG_YIELDS_EMPTY_VALUE";

/**
* This flag will cause the "Get XML data" step to yield null values on missing elements and empty values on empty elements when set to "Y".
* By default, both empty elements and missing elements will yield empty values.
*/
public static final String KETTLE_XML_MISSING_TAG_YIELDS_NULL_VALUE = "KETTLE_XML_MISSING_TAG_YIELDS_NULL_VALUE";

/**
* System wide flag to allow non-strict string to number conversion for backward compatibility. If this setting is set
* to "Y", an string starting with digits will be converted successfully into a number. (example: 192.168.1.1 will be
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4033,8 +4033,13 @@ public Object convertDataFromString( String pol, ValueMetaInterface convertMeta,
Boolean isEmptyAndNullDiffer = convertStringToBoolean(
Const.NVL( System.getProperty( Const.KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL, "N" ), "N" ) );

if ( pol == null && isStringValue && isEmptyAndNullDiffer ) {
pol = Const.NULL_STRING;
Boolean normalizeNullStringToEmpty = !convertStringToBoolean(
Const.NVL( System.getProperty( Const.KETTLE_DO_NOT_NORMALIZE_NULL_STRING_TO_EMPTY, "N" ), "N" ) );

if ( normalizeNullStringToEmpty ) {
if ( pol == null && isStringValue && isEmptyAndNullDiffer ) {
pol = Const.NULL_STRING;
}
}

if ( pol == null ) {
Expand Down
15 changes: 12 additions & 3 deletions core/src/main/java/org/pentaho/di/core/xml/XMLHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2019 by Hitachi Vantara : http://www.pentaho.com
* Copyright (C) 2002-2020 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
Expand Down Expand Up @@ -70,6 +70,8 @@
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import static org.pentaho.di.core.row.value.ValueMetaBase.convertStringToBoolean;


/**
* This class contains a number of (static) methods to facilitate the retrieval of information from XML Node(s).
Expand Down Expand Up @@ -131,12 +133,19 @@ public static String getTagValue( Node n, String tag ) {
return null;
}

Boolean xmlEmptyTagYieldsEmptyValue = convertStringToBoolean(
Const.NVL( System.getProperty( Const.KETTLE_XML_EMPTY_TAG_YIELDS_EMPTY_VALUE, "N" ), "N" ) );

children = n.getChildNodes();
for ( int i = 0; i < children.getLength(); i++ ) {
childnode = children.item( i );
if ( childnode.getNodeName().equalsIgnoreCase( tag ) ) {
if ( childnode.getFirstChild() != null ) {
return childnode.getFirstChild().getNodeValue();
if ( xmlEmptyTagYieldsEmptyValue ) {
return childnode.getTextContent();
} else {
if ( childnode.getFirstChild() != null ) {
return childnode.getFirstChild().getNodeValue();
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2020 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/

package org.pentaho.di.core.row.value;

import org.junit.Test;
import org.pentaho.di.core.Const;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

public class ValueMetaBaseTest_NullEmpty {

/**
* By default, converting null value to a string value will yield a null value.
* This is the expected behavior in current and past versions.
*/
@Test
public void convertDataFromStringWithDefaults() throws Exception {
System.setProperty( Const.KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL, "N" );
System.setProperty( Const.KETTLE_DO_NOT_NORMALIZE_NULL_STRING_TO_EMPTY, "N" );

ValueMetaBase out = new ValueMetaString();
ValueMetaBase value = new ValueMetaString();

Object data = out.convertDataFromString( null, value, null, null, 0 );
assertNull( data );
}

/**
* When KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL is set to "Y" whe start getting unexpected results, see PDI-18440.
* This flag should have no effect in data conversions, only when comparing values.
*/
@Test
public void convertDataFromStringWithEmptyDiffersFromNull() throws Exception {
System.setProperty( Const.KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL, "Y" );
System.setProperty( Const.KETTLE_DO_NOT_NORMALIZE_NULL_STRING_TO_EMPTY, "N" );

ValueMetaBase out = new ValueMetaString();
ValueMetaBase value = new ValueMetaString();

Object data = out.convertDataFromString( null, value, null, null, 0 );
assertEquals( "", data );
}

/**
* The new KETTLE_DO_NOT_NORMALIZE_NULL_STRING_TO_EMPTY flag fixes PDI-18440 resetting the behavior to what is expected.
*/
@Test
public void convertDataFromStringWithEmptyDiffersFromNullAndDoNotNormalize() throws Exception {
System.setProperty( Const.KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL, "Y" );
System.setProperty( Const.KETTLE_DO_NOT_NORMALIZE_NULL_STRING_TO_EMPTY, "Y" );

ValueMetaBase out = new ValueMetaString();
ValueMetaBase value = new ValueMetaString();

Object data = out.convertDataFromString( null, value, null, null, 0 );
assertNull( data );
}
}
81 changes: 81 additions & 0 deletions core/src/test/java/org/pentaho/di/core/xml/XMLHandlerTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2020 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/

package org.pentaho.di.core.xml;

import org.junit.Test;
import org.pentaho.di.core.Const;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;

public class XMLHandlerTest {

@Test
public void getTagValueWithNullNode() {
assertNull( XMLHandler.getTagValue( null, "text" ) );
}

/**
* Default behavior, an empty XML tag in the "Filter rows" step meta will be considered {@code null}.
* This will prevent filtering rows with empty values.
*/
@Test
public void getTagValueEmptyTagYieldsNullValue() {
System.setProperty( Const.KETTLE_XML_EMPTY_TAG_YIELDS_EMPTY_VALUE, "N" );
assertNull( XMLHandler.getTagValue( getNode(), "text" ) );
}

/**
* An empty XML tag in the "Filter rows" step meta will be considered an empty string.
* This will allow filtering rows with empty values.
*/
@Test
public void getTagValueEmptyTagYieldsEmptyValue() {
System.setProperty( Const.KETTLE_XML_EMPTY_TAG_YIELDS_EMPTY_VALUE, "Y" );
assertEquals( "", XMLHandler.getTagValue( getNode(), "text" ) );
}

private Node getNode() {
Element first = mock( Element.class );
doReturn( null ).when( first ).getNodeValue();

Node child = mock( Node.class );
doReturn( "text" ).when( child ).getNodeName();
doReturn( first ).when( child ).getFirstChild();
doReturn( "" ).when( child ).getTextContent();

NodeList children = mock( NodeList.class );
doReturn( 1 ).when( children ).getLength();
doReturn( child ).when( children ).item( 0 );

Node node = mock( Node.class );
doReturn( children ).when( node ).getChildNodes();

return node;
}
}
18 changes: 18 additions & 0 deletions engine/src/main/resources/kettle-variables.xml
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,24 @@
<default-value>N</default-value>
</kettle-variable>

<kettle-variable>
<description>Prevents Kettle from converting null strings to empty strings when KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL is set to "Y".</description>
<variable>KETTLE_DO_NOT_NORMALIZE_NULL_STRING_TO_EMPTY</variable>
<default-value>N</default-value>
</kettle-variable>

<kettle-variable>
<description>Prevent Kettle from yielding null as the value of an empty value.</description>
<variable>KETTLE_XML_EMPTY_TAG_YIELDS_EMPTY_VALUE</variable>
<default-value>N</default-value>
</kettle-variable>

<kettle-variable>
<description>Cause the "Get XML data" step to yield null values on missing elements and empty values on empty elements when set to "Y".</description>
<variable>KETTLE_XML_MISSING_TAG_YIELDS_NULL_VALUE</variable>
<default-value>N</default-value>
</kettle-variable>

<kettle-variable>
<description>The maximum number of log lines that are kept internally by Kettle. Set to 0 to keep all rows
(default)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2019 by Hitachi Vantara : http://www.pentaho.com
* Copyright (C) 2002-2020 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
Expand Down Expand Up @@ -66,6 +66,8 @@
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;

import static org.pentaho.di.core.row.value.ValueMetaBase.convertStringToBoolean;

/**
* Read XML files, parse them and convert them to rows and writes these to one or more output streams.
*
Expand Down Expand Up @@ -753,31 +755,40 @@ private Object[] processPutRow( Node node ) throws KettleException {
// Get node value
String nodevalue;

Boolean xmlMissingTagYieldsNullValue = convertStringToBoolean(
Const.NVL( System.getProperty( Const.KETTLE_XML_MISSING_TAG_YIELDS_NULL_VALUE, "N" ), "N" ) );

// Handle namespaces
if ( meta.isNamespaceAware() ) {
XPath xpathField = node.createXPath( addNSPrefix( XPathValue, data.PathValue ) );
xpathField.setNamespaceURIs( data.NAMESPACE );
if ( xmlDataField.getResultType() == GetXMLDataField.RESULT_TYPE_VALUE_OF ) {
nodevalue = xpathField.valueOf( node );
if ( xmlMissingTagYieldsNullValue ) {
nodevalue = xpathField.selectSingleNode( node ) != null ? xpathField.valueOf( node ) : null;
} else {
nodevalue = xpathField.valueOf( node );
}
} else {
// nodevalue=xpathField.selectSingleNode(node).asXML();
Node n = xpathField.selectSingleNode( node );
if ( n != null ) {
nodevalue = n.asXML();
} else {
nodevalue = "";
nodevalue = xmlMissingTagYieldsNullValue ? null : "";
}
}
} else {
if ( xmlDataField.getResultType() == GetXMLDataField.RESULT_TYPE_VALUE_OF ) {
nodevalue = node.valueOf( XPathValue );
if ( xmlMissingTagYieldsNullValue ) {
nodevalue = node.selectSingleNode( XPathValue ) != null ? node.valueOf( XPathValue ) : null;
} else {
nodevalue = node.valueOf( XPathValue );
}
} else {
// nodevalue=node.selectSingleNode(XPathValue).asXML();
Node n = node.selectSingleNode( XPathValue );
if ( n != null ) {
nodevalue = n.asXML();
} else {
nodevalue = "";
nodevalue = xmlMissingTagYieldsNullValue ? null : "";
}
}
}
Expand Down
Loading

0 comments on commit 6d0e66d

Please sign in to comment.