Skip to content

Commit

Permalink
Merge pull request pentaho#4390 from tmcsantos/BACKLOG-18643
Browse files Browse the repository at this point in the history
Backlog 18643
  • Loading branch information
hbfernandes authored Sep 15, 2017
2 parents 71cf24d + 1d7732c commit e9c7e87
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<extended_description/>
<trans_version/>
<trans_type>Normal</trans_type>
<directory>/</directory>
<directory>&#x2f;</directory>
<parameters>
</parameters>
<log>
Expand Down Expand Up @@ -427,10 +427,10 @@
<clusterschemas>
</clusterschemas>
<created_user/>
<created_date>2014/02/26 14:49:49.010</created_date>
<created_date>2014&#x2f;02&#x2f;26 14&#x3a;49&#x3a;49.010</created_date>
<modified_user>-</modified_user>
<modified_date>2008/09/04 11:49:56.623</modified_date>
<key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key>
<modified_date>2008&#x2f;09&#x2f;04 11&#x3a;49&#x3a;56.623</modified_date>
<key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA&#x3d;</key_for_session_key>
<is_key_private>N</is_key_private>
</info>
<notepads>
Expand Down Expand Up @@ -530,7 +530,8 @@
<yloc>191</yloc>
<draw>Y</draw>
</GUI>
</step>
</step>

<step>
<name>Data Validator</name>
<type>Validator</type>
Expand All @@ -542,37 +543,35 @@
<method>none</method>
<schema_name/>
</partitioning>
<validate_all>N</validate_all>
<concat_errors>N</concat_errors>
<concat_separator/>
<validator_field>
<name>territory</name>
<validation_name>Check territory</validation_name>
<max_length>-1</max_length>
<min_length>-1</min_length>
<null_allowed>Y</null_allowed>
<only_null_allowed>N</only_null_allowed>
<only_numeric_allowed>N</only_numeric_allowed>
<data_type>String</data_type>
<data_type_verified>N</data_type_verified>
<conversion_mask/>
<decimal_symbol/>
<grouping_symbol/>
<max_value/>
<min_value/>
<start_string/>
<end_string/>
<start_string_not_allowed/>
<end_string_not_allowed/>
<regular_expression/>
<regular_expression_not_allowed/>
<error_code/>
<error_description/>
<is_sourcing_values>Y</is_sourcing_values>
<sourcing_step>Territories.txt</sourcing_step>
<sourcing_field>Territory</sourcing_field>
<allowed_value/>
</validator_field>
<validate_all>N</validate_all>
<concat_errors>N</concat_errors>
<concat_separator/>
<validator_field><name>territory</name>
<validation_name>Check territory</validation_name>
<max_length>-1</max_length>
<min_length>-1</min_length>
<null_allowed>Y</null_allowed>
<only_null_allowed>N</only_null_allowed>
<only_numeric_allowed>N</only_numeric_allowed>
<data_type>String</data_type>
<data_type_verified>N</data_type_verified>
<conversion_mask/>
<decimal_symbol/>
<grouping_symbol/>
<max_value/>
<min_value/>
<start_string/>
<end_string/>
<start_string_not_allowed/>
<end_string_not_allowed/>
<regular_expression/>
<regular_expression_not_allowed/>
<error_code/>
<error_description/>
<is_sourcing_values>Y</is_sourcing_values>
<sourcing_step>Territories.txt</sourcing_step>
<sourcing_field>T</sourcing_field>
<allowed_value></allowed_value></validator_field>
<cluster_schema/>
<remotesteps>
<input>
Expand All @@ -585,7 +584,8 @@
<yloc>124</yloc>
<draw>Y</draw>
</GUI>
</step>
</step>

<step>
<name>EMEA</name>
<type>RowGenerator</type>
Expand Down Expand Up @@ -628,7 +628,8 @@
<yloc>124</yloc>
<draw>Y</draw>
</GUI>
</step>
</step>

<step>
<name>Errors</name>
<type>Dummy</type>
Expand All @@ -652,7 +653,8 @@
<yloc>239</yloc>
<draw>Y</draw>
</GUI>
</step>
</step>

<step>
<name>OK</name>
<type>Dummy</type>
Expand All @@ -676,7 +678,8 @@
<yloc>124</yloc>
<draw>Y</draw>
</GUI>
</step>
</step>

<step>
<name>Territories.txt</name>
<type>CsvInput</type>
Expand All @@ -688,12 +691,12 @@
<method>none</method>
<schema_name/>
</partitioning>
<filename>${Internal.Transformation.Filename.Directory}/files/Territories.txt</filename>
<filename>&#x24;&#x7b;Internal.Transformation.Filename.Directory&#x7d;&#x2f;files&#x2f;Territories.txt</filename>
<filename_field/>
<rownum_field/>
<include_filename>N</include_filename>
<separator>,</separator>
<enclosure>"</enclosure>
<enclosure>&#x22;</enclosure>
<header>Y</header>
<buffer_size>50000</buffer_size>
<lazy_conversion>N</lazy_conversion>
Expand All @@ -703,12 +706,12 @@
<encoding/>
<fields>
<field>
<name>Territory</name>
<name>T</name>
<type>String</type>
<format/>
<currency/>
<decimal/>
<group>,</group>
<group/>
<length>5</length>
<precision>-1</precision>
<trim_type>none</trim_type>
Expand All @@ -726,7 +729,8 @@
<yloc>239</yloc>
<draw>Y</draw>
</GUI>
</step>
</step>

<step>
<name>USA</name>
<type>RowGenerator</type>
Expand Down Expand Up @@ -769,20 +773,21 @@
<yloc>57</yloc>
<draw>Y</draw>
</GUI>
</step>
</step>

<step_error_handling>
<error>
<source_step>Data Validator</source_step>
<target_step>Errors</target_step>
<is_enabled>Y</is_enabled>
<nr_valuename>NrErrors</nr_valuename>
<descriptions_valuename>ErrorDescription</descriptions_valuename>
<fields_valuename>ErrorFields</fields_valuename>
<codes_valuename>ErrorCodes</codes_valuename>
<max_errors>-1</max_errors>
<max_pct_errors>-1</max_pct_errors>
<min_pct_rows>-1</min_pct_rows>
</error>
<error>
<source_step>Data Validator</source_step>
<target_step>Errors</target_step>
<is_enabled>Y</is_enabled>
<nr_valuename>NrErrors</nr_valuename>
<descriptions_valuename>ErrorDescription</descriptions_valuename>
<fields_valuename>ErrorFields</fields_valuename>
<codes_valuename>ErrorCodes</codes_valuename>
<max_errors>-1</max_errors>
<max_pct_errors>-1</max_pct_errors>
<min_pct_rows>-1</min_pct_rows>
</error>
</step_error_handling>
<slave-step-copy-partition-distribution>
</slave-step-copy-partition-distribution>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,47 +22,65 @@

package org.pentaho.di.trans.steps.csvinput;

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.collections4.MultiValuedMap;
import org.apache.commons.collections4.multimap.ArrayListValuedHashMap;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;

public class NamedFieldsMapping implements FieldsMapping {

private final Map<Integer, Integer> actualToMetaFieldMapping;
private final int[] actualToMetaFieldMapping;

public NamedFieldsMapping( Map<Integer, Integer> actualToMetaFieldMapping ) {
public NamedFieldsMapping( int[] actualToMetaFieldMapping ) {
this.actualToMetaFieldMapping = actualToMetaFieldMapping;
}

@Override
public int fieldMetaIndex( int index ) {
Integer metaIndex = actualToMetaFieldMapping.get( index );
return metaIndex == null ? FieldsMapping.FIELD_DOES_NOT_EXIST : metaIndex.intValue();
if ( index >= size() || index < 0 ) {
return FIELD_DOES_NOT_EXIST;
}
return actualToMetaFieldMapping[index];
}

@Override
public int size() {
return actualToMetaFieldMapping.size();
return actualToMetaFieldMapping.length;
}

public static NamedFieldsMapping mapping( String[] actualFieldNames, String[] metaFieldNames ) {
MultiValuedMap<String, Integer> metaNameToIndex = new ArrayListValuedHashMap<String, Integer>();
for ( int j = 0; j < metaFieldNames.length; j++ ) {
metaNameToIndex.put( metaFieldNames[j], Integer.valueOf( j ) );
LinkedHashMap<String, List<Integer>> metaNameToIndex = new LinkedHashMap<>();
List<Integer> unmatchedMetaFields = new ArrayList<>();
int[] actualToMetaFieldMapping = new int[actualFieldNames.length];

for ( int i = 0; i < metaFieldNames.length; i++ ) {
List<Integer> coll = metaNameToIndex.getOrDefault( metaFieldNames[i], new ArrayList<>() );
coll.add( i );
metaNameToIndex.put( metaFieldNames[i], coll );
}
Map<Integer, Integer> actualToMetaFieldMapping = new HashMap<>();

for ( int i = 0; i < actualFieldNames.length; i++ ) {
Collection<Integer> columnIndexes = metaNameToIndex.get( actualFieldNames[i] );
if ( columnIndexes.isEmpty() ) {
List<Integer> columnIndexes = metaNameToIndex.get( actualFieldNames[i] );
if ( columnIndexes == null || columnIndexes.isEmpty() ) {
unmatchedMetaFields.add( i );
actualToMetaFieldMapping[i] = FIELD_DOES_NOT_EXIST;
continue;
}
Integer columnIndex = columnIndexes.iterator().next();
metaNameToIndex.removeMapping( actualFieldNames[i], columnIndex );
actualToMetaFieldMapping.put( i, columnIndex );
actualToMetaFieldMapping[i] = columnIndexes.remove( 0 );
}

Iterator<Integer> remainingMetaIndexes = metaNameToIndex.values().stream()
.flatMap( List::stream )
.sorted()
.iterator();

for ( int idx : unmatchedMetaFields ) {
if ( !remainingMetaIndexes.hasNext() ) {
break;
}
actualToMetaFieldMapping[ idx ] = remainingMetaIndexes.next();
}

return new NamedFieldsMapping( actualToMetaFieldMapping );
}

Expand Down
2 changes: 1 addition & 1 deletion engine/src/main/resources/kettle-steps.xml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
<step id="SetValueField"> <description>i18n:org.pentaho.di.trans.step:BaseStep.TypeLongDesc.SetValueField</description> <classname>org.pentaho.di.trans.steps.setvaluefield.SetValueFieldMeta</classname> <category>i18n:org.pentaho.di.trans.step:BaseStep.Category.Transform</category> <tooltip>i18n:org.pentaho.di.trans.step:BaseStep.TypeTooltipDesc.SetValueField</tooltip> <iconfile>ui/images/SVF.svg</iconfile> <documentation_url>http://wiki.pentaho.com/display/EAI/Set+field+value</documentation_url> <cases_url/> <forum_url/> </step>
<step id="ExecProcess"> <description>i18n:org.pentaho.di.trans.step:BaseStep.TypeLongDesc.ExecProcess</description> <classname>org.pentaho.di.trans.steps.execprocess.ExecProcessMeta</classname> <category>i18n:org.pentaho.di.trans.step:BaseStep.Category.Utility</category> <tooltip>i18n:org.pentaho.di.trans.step:BaseStep.TypeTooltipDesc.ExecProcess</tooltip> <iconfile>ui/images/RPL.svg</iconfile> <documentation_url>http://wiki.pentaho.com/display/EAI/Execute+a+process</documentation_url> <cases_url/> <forum_url/> </step>
<step id="WebServiceLookup"> <description>i18n:org.pentaho.di.trans.step:BaseStep.TypeLongDesc.WebServiceLookup</description> <classname>org.pentaho.di.trans.steps.webservices.WebServiceMeta</classname> <category>i18n:org.pentaho.di.trans.step:BaseStep.Category.Lookup</category> <tooltip>i18n:org.pentaho.di.trans.step:BaseStep.TypeTooltipDesc.WebServiceLookup</tooltip> <iconfile>ui/images/WSL.svg</iconfile> <documentation_url>http://wiki.pentaho.com/display/EAI/Web+services+lookup</documentation_url> <cases_url/> <forum_url/> </step>
<step id="CsvInput"> <description>i18n:org.pentaho.di.trans.step:BaseStep.TypeLongDesc.CsvInput</description> <classname>org.pentaho.di.trans.steps.csvinput.CsvInputMeta</classname> <category>i18n:org.pentaho.di.trans.step:BaseStep.Category.Input</category> <tooltip>i18n:org.pentaho.di.trans.step:BaseStep.TypeTooltipDesc.CsvInput</tooltip> <iconfile>ui/images/TFI.svg</iconfile> <documentation_url>http://wiki.pentaho.com/display/EAI/CSV+Input</documentation_url> <cases_url/> <forum_url/> </step>
<step id="CsvInput"> <description>i18n:org.pentaho.di.trans.step:BaseStep.TypeLongDesc.CsvInput</description> <classname>org.pentaho.di.trans.steps.csvinput.CsvInputMeta</classname> <category>i18n:org.pentaho.di.trans.step:BaseStep.Category.Input</category> <tooltip>i18n:org.pentaho.di.trans.step:BaseStep.TypeTooltipDesc.CsvInput</tooltip> <iconfile>ui/images/TFI.svg</iconfile> <documentation_url>http://wiki.pentaho.com/display/EAI/CSV+File+Input</documentation_url> <cases_url/> <forum_url/> </step>
<step id="ParallelGzipCsvInput"> <description>i18n:org.pentaho.di.trans.step:BaseStep.TypeLongDesc.ParallelGzipCsvInput</description> <classname>org.pentaho.di.trans.steps.parallelgzipcsv.ParGzipCsvInputMeta</classname> <category>i18n:org.pentaho.di.trans.step:BaseStep.Category.Input</category> <tooltip>i18n:org.pentaho.di.trans.step:BaseStep.TypeTooltipDesc.ParallelGzipCsvInput</tooltip> <iconfile>ui/images/TFI.svg</iconfile> <documentation_url>http://wiki.pentaho.com/display/EAI/GZIP+CSV+Input</documentation_url> <cases_url/> <forum_url/> </step>
<step id="MailInput"> <description>i18n:org.pentaho.di.trans.step:BaseStep.TypeLongDesc.MailInput</description> <classname>org.pentaho.di.trans.steps.mailinput.MailInputMeta</classname> <category>i18n:org.pentaho.di.trans.step:BaseStep.Category.Input</category> <tooltip>i18n:org.pentaho.di.trans.step:BaseStep.TypeTooltipDesc.MailInput</tooltip> <iconfile>ui/images/GETPOP.svg</iconfile> <documentation_url>http://wiki.pentaho.com/display/EAI/Email+Messages+Input</documentation_url> <cases_url/> <forum_url/> </step>
<step id="GetTableNames"> <description>i18n:org.pentaho.di.trans.step:BaseStep.TypeLongDesc.GetTableNames</description> <classname>org.pentaho.di.trans.steps.gettablenames.GetTableNamesMeta</classname> <category>i18n:org.pentaho.di.trans.step:BaseStep.Category.Input</category> <tooltip>i18n:org.pentaho.di.trans.step:BaseStep.TypeTooltipDesc.GetTableNames</tooltip> <iconfile>ui/images/GTN.svg</iconfile> <documentation_url>http://wiki.pentaho.com/display/EAI/Get+table+names</documentation_url> <cases_url/> <forum_url/> </step>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@

import static org.junit.Assert.assertEquals;

import java.util.HashMap;
import java.util.Map;

import org.junit.Before;
import org.junit.Test;

Expand All @@ -36,10 +33,7 @@ public class NamedFieldsMappingTest {

@Before
public void before() {
Map<Integer, Integer> actualToMetaFieldsMap = new HashMap<>();
actualToMetaFieldsMap.put( 0, 3 );
actualToMetaFieldsMap.put( 1, 4 );
fieldsMapping = new NamedFieldsMapping( actualToMetaFieldsMap );
fieldsMapping = new NamedFieldsMapping( new int[] { 3, 4 } );
}

@Test
Expand Down Expand Up @@ -80,4 +74,42 @@ public void fieldMetaIndexWithUnexistingField_nonUniqueColumnNames() {
assertEquals( FieldsMapping.FIELD_DOES_NOT_EXIST, mapping.fieldMetaIndex( 2 ) );
}

@Test
public void mappingWithNonMatchingColumnNames() {
NamedFieldsMapping mapping =
NamedFieldsMapping.mapping( new String[] {"One", "Two", "Three" }, new String[] { "A", "B", "C" } );
assertEquals( 0, mapping.fieldMetaIndex( 0 ) ); // One -> A
assertEquals( 1, mapping.fieldMetaIndex( 1 ) ); // Two -> B
assertEquals( 2, mapping.fieldMetaIndex( 2 ) ); // Three -> C

mapping =
NamedFieldsMapping.mapping( new String[] {"A", "B", "A" }, new String[] { "A", "A" } );
assertEquals( 0, mapping.fieldMetaIndex( 0 ) ); // A -> A
assertEquals( FieldsMapping.FIELD_DOES_NOT_EXIST, mapping.fieldMetaIndex( 1 ) ); // B -> undefined
assertEquals( 1, mapping.fieldMetaIndex( 2 ) ); // A -> A

mapping =
NamedFieldsMapping.mapping( new String[] {"A", "B", "A" }, new String[] { "A", "X", "B", "Z" } );
assertEquals( 0, mapping.fieldMetaIndex( 0 ) ); // A -> A
assertEquals( 2, mapping.fieldMetaIndex( 1 ) ); // B -> B
assertEquals( 1, mapping.fieldMetaIndex( 2 ) ); // A -> X
assertEquals( FieldsMapping.FIELD_DOES_NOT_EXIST, mapping.fieldMetaIndex( 3 ) ); // undefined -> undefined

mapping =
NamedFieldsMapping.mapping( new String[] {"A", "B", "A" }, new String[] { "A", "A", "C" } );
assertEquals( 0, mapping.fieldMetaIndex( 0 ) ); // A -> A
assertEquals( 2, mapping.fieldMetaIndex( 1 ) ); // B -> C
assertEquals( 1, mapping.fieldMetaIndex( 2 ) ); // A -> A

mapping =
NamedFieldsMapping.mapping(
new String[] {"A", "B", "C", "D", "E" },
new String[] { "X", "C", "Y", "B" } );
assertEquals( 0, mapping.fieldMetaIndex( 0 ) ); // A -> X
assertEquals( 3, mapping.fieldMetaIndex( 1 ) ); // B -> B
assertEquals( 1, mapping.fieldMetaIndex( 2 ) ); // C -> C
assertEquals( 2, mapping.fieldMetaIndex( 3 ) ); // D -> Y
assertEquals( FieldsMapping.FIELD_DOES_NOT_EXIST, mapping.fieldMetaIndex( 4 ) ); // E -> undefined
}

}

0 comments on commit e9c7e87

Please sign in to comment.