Skip to content

Commit

Permalink
ARROW-1237: [JAVA] expose the ability to set lastSet
Browse files Browse the repository at this point in the history
The changes here expose the ability to set "lastSet" on Nullable<var length>Vector.
I believe this is needed only for NullableVarCharVector and NullableVarBinaryVector. Hence the API is exposed through NullableValueVectors.java

Author: siddharth <[email protected]>

Closes apache#868 from siddharthteotia/ARROW-1237 and squashes the following commits:

786dfea [siddharth] ARROW-1237: addressed review comments and added more tests
73b2fc5 [siddharth] ARROW-1237: added some unit tests
f8c7277 [siddharth] ARROW-1237: expose the ability to set lastSet
  • Loading branch information
siddharthteotia authored and Jacques Nadeau committed Jul 24, 2017
1 parent e1b098e commit 457bb07
Show file tree
Hide file tree
Showing 5 changed files with 309 additions and 2 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ MANIFEST

cpp/.idea/
python/.eggs/
.vscode
.vscode
.idea/
16 changes: 16 additions & 0 deletions java/vector/src/main/codegen/templates/NullableValueVectors.java
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,22 @@ public void reset(){
setCount = 0;
<#if type.major = "VarLen">lastSet = -1;</#if>
}

public void setLastSet(int value) {
<#if type.major = "VarLen">
lastSet = value;
<#else>
throw new UnsupportedOperationException();
</#if>
}

public int getLastSet() {
<#if type.major != "VarLen">
throw new UnsupportedOperationException();
<#else>
return lastSet;
</#if>
}
}
}
</#list>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,12 @@ public void setValueCount(int valueCount) {
vector.getMutator().setValueCount(childValueCount);
bits.getMutator().setValueCount(valueCount);
}

public void setLastSet(int value) {
lastSet = value;
}

public int getLastSet() { return lastSet; }
}

}
154 changes: 154 additions & 0 deletions java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,26 @@
*/
package org.apache.arrow.vector;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.impl.UnionListWriter;
import org.apache.arrow.vector.complex.impl.UnionListReader;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.complex.writer.FieldWriter;
import org.apache.arrow.vector.holders.NullableBigIntHolder;
import org.apache.arrow.vector.types.Types;
import org.apache.arrow.vector.types.Types.*;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import java.util.List;

public class TestListVector {

private BufferAllocator allocator;
Expand Down Expand Up @@ -80,4 +91,147 @@ public void testCopyFrom() throws Exception {
Assert.assertTrue("shouldn't be null", reader.isSet());
}
}

@Test
public void testSetLastSetUsage() throws Exception {
try (ListVector listVector = ListVector.empty("input", allocator)) {

/* Explicitly add the dataVector */
MinorType type = MinorType.BIGINT;
listVector.addOrGetVector(FieldType.nullable(type.getType()));

/* allocate memory */
listVector.allocateNew();

/* get inner vectors; bitVector and offsetVector */
List<BufferBacked> innerVectors = listVector.getFieldInnerVectors();
BitVector bitVector = (BitVector)innerVectors.get(0);
UInt4Vector offsetVector = (UInt4Vector)innerVectors.get(1);

/* get the underlying data vector -- NullableBigIntVector */
NullableBigIntVector dataVector = (NullableBigIntVector)listVector.getDataVector();

/* check current lastSet */
assertEquals(Integer.toString(0), Integer.toString(listVector.getMutator().getLastSet()));

int index = 0;
int offset = 0;

/* write [10, 11, 12] to the list vector at index */
bitVector.getMutator().setSafe(index, 1);
dataVector.getMutator().setSafe(0, 1, 10);
dataVector.getMutator().setSafe(1, 1, 11);
dataVector.getMutator().setSafe(2, 1, 12);
offsetVector.getMutator().setSafe(index + 1, 3);

index += 1;

/* write [13, 14] to the list vector at index 1 */
bitVector.getMutator().setSafe(index, 1);
dataVector.getMutator().setSafe(3, 1, 13);
dataVector.getMutator().setSafe(4, 1, 14);
offsetVector.getMutator().setSafe(index + 1, 5);

index += 1;

/* write [15, 16, 17] to the list vector at index 2 */
bitVector.getMutator().setSafe(index, 1);
dataVector.getMutator().setSafe(5, 1, 15);
dataVector.getMutator().setSafe(6, 1, 16);
dataVector.getMutator().setSafe(7, 1, 17);
offsetVector.getMutator().setSafe(index + 1, 8);

/* check current lastSet */
assertEquals(Integer.toString(0), Integer.toString(listVector.getMutator().getLastSet()));

/* set lastset and arbitrary valuecount for list vector.
*
* NOTE: if we don't execute setLastSet() before setLastValueCount(), then
* the latter will corrupt the offsetVector and thus the accessor will not
* retrieve the correct values from underlying dataVector. Run the test
* by commenting out next line and we should see failures from 5th assert
* onwards. This is why doing setLastSet() is important before setValueCount()
* once the vector has been loaded.
*
* Another important thing to remember is the value of lastSet itself.
* Even though the listVector has elements till index 2 only, the lastSet should
* be set as 3. This is because the offsetVector has valid offsets filled till index 3.
* If we do setLastSet(2), the offsetVector at index 3 will contain incorrect value
* after execution of setValueCount().
*
* correct state of the listVector
* bitvector {1, 1, 1, 0, 0.... }
* offsetvector {0, 3, 5, 8, 8, 8.....}
* datavector { [10, 11, 12],
* [13, 14],
* [15, 16, 17]
* }
*
* if we don't do setLastSet() before setValueCount --> incorrect state
* bitvector {1, 1, 1, 0, 0.... }
* offsetvector {0, 0, 0, 0, 0, 0.....}
* datavector { [10, 11, 12],
* [13, 14],
* [15, 16, 17]
* }
*
* if we do setLastSet(2) before setValueCount --> incorrect state
* bitvector {1, 1, 1, 0, 0.... }
* offsetvector {0, 3, 5, 5, 5, 5.....}
* datavector { [10, 11, 12],
* [13, 14],
* [15, 16, 17]
* }
*/
listVector.getMutator().setLastSet(3);
listVector.getMutator().setValueCount(10);

/* check the vector output */
final UInt4Vector.Accessor offsetAccessor = offsetVector.getAccessor();
final ValueVector.Accessor valueAccessor = dataVector.getAccessor();

index = 0;
offset = offsetAccessor.get(index);
assertEquals(Integer.toString(0), Integer.toString(offset));

Object actual = valueAccessor.getObject(offset);
assertEquals(new Long(10), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(11), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(12), (Long)actual);

index++;
offset = offsetAccessor.get(index);
assertEquals(Integer.toString(3), Integer.toString(offset));

actual = valueAccessor.getObject(offset);
assertEquals(new Long(13), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(14), (Long)actual);

index++;
offset = offsetAccessor.get(index);
assertEquals(Integer.toString(5), Integer.toString(offset));

actual = valueAccessor.getObject(offsetAccessor.get(index));
assertEquals(new Long(15), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(16), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(17), (Long)actual);

index++;
offset = offsetAccessor.get(index);
assertEquals(Integer.toString(8), Integer.toString(offset));

actual = valueAccessor.getObject(offsetAccessor.get(index));
assertNull(actual);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@

import java.nio.charset.Charset;
import java.util.List;
import java.util.ArrayList;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;

import org.apache.arrow.vector.schema.ArrowRecordBatch;
import org.apache.arrow.vector.schema.TypeLayout;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.junit.After;
Expand All @@ -56,6 +60,9 @@ public void init() {
private final static byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
private final static byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
private final static byte[] STR3 = "CCCC3".getBytes(utf8Charset);
private final static byte[] STR4 = "DDDDDDDD4".getBytes(utf8Charset);
private final static byte[] STR5 = "EEE5".getBytes(utf8Charset);
private final static byte[] STR6 = "FFFFF6".getBytes(utf8Charset);

@After
public void terminate() throws Exception {
Expand Down Expand Up @@ -509,11 +516,134 @@ public void testCopyFromWithNulls() {
} else {
assertEquals(Integer.toString(i), vector2.getAccessor().getObject(i).toString());
}

}
}
}

@Test
public void testSetLastSetUsage() {
try (final NullableVarCharVector vector = new NullableVarCharVector("myvector", allocator)) {

final NullableVarCharVector.Mutator mutator = vector.getMutator();

vector.allocateNew(1024 * 10, 1024);

setBytes(0, STR1, vector);
setBytes(1, STR2, vector);
setBytes(2, STR3, vector);
setBytes(3, STR4, vector);
setBytes(4, STR5, vector);
setBytes(5, STR6, vector);

/* Check current lastSet */
assertEquals(Integer.toString(-1), Integer.toString(mutator.getLastSet()));

/* Check the vector output */
final NullableVarCharVector.Accessor accessor = vector.getAccessor();
assertArrayEquals(STR1, accessor.get(0));
assertArrayEquals(STR2, accessor.get(1));
assertArrayEquals(STR3, accessor.get(2));
assertArrayEquals(STR4, accessor.get(3));
assertArrayEquals(STR5, accessor.get(4));
assertArrayEquals(STR6, accessor.get(5));

/*
* If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt
* the value vector by filling in all positions [0,valuecount-1] will empty byte arrays.
* Run the test by commenting out next line and we should see incorrect vector output.
*/
mutator.setLastSet(5);
mutator.setValueCount(20);

/* Check the vector output again */
assertArrayEquals(STR1, accessor.get(0));
assertArrayEquals(STR2, accessor.get(1));
assertArrayEquals(STR3, accessor.get(2));
assertArrayEquals(STR4, accessor.get(3));
assertArrayEquals(STR5, accessor.get(4));
assertArrayEquals(STR6, accessor.get(5));
}
}

@Test
public void testVectorLoadUnload() {

try (final NullableVarCharVector vector1 = new NullableVarCharVector("myvector", allocator)) {

final NullableVarCharVector.Mutator mutator1 = vector1.getMutator();

vector1.allocateNew(1024 * 10, 1024);

mutator1.set(0, STR1);
mutator1.set(1, STR2);
mutator1.set(2, STR3);
mutator1.set(3, STR4);
mutator1.set(4, STR5);
mutator1.set(5, STR6);
assertEquals(Integer.toString(5), Integer.toString(mutator1.getLastSet()));
mutator1.setValueCount(15);
assertEquals(Integer.toString(14), Integer.toString(mutator1.getLastSet()));

/* Check the vector output */
final NullableVarCharVector.Accessor accessor1 = vector1.getAccessor();
assertArrayEquals(STR1, accessor1.get(0));
assertArrayEquals(STR2, accessor1.get(1));
assertArrayEquals(STR3, accessor1.get(2));
assertArrayEquals(STR4, accessor1.get(3));
assertArrayEquals(STR5, accessor1.get(4));
assertArrayEquals(STR6, accessor1.get(5));

Field field = vector1.getField();
String fieldName = field.getName();

List<Field> fields = new ArrayList<Field>();
List<FieldVector> fieldVectors = new ArrayList<FieldVector>();

fields.add(field);
fieldVectors.add(vector1);

Schema schema = new Schema(fields);

VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, accessor1.getValueCount());
VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);

try (
ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE);
VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator);
) {

VectorLoader vectorLoader = new VectorLoader(schemaRoot2);
vectorLoader.load(recordBatch);

NullableVarCharVector vector2 = (NullableVarCharVector)schemaRoot2.getVector(fieldName);
NullableVarCharVector.Mutator mutator2 = vector2.getMutator();

/*
* lastSet would have internally been set by VectorLoader.load() when it invokes
* loadFieldBuffers.
*/
assertEquals(Integer.toString(14), Integer.toString(mutator2.getLastSet()));
mutator2.setValueCount(25);
assertEquals(Integer.toString(24), Integer.toString(mutator2.getLastSet()));

/* Check the vector output */
final NullableVarCharVector.Accessor accessor2 = vector2.getAccessor();
assertArrayEquals(STR1, accessor2.get(0));
assertArrayEquals(STR2, accessor2.get(1));
assertArrayEquals(STR3, accessor2.get(2));
assertArrayEquals(STR4, accessor2.get(3));
assertArrayEquals(STR5, accessor2.get(4));
assertArrayEquals(STR6, accessor2.get(5));
}
}
}

public static void setBytes(int index, byte[] bytes, NullableVarCharVector vector) {
final int currentOffset = vector.values.offsetVector.getAccessor().get(index);

vector.bits.getMutator().setToOne(index);
vector.values.offsetVector.getMutator().set(index + 1, currentOffset + bytes.length);
vector.values.data.setBytes(currentOffset, bytes, 0, bytes.length);
}
}

0 comments on commit 457bb07

Please sign in to comment.