Skip to content

Commit

Permalink
ARROW-14718: [Java] loadValidityBuffer should avoid allocating memory…
Browse files Browse the repository at this point in the history
… when input is not null and there are only null or non-null values

Currently in `BitVectorHelper.loadValidityBuffer`, we always allocate memory when the source vector contains only null or non-null values. However, as the [format also allows](https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps) allocating validity buffer even if all values are null or not-null, the method should also consider whether the input validity buffer is null or not, and avoiding allocating new buffer when it is latter.

Closes apache#11709 from sunchao/ARROW-14718

Authored-by: Chao Sun <[email protected]>
Signed-off-by: Chao Sun <[email protected]>
  • Loading branch information
sunchao committed Dec 14, 2021
1 parent 36367fe commit 902b541
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,15 @@ public static ArrowBuf loadValidityBuffer(final ArrowFieldNode fieldNode,
final BufferAllocator allocator) {
final int valueCount = fieldNode.getLength();
ArrowBuf newBuffer = null;
/* either all NULLs or all non-NULLs */
if (fieldNode.getNullCount() == 0 || fieldNode.getNullCount() == valueCount) {

// Create a new validity buffer iff both of the following are true:
// - validity buffer is not present, that is, it is either null or empty (in the case of
// IPC for instance).
// - values are either all NULLs or all non-NULLs
boolean isValidityBufferNull = sourceValidityBuffer == null ||
sourceValidityBuffer.capacity() == 0;
if (isValidityBufferNull &&
(fieldNode.getNullCount() == 0 || fieldNode.getNullCount() == valueCount)) {
newBuffer = allocator.buffer(getValidityBufferSize(valueCount));
newBuffer.setZero(0, newBuffer.capacity());
if (fieldNode.getNullCount() != 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
import org.junit.Test;

import io.netty.util.internal.PlatformDependent;
Expand Down Expand Up @@ -222,6 +223,61 @@ public void testConcatBitsInPlace() {
}
}

@Test
public void testLoadValidityBuffer() {
try (RootAllocator allocator = new RootAllocator(1024)) {
// if the input validity buffer is all null, we should allocate new memory
ArrowFieldNode fieldNode = new ArrowFieldNode(1024, 1024);
try (ArrowBuf buf = BitVectorHelper.loadValidityBuffer(fieldNode, null, allocator)) {
assertEquals(128, allocator.getAllocatedMemory());
for (int i = 0; i < 128; i++) {
assertEquals(0, buf.getByte(i));
}
}

// should also allocate memory if input validity buffer is all not-null
fieldNode = new ArrowFieldNode(1024, 0);
try (ArrowBuf buf = BitVectorHelper.loadValidityBuffer(fieldNode, null, allocator)) {
assertEquals(128, allocator.getAllocatedMemory());
for (int i = 0; i < 128; i++) {
assertEquals((byte) 0xff, buf.getByte(i));
}
}

// should not allocate memory if input validity buffer is not null, even if all values are
// null
fieldNode = new ArrowFieldNode(1024, 1024);
try (ArrowBuf src = allocator.buffer(128);
ArrowBuf dst = BitVectorHelper.loadValidityBuffer(fieldNode, src, allocator)) {
assertEquals(128, allocator.getAllocatedMemory());
}

// ... similarly if all values are not null
fieldNode = new ArrowFieldNode(1024, 0);
try (ArrowBuf src = allocator.buffer(128);
ArrowBuf dst = BitVectorHelper.loadValidityBuffer(fieldNode, src, allocator)) {
assertEquals(128, allocator.getAllocatedMemory());
}

// mixed case, input should match output
int numNulls = 100;
fieldNode = new ArrowFieldNode(1024, numNulls);
try (ArrowBuf src = allocator.buffer(128)) {
src.setZero(0, src.capacity());
for (int i = 0; i < numNulls; i++) {
BitVectorHelper.setBit(src, i);
}
try (ArrowBuf dst = BitVectorHelper.loadValidityBuffer(fieldNode, src, allocator)) {
assertEquals(src.memoryAddress(), dst.memoryAddress());
assertEquals(128, allocator.getAllocatedMemory());
for (int i = 0; i < 1024; i++) {
assertEquals(BitVectorHelper.get(src, i), BitVectorHelper.get(dst, i));
}
}
}
}
}

private void concatAndVerify(ArrowBuf buf1, int count1, ArrowBuf buf2, int count2, ArrowBuf output) {
BitVectorHelper.concatBits(buf1, count1, buf2, count2, output);
int outputIdx = 0;
Expand Down

0 comments on commit 902b541

Please sign in to comment.