Skip to content

Commit

Permalink
apacheGH-32605: [C#] Extend validity buffer api (apache#35342)
Browse files Browse the repository at this point in the history
Add a method to the ValidityBuffer that adds the same bool value length times without allocating an Enumerable.Repeat object

### Rationale for this change

See more details in the code review comments in apache#13810

* Closes: apache#32605

Authored-by: Aleksei Smirnov <[email protected]>
Signed-off-by: Eric Erhardt <[email protected]>
  • Loading branch information
asmirnov82 authored May 23, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent fbe5f64 commit f38943a
Showing 7 changed files with 245 additions and 33 deletions.
4 changes: 2 additions & 2 deletions csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs
Original file line number Diff line number Diff line change
@@ -142,7 +142,7 @@ public TBuilder Append(ReadOnlySpan<T> span)
int len = ValueBuffer.Length;
ValueBuffer.Append(span);
int additionalBitsCount = ValueBuffer.Length - len;
ValidityBuffer.Reserve(additionalBitsCount).AppendRange(Enumerable.Repeat(true, additionalBitsCount));
ValidityBuffer.AppendRange(true, additionalBitsCount);
return Instance;
}

@@ -151,7 +151,7 @@ public TBuilder AppendRange(IEnumerable<T> values)
int len = ValueBuffer.Length;
ValueBuffer.AppendRange(values);
var additionalBitsCount = ValueBuffer.Length - len;
ValidityBuffer.Reserve(additionalBitsCount).AppendRange(Enumerable.Repeat(true, additionalBitsCount));
ValidityBuffer.AppendRange(true, additionalBitsCount);
return Instance;
}

21 changes: 21 additions & 0 deletions csharp/src/Apache.Arrow/ArrowBuffer.BitmapBuilder.cs
Original file line number Diff line number Diff line change
@@ -138,6 +138,27 @@ public BitmapBuilder AppendRange(IEnumerable<bool> values)
return this;
}

/// <summary>
/// Append multiple bits.
/// </summary>
/// <param name="value">Value of bits to append.</param>
/// <param name="length">Number of times the value should be added.</param>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public BitmapBuilder AppendRange(bool value, int length)
{
if (length < 0)
throw new ArgumentOutOfRangeException(nameof(length));

EnsureAdditionalCapacity(length);
Span<byte> span = Span;
BitUtility.SetBits(span, Length, length, value);

Length += length;
SetBitCount += value ? length : 0;

return this;
}

/// <summary>
/// Toggle the bit at a particular index.
/// </summary>
109 changes: 87 additions & 22 deletions csharp/src/Apache.Arrow/BitUtility.cs
Original file line number Diff line number Diff line change
@@ -62,73 +62,138 @@ public static void SetBit(Span<byte> data, int index, bool value)
: (byte)(data[idx] & ~BitMask[mod]);
}

/// <summary>
/// Set the number of bits in a span of bytes starting
/// at a specific index, and limiting to length.
/// </summary>
/// <param name="data">Span to set bits value.</param>
/// <param name="index">Bit index to start counting from.</param>
/// <param name="length">Maximum of bits in the span to consider.</param>
internal static void SetBits(Span<byte> data, int index, int length, bool value)
{
if (length == 0)
return;

int endBitIndex = checked(index + length - 1);

// Use simpler method if there aren't many values
if (length < 20)
{
for (int i = index; i <= endBitIndex; i++)
{
SetBit(data, i, value);
}
return;
}

// Otherwise do the work to figure out how to copy whole bytes
int startByteIndex = index / 8;
int startBitOffset = index % 8;
int endByteIndex = endBitIndex / 8;
int endBitOffset = endBitIndex % 8;

// If the starting index and ending index are not byte-aligned,
// we'll need to set bits the slow way. If they are
// byte-aligned, and for all other bytes in the 'middle', we
// can use a faster byte-aligned set.
int fullByteStartIndex = startBitOffset == 0 ? startByteIndex : startByteIndex + 1;
int fullByteEndIndex = endBitOffset == 7 ? endByteIndex : endByteIndex - 1;

// Bits we will be using to finish up the first byte
if (startBitOffset != 0)
{
Span<byte> slice = data.Slice(startByteIndex, 1);
for (int i = startBitOffset; i <= 7; i++)
SetBit(slice, i, value);
}

if (fullByteEndIndex >= fullByteStartIndex)
{
Span<byte> slice = data.Slice(fullByteStartIndex, fullByteEndIndex - fullByteStartIndex + 1);
byte fill = (byte)(value ? 0xFF : 0x00);

slice.Fill(fill);
}

if (endBitOffset != 7)
{
Span<byte> slice = data.Slice(endByteIndex, 1);
for (int i = 0; i <= endBitOffset; i++)
SetBit(slice, i, value);
}
}

public static void ToggleBit(Span<byte> data, int index)
{
data[index / 8] ^= BitMask[index % 8];
}

/// <summary>
/// Counts the number of set bits in a span of bytes starting
/// at a specific bit offset.
/// at a specific bit index.
/// </summary>
/// <param name="data">Span to count bits</param>
/// <param name="offset">Bit offset to start counting from</param>
/// <returns>Count of set (one) bits</returns>
public static int CountBits(ReadOnlySpan<byte> data, int offset) =>
CountBits(data, offset, data.Length * 8 - offset);
/// <param name="data">Span to count bits.</param>
/// <param name="index">Bit index to start counting from.</param>
/// <returns>Count of set (one) bits.</returns>
public static int CountBits(ReadOnlySpan<byte> data, int index) =>
CountBits(data, index, data.Length * 8 - index);

/// <summary>
/// Counts the number of set bits in a span of bytes starting
/// at a specific bit offset, and limiting to a certain number of bits
/// at a specific bit index, and limiting to a certain number of bits
/// in the span.
/// </summary>
/// <param name="data">Span to count bits.</param>
/// <param name="offset">Bit offset to start counting from.</param>
/// <param name="index">Bit index to start counting from.</param>
/// <param name="length">Maximum of bits in the span to consider.</param>
/// <returns>Count of set (one) bits</returns>
public static int CountBits(ReadOnlySpan<byte> data, int offset, int length)
/// <returns>Count of set (one) bits.</returns>
public static int CountBits(ReadOnlySpan<byte> data, int index, int length)
{
int startByteIndex = offset / 8;
int startBitOffset = offset % 8;
int endByteIndex = (offset + length - 1) / 8;
int endBitOffset = (offset + length - 1) % 8;
int startByteIndex = index / 8;
int startBitOffset = index % 8;

int endBitIndex = index + length - 1;

int endByteIndex = endBitIndex / 8;
int endBitOffset = endBitIndex % 8;

if (startBitOffset < 0)
return 0;

int count = 0;
if (startByteIndex == endByteIndex)
{
// Range starts and ends within the same byte.
var slice = data.Slice(startByteIndex, 1);
ReadOnlySpan<byte> slice = data.Slice(startByteIndex, 1);
for (int i = startBitOffset; i <= endBitOffset; i++)
count += GetBit(slice, i) ? 1 : 0;

return count;
}

// If the starting index and ending index are not byte-aligned,
// we'll need to count bits the slow way. If they are
// we'll need to count bits the slow way. If they are
// byte-aligned, and for all other bytes in the 'middle', we
// can use a faster byte-aligned count.
int fullByteStartIndex = startBitOffset == 0 ? startByteIndex : startByteIndex + 1;
int fullByteEndIndex = endBitOffset == 7 ? endByteIndex : endByteIndex - 1;

if (startBitOffset != 0)
{
var slice = data.Slice(startByteIndex, 1);
ReadOnlySpan<byte> slice = data.Slice(startByteIndex, 1);
for (int i = startBitOffset; i <= 7; i++)
count += GetBit(slice, i) ? 1 : 0;
}

if (fullByteEndIndex >= fullByteStartIndex)
{
var slice = data.Slice(fullByteStartIndex, fullByteEndIndex - fullByteStartIndex + 1);
ReadOnlySpan<byte> slice = data.Slice(fullByteStartIndex, fullByteEndIndex - fullByteStartIndex + 1);
count += CountBits(slice);
}

if (endBitOffset != 7)
{
var slice = data.Slice(endByteIndex, 1);
ReadOnlySpan<byte> slice = data.Slice(endByteIndex, 1);
for (int i = 0; i <= endBitOffset; i++)
count += GetBit(slice, i) ? 1 : 0;
}
@@ -139,7 +204,7 @@ public static int CountBits(ReadOnlySpan<byte> data, int offset, int length)
/// <summary>
/// Counts the number of set bits in a span of bytes.
/// </summary>
/// <param name="data">Span to count bits</param>
/// <param name="data">Span to count bits.</param>
/// <returns>Count of set (one) bits.</returns>
public static int CountBits(ReadOnlySpan<byte> data)
{
@@ -186,8 +251,8 @@ public static long RoundUpToMultiplePowerOfTwo(long n, int factor)
/// <summary>
/// Calculates the number of bytes required to store n bits.
/// </summary>
/// <param name="n">number of bits</param>
/// <returns>number of bytes</returns>
/// <param name="n">Number of bits</param>
/// <returns>Number of bytes</returns>
public static int ByteCount(int n)
{
Debug.Assert(n >= 0);
1 change: 1 addition & 0 deletions csharp/src/Apache.Arrow/Properties/AssembyInfo.cs
Original file line number Diff line number Diff line change
@@ -16,3 +16,4 @@
using System.Runtime.CompilerServices;

[assembly: InternalsVisibleTo("Apache.Arrow.Flight, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
[assembly: InternalsVisibleTo("Apache.Arrow.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
2 changes: 1 addition & 1 deletion csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
Original file line number Diff line number Diff line change
@@ -160,7 +160,7 @@ public void ListArrayBuilderValidityBuffer()
public void NestedListArrayBuilder()
{
var childListType = new ListType(Int64Type.Default);
var parentListBuilder = new ListArray.Builder(childListType);
var parentListBuilder = new ListArray.Builder((IArrowType)childListType);
var childListBuilder = parentListBuilder.ValueBuilder as ListArray.Builder;
Assert.NotNull(childListBuilder);
var valueBuilder = childListBuilder.ValueBuilder as Int64Array.Builder;
43 changes: 36 additions & 7 deletions csharp/test/Apache.Arrow.Tests/ArrowBufferBitmapBuilderTests.cs
Original file line number Diff line number Diff line change
@@ -83,7 +83,7 @@ public void IncreasesCapacityWhenRequired()
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
int initialCapacity = builder.Capacity;
builder.AppendRange(Enumerable.Repeat(true, initialCapacity)); // Fill to capacity.
builder.AppendRange(true, initialCapacity); // Fill to capacity.

// Act
var actualReturnValue = builder.Append(true);
@@ -136,7 +136,7 @@ public void BitsAreAppendedToBuilderContainingByteAllignedData(byte[] bytesToApp
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
builder.AppendRange(Enumerable.Repeat(true, 8));
builder.AppendRange(true, 8);

// Act
var actualReturnValue = builder.Append(new Span<byte>(bytesToAppend), validBits);
@@ -162,7 +162,7 @@ public void BitsAreAppendedToBuilderContainingNotAllignedData(byte[] bytesToAppe
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
builder.AppendRange(Enumerable.Repeat(true, 9));
builder.AppendRange(true, 9);

// Act
var actualReturnValue = builder.Append(new Span<byte>(bytesToAppend), validBits);
@@ -180,7 +180,7 @@ public void EmptySpanAppendsCorrectNumberOfBits()
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
builder.AppendRange(Enumerable.Repeat(true, 8));
builder.AppendRange(true, 8);

// Act
var actualReturnValue = builder.Append(Span<byte>.Empty, 8);
@@ -198,7 +198,7 @@ public void ThrowsWhenLengthIsTooBig()
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
builder.AppendRange(Enumerable.Repeat(true, 8));
builder.AppendRange(true, 8);

// Act
Assert.Throws<ArgumentException>(() => builder.Append(new byte[] { 0b0010111 }, 9));
@@ -213,7 +213,7 @@ public class AppendRange
[InlineData(new bool[] {}, new[] { true, false }, 2, 1, 1)]
[InlineData(new[] { true, false }, new bool[] {}, 2, 1, 1)]
[InlineData(new[] { true, false }, new[] { true, false }, 4, 2, 2)]
public void IncreasesLength(
public void AppendingEnumerableIncreasesLength(
bool[] initialContents,
bool[] toAppend,
int expectedLength,
@@ -234,6 +234,35 @@ public void IncreasesLength(
Assert.Equal(expectedSetBitCount, builder.SetBitCount);
Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount);
}

[Theory]
[InlineData(new bool[] { }, true, 0, 0, 0, 0)]
[InlineData(new bool[] { }, true, 2, 2, 2, 0)]
[InlineData(new[] { true, false }, false, 0, 2, 1, 1)]
[InlineData(new[] { true, false }, false, 2, 4, 1, 3)]
[InlineData(new[] { true, false }, true, 2, 4, 3, 1)]
public void AppendingValueMultipleTimesIncreasesLength(
bool[] initialContents,
bool valueToAppend,
int numberOfTimes,
int expectedLength,
int expectedSetBitCount,
int expectedUnsetBitCount)
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
builder.AppendRange(initialContents);

// Act
var actualReturnValue = builder.AppendRange(valueToAppend, numberOfTimes);

// Assert
Assert.Equal(builder, actualReturnValue);
Assert.Equal(expectedLength, builder.Length);
Assert.True(builder.Capacity >= expectedLength);
Assert.Equal(expectedSetBitCount, builder.SetBitCount);
Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount);
}
}

public class Build
@@ -329,7 +358,7 @@ public void CapacityIncreased(int initialCapacity, int numBitsToAppend, int addi
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder(initialCapacity);
builder.AppendRange(Enumerable.Repeat(true, numBitsToAppend));
builder.AppendRange(true, numBitsToAppend);

// Act
var actualReturnValue = builder.Reserve(additionalCapacity);
Loading
Oops, something went wrong.

0 comments on commit f38943a

Please sign in to comment.