diff --git a/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index e7c0d11cb9..d522ffe6f5 100644 --- a/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -125,10 +125,15 @@ public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) { targetVector .getOffsetBuffer() .getInt((long) targetVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); + // The delta vector's offset buffer need not start at zero (e.g. a vector imported through + // the C data interface from a sliced array), so the amount of data to append is the + // distance between its first and last offsets, not the last offset itself. + int deltaDataStart = deltaVector.getOffsetBuffer().getInt(0); int deltaDataSize = deltaVector - .getOffsetBuffer() - .getInt((long) deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); + .getOffsetBuffer() + .getInt((long) deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH) + - deltaDataStart; int newValueCapacity = targetDataSize + deltaDataSize; // make sure there is enough capacity @@ -149,7 +154,7 @@ public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) { // append data buffer MemoryUtil.copyMemory( - deltaVector.getDataBuffer().memoryAddress(), + deltaVector.getDataBuffer().memoryAddress() + deltaDataStart, targetVector.getDataBuffer().memoryAddress() + targetDataSize, deltaDataSize); @@ -160,7 +165,7 @@ public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) { + (targetVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); - // increase each offset from the second buffer + // rebase each appended offset to the target's data, accounting for the delta's start offset for (int i = 0; i < deltaVector.getValueCount(); i++) { int oldOffset = targetVector @@ -172,7 +177,7 @@ public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) { .getOffsetBuffer() .setInt( (long) (targetVector.getValueCount() + 1 + i) * BaseVariableWidthVector.OFFSET_WIDTH, - oldOffset + targetDataSize); + oldOffset - deltaDataStart + targetDataSize); } ((BaseVariableWidthVector) targetVector).setLastSet(newValueCount - 1); targetVector.setValueCount(newValueCount); @@ -196,11 +201,15 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) { .getOffsetBuffer() .getLong( (long) targetVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH); + // see the corresponding comment in visit(BaseVariableWidthVector, Void): the delta's + // offset buffer need not start at zero + long deltaDataStart = deltaVector.getOffsetBuffer().getLong(0); long deltaDataSize = deltaVector - .getOffsetBuffer() - .getLong( - (long) deltaVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH); + .getOffsetBuffer() + .getLong( + (long) deltaVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH) + - deltaDataStart; long newValueCapacity = targetDataSize + deltaDataSize; // make sure there is enough capacity @@ -221,7 +230,7 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) { // append data buffer MemoryUtil.copyMemory( - deltaVector.getDataBuffer().memoryAddress(), + deltaVector.getDataBuffer().memoryAddress() + deltaDataStart, targetVector.getDataBuffer().memoryAddress() + targetDataSize, deltaDataSize); @@ -232,7 +241,7 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) { + (targetVector.getValueCount() + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH, deltaVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH); - // increase each offset from the second buffer + // rebase each appended offset to the target's data, accounting for the delta's start offset for (int i = 0; i < deltaVector.getValueCount(); i++) { long oldOffset = targetVector @@ -245,7 +254,7 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) { .setLong( (long) (targetVector.getValueCount() + 1 + i) * BaseLargeVariableWidthVector.OFFSET_WIDTH, - oldOffset + targetDataSize); + oldOffset - deltaDataStart + targetDataSize); } ((BaseLargeVariableWidthVector) targetVector).setLastSet(newValueCount - 1); targetVector.setValueCount(newValueCount); diff --git a/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index df5521a1ad..dbbe5d3be5 100644 --- a/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -26,10 +26,13 @@ import java.util.List; import java.util.stream.IntStream; import java.util.stream.Stream; +import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; @@ -53,6 +56,7 @@ import org.apache.arrow.vector.holders.NullableBigIntHolder; import org.apache.arrow.vector.holders.NullableFloat4Holder; import org.apache.arrow.vector.holders.NullableIntHolder; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -178,6 +182,82 @@ public void testAppendVariableWidthVector() { } } + @Test + public void testAppendVariableWidthVectorWithNonZeroStartOffset() { + try (VarCharVector target = new VarCharVector("", allocator); + VarCharVector delta = new VarCharVector("", allocator)) { + + target.allocateNew(64, 4); + ValueVectorDataPopulator.setVector(target, "a0", "a1"); + + // Build a delta vector whose offset buffer does not start at zero, as produced e.g. by + // importing a sliced array through the C data interface. The values are "BBBB" and + // "CCCC"; the data buffer additionally holds 4 bytes of unreferenced prefix ("AAAA"). + try (ArrowBuf validity = allocator.buffer(1); + ArrowBuf offsets = allocator.buffer(12); + ArrowBuf data = allocator.buffer(12)) { + validity.setByte(0, 0b11); + offsets.setInt(0, 4); + offsets.setInt(4, 8); + offsets.setInt(8, 12); + data.setBytes(0, "AAAABBBBCCCC".getBytes(StandardCharsets.UTF_8)); + delta.loadFieldBuffers(new ArrowFieldNode(2, 0), Arrays.asList(validity, offsets, data)); + } + + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + // the unreferenced prefix must not be appended + assertEquals( + 4 + 8, + target + .getOffsetBuffer() + .getInt((long) target.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH)); + + try (VarCharVector expected = new VarCharVector("expected", allocator)) { + expected.allocateNew(); + ValueVectorDataPopulator.setVector(expected, "a0", "a1", "BBBB", "CCCC"); + assertVectorsEqual(expected, target); + } + } + } + + @Test + public void testAppendLargeVariableWidthVectorWithNonZeroStartOffset() { + try (LargeVarCharVector target = new LargeVarCharVector("", allocator); + LargeVarCharVector delta = new LargeVarCharVector("", allocator)) { + + target.allocateNew(64, 4); + ValueVectorDataPopulator.setVector(target, "a0", "a1"); + + try (ArrowBuf validity = allocator.buffer(1); + ArrowBuf offsets = allocator.buffer(24); + ArrowBuf data = allocator.buffer(12)) { + validity.setByte(0, 0b11); + offsets.setLong(0, 4); + offsets.setLong(8, 8); + offsets.setLong(16, 12); + data.setBytes(0, "AAAABBBBCCCC".getBytes(StandardCharsets.UTF_8)); + delta.loadFieldBuffers(new ArrowFieldNode(2, 0), Arrays.asList(validity, offsets, data)); + } + + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + assertEquals( + 4 + 8, + target + .getOffsetBuffer() + .getLong((long) target.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH)); + + try (LargeVarCharVector expected = new LargeVarCharVector("expected", allocator)) { + expected.allocateNew(); + ValueVectorDataPopulator.setVector(expected, "a0", "a1", "BBBB", "CCCC"); + assertVectorsEqual(expected, target); + } + } + } + @Test public void testAppendVariableWidthViewVector() { final int length1 = 10;