Skip to content

Commit 41441ea

Browse files
orlpzeroshade
andauthored
fix(arrow/cdata): Avoid calling unsafe.Slice on zero-length pointers (#513)
### Rationale for this change Slices from FFI may have an arbitrary pointer when the length is zero, but this is not allowed in Go, where the pointer must always be valid. I believe this fixes #28. ### What changes are included in this PR? I changed the instances of `unsafe.Slice` in `arrow/cdata` I could find to be robust when used with length zero. ### Are these changes tested? No, I don't have a Go setup at all. ### Are there any user-facing changes? No. --------- Co-authored-by: Matt Topol <zotthewizard@gmail.com>
1 parent d241a6e commit 41441ea

4 files changed

Lines changed: 61 additions & 10 deletions

File tree

arrow/cdata/cdata.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -370,10 +370,10 @@ func (imp *cimporter) importChild(parent *cimporter, src *CArrowArray) error {
370370

371371
// import any child arrays for lists, structs, and so on.
372372
func (imp *cimporter) doImportChildren() error {
373-
children := unsafe.Slice(imp.arr.children, imp.arr.n_children)
374-
375-
if len(children) > 0 {
376-
imp.children = make([]cimporter, len(children))
373+
var children []*CArrowArray
374+
if imp.arr.n_children > 0 {
375+
children = unsafe.Slice(imp.arr.children, imp.arr.n_children)
376+
imp.children = make([]cimporter, imp.arr.n_children)
377377
}
378378

379379
// handle the cases
@@ -711,10 +711,12 @@ func (imp *cimporter) importBinaryViewLike() (err error) {
711711
return
712712
}
713713

714-
dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2)
715-
for i, size := range dataBufferSizes {
716-
if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil {
717-
return
714+
if len(buffers) > 2 {
715+
dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2)
716+
for i, size := range dataBufferSizes {
717+
if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil {
718+
return
719+
}
718720
}
719721
}
720722

@@ -866,7 +868,7 @@ func (imp *cimporter) checkNumBuffers(n int64) error {
866868
func (imp *cimporter) importBuffer(bufferID int, sz int64) (*memory.Buffer, error) {
867869
// this is not a copy, we're just having a slice which points at the data
868870
// it's still owned by the C.ArrowArray object and its backing C++ object.
869-
if imp.cbuffers[bufferID] == nil {
871+
if imp.cbuffers[bufferID] == nil || sz == 0 {
870872
if sz != 0 {
871873
return nil, errors.New("invalid buffer")
872874
}

arrow/cdata/cdata_fulltest.c

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,8 +335,13 @@ void export_int32_array(const int32_t*, int64_t, struct ArrowArray*);
335335

336336
static void release_str_array(struct ArrowArray* array) {
337337
assert(array->n_buffers == 3);
338+
if (array->buffers[0] != NULL) {
339+
free((void*) array->buffers[0]);
340+
}
338341
free((void*) array->buffers[1]);
339-
free((void*) array->buffers[2]);
342+
if (array->buffers[2] != NULL && array->buffers[2] != (void*)0x1) {
343+
free((void*) array->buffers[2]);
344+
}
340345
free(array->buffers);
341346
array->release = NULL;
342347
}
@@ -361,6 +366,29 @@ void export_str_array(const char* data, const int32_t* offsets, int64_t nitems,
361366
out->buffers[2] = data;
362367
}
363368

369+
void export_str_array_with_nulls(int64_t nitems, struct ArrowArray* out) {
370+
*out = (struct ArrowArray) {
371+
.length = nitems,
372+
.offset = 0,
373+
.null_count = nitems,
374+
.n_buffers = 3,
375+
.n_children = 0,
376+
.children = NULL,
377+
.dictionary = NULL,
378+
// bookkeeping
379+
.release = &release_str_array
380+
};
381+
382+
out->buffers = (const void**)malloc(sizeof(void*) * out->n_buffers);
383+
assert(out->buffers != NULL);
384+
int64_t bitmap_nbytes = (nitems + 7) / 8;
385+
out->buffers[0] = malloc(bitmap_nbytes);
386+
memset((void*)out->buffers[0], 0, bitmap_nbytes);
387+
out->buffers[1] = malloc((nitems + 1) * sizeof(int32_t));
388+
memset((void*)out->buffers[1], 0, (nitems + 1) * sizeof(int32_t));
389+
out->buffers[2] = (void*)0x1;
390+
}
391+
364392
static int next_record(struct ArrowArrayStream* st, struct ArrowArray* out) {
365393
struct streamcounter* cnter = (struct streamcounter*)(st->private_data);
366394
if (cnter->n == cnter->max) {

arrow/cdata/cdata_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,20 @@ func TestNestedArrays(t *testing.T) {
737737
}
738738
}
739739

740+
func TestStrArrayAllNulls(t *testing.T) {
741+
arr := exportStrArrayWithNulls(1000)
742+
carr, err := ImportCArrayWithType(&arr, arrow.BinaryTypes.String)
743+
require.NoError(t, err)
744+
defer carr.Release()
745+
746+
buffer := carr.Data().Buffers()[2]
747+
assert.NotNil(t, buffer)
748+
bs := buffer.Bytes()
749+
assert.Equal(t, 1000, carr.Len())
750+
assert.Equal(t, 1000, carr.NullN())
751+
assert.Empty(t, bs)
752+
}
753+
740754
func TestRecordBatch(t *testing.T) {
741755
mem := mallocator.NewMallocator()
742756
defer mem.AssertSize(t, 0)

arrow/cdata/cdata_test_framework.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ package cdata
5353
// }
5454
// void export_int32_type(struct ArrowSchema* schema);
5555
// void export_int32_array(const int32_t*, int64_t, struct ArrowArray*);
56+
// void export_str_array_with_nulls(int64_t nitems, struct ArrowArray* out);
5657
// int test1_is_released();
5758
// void test_primitive(struct ArrowSchema* schema, const char* fmt);
5859
// void free_malloced_schemas(struct ArrowSchema**);
@@ -98,6 +99,12 @@ func exportInt32TypeSchema() CArrowSchema {
9899
return s
99100
}
100101

102+
func exportStrArrayWithNulls(nitems int64) CArrowArray {
103+
var arr CArrowArray
104+
C.export_str_array_with_nulls(C.int64_t(nitems), &arr)
105+
return arr
106+
}
107+
101108
func schemaIsReleased(s *CArrowSchema) bool {
102109
return C.ArrowSchemaIsReleased(s) == 1
103110
}

0 commit comments

Comments
 (0)