Skip to content

Commit

Permalink
TileDB: FillPrimitiveListArray(): fix data corruption with spatial fi…
Browse files Browse the repository at this point in the history
…lter (Coverity CID 1512587, 1512576)
  • Loading branch information
rouault committed May 28, 2023
1 parent 5dc5eea commit a5e3cf8
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 9 deletions.
68 changes: 62 additions & 6 deletions autotest/ogr/ogr_tiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
###############################################################################


def create_tiledb_dataset(nullable, batch_size, include_bool):
def create_tiledb_dataset(nullable, batch_size, include_bool, extra_feature=False):

ds = ogr.GetDriverByName("TileDB").CreateDataSource("tmp/test.tiledb")
srs = osr.SpatialReference()
Expand Down Expand Up @@ -206,6 +206,34 @@ def create_tiledb_dataset(nullable, batch_size, include_bool):
assert lyr.CreateFeature(f) == ogr.OGRERR_NONE
assert f.GetFID() == 3

if extra_feature:
f = ogr.Feature(lyr.GetLayerDefn())
f["strfield"] = "something"
f["intfield"] = 8765432
f["int16field"] = 32767
if include_bool:
f["boolfield"] = False
f["uint8field"] = 255
f["uint16field"] = 65535
f["int64field"] = 9876543210123456
f["doublefield"] = -1.2345
f["floatfield"] = -1.5
f.SetFieldBinaryFromHexString("binaryfield", "DEAFBEEF")
f["intlistfield"] = [-123456789, -123]
f["int16listfield"] = [32767, -32768]
if include_bool:
f["boollistfield"] = [False, True]
f["doublelistfield"] = [-1.2345, 1.2345]
f["floatlistfield"] = [0.0, -1.5, 1.5]
# Will be transformed to "2023/04/07 10:19:56.789+00"
f["datetimefield"] = "2023-04-07T12:34:56.789+0215"
f["datefield"] = "2023-04-08"
f["timefield"] = "13:34:56.789"
f["intfieldextra"] = 4
f.SetGeometry(ogr.CreateGeometryFromWkt("POINT (-0.9 -0.9)"))
assert lyr.CreateFeature(f) == ogr.OGRERR_NONE
assert f.GetFID() == 4

ds = None
return field_count, srs, options

Expand Down Expand Up @@ -1220,7 +1248,9 @@ def test_ogr_tiledb_arrow_stream_numpy(nullable, batch_size):
shutil.rmtree("tmp/test.tiledb")

include_bool = True
_, _, options = create_tiledb_dataset(nullable, batch_size, include_bool)
_, _, options = create_tiledb_dataset(
nullable, batch_size, include_bool, extra_feature=True
)

ds = gdal.OpenEx("tmp/test.tiledb", open_options=options)
lyr = ds.GetLayer(0)
Expand Down Expand Up @@ -1338,7 +1368,19 @@ def check_batch(batch):
for batch in stream:
fids += batch["FID"].tolist()
check_batch(batch)
assert set(fids) == set([1, 2, 3])
assert set(fids) == set([1, 2, 3, 4])
lyr.SetSpatialFilter(None)

# Test spatial filter that intersects 1st, 2nd and 4th features only, but given how
# spatial filtering works, more intermediate features will be collected
# before being discarded
lyr.SetSpatialFilterRect(-0.99, -2.99, 3, 3)
stream = lyr.GetArrowStreamAsNumPy(options=["USE_MASKED_ARRAYS=NO"])
fids = []
for batch in stream:
fids += batch["FID"].tolist()
check_batch(batch)
assert set(fids) == set([1, 2, 4])
lyr.SetSpatialFilter(None)

# Test spatial filter that intersects 1st and 2nd features only, but given how
Expand All @@ -1354,7 +1396,7 @@ def check_batch(batch):
lyr.SetSpatialFilter(None)

# Test spatial filter that intersects 3rd feature only
lyr.SetSpatialFilterRect(-3, -3, 0, 0)
lyr.SetSpatialFilterRect(-3, -3, -0.95, -0.95)
stream = lyr.GetArrowStreamAsNumPy(options=["USE_MASKED_ARRAYS=NO"])
fids = []
for batch in stream:
Expand All @@ -1363,6 +1405,16 @@ def check_batch(batch):
assert set(fids) == set([3])
lyr.SetSpatialFilter(None)

# Test spatial filter that intersects 4th feature only
lyr.SetSpatialFilterRect(-0.95, -0.95, -0.85, -0.85)
stream = lyr.GetArrowStreamAsNumPy(options=["USE_MASKED_ARRAYS=NO"])
fids = []
for batch in stream:
fids += batch["FID"].tolist()
check_batch(batch)
assert set(fids) == set([4])
lyr.SetSpatialFilter(None)

# Test spatial filter that intersects no feature
lyr.SetSpatialFilterRect(-0.5, -0.5, 0.5, 0.5)
stream = lyr.GetArrowStreamAsNumPy(options=["USE_MASKED_ARRAYS=NO"])
Expand Down Expand Up @@ -1394,7 +1446,9 @@ def check_batch(batch):
batch = batches[0]
assert "strfield" not in batch.keys()
assert "intfield" in batch.keys()
assert set([x for x in batch["intfield"]]) == set([-123456789, 0, 123456789])
assert set([x for x in batch["intfield"]]) == set(
[-123456789, 0, 123456789, 8765432]
)

lyr.SetIgnoredFields(["wkb_geometry"])
stream = lyr.GetArrowStreamAsNumPy(
Expand All @@ -1405,7 +1459,9 @@ def check_batch(batch):
batch = batches[0]
assert "wkb_geometry" not in batch.keys()
assert "intfield" in batch.keys()
assert set([x for x in batch["intfield"]]) == set([-123456789, 0, 123456789])
assert set([x for x in batch["intfield"]]) == set(
[-123456789, 0, 123456789, 8765432]
)

ds = None

Expand Down
6 changes: 3 additions & 3 deletions frmts/tiledb/tiledbsparse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4954,8 +4954,8 @@ void OGRTileDBLayer::FillPrimitiveListArray(
offsets.push_back(nAccLen);
if (nItemLen && nAccLen < nSrcOffset)
{
memmove(v_source->data() + nAccLen * sizeof(T),
v_source->data() + nSrcOffset * sizeof(T),
memmove(v_source->data() + nAccLen,
v_source->data() + nSrcOffset,
nItemLen * sizeof(T));
}
nAccLen += nItemLen;
Expand Down Expand Up @@ -5045,7 +5045,7 @@ void OGRTileDBLayer::FillBoolListArray(
}
else
{
offsetsPtr->resize(offsetsPtr->size() + 1);
CPLAssert(offsetsPtr->size() > static_cast<size_t>(psChild->length));

auto &offsets = *offsetsPtr;
const size_t nSrcVals = offsets.size();
Expand Down

0 comments on commit a5e3cf8

Please sign in to comment.