Skip to content

Commit

Permalink
ARROW-3741: [R] Add support for arrow::compute::Cast to convert Arrow…
Browse files Browse the repository at this point in the history
… arrays from one type to anothe

``` r
library(arrow)
a <- array(1:10, NA)
a$type()
#> arrow::Int32
#> int32

b <- a$cast(int16())
b$type()
#> arrow::Int16
#> int16
```

<sup>Created on 2018-11-14 by the [reprex package](https://reprex.tidyverse.org) (v0.2.1.9000)</sup>

Author: Romain Francois <[email protected]>

Closes apache#2959 from romainfrancois/ARROW-3741/Cast and squashes the following commits:

053bd35 <Romain Francois> provision test for cast to half float
085886b <Romain Francois> fix similar to @javierluraschi fix on apache#2955
05f8758 <Romain Francois> Table$cast(schema)
f02e744 <Romain Francois> RecordBatch$cast(schema)
7cb78ca <Romain Francois> ChunkedArray$cast()
26ef538 <Romain Francois> expose Schema$names as an active
d41423b <Romain Francois> making STOP_IF_NULL an inline function so that it is only used on pointers.
ffa8c7c <Romain Francois> + tests
a96defa <Romain Francois> testing the right thing in `STOP_IF_NULL`, same as apache@e8a7b23 from apache#2953
70898ed <Romain Francois> Array$cast
  • Loading branch information
romainfrancois authored and wesm committed Nov 24, 2018
1 parent 8c52f4c commit 7281731
Show file tree
Hide file tree
Showing 19 changed files with 428 additions and 6 deletions.
1 change: 1 addition & 0 deletions r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Collate:
'Table.R'
'array.R'
'buffer.R'
'compute.R'
'dictionary.R'
'feather.R'
'io.R'
Expand Down
1 change: 1 addition & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ export(boolean)
export(buffer)
export(buffer_output_stream)
export(buffer_reader)
export(cast_options)
export(chunked_array)
export(date32)
export(date64)
Expand Down
5 changes: 5 additions & 0 deletions r/R/ChunkedArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
} else {
shared_ptr(`arrow::ChunkedArray`, ChunkArray__Slice2(self, offset, length))
}
},
cast = function(target_type, safe = TRUE, options = cast_options(safe)) {
assert_that(inherits(target_type, "arrow::DataType"))
assert_that(inherits(options, "arrow::compute::CastOptions"))
shared_ptr(`arrow::ChunkedArray`, ChunkedArray__cast(self, target_type, options))
}
)
)
Expand Down
24 changes: 24 additions & 0 deletions r/R/RcppExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion r/R/RecordBatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,14 @@
}
},

serialize = function(output_stream, ...) write_record_batch(self, output_stream, ...)
serialize = function(output_stream, ...) write_record_batch(self, output_stream, ...),

cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
assert_that(inherits(target_schema, "arrow::Schema"))
assert_that(inherits(options, "arrow::compute::CastOptions"))
assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas")
shared_ptr(`arrow::RecordBatch`, RecordBatch__cast(self, target_schema, options))
}
)
)

Expand Down
3 changes: 3 additions & 0 deletions r/R/Schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
ToString = function() Schema__ToString(self),
num_fields = function() Schema__num_fields(self),
field = function(i) shared_ptr(`arrow::Field`, Schema__field(self, i))
),
active = list(
names = function() Schema__names(self)
)
)

Expand Down
9 changes: 8 additions & 1 deletion r/R/Table.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@
schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)),
column = function(i) shared_ptr(`arrow::Column`, Table__column(self, i)),

serialize = function(output_stream, ...) write_table(self, output_stream, ...)
serialize = function(output_stream, ...) write_table(self, output_stream, ...),

cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
assert_that(inherits(target_schema, "arrow::Schema"))
assert_that(inherits(options, "arrow::compute::CastOptions"))
assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas")
shared_ptr(`arrow::Table`, Table__cast(self, target_schema, options))
}
)
)

Expand Down
5 changes: 5 additions & 0 deletions r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@
RangeEquals = function(other, start_idx, end_idx, other_start_idx) {
assert_that(inherits(other, "arrow::Array"))
Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx)
},
cast = function(target_type, safe = TRUE, options = cast_options(safe)) {
assert_that(inherits(target_type, "arrow::DataType"))
assert_that(inherits(options, "arrow::compute::CastOptions"))
`arrow::Array`$dispatch(Array__cast(self, target_type, options))
}
)
)
Expand Down
39 changes: 39 additions & 0 deletions r/R/compute.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

#' @include array.R

`arrow::compute::CastOptions` <- R6Class("arrow::compute::CastOptions", inherit = `arrow::Object`)

#' Cast options
#'
#' @param safe enforce safe conversion
#' @param allow_int_overflow allow int conversion, `!safe` by default
#' @param allow_time_truncate allow time truncate, `!safe` by default
#' @param allow_float_truncate allow float truncate, `!safe` by default
#'
#' @export
cast_options <- function(
safe = TRUE,
allow_int_overflow = !safe,
allow_time_truncate = !safe,
allow_float_truncate = !safe
){
shared_ptr(`arrow::compute::CastOptions`,
compute___CastOptions__initialize(allow_int_overflow, allow_time_truncate, allow_float_truncate)
)
}
21 changes: 21 additions & 0 deletions r/man/cast_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

82 changes: 82 additions & 0 deletions r/src/RcppExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions r/src/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,7 @@ struct Converter_Promotion {
std::fill_n(data.begin() + start, n, default_value<RTYPE>());
} else {
auto p_values = GetValuesSafely<value_type>(array->data(), 1, array->offset());
STOP_IF_NULL(start);
STOP_IF_NULL(p_values);

auto value_convert = [](value_type value) {
return static_cast<r_stored_type>(value);
Expand Down Expand Up @@ -945,10 +945,10 @@ SEXP ArrayVector__as_vector(int64_t n, const ArrayVector& arrays) {
return ArrayVector_To_Vector<Converter_Promotion<REALSXP, arrow::UInt32Type>>(
n, arrays);
case Type::HALF_FLOAT:
return ArrayVector_To_Vector<Converter_Promotion<REALSXP, arrow::UInt32Type>>(
return ArrayVector_To_Vector<Converter_Promotion<REALSXP, arrow::HalfFloatType>>(
n, arrays);
case Type::FLOAT:
return ArrayVector_To_Vector<Converter_Promotion<REALSXP, arrow::UInt32Type>>(
return ArrayVector_To_Vector<Converter_Promotion<REALSXP, arrow::FloatType>>(
n, arrays);

// time32 ane time64
Expand Down
7 changes: 6 additions & 1 deletion r/src/arrow_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#undef Free
#include <arrow/api.h>
#include <arrow/compute/api.h>
#include <arrow/io/file.h>
#include <arrow/io/memory.h>
#include <arrow/ipc/feather.h>
Expand All @@ -34,7 +35,11 @@
} while (0)

#define STOP_IF_NOT_OK(s) STOP_IF_NOT(s.ok(), s.ToString())
#define STOP_IF_NULL(buf) STOP_IF_NOT(buf, "invalid data")

template <typename T>
inline void STOP_IF_NULL(T* ptr) {
STOP_IF_NOT(ptr, "invalid data");
}

template <typename T>
struct NoDelete {
Expand Down
Loading

0 comments on commit 7281731

Please sign in to comment.