-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
- [x] read_parquet/feather/etc. from S3 (use FileSystem->OpenInputFile(path)) - [x] write_$FORMAT via FileSystem->OpenOutputStream(path) - [x] write_dataset (done? at least via URI) - [x] ~~for linux, an argument to install_arrow to help, assuming you've installed aws-sdk-cpp already (turn on ARROW_S3, AWSSDK_SOURCE=SYSTEM)~~ Turns out there's no official deb/rpm packages for aws-sdk-cpp so there's no value in making this part easier; would be more confusing than helpful actually - [x] set up a real test bucket and user for e2e testing (credentials available on request) - [x] add a few tests that use s3, if credentials are set (which I'll set locally) - [x] add vignette showing how to use s3 (via URI) - [x] update docs, news Out of the current scope: - [ ] testing with minio on CI - [ ] download dataset, i.e. copy files/directory recursively (needs ARROW-9867, ARROW-9868) - [ ] friendlier methods for interacting with/viewing a filesystem (ls, mkdir, etc.) (ARROW-9870) - [ ] direct construction of S3FileSystem object with S3Options (i.e. not only URI) (ARROW-9869) Closes apache#8058 from nealrichardson/r-s3 Authored-by: Neal Richardson <[email protected]> Signed-off-by: Neal Richardson <[email protected]>
- Loading branch information
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
context("S3 integration tests") | ||
|
||
run_these <- tryCatch({ | ||
if (arrow_with_s3() && | ||
identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") && | ||
!identical(Sys.getenv("AWS_ACCESS_KEY_ID"), "") && | ||
!identical(Sys.getenv("AWS_SECRET_ACCESS_KEY"), "")) { | ||
# See if we have access to the test bucket | ||
bucket <- FileSystem$from_uri("s3://ursa-labs-r-test?region=us-west-2") | ||
bucket$fs$GetFileInfo(bucket$path) | ||
TRUE | ||
} else { | ||
FALSE | ||
} | ||
}, error = function(e) FALSE) | ||
|
||
bucket_uri <- function(..., bucket = "s3://ursa-labs-r-test/%s?region=us-west-2") { | ||
segments <- paste(..., sep = "/") | ||
sprintf(bucket, segments) | ||
} | ||
|
||
if (run_these) { | ||
now <- as.numeric(Sys.time()) | ||
on.exit(bucket$fs$DeleteDir(paste0("ursa-labs-r-test/", now))) | ||
|
||
test_that("read/write Feather on S3", { | ||
write_feather(example_data, bucket_uri(now, "test.feather")) | ||
expect_identical(read_feather(bucket_uri(now, "test.feather")), example_data) | ||
}) | ||
|
||
test_that("read/write Parquet on S3", { | ||
write_parquet(example_data, bucket_uri(now, "test.parquet")) | ||
expect_identical(read_parquet(bucket_uri(now, "test.parquet")), example_data) | ||
}) | ||
} |