Skip to content

Commit

Permalink
Expose service account info (r-lib#234)
Browse files Browse the repository at this point in the history
* Make sure compat file is up-to-date

* Export gce_instance_service_accounts()

Refactor a bunch of other stuff

* Add NEWS bullet; update vignette

* Include new function in reference index

* Don't insist on making "computeMetadata/v1/" part of the path

It's not required to determine if the VM is on GCE, so this allows is_gce() to be more minimal and, perhaps, less likely to time out (that is pure speculation).

* This should have always been `||`, not `%||%`

* Hardening
  • Loading branch information
jennybc authored Jan 30, 2023
1 parent 27b701b commit 254d951
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 80 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export(gargle_oauth_email)
export(gargle_oauth_sitrep)
export(gargle_oob_default)
export(gargle_verbosity)
export(gce_instance_service_accounts)
export(init_AuthState)
export(local_cred_funs)
export(local_gargle_verbosity)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ As a bridging measure, `gargle_oauth_client` currently inherits from httr's `oau

`credentials_gce()` no longer asks the user about initiating an OAuth cache, which is not relevant to that flow (#221).

`gce_instance_service_accounts()` is a newly exported utility that exposes the service accounts available from the metadata server for the current instance (#234).

`vignette("non-interactive-auth")` has a new section "Workload Identity on Google Kubernetes Engine (GKE)" that explains how gargle supports the use of workload identity for applications running on GKE. This is the recommended method of auth in R code running on GKE that needs to access other Google Cloud services, such as the BigQuery API (#197, #223, @MarkEdmondson1234).

## Credential function registry
Expand Down
14 changes: 10 additions & 4 deletions R/compat-purrr.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# nocov start - compat-purrr.R
# Latest version: https://github.com/r-lib/rlang/blob/main/R/compat-purrr.R

# ---
# repo: r-lib/rlang
# file: compat-purrr.R
# last-updated: 2022-06-07
# license: https://unlicense.org
# ---
#
# This file provides a minimal shim to provide a purrr-like API on top of
# base R functions. They are not drop-in replacements but allow a similar style
# of programming.
#
# Changelog:
# ## Changelog
#
# 2022-06-07:
# * `transpose()` is now more consistent with purrr when inner names
Expand All @@ -22,6 +26,8 @@
# * Removed `*_cpl()` functions
# * Used `as_function()` to allow use of `~`
# * Used `.` prefix for helpers
#
# nocov start

map <- function(.x, .f, ...) {
.f <- as_function(.f, env = global_env())
Expand Down
67 changes: 47 additions & 20 deletions R/credentials_gce.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
credentials_gce <- function(scopes = "https://www.googleapis.com/auth/cloud-platform",
service_account = "default", ...) {
gargle_debug("trying {.fun credentials_gce}")
if (!detect_gce()) {
if (!is_gce()) {
return(NULL)
}

Expand Down Expand Up @@ -136,22 +136,30 @@ GceToken <- R6::R6Class("GceToken", inherit = httr::Token2.0, list(
revoke = function() {}
))

gce_metadata_url <- function() {
gce_metadata_hostname <- function() {
use_ip <- getOption("gargle.gce.use_ip", FALSE)
root_url <- Sys.getenv("GCE_METADATA_URL", "metadata.google.internal")
if (use_ip) {
root_url <- Sys.getenv("GCE_METADATA_IP", "169.254.169.254")
if (isTRUE(use_ip)) {
Sys.getenv("GCE_METADATA_IP", "169.254.169.254")
} else {
Sys.getenv("GCE_METADATA_URL", "metadata.google.internal")
}
paste0("http://", root_url, "/")
}

gce_metadata_request <- function(path, stop_on_error = TRUE) {
root_url <- gce_metadata_url()
gce_metadata_request <- function(path = "", query = NULL, stop_on_error = TRUE) {
# TODO(craigcitro): Add options to ignore proxies.
if (grepl("^/", path)) {
path <- substring(path, 2)
}
url <- paste0(root_url, "computeMetadata/v1/", path)
url_parts <- structure(
list(
scheme = "http",
hostname = gce_metadata_hostname(),
path = path,
query = query
),
class = "url"
)
url <- httr::build_url(url_parts)
timeout <- getOption("gargle.gce.timeout", default = 0.8)
response <- try(
{
Expand All @@ -178,18 +186,37 @@ gce_metadata_request <- function(path, stop_on_error = TRUE) {
response
}

detect_gce <- function() {
response <- gce_metadata_request("", stop_on_error = FALSE)
!(inherits(response, "try-error") %||% httr::http_error(response))
# https://cloud.google.com/compute/docs/instances/detect-compute-engine
is_gce <- function() {
response <- gce_metadata_request(stop_on_error = FALSE)
!(inherits(response, "try-error") || httr::http_error(response))
}

# List all service accounts available on this GCE instance.
#
# @return A list of service account names.
list_service_accounts <- function() {
accounts <- gce_metadata_request("instance/service-accounts")
ct <- httr::content(accounts, as = "text", encoding = "UTF-8")
strsplit(ct, split = "/\n", fixed = TRUE)[[1]]
#' List all service accounts available on this GCE instance
#'
#' @returns A data frame, where each row is a service account. Due to aliasing,
#' there is no guarantee that each row represents a distinct service account.
#'
#' @seealso The return value is built from a recursive query of the so-called
#' "directory" of the instance's service accounts as documented in
#' <https://cloud.google.com/compute/docs/metadata/default-metadata-values#vm_instance_metadata>.
#'
#' @export
#' @examplesIf gargle:::is_gce()
#' credentials_gce()
gce_instance_service_accounts <- function() {
response <- gce_metadata_request(
"computeMetadata/v1/instance/service-accounts",
query = list(recursive = "true")
)
raw <- transpose(response_as_json(response))
data.frame(
name = names(raw$email),
email = unlist(raw$email),
aliases = map_chr(raw$aliases, function(x) glue_collapse(x, sep = ",")),
scopes = map_chr(raw$scopes, function(x) glue_collapse(x, sep = ",")),
stringsAsFactors = FALSE, row.names = NULL
)
}

# TODO: why isn't scopes used here at all?
Expand All @@ -198,7 +225,7 @@ list_service_accounts <- function() {
# perhaps there are use cases where it would be helpful it we did same:
# https://github.com/r-lib/gargle/issues/216
fetch_gce_access_token <- function(scopes, service_account) {
path <- glue("instance/service-accounts/{service_account}/token")
path <- glue("computeMetadata/v1/instance/service-accounts/{service_account}/token")
response <- gce_metadata_request(path)
httr::content(response, as = "parsed", type = "application/json")
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ reference:
- starts_with("cred_funs")
- starts_with("gargle_oauth_client")
- starts_with("token")
- gce_instance_service_accounts
- title: "Requests and responses"
desc: >
Helpers for forming HTTP requests and processing the response
Expand Down
25 changes: 25 additions & 0 deletions man/gce_instance_service_accounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions tests/testthat/_snaps/credential-function-registry.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# We insist on valid credential function (or NULL)

Code
cred_funs_add(a = mean)
Condition
Error in `cred_funs_check()`:
! Not a valid credential function:
x Element 1

---

Code
cred_funs_set(list(a = NULL))
Condition
Error in `cred_funs_check()`:
! Not a valid credential function:
x Element 1

# We insist on uniquely named credential functions

Code
Expand Down
6 changes: 5 additions & 1 deletion tests/testthat/test-assets.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ test_that("default options", {
gargle_quiet = NULL
))
expect_equal(gargle_oauth_cache(), NA)
expect_false(gargle_oob_default())
if (is_rstudio_server()) {
expect_true(gargle_oob_default())
} else {
expect_false(gargle_oob_default())
}
expect_null(gargle_oauth_email())
expect_equal(gargle_verbosity(), "info")
})
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test-credentials_gce.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
test_that("GCE metadata server hostname is correct w.r.t. option and env var", {
withr::local_options(list(gargle.gce.use_ip = NULL))
withr::local_envvar(c(GCE_METADATA_URL = NA))
expect_equal(gce_metadata_hostname(), "metadata.google.internal")

withr::local_options(list(gargle.gce.use_ip = FALSE))
expect_equal(gce_metadata_hostname(), "metadata.google.internal")

withr::local_envvar(GCE_METADATA_URL = "some.fake.hostname")
expect_equal(gce_metadata_hostname(), "some.fake.hostname")
})

test_that("GCE metadata server IP address is correct w.r.t. option and env var", {
withr::local_options(list(gargle.gce.use_ip = TRUE))
withr::local_envvar(c(GCE_METADATA_IP = NA))
expect_equal(gce_metadata_hostname(), "169.254.169.254")

withr::local_envvar(c(GCE_METADATA_IP = "1.2.3.4"))
expect_equal(gce_metadata_hostname(), "1.2.3.4")
})

test_that("GCE metadata detection fails not on GCE", {
withr::local_envvar(GCE_METADATA_URL = "some.fake.hostname")
expect_false(is_gce())
})

test_that("Can list service accounts", {
skip_if_not(is_gce(), "Not on GCE")
service_accounts <- gce_instance_service_accounts()
expect_s3_class(service_accounts, class = "data.frame")
})
54 changes: 0 additions & 54 deletions tests/testthat/test-gce-token.R

This file was deleted.

2 changes: 1 addition & 1 deletion vignettes/non-interactive-auth.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ Documentation around `GKEStartPodOperator()` within Cloud Composer can be found
server that's relevant on GKE.
- Specify the target service account, i.e. you can't just passively accept the
default, which is to use the `"default"` service account.
`gargle:::list_service_accounts()` can be helpful, e.g., if you want to
`gce_instance_service_accounts()` can be helpful, e.g., if you want to
know which service accounts your Docker container can see.

Here is example code that you might execute in your Docker container:
Expand Down

0 comments on commit 254d951

Please sign in to comment.