Skip to content

Commit

Permalink
Add support for GOOGLE_CLOUD_PROJECT env var, so users can set that o… (
Browse files Browse the repository at this point in the history
Kaggle#552)

* Add support for GOOGLE_CLOUD_PROJECT env var, so users can set that once and not need to update each bigquery.client() call.

* Remove leftover print lines.
  • Loading branch information
vimota authored May 23, 2019
1 parent f94b6c4 commit 3f391a5
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
13 changes: 10 additions & 3 deletions patches/sitecustomize.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,25 @@

bq_user_jwt = os.getenv("KAGGLE_USER_SECRETS_TOKEN")
if kaggle_proxy_token or bq_user_jwt:
from google.auth import credentials
from google.auth import credentials, environment_vars
from google.cloud import bigquery
from google.cloud.bigquery._http import Connection
# TODO: Update this to the correct kaggle.gcp path once we no longer inject modules
# from the worker.
from kaggle_gcp import PublicBigqueryClient, KaggleKernelCredentials

def monkeypatch_bq(bq_client, *args, **kwargs):
specified_project = kwargs.get('project')
specified_credentials = kwargs.get('credentials')
has_bigquery = get_integrations().has_bigquery()
if specified_project is None and specified_credentials is None and not has_bigquery:
# Prioritize passed in project id, but if it is missing look for env var.
arg_project = kwargs.get('project')
explicit_project_id = arg_project or os.environ.get(environment_vars.PROJECT)
# This is a hack to get around the bug in google-cloud library.
# Remove these two lines once this is resolved:
# https://github.com/googleapis/google-cloud-python/issues/8108
if explicit_project_id:
kwargs['project'] = explicit_project_id
if explicit_project_id is None and specified_credentials is None and not has_bigquery:
print("Using Kaggle's public dataset BigQuery integration.")
return PublicBigqueryClient(*args, **kwargs)

Expand Down
11 changes: 10 additions & 1 deletion tests/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_proxy_with_kwargs(self):
env.unset('KAGGLE_USER_SECRETS_TOKEN')
with env:
client = bigquery.Client(
default_query_job_config=bigquery.QueryJobConfig(maximum_bytes_billed=1e9))
default_query_job_config=bigquery.QueryJobConfig(maximum_bytes_billed=int(1e9)))
self._test_proxy(client, should_use_proxy=True)

def test_project_with_connected_account(self):
Expand Down Expand Up @@ -103,6 +103,15 @@ def test_project_with_connected_account_default_credentials(self):
client = bigquery.Client(project='ANOTHER_PROJECT')
self._test_proxy(client, should_use_proxy=False)

def test_project_with_env_var_project_default_credentials(self):
env = EnvironmentVarGuard()
env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar')
env.set('KAGGLE_KERNEL_INTEGRATIONS', 'BIGQUERY')
env.set('GOOGLE_CLOUD_PROJECT', 'ANOTHER_PROJECT')
with env:
client = bigquery.Client()
self._test_proxy(client, should_use_proxy=False)

def test_simultaneous_clients(self):
env = EnvironmentVarGuard()
env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar')
Expand Down

0 comments on commit 3f391a5

Please sign in to comment.