From 10c8755e4feb4ed3ab7e93aa169c9989d7dad43a Mon Sep 17 00:00:00 2001 From: Travis Bowen Date: Mon, 24 Feb 2025 09:37:32 -0800 Subject: [PATCH 1/8] Test fix and ability to pass REG_TEST_TOKEN --- regtests/run.sh | 31 ++++++++++--------- .../src/test_spark_sql_s3_with_privileges.py | 17 +++++++--- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/regtests/run.sh b/regtests/run.sh index a73caf70a..c034c2bce 100755 --- a/regtests/run.sh +++ b/regtests/run.sh @@ -67,23 +67,26 @@ NUM_SUCCESSES=0 export AWS_ACCESS_KEY_ID='' export AWS_SECRET_ACCESS_KEY='' -if ! output=$(curl -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ - -d "grant_type=client_credentials" \ - -d "client_id=root" \ - -d "client_secret=secret" \ - -d "scope=PRINCIPAL_ROLE:ALL"); then - logred "Error: Failed to retrieve bearer token" - exit 1 -fi +# Allow bearer token to be provided if desired +if [ "REGTEST_ROOT_BEARER_TOKEN" != "" ]; then + if ! output=$(curl -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=secret" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then + logred "Error: Failed to retrieve bearer token" + exit 1 + fi -token=$(echo "$output" | awk -F\" '{print $4}') + token=$(echo "$output" | awk -F\" '{print $4}') -if [ "$token" == "unauthorized_client" ]; then - logred "Error: Failed to retrieve bearer token" - exit 1 -fi + if [ "$token" == "unauthorized_client" ]; then + logred "Error: Failed to retrieve bearer token" + exit 1 + fi -export REGTEST_ROOT_BEARER_TOKEN=$token + export REGTEST_ROOT_BEARER_TOKEN=$token +fi echo "Root bearer token: ${REGTEST_ROOT_BEARER_TOKEN}" diff --git a/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py b/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py index ce00ce58c..1af3c2307 100644 --- a/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py +++ b/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py @@ -490,7 +490,7 @@ def test_spark_credentials_can_delete_after_purge(root_client, snowflake_catalog Prefix=f'polaris_test/snowflake_catalog/db1/schema/{table_name}/data/') assert objects is not None assert 'Contents' in objects - assert len(objects['Contents']) >= 4 # idk, it varies - at least one file for each inser and one for the update + assert len(objects['Contents']) >= 4 # it varies - at least one file for each insert and one for the update print(f"Found {len(objects['Contents'])} data files in S3 before drop") objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', @@ -603,13 +603,18 @@ def test_spark_credentials_can_write_with_random_prefix(root_client, snowflake_c print(f"Found common prefixes in S3 {objects['CommonPrefixes']}") objs_to_delete = [] + for prefix in objects['CommonPrefixes']: - data_objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix=f'{prefix["Prefix"]}schema/{table_name}/') + # Don't utilize delimiter so all files (recursive) are returned + data_objects = s3.list_objects(Bucket=test_bucket, + Prefix=f'polaris_test/snowflake_catalog/{table_name}data/') assert data_objects is not None print(data_objects) assert 'Contents' in data_objects - objs_to_delete.extend([{'Key': obj['Key']} for obj in data_objects['Contents']]) + for obj in data_objects['Contents']: + filePathMap = {'Key': obj['Key']} + assert f'/schema/{table_name}/' in filePathMap['Key'] + objs_to_delete.append(filePathMap) objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', Prefix=f'polaris_test/snowflake_catalog/db1/schema/{table_name}/metadata/') @@ -702,7 +707,9 @@ def test_spark_object_store_layout_under_table_dir(root_client, snowflake_catalo print(f"Found common prefixes in S3 {objects['CommonPrefixes']}") objs_to_delete = [] for prefix in objects['CommonPrefixes']: - data_objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', + # Files may be under further subdirectories, so omit the + # delimiter so that all files (recursive) are returned. + data_objects = s3.list_objects(Bucket=test_bucket, Prefix=f'{prefix["Prefix"]}') assert data_objects is not None print(data_objects) From 0b9940ed1314afef31f1246aa6bd00a2f847881d Mon Sep 17 00:00:00 2001 From: Travis Bowen Date: Mon, 24 Feb 2025 13:11:50 -0800 Subject: [PATCH 2/8] Ability to configure bucket prefix --- regtests/docker-compose.yml | 1 + regtests/t_pyspark/src/conftest.py | 16 +++++-- .../src/test_spark_sql_s3_with_privileges.py | 48 +++++++++---------- 3 files changed, 38 insertions(+), 27 deletions(-) diff --git a/regtests/docker-compose.yml b/regtests/docker-compose.yml index 94a0f7502..bc7ecdde2 100644 --- a/regtests/docker-compose.yml +++ b/regtests/docker-compose.yml @@ -52,6 +52,7 @@ services: environment: AWS_TEST_ENABLED: $AWS_TEST_ENABLED AWS_STORAGE_BUCKET: $AWS_STORAGE_BUCKET + AWS_BUCKET_BASE_LOCATION_PREFIX: $AWS_BUCKET_BASE_LOCATION_PREFIX AWS_ROLE_ARN: $AWS_ROLE_ARN AWS_TEST_BASE: $AWS_TEST_BASE GCS_TEST_ENABLED: $GCS_TEST_ENABLED diff --git a/regtests/t_pyspark/src/conftest.py b/regtests/t_pyspark/src/conftest.py index db858acdb..69bb83efa 100644 --- a/regtests/t_pyspark/src/conftest.py +++ b/regtests/t_pyspark/src/conftest.py @@ -57,6 +57,16 @@ def test_bucket(): def aws_role_arn(): return os.getenv('AWS_ROLE_ARN') +@pytest.fixture +def aws_bucket_base_location_prefix(): + """ + :return: Base location prefix for tests, excluding leading and trailing '/' + Provides a default if null or empty + """ + default_val = 'polaris_test' + bucket_prefix = os.getenv('AWS_BUCKET_BASE_LOCATION_PREFIX',default_val) + return default_val if bucket_prefix == '' else bucket_prefix + @pytest.fixture def catalog_client(polaris_catalog_url): """ @@ -72,13 +82,13 @@ def catalog_client(polaris_catalog_url): @pytest.fixture -def snowflake_catalog(root_client, catalog_client, test_bucket, aws_role_arn): +def snowflake_catalog(root_client, catalog_client, test_bucket, aws_role_arn, aws_bucket_base_location_prefix): storage_conf = AwsStorageConfigInfo(storage_type="S3", - allowed_locations=[f"s3://{test_bucket}/polaris_test/"], + allowed_locations=[f"s3://{test_bucket}/{aws_bucket_base_location_prefix}/"], role_arn=aws_role_arn) catalog_name = 'snowflake' catalog = Catalog(name=catalog_name, type='INTERNAL', properties={ - "default-base-location": f"s3://{test_bucket}/polaris_test/snowflake_catalog", + "default-base-location": f"s3://{test_bucket}/{aws_bucket_base_location_prefix}/snowflake_catalog", "client.credentials-provider": "software.amazon.awssdk.auth.credentials.SystemPropertyCredentialsProvider" }, storage_config_info=storage_conf) diff --git a/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py b/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py index 1af3c2307..392db25f6 100644 --- a/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py +++ b/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py @@ -426,7 +426,7 @@ def test_spark_cannot_create_view_overlapping_table(root_client, snowflake_catal @pytest.mark.skipif(os.environ.get('AWS_TEST_ENABLED', 'False').lower() != 'true', reason='AWS_TEST_ENABLED is not set or is false') def test_spark_credentials_can_delete_after_purge(root_client, snowflake_catalog, polaris_catalog_url, snowman, - snowman_catalog_client, test_bucket): + snowman_catalog_client, test_bucket, aws_bucket_base_location_prefix): """ Using snowman, create namespaces and a table. Insert into the table in multiple operations and update existing records to generate multiple metadata.json files and manfiests. Drop the table with purge=true. Poll S3 and validate all of @@ -487,14 +487,14 @@ def test_spark_credentials_can_delete_after_purge(root_client, snowflake_catalog aws_session_token=response.config['s3.session-token']) objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix=f'polaris_test/snowflake_catalog/db1/schema/{table_name}/data/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/{table_name}/data/') assert objects is not None assert 'Contents' in objects assert len(objects['Contents']) >= 4 # it varies - at least one file for each insert and one for the update print(f"Found {len(objects['Contents'])} data files in S3 before drop") objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix=f'polaris_test/snowflake_catalog/db1/schema/{table_name}/metadata/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/{table_name}/metadata/') assert objects is not None assert 'Contents' in objects assert len(objects['Contents']) == 15 # 5 metadata.json files, 4 manifest lists, and 6 manifests @@ -514,14 +514,14 @@ def test_spark_credentials_can_delete_after_purge(root_client, snowflake_catalog while 'Contents' in objects and len(objects['Contents']) > 0 and attempts < 60: time.sleep(1) # seconds, not milliseconds ;) objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix=f'polaris_test/snowflake_catalog/db1/schema/{table_name}/data/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/{table_name}/data/') attempts = attempts + 1 if 'Contents' in objects and len(objects['Contents']) > 0: pytest.fail(f"Expected all data to be deleted, but found metadata files {objects['Contents']}") objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix=f'polaris_test/snowflake_catalog/db1/schema/{table_name}/data/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/{table_name}/data/') if 'Contents' in objects and len(objects['Contents']) > 0: pytest.fail(f"Expected all data to be deleted, but found data files {objects['Contents']}") @@ -529,7 +529,7 @@ def test_spark_credentials_can_delete_after_purge(root_client, snowflake_catalog @pytest.mark.skipif(os.environ.get('AWS_TEST_ENABLED', 'False').lower() != 'true', reason='AWS_TEST_ENABLED is not set or is false') def test_spark_credentials_can_write_with_random_prefix(root_client, snowflake_catalog, polaris_catalog_url, snowman, - snowman_catalog_client, test_bucket): + snowman_catalog_client, test_bucket, aws_bucket_base_location_prefix): """ Update the catalog configuration to support unstructured table locations. Using snowman, create namespaces and a table configured to use object-store layout in a folder under the catalog root, outside of the default table @@ -559,7 +559,7 @@ def test_spark_credentials_can_write_with_random_prefix(root_client, snowflake_c spark.sql('SHOW NAMESPACES') spark.sql('USE db1.schema') spark.sql( - f"CREATE TABLE {table_name} (col1 int, col2 string) TBLPROPERTIES ('write.object-storage.enabled'='true','write.data.path'='s3://{test_bucket}/polaris_test/snowflake_catalog/{table_name}data')") + f"CREATE TABLE {table_name} (col1 int, col2 string) TBLPROPERTIES ('write.object-storage.enabled'='true','write.data.path'='s3://{test_bucket}/{aws_bucket_base_location_prefix}/snowflake_catalog/{table_name}data')") spark.sql('SHOW TABLES') # several inserts and an update, which should cause earlier files to show up as deleted in the later manifests @@ -597,7 +597,7 @@ def test_spark_credentials_can_write_with_random_prefix(root_client, snowflake_c aws_session_token=response.config['s3.session-token']) objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix=f'polaris_test/snowflake_catalog/{table_name}data/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/{table_name}data/') assert objects is not None assert len(objects['CommonPrefixes']) >= 3 @@ -607,7 +607,7 @@ def test_spark_credentials_can_write_with_random_prefix(root_client, snowflake_c for prefix in objects['CommonPrefixes']: # Don't utilize delimiter so all files (recursive) are returned data_objects = s3.list_objects(Bucket=test_bucket, - Prefix=f'polaris_test/snowflake_catalog/{table_name}data/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/{table_name}data/') assert data_objects is not None print(data_objects) assert 'Contents' in data_objects @@ -617,7 +617,7 @@ def test_spark_credentials_can_write_with_random_prefix(root_client, snowflake_c objs_to_delete.append(filePathMap) objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix=f'polaris_test/snowflake_catalog/db1/schema/{table_name}/metadata/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/{table_name}/metadata/') assert objects is not None assert 'Contents' in objects assert len(objects['Contents']) == 15 # 5 metadata.json files, 4 manifest lists, and 6 manifests @@ -636,7 +636,7 @@ def test_spark_credentials_can_write_with_random_prefix(root_client, snowflake_c @pytest.mark.skipif(os.environ.get('AWS_TEST_ENABLED', 'False').lower() != 'true', reason='AWS_TEST_ENABLED is not set or is false') def test_spark_object_store_layout_under_table_dir(root_client, snowflake_catalog, polaris_catalog_url, snowman, - snowman_catalog_client, test_bucket): + snowman_catalog_client, test_bucket, aws_bucket_base_location_prefix): """ Using snowman, create namespaces and a table configured to use object-store layout, using a folder under the default table directory structure. Insert into the table in multiple operations and update existing records @@ -660,7 +660,7 @@ def test_spark_object_store_layout_under_table_dir(root_client, snowflake_catalo spark.sql('CREATE NAMESPACE db1.schema') spark.sql('SHOW NAMESPACES') spark.sql('USE db1.schema') - table_base_dir = f'polaris_test/snowflake_catalog/db1/schema/{table_name}/obj_layout/' + table_base_dir = f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/{table_name}/obj_layout/' spark.sql( f"CREATE TABLE {table_name} (col1 int, col2 string) TBLPROPERTIES ('write.object-storage.enabled'='true','write.data.path'='s3://{test_bucket}/{table_base_dir}')") spark.sql('SHOW TABLES') @@ -717,7 +717,7 @@ def test_spark_object_store_layout_under_table_dir(root_client, snowflake_catalo objs_to_delete.extend([{'Key': obj['Key']} for obj in data_objects['Contents']]) objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix=f'polaris_test/snowflake_catalog/db1/schema/{table_name}/metadata/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/{table_name}/metadata/') assert objects is not None assert 'Contents' in objects assert len(objects['Contents']) == 15 # 5 metadata.json files, 4 manifest lists, and 6 manifests @@ -794,7 +794,7 @@ def test_spark_credentials_can_create_views(snowflake_catalog, polaris_catalog_u @pytest.mark.skipif(os.environ.get('AWS_TEST_ENABLED', 'False').lower() != 'true', reason='AWS_TEST_ENABLED is not set or is false') def test_spark_credentials_s3_direct_with_write(root_client, snowflake_catalog, polaris_catalog_url, - snowman, snowman_catalog_client, test_bucket): + snowman, snowman_catalog_client, test_bucket, aws_bucket_base_location_prefix): """ Create two tables using Spark. Then call the loadTable api directly with snowman token to fetch the vended credentials for the first table. @@ -833,13 +833,13 @@ def test_spark_credentials_s3_direct_with_write(root_client, snowflake_catalog, aws_session_token=response.config['s3.session-token']) objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix='polaris_test/snowflake_catalog/db1/schema/iceberg_table/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table/') assert objects is not None assert 'CommonPrefixes' in objects assert len(objects['CommonPrefixes']) > 0 objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix='polaris_test/snowflake_catalog/db1/schema/iceberg_table/metadata/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table/metadata/') assert objects is not None assert 'Contents' in objects assert len(objects['Contents']) > 0 @@ -860,7 +860,7 @@ def test_spark_credentials_s3_direct_with_write(root_client, snowflake_catalog, # list files in the other table's directory. The access policy should restrict this try: objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix='polaris_test/snowflake_catalog/db1/schema/iceberg_table_2/metadata/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table_2/metadata/') pytest.fail('Expected exception listing file outside of table directory') except botocore.exceptions.ClientError as error: print(error) @@ -886,7 +886,7 @@ def test_spark_credentials_s3_direct_with_write(root_client, snowflake_catalog, @pytest.mark.skipif(os.environ.get('AWS_TEST_ENABLED', 'false').lower() != 'true', reason='AWS_TEST_ENABLED is not set or is false') def test_spark_credentials_s3_direct_without_write(root_client, snowflake_catalog, polaris_catalog_url, - snowman, reader_catalog_client, test_bucket): + snowman, reader_catalog_client, test_bucket, aws_bucket_base_location_prefix): """ Create two tables using Spark. Then call the loadTable api directly with test_reader token to fetch the vended credentials for the first table. @@ -927,7 +927,7 @@ def test_spark_credentials_s3_direct_without_write(root_client, snowflake_catalo aws_session_token=response.config['s3.session-token']) objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix='polaris_test/snowflake_catalog/db1/schema/iceberg_table/metadata/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table/metadata/') assert objects is not None assert 'Contents' in objects assert len(objects['Contents']) > 0 @@ -950,7 +950,7 @@ def test_spark_credentials_s3_direct_without_write(root_client, snowflake_catalo # list files in the other table's directory. The access policy should restrict this try: objects = s3.list_objects(Bucket=test_bucket, Delimiter='/', - Prefix='polaris_test/snowflake_catalog/db1/schema/iceberg_table_2/metadata/') + Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table_2/metadata/') pytest.fail('Expected exception listing file outside of table directory') except botocore.exceptions.ClientError as error: print(error) @@ -1039,7 +1039,7 @@ def create_principal(polaris_url, polaris_catalog_url, api, principal_name): @pytest.mark.skipif(os.environ.get('AWS_TEST_ENABLED', 'False').lower() != 'true', reason='AWS_TEST_ENABLED is not set or is false') def test_spark_credentials_s3_scoped_to_metadata_data_locations(root_client, snowflake_catalog, polaris_catalog_url, - snowman, snowman_catalog_client, test_bucket): + snowman, snowman_catalog_client, test_bucket, aws_bucket_base_location_prefix): """ Create a table using Spark. Then call the loadTable api directly with snowman token to fetch the vended credentials for the table. @@ -1060,10 +1060,10 @@ def test_spark_credentials_s3_scoped_to_metadata_data_locations(root_client, sno spark.sql('USE db1.schema') spark.sql('CREATE TABLE iceberg_table_scope_loc(col1 int, col2 string)') spark.sql( - f'''CREATE TABLE iceberg_table_scope_loc_slashes (col1 int, col2 string) LOCATION \'s3://{test_bucket}/polaris_test/snowflake_catalog/db1/schema/iceberg_table_scope_loc_slashes/path_with_slashes///////\'''') + f'''CREATE TABLE iceberg_table_scope_loc_slashes (col1 int, col2 string) LOCATION \'s3://{test_bucket}/{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table_scope_loc_slashes/path_with_slashes///////\'''') - prefix1 = 'polaris_test/snowflake_catalog/db1/schema/iceberg_table_scope_loc' - prefix2 = 'polaris_test/snowflake_catalog/db1/schema/iceberg_table_scope_loc_slashes/path_with_slashes' + prefix1 = f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table_scope_loc' + prefix2 = f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table_scope_loc_slashes/path_with_slashes' response1 = snowman_catalog_client.load_table(snowflake_catalog.name, unquote('db1%1Fschema'), "iceberg_table_scope_loc", "vended-credentials") From 291801f282b3bf4af68eadd7669a7b69c315bbfb Mon Sep 17 00:00:00 2001 From: Travis Bowen Date: Mon, 24 Feb 2025 13:49:14 -0800 Subject: [PATCH 3/8] Add reg_test env var to docker compose --- regtests/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/regtests/docker-compose.yml b/regtests/docker-compose.yml index bc7ecdde2..70bd13f61 100644 --- a/regtests/docker-compose.yml +++ b/regtests/docker-compose.yml @@ -68,6 +68,7 @@ services: AWS_CROSS_REGION_BUCKET: $AWS_CROSS_REGION_BUCKET AWS_ROLE_FOR_CROSS_REGION_BUCKET: $AWS_ROLE_FOR_CROSS_REGION_BUCKET AWS_REGION_FOR_CROSS_REGION_TEST: $AWS_REGION_FOR_CROSS_REGION_TEST + REGTEST_ROOT_BEARER_TOKEN: $REGTEST_ROOT_BEARER_TOKEN volumes: - ./output:/tmp/polaris-regtests/ - ./credentials:/tmp/credentials/ From 2f41a84caf397de2b4e7882de2ab4d1eb201bb39 Mon Sep 17 00:00:00 2001 From: Travis Bowen Date: Wed, 26 Feb 2025 15:22:21 -0800 Subject: [PATCH 4/8] PR Comments --- regtests/run.sh | 2 +- regtests/t_pyspark/src/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/regtests/run.sh b/regtests/run.sh index c034c2bce..6525bc14b 100755 --- a/regtests/run.sh +++ b/regtests/run.sh @@ -68,7 +68,7 @@ export AWS_ACCESS_KEY_ID='' export AWS_SECRET_ACCESS_KEY='' # Allow bearer token to be provided if desired -if [ "REGTEST_ROOT_BEARER_TOKEN" != "" ]; then +if [[-n "$REGTEST_ROOT_BEARER_TOKEN"]]; then if ! output=$(curl -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ -d "grant_type=client_credentials" \ -d "client_id=root" \ diff --git a/regtests/t_pyspark/src/conftest.py b/regtests/t_pyspark/src/conftest.py index 69bb83efa..645d21600 100644 --- a/regtests/t_pyspark/src/conftest.py +++ b/regtests/t_pyspark/src/conftest.py @@ -64,7 +64,7 @@ def aws_bucket_base_location_prefix(): Provides a default if null or empty """ default_val = 'polaris_test' - bucket_prefix = os.getenv('AWS_BUCKET_BASE_LOCATION_PREFIX',default_val) + bucket_prefix = os.getenv('AWS_BUCKET_BASE_LOCATION_PREFIX', default_val) return default_val if bucket_prefix == '' else bucket_prefix @pytest.fixture From 05011bb21842f3ef8ca4daecbca56973a5339d48 Mon Sep 17 00:00:00 2001 From: Travis Bowen Date: Wed, 26 Feb 2025 15:34:33 -0800 Subject: [PATCH 5/8] Add path prefix --- regtests/docker-compose.yml | 1 + regtests/t_pyspark/src/conftest.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/regtests/docker-compose.yml b/regtests/docker-compose.yml index 70bd13f61..aed9e054f 100644 --- a/regtests/docker-compose.yml +++ b/regtests/docker-compose.yml @@ -69,6 +69,7 @@ services: AWS_ROLE_FOR_CROSS_REGION_BUCKET: $AWS_ROLE_FOR_CROSS_REGION_BUCKET AWS_REGION_FOR_CROSS_REGION_TEST: $AWS_REGION_FOR_CROSS_REGION_TEST REGTEST_ROOT_BEARER_TOKEN: $REGTEST_ROOT_BEARER_TOKEN + POLARIS_PATH_PREFIX: $POLARIS_PATH_PREFIX volumes: - ./output:/tmp/polaris-regtests/ - ./credentials:/tmp/credentials/ diff --git a/regtests/t_pyspark/src/conftest.py b/regtests/t_pyspark/src/conftest.py index 645d21600..ca70c456d 100644 --- a/regtests/t_pyspark/src/conftest.py +++ b/regtests/t_pyspark/src/conftest.py @@ -39,15 +39,22 @@ def polaris_host(): def polaris_port(): return int(os.getenv('POLARIS_PORT', '8181')) +@pytest.fixture +def polaris_path_prefix(): + """ + Used to provide a path prefix between the port number and the standard polaris endpoint paths + :return: + """ + return os.getenv('POLARIS_PATH_PREFIX', '') @pytest.fixture -def polaris_url(polaris_host, polaris_port): - return f"http://{polaris_host}:{polaris_port}/api/management/v1" +def polaris_url(polaris_host, polaris_port, polaris_path_prefix): + return f"http://{polaris_host}:{polaris_port}{polaris_path_prefix}/api/management/v1" @pytest.fixture -def polaris_catalog_url(polaris_host, polaris_port): - return f"http://{polaris_host}:{polaris_port}/api/catalog" +def polaris_catalog_url(polaris_host, polaris_port, polaris_path_prefix): + return f"http://{polaris_host}:{polaris_port}{polaris_path_prefix}/api/catalog" @pytest.fixture def test_bucket(): From b082e1646f789aa605f741170dcfb7f626fd89ce Mon Sep 17 00:00:00 2001 From: Travis Bowen Date: Wed, 26 Feb 2025 17:21:26 -0800 Subject: [PATCH 6/8] Nit Updates --- regtests/docker-compose.yml | 3 --- regtests/run.sh | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/regtests/docker-compose.yml b/regtests/docker-compose.yml index aed9e054f..94a0f7502 100644 --- a/regtests/docker-compose.yml +++ b/regtests/docker-compose.yml @@ -52,7 +52,6 @@ services: environment: AWS_TEST_ENABLED: $AWS_TEST_ENABLED AWS_STORAGE_BUCKET: $AWS_STORAGE_BUCKET - AWS_BUCKET_BASE_LOCATION_PREFIX: $AWS_BUCKET_BASE_LOCATION_PREFIX AWS_ROLE_ARN: $AWS_ROLE_ARN AWS_TEST_BASE: $AWS_TEST_BASE GCS_TEST_ENABLED: $GCS_TEST_ENABLED @@ -68,8 +67,6 @@ services: AWS_CROSS_REGION_BUCKET: $AWS_CROSS_REGION_BUCKET AWS_ROLE_FOR_CROSS_REGION_BUCKET: $AWS_ROLE_FOR_CROSS_REGION_BUCKET AWS_REGION_FOR_CROSS_REGION_TEST: $AWS_REGION_FOR_CROSS_REGION_TEST - REGTEST_ROOT_BEARER_TOKEN: $REGTEST_ROOT_BEARER_TOKEN - POLARIS_PATH_PREFIX: $POLARIS_PATH_PREFIX volumes: - ./output:/tmp/polaris-regtests/ - ./credentials:/tmp/credentials/ diff --git a/regtests/run.sh b/regtests/run.sh index 6525bc14b..769531ae5 100755 --- a/regtests/run.sh +++ b/regtests/run.sh @@ -68,7 +68,7 @@ export AWS_ACCESS_KEY_ID='' export AWS_SECRET_ACCESS_KEY='' # Allow bearer token to be provided if desired -if [[-n "$REGTEST_ROOT_BEARER_TOKEN"]]; then +if [[ -z "$REGTEST_ROOT_BEARER_TOKEN" ]]; then if ! output=$(curl -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ -d "grant_type=client_credentials" \ -d "client_id=root" \ From fc666163c7920ec2a5d5574ef1672b10f81281a7 Mon Sep 17 00:00:00 2001 From: Travis Bowen Date: Wed, 26 Feb 2025 18:42:52 -0800 Subject: [PATCH 7/8] More updates --- regtests/t_pyspark/src/conftest.py | 19 ++++++++++---- .../src/test_spark_sql_s3_with_privileges.py | 26 +++++++------------ 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/regtests/t_pyspark/src/conftest.py b/regtests/t_pyspark/src/conftest.py index ca70c456d..8178d8a15 100644 --- a/regtests/t_pyspark/src/conftest.py +++ b/regtests/t_pyspark/src/conftest.py @@ -19,6 +19,7 @@ import codecs import os +import uuid from typing import List import pytest @@ -48,13 +49,21 @@ def polaris_path_prefix(): return os.getenv('POLARIS_PATH_PREFIX', '') @pytest.fixture -def polaris_url(polaris_host, polaris_port, polaris_path_prefix): - return f"http://{polaris_host}:{polaris_port}{polaris_path_prefix}/api/management/v1" +def polaris_url_scheme(): + """ + Used to provide a path prefix between the port number and the standard polaris endpoint paths + :return: + """ + return os.getenv('POLARIS_URL_SCHEME', 'http://') + +@pytest.fixture +def polaris_url(polaris_url_scheme, polaris_host, polaris_port, polaris_path_prefix): + return f"{polaris_url_scheme}{polaris_host}:{polaris_port}{polaris_path_prefix}/api/management/v1" @pytest.fixture -def polaris_catalog_url(polaris_host, polaris_port, polaris_path_prefix): - return f"http://{polaris_host}:{polaris_port}{polaris_path_prefix}/api/catalog" +def polaris_catalog_url(polaris_url_scheme, polaris_host, polaris_port, polaris_path_prefix): + return f"{polaris_url_scheme}{polaris_host}:{polaris_port}{polaris_path_prefix}/api/catalog" @pytest.fixture def test_bucket(): @@ -93,7 +102,7 @@ def snowflake_catalog(root_client, catalog_client, test_bucket, aws_role_arn, aw storage_conf = AwsStorageConfigInfo(storage_type="S3", allowed_locations=[f"s3://{test_bucket}/{aws_bucket_base_location_prefix}/"], role_arn=aws_role_arn) - catalog_name = 'snowflake' + catalog_name = f'snowflake_{str(uuid.uuid4())[-10:]}' catalog = Catalog(name=catalog_name, type='INTERNAL', properties={ "default-base-location": f"s3://{test_bucket}/{aws_bucket_base_location_prefix}/snowflake_catalog", "client.credentials-provider": "software.amazon.awssdk.auth.credentials.SystemPropertyCredentialsProvider" diff --git a/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py b/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py index 392db25f6..c121c7009 100644 --- a/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py +++ b/regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py @@ -50,8 +50,8 @@ def snowman(polaris_url, polaris_catalog_url, root_client, snowflake_catalog): :param snowflake_catalog: :return: """ - snowman_name = "snowman" - table_writer_rolename = "table_writer" + snowman_name = f"snowman_{str(uuid.uuid4())[-10:]}" + table_writer_rolename = f"table_writer_{str(uuid.uuid4())[-10:]}" snowflake_writer_rolename = "snowflake_writer" try: snowman = create_principal(polaris_url, polaris_catalog_url, root_client, snowman_name) @@ -1012,28 +1012,22 @@ def test_spark_credentials_s3_direct_without_read( def create_principal(polaris_url, polaris_catalog_url, api, principal_name): principal = Principal(name=principal_name, type="SERVICE") - try: - principal_result = api.create_principal(CreatePrincipalRequest(principal=principal)) + principal_result = api.create_principal(CreatePrincipalRequest(principal=principal)) - token_client = CatalogApiClient(Configuration(username=principal_result.principal.client_id, + token_client = CatalogApiClient(Configuration(username=principal_result.principal.client_id, password=principal_result.credentials.client_secret, host=polaris_catalog_url)) - oauth_api = IcebergOAuth2API(token_client) - token = oauth_api.get_token(scope='PRINCIPAL_ROLE:ALL', client_id=principal_result.principal.client_id, + oauth_api = IcebergOAuth2API(token_client) + token = oauth_api.get_token(scope='PRINCIPAL_ROLE:ALL', client_id=principal_result.principal.client_id, client_secret=principal_result.credentials.client_secret, grant_type='client_credentials', _headers={'realm': 'POLARIS'}) - rotate_client = ManagementApiClient(Configuration(access_token=token.access_token, + rotate_client = ManagementApiClient(Configuration(access_token=token.access_token, host=polaris_url)) - rotate_api = PolarisDefaultApi(rotate_client) + rotate_api = PolarisDefaultApi(rotate_client) - rotate_credentials = rotate_api.rotate_credentials(principal_name=principal_name) - return rotate_credentials - except ApiException as e: - if e.status == 409: - return rotate_api.rotate_credentials(principal_name=principal_name) - else: - raise e + rotate_credentials = rotate_api.rotate_credentials(principal_name=principal_name) + return rotate_credentials @pytest.mark.skipif(os.environ.get('AWS_TEST_ENABLED', 'False').lower() != 'true', From 319eca0132c9f7b363fe2f556fe7bd8b68865abb Mon Sep 17 00:00:00 2001 From: Travis Bowen Date: Thu, 27 Feb 2025 16:12:47 -0800 Subject: [PATCH 8/8] PR Feedback --- regtests/t_pyspark/src/conftest.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/regtests/t_pyspark/src/conftest.py b/regtests/t_pyspark/src/conftest.py index 8178d8a15..938040438 100644 --- a/regtests/t_pyspark/src/conftest.py +++ b/regtests/t_pyspark/src/conftest.py @@ -43,7 +43,8 @@ def polaris_port(): @pytest.fixture def polaris_path_prefix(): """ - Used to provide a path prefix between the port number and the standard polaris endpoint paths + Used to provide a path prefix between the port number and the standard polaris endpoint paths. + No leading or trailing / :return: """ return os.getenv('POLARIS_PATH_PREFIX', '') @@ -51,19 +52,21 @@ def polaris_path_prefix(): @pytest.fixture def polaris_url_scheme(): """ - Used to provide a path prefix between the port number and the standard polaris endpoint paths + The URL Schema - either http or https - no : or trailing / :return: """ - return os.getenv('POLARIS_URL_SCHEME', 'http://') + return os.getenv('POLARIS_URL_SCHEME', 'http') @pytest.fixture def polaris_url(polaris_url_scheme, polaris_host, polaris_port, polaris_path_prefix): - return f"{polaris_url_scheme}{polaris_host}:{polaris_port}{polaris_path_prefix}/api/management/v1" + polaris_path_prefix = polaris_path_prefix if len(polaris_path_prefix) == 0 else '/' + polaris_path_prefix + return f"{polaris_url_scheme}://{polaris_host}:{polaris_port}{polaris_path_prefix}/api/management/v1" @pytest.fixture def polaris_catalog_url(polaris_url_scheme, polaris_host, polaris_port, polaris_path_prefix): - return f"{polaris_url_scheme}{polaris_host}:{polaris_port}{polaris_path_prefix}/api/catalog" + polaris_path_prefix = polaris_path_prefix if len(polaris_path_prefix) == 0 else '/' + polaris_path_prefix + return f"{polaris_url_scheme}://{polaris_host}:{polaris_port}{polaris_path_prefix}/api/catalog" @pytest.fixture def test_bucket():