From 3518cb3cf05b72de944d30709a72c7daf7e09f49 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Fri, 6 Sep 2024 12:16:18 -0700 Subject: [PATCH] Using dedent in the Sagemaker system test makes the code a little easier to read. (#42076) --- .../providers/amazon/aws/example_sagemaker.py | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/system/providers/amazon/aws/example_sagemaker.py b/tests/system/providers/amazon/aws/example_sagemaker.py index 945bb1eca868d..15b756494eac3 100644 --- a/tests/system/providers/amazon/aws/example_sagemaker.py +++ b/tests/system/providers/amazon/aws/example_sagemaker.py @@ -21,6 +21,7 @@ import subprocess from datetime import datetime from tempfile import NamedTemporaryFile +from textwrap import dedent import boto3 @@ -75,34 +76,33 @@ SAMPLE_SIZE = 600 # This script will be the entrypoint for the docker image which will handle preprocessing the raw data -# NOTE: The following string must remain dedented as it is being written to a file. -PREPROCESS_SCRIPT_TEMPLATE = """ -import numpy as np -import pandas as pd +PREPROCESS_SCRIPT_TEMPLATE = dedent(""" + import numpy as np + import pandas as pd -def main(): - # Load the dataset from {input_path}/input.csv, split it into train/test - # subsets, and write them to {output_path}/ for the Processing Operator. + def main(): + # Load the dataset from {input_path}/input.csv, split it into train/test + # subsets, and write them to {output_path}/ for the Processing Operator. - data = pd.read_csv('{input_path}/input.csv') + data = pd.read_csv('{input_path}/input.csv') - # Split into test and train data - data_train, data_test = np.split( - data.sample(frac=1, random_state=np.random.RandomState()), [int(0.7 * len(data))] - ) + # Split into test and train data + data_train, data_test = np.split( + data.sample(frac=1, random_state=np.random.RandomState()), [int(0.7 * len(data))] + ) - # Remove the "answers" from the test set - data_test.drop(['class'], axis=1, inplace=True) + # Remove the "answers" from the test set + data_test.drop(['class'], axis=1, inplace=True) - # Write the splits to disk - data_train.to_csv('{output_path}/train.csv', index=False, header=False) - data_test.to_csv('{output_path}/test.csv', index=False, header=False) + # Write the splits to disk + data_train.to_csv('{output_path}/train.csv', index=False, header=False) + data_test.to_csv('{output_path}/test.csv', index=False, header=False) - print('Preprocessing Done.') + print('Preprocessing Done.') -if __name__ == "__main__": - main() -""" + if __name__ == "__main__": + main() +""") def _create_ecr_repository(repo_name):