Skip to content

Commit

Permalink
Using dedent in the Sagemaker system test makes the code a little eas…
Browse files Browse the repository at this point in the history
…ier to read. (apache#42076)
  • Loading branch information
ferruzzi authored Sep 6, 2024
1 parent 4bcd184 commit 3518cb3
Showing 1 changed file with 21 additions and 21 deletions.
42 changes: 21 additions & 21 deletions tests/system/providers/amazon/aws/example_sagemaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import subprocess
from datetime import datetime
from tempfile import NamedTemporaryFile
from textwrap import dedent

import boto3

Expand Down Expand Up @@ -75,34 +76,33 @@
SAMPLE_SIZE = 600

# This script will be the entrypoint for the docker image which will handle preprocessing the raw data
# NOTE: The following string must remain dedented as it is being written to a file.
PREPROCESS_SCRIPT_TEMPLATE = """
import numpy as np
import pandas as pd
PREPROCESS_SCRIPT_TEMPLATE = dedent("""
import numpy as np
import pandas as pd
def main():
# Load the dataset from {input_path}/input.csv, split it into train/test
# subsets, and write them to {output_path}/ for the Processing Operator.
def main():
# Load the dataset from {input_path}/input.csv, split it into train/test
# subsets, and write them to {output_path}/ for the Processing Operator.
data = pd.read_csv('{input_path}/input.csv')
data = pd.read_csv('{input_path}/input.csv')
# Split into test and train data
data_train, data_test = np.split(
data.sample(frac=1, random_state=np.random.RandomState()), [int(0.7 * len(data))]
)
# Split into test and train data
data_train, data_test = np.split(
data.sample(frac=1, random_state=np.random.RandomState()), [int(0.7 * len(data))]
)
# Remove the "answers" from the test set
data_test.drop(['class'], axis=1, inplace=True)
# Remove the "answers" from the test set
data_test.drop(['class'], axis=1, inplace=True)
# Write the splits to disk
data_train.to_csv('{output_path}/train.csv', index=False, header=False)
data_test.to_csv('{output_path}/test.csv', index=False, header=False)
# Write the splits to disk
data_train.to_csv('{output_path}/train.csv', index=False, header=False)
data_test.to_csv('{output_path}/test.csv', index=False, header=False)
print('Preprocessing Done.')
print('Preprocessing Done.')
if __name__ == "__main__":
main()
"""
if __name__ == "__main__":
main()
""")


def _create_ecr_repository(repo_name):
Expand Down

0 comments on commit 3518cb3

Please sign in to comment.