Skip to content

Commit 73b5d9e

Browse files
author
Pavel Pscheidl
authored
PUBDEV-6556 - S3 tests failing non-deterministically in hadoop pipeline (h2oai#3579)
* PUBDEV-6556 - S3 test fails due to a file not yet existing in S3 * S3 HDP import/export test uses millisecond precision in exported file name
1 parent ef6f830 commit 73b5d9e

File tree

2 files changed

+27
-7
lines changed

2 files changed

+27
-7
lines changed

h2o-hadoop-2/tests/python/pyunit_s3_import_export.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,26 @@
1212
def s3_import_export():
1313
local_frame = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
1414
for scheme in ["s3n", "s3a"]:
15-
timestamp = datetime.today().utcnow().strftime("%Y%m%d-%H%M%S")
15+
timestamp = datetime.today().utcnow().strftime("%Y%m%d-%H%M%S.%f")
1616
unique_suffix = str(uuid.uuid4())
1717
s3_path = scheme + "://test.0xdata.com/h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
1818
timestamp + "." + unique_suffix + ".csv.zip"
1919
h2o.export_file(local_frame, s3_path)
20+
21+
s3 = boto3.resource('s3')
22+
client = boto3.client('s3')
23+
# S3 might have a delay in indexing the file (usually milliseconds or hundreds of milliseconds)
24+
# Wait for the file to be available, if not available in the biginning, try every 2 seconds, up to 10 times
25+
client.get_waiter('object_exists').wait(Bucket='test.0xdata.com',
26+
Key="h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
27+
timestamp + "." + unique_suffix + ".csv.zip",
28+
WaiterConfig={
29+
'Delay': 2,
30+
'MaxAttempts': 10
31+
})
2032
s3_frame = h2o.import_file(s3_path)
2133
assert_frame_equal(local_frame.as_data_frame(), s3_frame.as_data_frame())
2234

23-
#Delete the file afterwards
24-
s3 = boto3.resource('s3')
2535
s3.Object(bucket_name='test.0xdata.com', key="h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
2636
timestamp + "." + unique_suffix + ".csv.zip").delete()
2737

h2o-hadoop-3/tests/python/pyunit_s3_import_export.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,27 @@
1111

1212
def s3_import_export():
1313
local_frame = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
14-
for scheme in ["s3a"]: # s3n is deprecated since HDP3/CDH6
15-
timestamp = datetime.today().utcnow().strftime("%Y%m%d-%H%M%S")
14+
for scheme in ["s3a"]: # s3n is deprecated since HDP3/CDH6
15+
timestamp = datetime.today().utcnow().strftime("%Y%m%d-%H%M%S.%f")
1616
unique_suffix = str(uuid.uuid4())
1717
s3_path = scheme + "://test.0xdata.com/h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
1818
timestamp + "." + unique_suffix + ".csv.zip"
1919
h2o.export_file(local_frame, s3_path)
20+
21+
s3 = boto3.resource('s3')
22+
client = boto3.client('s3')
23+
# S3 might have a delay in indexing the file (usually milliseconds or hundreds of milliseconds)
24+
# Wait for the file to be available, if not available in the biginning, try every 2 seconds, up to 10 times
25+
client.get_waiter('object_exists').wait(Bucket='test.0xdata.com',
26+
Key="h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
27+
timestamp + "." + unique_suffix + ".csv.zip",
28+
WaiterConfig={
29+
'Delay': 2,
30+
'MaxAttempts': 10
31+
})
2032
s3_frame = h2o.import_file(s3_path)
2133
assert_frame_equal(local_frame.as_data_frame(), s3_frame.as_data_frame())
2234

23-
#Delete the file afterwards
24-
s3 = boto3.resource('s3')
2535
s3.Object(bucket_name='test.0xdata.com', key="h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
2636
timestamp + "." + unique_suffix + ".csv.zip").delete()
2737

0 commit comments

Comments
 (0)