Skip to content

Commit

Permalink
Add CTS to validate device reset ext (#43)
Browse files Browse the repository at this point in the history
* Add CTS to validate device reset ext

Related-to: VLCLJ-2115

Signed-off-by: Vishnu Khanth <[email protected]>

* Add CTS to validate device reset ext

Related-to: VLCLJ-2115

Signed-off-by: Vishnu Khanth <[email protected]>

* Add CTS to validate device reset ext

Related-to: VLCLJ-2115

Signed-off-by: Vishnu Khanth <[email protected]>

* Add CTS to validate device reset ext

Related-to: VLCLJ-2115

Signed-off-by: Vishnu Khanth <[email protected]>

---------

Signed-off-by: Vishnu Khanth <[email protected]>
  • Loading branch information
vishnu-khanth authored Jul 4, 2024
1 parent 6fe343d commit c99886b
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 4 deletions.
150 changes: 146 additions & 4 deletions conformance_tests/sysman/test_sysman_device/src/test_sysman_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,17 +282,16 @@ TEST_F(
}
}

TEST_F(
SYSMAN_DEVICE_TEST,
GivenValidDeviceWhenResettingSysmanDeviceThenSysmanDeviceResetIsSucceded) {
TEST_F(SYSMAN_DEVICE_TEST,
GivenValidDeviceWhenResettingSysmanDeviceThenSysmanDeviceResetSucceeds) {
for (auto device : devices) {
lzt::sysman_device_reset(device);
}
}

TEST_F(
SYSMAN_DEVICE_TEST,
GivenValidDeviceWhenResettingSysmanDeviceNnumberOfTimesThenSysmanDeviceResetAlwaysSucceded) {
GivenValidDeviceWhenResettingSysmanDeviceNnumberOfTimesThenSysmanDeviceResetAlwaysSucceeds) {
int number_iterations = 2;
for (int i = 0; i < number_iterations; i++) {
for (auto device : devices) {
Expand Down Expand Up @@ -644,4 +643,147 @@ TEST_F(
}
}

TEST_F(SYSMAN_DEVICE_TEST,
GivenValidDeviceWhenWarmResettingDeviceThenDeviceResetExtSucceeds) {
for (auto device : devices) {
lzt::sysman_device_reset_ext(device, false, ZES_RESET_TYPE_WARM);
}
}

TEST_F(SYSMAN_DEVICE_TEST,
GivenValidDeviceWhenColdResettingDeviceThenDeviceResetExtSucceeds) {
for (auto device : devices) {
lzt::sysman_device_reset_ext(device, false, ZES_RESET_TYPE_COLD);
}
}

TEST_F(SYSMAN_DEVICE_TEST,
GivenValidDeviceWhenFlrResettingDeviceThenDeviceResetExtSucceeds) {
for (auto device : devices) {
lzt::sysman_device_reset_ext(device, false, ZES_RESET_TYPE_FLR);
}
}

TEST_F(
SYSMAN_DEVICE_TEST,
GivenWorkingDeviceHandleWhenWarmResettingSysmanDeviceThenWorkloadExecutionAlwaysSucceedsAfterResetExt) {
uint32_t n = 512;
std::vector<float> a(n * n, 1);
std::vector<float> b(n * n, 1);
std::vector<float> c;
std::vector<float> c_cpu;
c_cpu = perform_matrix_multiplication_on_cpu(a, b, n);

for (auto device : devices) {
// Perform workload execution before reset
#ifdef USE_ZESINIT
auto sysman_device_properties = lzt::get_sysman_device_properties(device);
ze_device_handle_t core_device =
get_core_device_by_uuid(sysman_device_properties.core.uuid.id);
EXPECT_NE(core_device, nullptr);
c = submit_workload_for_gpu(a, b, n, core_device);
#else // USE_ZESINIT
c = submit_workload_for_gpu(a, b, n, device);
#endif // USE_ZESINIT

compare_results(c, c_cpu);
c.clear();
LOG_INFO << "Initiating device reset...\n";
// perform device reset
lzt::sysman_device_reset_ext(device, false, ZES_RESET_TYPE_WARM);
LOG_INFO << "End of device reset...\n";

// Perform workload execution after reset
#ifdef USE_ZESINIT
c = submit_workload_for_gpu(a, b, n, core_device);
#else // USE_ZESINIT
c = submit_workload_for_gpu(a, b, n, device);
#endif // USE_ZESINIT

compare_results(c, c_cpu);
c.clear();
}
}

TEST_F(
SYSMAN_DEVICE_TEST,
GivenWorkingDeviceHandleWhenColdResettingSysmanDeviceThenWorkloadExecutionAlwaysSucceedsAfterResetExt) {
uint32_t n = 512;
std::vector<float> a(n * n, 1);
std::vector<float> b(n * n, 1);
std::vector<float> c;
std::vector<float> c_cpu;
c_cpu = perform_matrix_multiplication_on_cpu(a, b, n);

for (auto device : devices) {
// Perform workload execution before reset
#ifdef USE_ZESINIT
auto sysman_device_properties = lzt::get_sysman_device_properties(device);
ze_device_handle_t core_device =
get_core_device_by_uuid(sysman_device_properties.core.uuid.id);
EXPECT_NE(core_device, nullptr);
c = submit_workload_for_gpu(a, b, n, core_device);
#else // USE_ZESINIT
c = submit_workload_for_gpu(a, b, n, device);
#endif // USE_ZESINIT

compare_results(c, c_cpu);
c.clear();
LOG_INFO << "Initiating device reset...\n";
// perform device reset
lzt::sysman_device_reset_ext(device, false, ZES_RESET_TYPE_COLD);
LOG_INFO << "End of device reset...\n";

// Perform workload execution after reset
#ifdef USE_ZESINIT
c = submit_workload_for_gpu(a, b, n, core_device);
#else // USE_ZESINIT
c = submit_workload_for_gpu(a, b, n, device);
#endif // USE_ZESINIT

compare_results(c, c_cpu);
c.clear();
}
}

TEST_F(
SYSMAN_DEVICE_TEST,
GivenWorkingDeviceHandleWhenFlrResettingSysmanDeviceThenWorkloadExecutionAlwaysSucceedsAfterResetExt) {
uint32_t n = 512;
std::vector<float> a(n * n, 1);
std::vector<float> b(n * n, 1);
std::vector<float> c;
std::vector<float> c_cpu;
c_cpu = perform_matrix_multiplication_on_cpu(a, b, n);

for (auto device : devices) {
// Perform workload execution before reset
#ifdef USE_ZESINIT
auto sysman_device_properties = lzt::get_sysman_device_properties(device);
ze_device_handle_t core_device =
get_core_device_by_uuid(sysman_device_properties.core.uuid.id);
EXPECT_NE(core_device, nullptr);
c = submit_workload_for_gpu(a, b, n, core_device);
#else // USE_ZESINIT
c = submit_workload_for_gpu(a, b, n, device);
#endif // USE_ZESINIT

compare_results(c, c_cpu);
c.clear();
LOG_INFO << "Initiating device reset...\n";
// perform device reset
lzt::sysman_device_reset_ext(device, false, ZES_RESET_TYPE_FLR);
LOG_INFO << "End of device reset...\n";

// Perform workload execution after reset
#ifdef USE_ZESINIT
c = submit_workload_for_gpu(a, b, n, core_device);
#else // USE_ZESINIT
c = submit_workload_for_gpu(a, b, n, device);
#endif // USE_ZESINIT

compare_results(c, c_cpu);
c.clear();
}
}
} // namespace
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ zes_device_state_t get_device_state(zes_device_handle_t device);
std::vector<zes_process_state_t> get_processes_state(zes_device_handle_t device,
uint32_t &count);

void sysman_device_reset_ext(zes_device_handle_t device, ze_bool_t force,
zes_reset_type_t type);

} // namespace level_zero_tests

#endif
10 changes: 10 additions & 0 deletions utils/test_harness/sysman/src/test_harness_sysman_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,14 @@ zes_device_state_t get_device_state(zes_device_handle_t device) {
return state;
}

void sysman_device_reset_ext(zes_device_handle_t device, ze_bool_t force,
zes_reset_type_t type) {
zes_reset_properties_t properties{};
properties.stype = ZES_STRUCTURE_TYPE_RESET_PROPERTIES;
properties.pNext = nullptr;
properties.force = force;
properties.resetType = type;
EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceResetExt(device, &properties));
}

} // namespace level_zero_tests

0 comments on commit c99886b

Please sign in to comment.