Skip to content

Commit

Permalink
[cuda] Port over existing semaphore impl (iree-org#14325)
Browse files Browse the repository at this point in the history
The current semaphore implementation actually does nothing.
Still port over it so that we can have a full implementation
passing various end-to-end tests to be based on. This makes
having a proper implementation of semaphores easier later
as we can verify correctness immediately afterwards.

Progress towards iree-org#13245
  • Loading branch information
antiagainst authored Jul 10, 2023
1 parent d01a83c commit 04beef2
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 3 deletions.
3 changes: 3 additions & 0 deletions experimental/cuda2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ iree_cc_library(
"native_executable.h"
"nop_executable_cache.c"
"nop_executable_cache.h"
"nop_semaphore.c"
"nop_semaphore.h"
"nccl_channel.c"
"nccl_channel.h"
"pipeline_layout.c"
Expand All @@ -44,6 +46,7 @@ iree_cc_library(
iree::base::internal::flatcc::parsing
iree::hal
iree::hal::utils::buffer_transfer
iree::hal::utils::semaphore_base
iree::schemas::cuda_executable_def_c_fbs
PUBLIC
)
Expand Down
8 changes: 5 additions & 3 deletions experimental/cuda2/cuda_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "experimental/cuda2/nccl_channel.h"
#include "experimental/cuda2/nccl_dynamic_symbols.h"
#include "experimental/cuda2/nop_executable_cache.h"
#include "experimental/cuda2/nop_semaphore.h"
#include "experimental/cuda2/pipeline_layout.h"
#include "experimental/cuda2/tracing.h"
#include "iree/base/internal/arena.h"
Expand Down Expand Up @@ -484,15 +485,16 @@ static iree_status_t iree_hal_cuda2_device_create_pipeline_layout(
static iree_status_t iree_hal_cuda2_device_create_semaphore(
iree_hal_device_t* base_device, uint64_t initial_value,
iree_hal_semaphore_t** out_semaphore) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"semaphore not yet implmeneted");
iree_hal_cuda2_device_t* device = iree_hal_cuda2_device_cast(base_device);
return iree_hal_cuda2_semaphore_create(initial_value, device->host_allocator,
out_semaphore);
}

static iree_hal_semaphore_compatibility_t
iree_hal_cuda2_device_query_semaphore_compatibility(
iree_hal_device_t* base_device, iree_hal_semaphore_t* semaphore) {
// TODO: implement CUDA semaphores.
return IREE_HAL_SEMAPHORE_COMPATIBILITY_NONE;
return IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_ONLY;
}

// TODO: implement multiple streams; today we only have one and queue_affinity
Expand Down
111 changes: 111 additions & 0 deletions experimental/cuda2/nop_semaphore.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "experimental/cuda2/nop_semaphore.h"

#include <stddef.h>

#include "iree/base/api.h"
#include "iree/hal/utils/semaphore_base.h"

typedef struct iree_hal_cuda2_semaphore_t {
iree_hal_semaphore_t base;
iree_allocator_t host_allocator;
iree_atomic_int64_t value;
} iree_hal_cuda2_semaphore_t;

static const iree_hal_semaphore_vtable_t iree_hal_cuda2_semaphore_vtable;

static iree_hal_cuda2_semaphore_t* iree_hal_cuda2_semaphore_cast(
iree_hal_semaphore_t* base_value) {
IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_cuda2_semaphore_vtable);
return (iree_hal_cuda2_semaphore_t*)base_value;
}

iree_status_t iree_hal_cuda2_semaphore_create(
uint64_t initial_value, iree_allocator_t host_allocator,
iree_hal_semaphore_t** out_semaphore) {
IREE_ASSERT_ARGUMENT(out_semaphore);
IREE_TRACE_ZONE_BEGIN(z0);

iree_hal_cuda2_semaphore_t* semaphore = NULL;
iree_status_t status = iree_allocator_malloc(
host_allocator, sizeof(*semaphore), (void**)&semaphore);
if (iree_status_is_ok(status)) {
iree_hal_semaphore_initialize(&iree_hal_cuda2_semaphore_vtable,
&semaphore->base);
semaphore->host_allocator = host_allocator;
iree_atomic_store_int64(&semaphore->value, initial_value,
iree_memory_order_release);
*out_semaphore = &semaphore->base;
}

IREE_TRACE_ZONE_END(z0);
return status;
}

static void iree_hal_cuda2_semaphore_destroy(
iree_hal_semaphore_t* base_semaphore) {
iree_hal_cuda2_semaphore_t* semaphore =
iree_hal_cuda2_semaphore_cast(base_semaphore);
iree_allocator_t host_allocator = semaphore->host_allocator;
IREE_TRACE_ZONE_BEGIN(z0);

iree_hal_semaphore_deinitialize(&semaphore->base);
iree_allocator_free(host_allocator, semaphore);

IREE_TRACE_ZONE_END(z0);
}

static iree_status_t iree_hal_cuda2_semaphore_query(
iree_hal_semaphore_t* base_semaphore, uint64_t* out_value) {
iree_hal_cuda2_semaphore_t* semaphore =
iree_hal_cuda2_semaphore_cast(base_semaphore);
// TODO: Support semaphores completely.
*out_value =
iree_atomic_load_int64(&semaphore->value, iree_memory_order_acquire);
return iree_ok_status();
}

static iree_status_t iree_hal_cuda2_semaphore_signal(
iree_hal_semaphore_t* base_semaphore, uint64_t new_value) {
iree_hal_cuda2_semaphore_t* semaphore =
iree_hal_cuda2_semaphore_cast(base_semaphore);
// TODO: Support semaphores completely. Return OK currently as everything is
// synchronized for each submit to allow things to run.
iree_atomic_store_int64(&semaphore->value, new_value,
iree_memory_order_release);
iree_hal_semaphore_poll(&semaphore->base);
return iree_ok_status();
}

static void iree_hal_cuda2_semaphore_fail(iree_hal_semaphore_t* base_semaphore,
iree_status_t status) {
iree_hal_cuda2_semaphore_t* semaphore =
iree_hal_cuda2_semaphore_cast(base_semaphore);
// TODO: save status and mark timepoint as failed.
iree_status_ignore(status);
iree_hal_semaphore_poll(&semaphore->base);
}

static iree_status_t iree_hal_cuda2_semaphore_wait(
iree_hal_semaphore_t* base_semaphore, uint64_t value,
iree_timeout_t timeout) {
iree_hal_cuda2_semaphore_t* semaphore =
iree_hal_cuda2_semaphore_cast(base_semaphore);
// TODO: Support semaphores completely. Return OK currently as everything is
// synchronized for each submit to allow things to run.
iree_hal_semaphore_poll(&semaphore->base);
return iree_ok_status();
}

static const iree_hal_semaphore_vtable_t iree_hal_cuda2_semaphore_vtable = {
.destroy = iree_hal_cuda2_semaphore_destroy,
.query = iree_hal_cuda2_semaphore_query,
.signal = iree_hal_cuda2_semaphore_signal,
.fail = iree_hal_cuda2_semaphore_fail,
.wait = iree_hal_cuda2_semaphore_wait,
};
29 changes: 29 additions & 0 deletions experimental/cuda2/nop_semaphore.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef EXPERIMENTAL_CUDA2_NOP_SEMAPHORE_H_
#define EXPERIMENTAL_CUDA2_NOP_SEMAPHORE_H_

#include <stdint.h>

#include "iree/base/api.h"
#include "iree/hal/api.h"

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

// Creates a HAL semaphore for CUDA that does not perform real synchronization.
// This is expected to work with a command buffer that serializes all commands.
iree_status_t iree_hal_cuda2_semaphore_create(
uint64_t initial_value, iree_allocator_t host_allocator,
iree_hal_semaphore_t** out_semaphore);

#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus

#endif // EXPERIMENTAL_CUDA2_NOP_SEMAPHORE_H_

0 comments on commit 04beef2

Please sign in to comment.