Skip to content

Commit

Permalink
dev-libs/rocm-opencl-runtime: Add segfault fix
Browse files Browse the repository at this point in the history
Signed-off-by: Patrick Lauer <[email protected]>
  • Loading branch information
patricklauer committed Sep 19, 2024
1 parent 94c010d commit e7f0f2b
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
From d1fff7cea2231e7980d85790486edb70d093a1dc Mon Sep 17 00:00:00 2001
From: Rakesh Roy <[email protected]>
Date: Wed, 20 Mar 2024 22:50:22 +0530
Subject: [PATCH] SWDEV-445096 - Fix -O0 crash in OpenCL tests

- With https://gerrit-git.amd.com/c/lightning/ec/llvm-project/+/1002628 applied, at -O0 Kernel::dynamicParallelism() returns true but virtual queue isn't created
- This causes segfault inside VirtualGPU::submitKernelInternal() when getVQVirtualAddress() is called

Change-Id: Ia7af042adad2329e870c142caaac3e8fa886f8b8
---
rocclr/device/rocm/rocvirtual.cpp | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp
index 51080013b..1610aecfe 100644
--- a/rocclr/device/rocm/rocvirtual.cpp
+++ b/rocclr/device/rocm/rocvirtual.cpp
@@ -3326,8 +3326,11 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes,

if (gpuKernel.dynamicParallelism()) {
dispatchBarrierPacket(kBarrierPacketHeader, true);
- static_cast<KernelBlitManager&>(blitMgr()).runScheduler(
- getVQVirtualAddress(), schedulerParam_, schedulerQueue_, schedulerSignal_, schedulerThreads_);
+ if (virtualQueue_ != nullptr) {
+ static_cast<KernelBlitManager&>(blitMgr()).runScheduler(
+ getVQVirtualAddress(), schedulerParam_, schedulerQueue_,
+ schedulerSignal_, schedulerThreads_);
+ }
}

// Check if image buffer write back is required
94 changes: 94 additions & 0 deletions dev-libs/rocm-opencl-runtime/rocm-opencl-runtime-6.1.2-r1.ebuild
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Copyright 1999-2024 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2

EAPI=8

ROCM_SKIP_GLOBALS=1
inherit cmake edo flag-o-matic rocm

DESCRIPTION="Radeon Open Compute OpenCL Compatible Runtime"
HOMEPAGE="https://github.com/ROCm-Developer-Tools/clr"

SRC_URI="https://github.com/ROCm-Developer-Tools/clr/archive/refs/tags/rocm-${PV}.tar.gz -> rocm-clr-${PV}.tar.gz"
S="${WORKDIR}/clr-rocm-${PV}/"

LICENSE="Apache-2.0 MIT"
SLOT="0/$(ver_cut 1-2)"
KEYWORDS="~amd64"
IUSE="debug test"
RESTRICT="!test? ( test )"

RDEPEND=">=dev-libs/rocr-runtime-6.0
>=dev-libs/rocm-comgr-6.0
>=dev-libs/rocm-device-libs-6.0
>=virtual/opencl-3
media-libs/mesa[-opencl]"
DEPEND="${RDEPEND}"
BDEPEND=">=dev-build/rocm-cmake-5.3
media-libs/glew
test? ( >=x11-apps/mesa-progs-8.5.0[X] )
"
PATCHES=( ${FILESDIR}/rocm-opencl-runtime-6.1.2-fix-segfault.patch )

src_configure() {
# -Werror=strict-aliasing
# https://bugs.gentoo.org/856088
# https://github.com/ROCm/clr/issues/64
#
# Do not trust it for LTO either
append-flags -fno-strict-aliasing
filter-lto

# Fix ld.lld linker error: https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime/issues/155
append-ldflags $(test-flags-CCLD -Wl,--undefined-version)

# Reported upstream: https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime/issues/120
append-cflags -fcommon

local mycmakeargs=(
-Wno-dev
-DROCM_PATH="${EPREFIX}/usr"
-DBUILD_TESTS=$(usex test ON OFF)
-DEMU_ENV=ON
-DBUILD_ICD=ON
-DFILE_REORG_BACKWARD_COMPATIBILITY=OFF
-DCLR_BUILD_OCL=on
)
cmake_src_configure
}

src_install() {
insinto /etc/OpenCL/vendors
doins opencl/config/amdocl64.icd

cd "${BUILD_DIR}"/opencl || die
insinto /usr/lib64
doins amdocl/libamdocl64.so
doins tools/cltrace/libcltrace.so
}

src_test() {
check_amdgpu
cd "${BUILD_DIR}"/tests/ocltst || die
export OCL_ICD_FILENAMES="${BUILD_DIR}"/amdocl/libamdocl64.so
local instruction1="Please start an X server using amdgpu driver (not Xvfb!),"
local instruction2="and export OCLGL_DISPLAY=\${DISPLAY} OCLGL_XAUTHORITY=\${XAUTHORITY} before reruning the test."
if [[ -n ${OCLGL_DISPLAY+x} ]]; then
export DISPLAY=${OCLGL_DISPLAY}
export XAUTHORITY=${OCLGL_XAUTHORITY}
ebegin "Running oclgl test under DISPLAY ${OCLGL_DISPLAY}"
if ! glxinfo | grep "OpenGL vendor string: AMD"; then
ewarn "${instruction1}"
ewarn "${instruction2}"
die "This display does not have AMD OpenGL vendor!"
fi
./ocltst -m $(realpath liboclgl.so) -A ogl.exclude
eend $? || die "oclgl test failed"
else
ewarn "${instruction1}"
ewarn "${instruction2}"
die "\${OCLGL_DISPLAY} not set."
fi
edob ./ocltst -m $(realpath liboclruntime.so) -A oclruntime.exclude
edob ./ocltst -m $(realpath liboclperf.so) -A oclperf.exclude
}

0 comments on commit e7f0f2b

Please sign in to comment.