Skip to content

Commit

Permalink
Merge pull request JuliaGPU#1200 from JuliaGPU/tb/ci
Browse files Browse the repository at this point in the history
CI improvements.
  • Loading branch information
maleadt authored Oct 15, 2021
2 parents aef1a6f + ca4b3ce commit 27c87a6
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 89 deletions.
73 changes: 35 additions & 38 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,33 +20,30 @@ steps:
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 120

- wait # smoke test: don't kick off a large number of tests if they'd all just fail

# XXX: disabled due to memory requirements; re-enable on the new CI machines
# - label: "Julia 1.6 (debug)"
# plugins:
# - JuliaCI/julia#v1:
# version: 1.6
# - JuliaCI/julia-test#v1:
# julia_args: "-g2"
# test_args: "--sanitize --quickfail --jobs=2"
# - JuliaCI/julia-coverage#v1:
# codecov: true
# dirs:
# - src
# - lib
# - examples
# agents:
# queue: "juliagpu"
# cuda: "11.0"
# cap: "sm_80" # test as much as possible
# env:
# JULIA_CUDA_VERSION: '11.4'
# JULIA_CUDA_DEBUG_INFO: 'false' # NVIDIA bug #3305774: ptxas segfaults with out debug info
# JULIA_CUDA_USE_BINARYBUILDER: 'true'
# if: build.message !~ /\[skip tests\]/ &&
# !build.pull_request.draft
# timeout_in_minutes: 120
- label: "Julia 1.6 (debug)"
plugins:
- JuliaCI/julia#v1:
version: 1.6
- JuliaCI/julia-test#v1:
julia_args: "-g2"
test_args: "--sanitize --quickfail --jobs=1"
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
- lib
- examples
agents:
queue: "juliagpu"
cuda: "11.0"
cap: "sm_80"
env:
JULIA_CUDA_VERSION: '11.4'
JULIA_CUDA_DEBUG_INFO: 'false' # NVIDIA bug #3305774: ptxas segfaults with out debug info
JULIA_CUDA_USE_BINARYBUILDER: 'true'
if: build.message !~ /\[skip tests\]/ &&
!build.pull_request.draft
timeout_in_minutes: 180

- label: "Julia 1.7"
plugins:
Expand Down Expand Up @@ -244,17 +241,17 @@ steps:

# special tests

- label: "Windows"
plugins:
- JuliaCI/julia#v1:
version: 1.6
- JuliaCI/julia-test#v1: ~
# XXX: no coverage, as no secrets on Windows
agents:
queue: "juliagpu-windows"
cuda: "*"
if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
timeout_in_minutes: 120
# - label: "Windows"
# plugins:
# - JuliaCI/julia#v1:
# version: 1.6
# - JuliaCI/julia-test#v1: ~
# # XXX: no coverage, as no secrets on Windows
# agents:
# queue: "juliagpu-windows"
# cuda: "*"
# if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
# timeout_in_minutes: 120

- label: "NNlibCUDA.jl"
plugins:
Expand Down
8 changes: 4 additions & 4 deletions test/cudnn/convolution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,10 @@ false && @testset "cudnn/convolution" begin

# Test tensor format
cx2,cw2,cb2 = (x->permutedims(x,(3,1,2,4))).((cx,cw,cb))
whcn = cudnnConvolutionForward(cw,cx)
cwhn = cudnnConvolutionForward(cw2,cx2,format=CUDNN_TENSOR_NHWC)
whcn = cudnnConvolutionForward(cw,cx) |> Array
cwhn = cudnnConvolutionForward(cw2,cx2,format=CUDNN_TENSOR_NHWC) |> Array
@test cwhn permutedims(whcn,(3,1,2,4))
whcn = cudnnConvolutionForward(cw,cx;bias=cb)
cwhn = cudnnConvolutionForward(cw2,cx2;bias=cb2,format=CUDNN_TENSOR_NHWC)
whcn = cudnnConvolutionForward(cw,cx;bias=cb) |> Array
cwhn = cudnnConvolutionForward(cw2,cx2;bias=cb2,format=CUDNN_TENSOR_NHWC) |> Array
@test cwhn permutedims(whcn,(3,1,2,4))
end
10 changes: 5 additions & 5 deletions test/cudnn/dropout.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ using CUDA.CUDNN:
x = CUDA.rand(N)
d = cudnnDropoutDescriptor(P)
cudnnDropoutSeed[] = 1
y = cudnnDropoutForward(x; dropout = P)
@test isapprox(mean(Array(y).==0), P; atol = 3/sqrt(N))
@test y == cudnnDropoutForward(x, d)
@test y == cudnnDropoutForward!(similar(x), x; dropout = P)
@test y == cudnnDropoutForward!(similar(x), x, d)
y = cudnnDropoutForward(x; dropout = P) |> Array
@test isapprox(mean(y.==0), P; atol = 3/sqrt(N))
@test y == cudnnDropoutForward(x, d) |> Array
@test y == cudnnDropoutForward!(similar(x), x; dropout = P) |> Array
@test y == cudnnDropoutForward!(similar(x), x, d) |> Array
cudnnDropoutSeed[] = -1
end
24 changes: 12 additions & 12 deletions test/cudnn/multiheadattn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,18 @@ using CUDA.CUDNN:
qProjSize, kProjSize, vProjSize, oProjSize,
qoMaxSeqLength, kvMaxSeqLength, maxBatchSize,
maxBeamSize, residuals, currIdx, loWinIdx, hiWinIdx)
@test y cudnnMultiHeadAttnForward!(zero(y), weights, queries, keys, values; axes,
seqLengthsQO, seqLengthsKV, attnMode, nHeads,
smScaler, mathType, qProjSize, kProjSize,
vProjSize, oProjSize, qoMaxSeqLength,
kvMaxSeqLength, maxBatchSize, maxBeamSize,
residuals, currIdx, loWinIdx, hiWinIdx)
@test y cudnnMultiHeadAttnForward(weights, queries, keys, values, attnDesc;
axes, seqLengthsQO, seqLengthsKV, residuals,
currIdx, loWinIdx, hiWinIdx)
@test y cudnnMultiHeadAttnForward!(zero(y), weights, queries, keys, values, attnDesc;
axes, seqLengthsQO, seqLengthsKV, residuals,
currIdx, loWinIdx, hiWinIdx)
@test Array(y) cudnnMultiHeadAttnForward!(zero(y), weights, queries, keys, values; axes,
seqLengthsQO, seqLengthsKV, attnMode, nHeads,
smScaler, mathType, qProjSize, kProjSize,
vProjSize, oProjSize, qoMaxSeqLength,
kvMaxSeqLength, maxBatchSize, maxBeamSize,
residuals, currIdx, loWinIdx, hiWinIdx) |> Array
@test Array(y) cudnnMultiHeadAttnForward(weights, queries, keys, values, attnDesc;
axes, seqLengthsQO, seqLengthsKV, residuals,
currIdx, loWinIdx, hiWinIdx) |> Array
@test Array(y) cudnnMultiHeadAttnForward!(zero(y), weights, queries, keys, values, attnDesc;
axes, seqLengthsQO, seqLengthsKV, residuals,
currIdx, loWinIdx, hiWinIdx) |> Array
end

Q,K,V,B,T,F = 6,6,5,4,3,Float32
Expand Down
18 changes: 9 additions & 9 deletions test/cudnn/normalization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,15 @@ using CUDA.CUDNN:
y0 = randn!(similar(x))
y1 = alpha * y
y2 = y1 + beta * y0
@test y1 cudnnNormalizationForward(x, xmean, xvar, bias, scale; training, z, mode,
normOps, algo, alpha, epsilon, groupCnt,
format, exponentialAverageFactor, savedMean,
savedInvVariance, activationDesc)
@test y2 cudnnNormalizationForward!(copy(y0), x, xmean, xvar, bias, scale;
training, z, mode, normOps, algo, alpha, beta,
epsilon, groupCnt, format,
exponentialAverageFactor, savedMean,
savedInvVariance, activationDesc)
@test Array(y1) cudnnNormalizationForward(x, xmean, xvar, bias, scale; training, z, mode,
normOps, algo, alpha, epsilon, groupCnt,
format, exponentialAverageFactor, savedMean,
savedInvVariance, activationDesc) |> Array
@test Array(y2) cudnnNormalizationForward!(copy(y0), x, xmean, xvar, bias, scale;
training, z, mode, normOps, algo, alpha, beta,
epsilon, groupCnt, format,
exponentialAverageFactor, savedMean,
savedInvVariance, activationDesc) |> Array
end

x, z, s = (CUDA.randn(x...) for x in ((5,4,3,2),(5,4,3,2),(1,1,3,1)))
Expand Down
22 changes: 11 additions & 11 deletions test/cudnn/rnn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,17 @@ using CUDA.CUDNN:
_y = copy(y)
_hy = (hy === nothing ? hy : copy(hy[]))
_cy = (cy === nothing ? cy : copy(cy[]))
@test _y cudnnRNNForward!(y, w, x; hx, cx, hy, cy, layout, seqLengthArray, fwdMode,
hiddenSize, algo, cellMode, biasMode, dirMode, inputMode,
mathPrec, mathType, inputSize, projSize, numLayers, dropout, auxFlags)
(_hy === hy === nothing || _hy hy[])
(_cy === cy === nothing || _cy cy[])
@test _y cudnnRNNForward(w, x, d; hx, cx, hy, cy, layout, seqLengthArray, fwdMode)
(_hy === hy === nothing || _hy hy[])
(_cy === cy === nothing || _cy cy[])
@test _y cudnnRNNForward!(y, w, x, d; hx, cx, hy, cy, layout, seqLengthArray, fwdMode)
(_hy === hy === nothing || _hy hy[])
(_cy === cy === nothing || _cy cy[])
@test Array(_y) cudnnRNNForward!(y, w, x; hx, cx, hy, cy, layout, seqLengthArray, fwdMode,
hiddenSize, algo, cellMode, biasMode, dirMode, inputMode,
mathPrec, mathType, inputSize, projSize, numLayers, dropout, auxFlags) |> Array
(_hy === hy === nothing || @test Array(_hy) Array(hy[]))
(_cy === cy === nothing || @test Array(_cy) Array(cy[]))
@test Array(_y) cudnnRNNForward(w, x, d; hx, cx, hy, cy, layout, seqLengthArray, fwdMode) |> Array
(_hy === hy === nothing || @test Array(_hy) Array(hy[]))
(_cy === cy === nothing || @test Array(_cy) Array(cy[]))
@test Array(_y) cudnnRNNForward!(y, w, x, d; hx, cx, hy, cy, layout, seqLengthArray, fwdMode) |> Array
(_hy === hy === nothing || @test Array(_hy) Array(hy[]))
(_cy === cy === nothing || @test Array(_cy) Array(cy[]))
end

rnntest()
Expand Down
15 changes: 5 additions & 10 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,14 @@ has_cudnn() || push!(skip_tests, "cudnn")
has_cusolvermg() || push!(skip_tests, "cusolvermg")
has_nvml() || push!(skip_tests, "nvml")
if !has_cutensor() || CUDA.version() < v"10.1" || first(picks).cap < v"7.0" || do_sanitize
push!(skip_tests, "cutensor")
end
if do_sanitize
# XXX: some library tests fail under compute-sanitizer
append!(skip_tests, ["cutensor", "cusparse"])
# XXX: others take absurdly long
push!(skip_tests, "cusolver")
end
is_debug = ccall(:jl_is_debugbuild, Cint, ()) != 0
if first(picks).cap < v"7.0"
push!(skip_tests, "device/intrinsics/wmma")
end
Expand Down Expand Up @@ -430,15 +434,6 @@ try
p = recycle_worker(p)
else
print_testworker_stats(test, wrkr, resp)

cpu_rss = resp[9]
if CUDA.getenv("CI", false) && cpu_rss > 4*2^30
# XXX: despite resetting the device and collecting garbage
# after each test, we are leaking CPU memory somewhere.
# this is a problem on CI, where2 we don't have much RAM.
# work around this by periodically recycling the worker.
p = recycle_worker(p)
end
end

# aggregate the snooped compiler invocations
Expand Down

0 comments on commit 27c87a6

Please sign in to comment.