forked from JuliaGPU/GPUArrays.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconstruction.jl
100 lines (85 loc) · 3.42 KB
/
construction.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
function Base.fill(X::Type{<: GPUArray}, val::T, dims::NTuple{N, Integer}) where {T, N}
res = similar(X, T, dims)
fill!(res, val)
end
function Base.fill(X::Type{<: GPUArray{T}}, val, dims::NTuple{N, Integer}) where {T, N}
res = similar(X, T, dims)
fill!(res, convert(T, val))
end
function Base.fill!(A::GPUArray{T}, x) where T
gpu_call(A, (A, convert(T, x))) do state, a, val
idx = @linearidx(a, state)
@inbounds a[idx] = val
return
end
A
end
Base.zeros(T::Type{<: GPUArray}, dims::NTuple{N, Integer}) where N = fill(T, zero(eltype(T)), dims)
Base.ones(T::Type{<: GPUArray}, dims::NTuple{N, Integer}) where N = fill(T, one(eltype(T)), dims)
function uniformscaling_kernel(state, res::AbstractArray{T}, stride, s::UniformScaling) where T
i = linear_index(state)
i > stride && return
ilin = (stride * (i - 1)) + i
@inbounds res[ilin] = s.λ
return
end
function (T::Type{<: GPUArray})(s::UniformScaling, dims::Dims{2})
res = zeros(T, dims)
gpu_call(uniformscaling_kernel, res, (res, size(res, 1), s), minimum(dims))
res
end
(T::Type{<: GPUArray})(s::UniformScaling, m::Integer, n::Integer) = T(s, Dims((m, n)))
(T::Type{<: GPUArray})(x) = convert(T, x)
(T::Type{<: GPUArray})(dims::Integer...) = T(dims)
(T::Type{<: GPUArray})(dims::NTuple{N, Base.OneTo{Int}}) where N = T(undef, length.(dims))
(T::Type{<: GPUArray{X} where X})(dims::NTuple{N, Integer}) where N = similar(T, eltype(T), dims)
(T::Type{<: GPUArray{X} where X})(::UndefInitializer, dims::NTuple{N, Integer}) where N = similar(T, eltype(T), dims)
Base.similar(x::X, ::Type{T}, size::Base.Dims{N}) where {X <: GPUArray, T, N} = similar(X, T, size)
Base.similar(::Type{X}, ::Type{T}, size::NTuple{N, Base.OneTo{Int}}) where {X <: GPUArray, T, N} = similar(X, T, length.(size))
Base.convert(AT::Type{<: GPUArray{T, N}}, A::GPUArray{T, N}) where {T, N} = A
function indexstyle(x::T) where T
style = try
Base.IndexStyle(x)
catch
nothing
end
style
end
function collect_kernel(state, A, iter, ::IndexCartesian)
idx = @cartesianidx(A, state)
@inbounds A[idx...] = iter[idx...]
return
end
function collect_kernel(state, A, iter, ::IndexLinear)
idx = linear_index(state)
@inbounds A[idx] = iter[idx]
return
end
eltype_or(::Type{<: GPUArray}, or) = or
eltype_or(::Type{<: GPUArray{T}}, or) where T = T
eltype_or(::Type{<: GPUArray{T, N}}, or) where {T, N} = T
function Base.convert(AT::Type{<: GPUArray}, iter)
isize = Base.IteratorSize(iter)
style = indexstyle(iter)
ettrait = Base.IteratorEltype(iter)
if isbits(iter) && isa(isize, Base.HasShape) && style != nothing && isa(ettrait, Base.HasEltype)
# We can collect on the GPU
A = similar(AT, eltype_or(AT, eltype(iter)), size(iter))
gpu_call(collect_kernel, A, (A, iter, style))
A
else
convert(AT, collect(iter))
end
end
function Base.convert(AT::Type{<: GPUArray{T, N}}, A::DenseArray{T, N}) where {T, N}
copyto!(AT(Base.size(A)), A)
end
function Base.convert(AT::Type{<: GPUArray{T1}}, A::DenseArray{T2, N}) where {T1, T2, N}
copyto!(similar(AT, T1, size(A)), convert(Array{T1, N}, A))
end
function Base.convert(AT::Type{<: GPUArray}, A::DenseArray{T2, N}) where {T2, N}
copyto!(similar(AT, T2, size(A)), A)
end
function Base.convert(AT::Type{Array{T, N}}, A::GPUArray{CT, CN}) where {T, N, CT, CN}
convert(AT, copyto!(Array{CT, CN}(undef, size(A)), A))
end