diff --git a/src/PlaneWaveBasis.jl b/src/PlaneWaveBasis.jl index caa422b406..2636acb6c7 100644 --- a/src/PlaneWaveBasis.jl +++ b/src/PlaneWaveBasis.jl @@ -41,9 +41,10 @@ Normalization conventions: """ struct PlaneWaveBasis{T, VT <: Real, + Arch <: AbstractArchitecture, T_G_vectors <: AbstractArray{Vec3{Int}, 3}, T_r_vectors <: AbstractArray{Vec3{VT}, 3}, - T_kpt_G_vecs <: AbstractVector{Vec3{Int}} + T_kpt_G_vecs <: AbstractVector{Vec3{Int}}, } <: AbstractBasis{T} # T is the default type to express data, VT the corresponding bare value type (i.e. not dual) @@ -100,7 +101,7 @@ struct PlaneWaveBasis{T, # respective rank in comm_kpts ## Information on the hardware and device used for computations. - architecture::AbstractArchitecture + architecture::Arch ## Symmetry operations that leave the discretized model (k and r grids) invariant. # Subset of model.symmetries. @@ -160,7 +161,8 @@ end function PlaneWaveBasis(model::Model{T}, Ecut::Number, fft_size, variational, kcoords, kweights, kgrid, kshift, symmetries_respect_rgrid, comm_kpts, - architecture::AbstractArchitecture) where {T <: Real} + architecture::Arch + ) where {T <: Real, Arch <: AbstractArchitecture} # Validate fft_size if variational max_E = norm2(model.recip_lattice * floor.(Int, Vec3(fft_size) ./ 2)) / 2 @@ -220,7 +222,7 @@ function PlaneWaveBasis(model::Model{T}, Ecut::Number, fft_size, variational, # by the same process n_kpt = length(kcoords_global) n_procs = mpi_nprocs(comm_kpts) - + # Custom reduction operators for MPI are currently not working on aarch64, so # fallbacks are defined in common/mpi.jl. For them to work, there cannot be more # than 1 MPI process. @@ -279,7 +281,7 @@ function PlaneWaveBasis(model::Model{T}, Ecut::Number, fft_size, variational, r_vectors = to_device(architecture, r_vectors) terms = Vector{Any}(undef, length(model.term_types)) # Dummy terms array, filled below - basis = PlaneWaveBasis{T, value_type(T), typeof(Gs), typeof(r_vectors), + basis = PlaneWaveBasis{T, value_type(T), Arch, typeof(Gs), typeof(r_vectors), typeof(kpoints[1].G_vectors)}( model, fft_size, dvol, Ecut, variational, diff --git a/src/densities.jl b/src/densities.jl index dcde8601c8..7fd187b50d 100644 --- a/src/densities.jl +++ b/src/densities.jl @@ -110,7 +110,7 @@ function ρ_from_total(basis, ρtot::AbstractArray{T}) where {T} if basis.model.spin_polarization in (:none, :spinless) ρspin = nothing else - ρspin = zeros(T, basis.fft_size) + ρspin = zeros_like(basis.G_vectors, T, basis.fft_size...) end ρ_from_total_and_spin(ρtot, ρspin) end diff --git a/src/density_methods.jl b/src/density_methods.jl index b8a1fda8fc..1a7d0d1f6c 100644 --- a/src/density_methods.jl +++ b/src/density_methods.jl @@ -18,7 +18,7 @@ end @doc raw""" Build a random charge density normalized to the provided number of electrons. """ -function random_density(basis::PlaneWaveBasis{T}, n_electrons) where {T} +function random_density(basis::PlaneWaveBasis{T}, n_electrons::Integer) where {T} ρtot = rand(T, basis.fft_size) ρtot = ρtot .* n_electrons ./ (sum(ρtot) * basis.dvol) # Integration to n_electrons ρspin = nothing @@ -124,17 +124,17 @@ function atomic_spin_density(basis::PlaneWaveBasis{T}, method::AtomicDensity, end @assert length(magmoms) == length(basis.model.atoms) - coefficients = map(zip(basis.model.atoms, magmoms)) do (atom, magmom) + coefficients = map(basis.model.atoms, magmoms) do atom, magmom iszero(magmom[1:2]) || error("Non-collinear magnetization not yet implemented") magmom[3] ≤ n_elec_valence(atom) || error( "Magnetic moment $(magmom[3]) too large for element $(atomic_symbol(atom)) " * "with only $(n_elec_valence(atom)) valence electrons." ) magmom[3] / n_elec_valence(atom) - end + end::AbstractVector{T} # Needed to ensure type stability in final guess density form_factors = atomic_density_form_factors(basis, method) - atomic_density_superposition(basis, form_factors; coefficients) + atomic_density_superposition(basis, form_factors; coefficients) end # Perform an atomic density superposition. The density is constructed in reciprocal space @@ -143,13 +143,12 @@ end function atomic_density_superposition(basis::PlaneWaveBasis{T}, form_factors::IdDict{Tuple{Int,T},T}; coefficients=ones(T, length(basis.model.atoms)) - )::Array{T,3} where {T} + ) where {T} model = basis.model - G_cart = G_vectors_cart(basis) - - ρ = map(enumerate(G_vectors(basis))) do (iG, G) + G_cart = to_cpu(G_vectors_cart(basis)) + ρ_cpu = map(enumerate(to_cpu(G_vectors(basis)))) do (iG, G) Gnorm = norm(G_cart[iG]) - ρ_iG = sum(enumerate(model.atom_groups); init=zero(Complex{T})) do (igroup, group) + ρ_iG = sum(enumerate(model.atom_groups); init=zero(complex(T))) do (igroup, group) sum(group) do iatom structure_factor = cis2pi(-dot(G, model.positions[iatom])) coefficients[iatom] * form_factors[(igroup, Gnorm)] * structure_factor @@ -157,6 +156,7 @@ function atomic_density_superposition(basis::PlaneWaveBasis{T}, end ρ_iG / sqrt(model.unit_cell_volume) end + ρ = to_device(basis.architecture, ρ_cpu) enforce_real!(basis, ρ) # Symmetrize Fourier coeffs to have real iFFT irfft(basis, ρ) end @@ -166,7 +166,7 @@ function atomic_density_form_factors(basis::PlaneWaveBasis{T}, )::IdDict{Tuple{Int,T},T} where {T<:Real} model = basis.model form_factors = IdDict{Tuple{Int,T},T}() # IdDict for Dual compatability - for G in G_vectors_cart(basis) + for G in to_cpu(G_vectors_cart(basis)) Gnorm = norm(G) for (igroup, group) in enumerate(model.atom_groups) if !haskey(form_factors, (igroup, Gnorm)) diff --git a/src/external/jld2io.jl b/src/external/jld2io.jl index 1a61ea4ed4..bc906368e4 100644 --- a/src/external/jld2io.jl +++ b/src/external/jld2io.jl @@ -73,7 +73,7 @@ load_scfres(file::AbstractString) = JLD2.jldopen(load_scfres, file, "r") # # Custom serialisations # -struct PlaneWaveBasisSerialisation{T <: Real} +struct PlaneWaveBasisSerialisation{T <: Real, Arch <: AbstractArchitecture} model::Model{T,T} Ecut::T variational::Bool @@ -83,17 +83,17 @@ struct PlaneWaveBasisSerialisation{T <: Real} kshift::Union{Nothing,Vec3{T}} symmetries_respect_rgrid::Bool fft_size::Tuple{Int, Int, Int} - architecture::AbstractArchitecture + architecture::Arch end -function JLD2.writeas(::Type{PlaneWaveBasis{T,T,GT,RT,KGT}}) where {T,GT,RT,KGT} +function JLD2.writeas(::Type{PlaneWaveBasis{T,T,Arch,GT,RT,KGT}}) where {T,Arch,GT,RT,KGT} # The GT, GT, KGT are uniquely determined by the architecture, # which is stored in the basis. - PlaneWaveBasisSerialisation{T} + PlaneWaveBasisSerialisation{T,Arch} end -function Base.convert(::Type{PlaneWaveBasisSerialisation{T}}, - basis::PlaneWaveBasis{T,T}) where {T} - PlaneWaveBasisSerialisation{T}( +function Base.convert(::Type{PlaneWaveBasisSerialisation{T,Arch}}, + basis::PlaneWaveBasis{T,T,Arch}) where {T,Arch} + PlaneWaveBasisSerialisation{T,Arch}( basis.model, basis.Ecut, basis.variational, @@ -107,8 +107,8 @@ function Base.convert(::Type{PlaneWaveBasisSerialisation{T}}, ) end -function Base.convert(::Type{PlaneWaveBasis{T,T,GT,RT,KGT}}, - serial::PlaneWaveBasisSerialisation{T}) where {T,GT,RT,KGT} +function Base.convert(::Type{PlaneWaveBasis{T,T,Arch,GT,RT,KGT}}, + serial::PlaneWaveBasisSerialisation{T,Arch}) where {T,Arch,GT,RT,KGT} PlaneWaveBasis(serial.model, serial.Ecut, serial.kcoords, serial.kweights; serial.fft_size, serial.kgrid, diff --git a/src/response/chi0.jl b/src/response/chi0.jl index 6e17de5859..217dc17210 100644 --- a/src/response/chi0.jl +++ b/src/response/chi0.jl @@ -55,7 +55,7 @@ function compute_χ0(ham; temperature=ham.basis.model.temperature) T = eltype(basis) occupation, εF = compute_occupation(basis, Es, fermialg; temperature, tol_n_elec=10eps(T)) - χ0 = zeros(eltype(basis), n_spin * n_fft, n_spin * n_fft) + χ0 = zeros_like(basis.G_vectors, T, n_spin * n_fft, n_spin * n_fft) for (ik, kpt) in enumerate(basis.kpoints) # The sum-over-states terms of χ0 are diagonal in the spin blocks (no αβ / βα terms) # so the spin of the kpt selects the block we are in @@ -109,8 +109,8 @@ precondprep!(P::FunctionPreconditioner, ::Any) = P # included). function sternheimer_solver(Hk, ψk, ε, rhs; callback=identity, cg_callback=identity, - ψk_extra=zeros(size(ψk,1), 0), εk_extra=zeros(0), - Hψk_extra=zeros(size(ψk,1), 0), tol=1e-9) + ψk_extra=zeros_like(ψk, size(ψk, 1), 0), εk_extra=zeros(0), + Hψk_extra=zeros_like(ψk, size(ψk, 1), 0), tol=1e-9) basis = Hk.basis kpoint = Hk.kpoint @@ -291,7 +291,7 @@ Perform in-place computations of the derivatives of the wave functions by solvin a Sternheimer equation for each `k`-points. It is assumed the passed `δψ` are initialised to zero. """ -function compute_δψ!(δψ, basis, H, ψ, εF, ε, δHψ; ψ_extra=[zeros(size(ψk,1), 0) for ψk in ψ], +function compute_δψ!(δψ, basis, H, ψ, εF, ε, δHψ; ψ_extra=[zeros_like(ψk, size(ψk,1), 0) for ψk in ψ], kwargs_sternheimer...) model = basis.model temperature = model.temperature diff --git a/src/terms/operators.jl b/src/terms/operators.jl index 99090db9d0..f552ed333b 100644 --- a/src/terms/operators.jl +++ b/src/terms/operators.jl @@ -41,7 +41,7 @@ end apply!(Hψ, op::NoopOperator, ψ) = nothing function Matrix(op::NoopOperator) n_Gk = length(G_vectors(op.basis, op.kpoint)) - zeros(eltype(op.basis), n_Gk, n_Gk) + zeros_like(op.basis.G_vectors, eltype(op.basis), n_Gk, n_Gk) end """ diff --git a/src/workarounds/cuda_arrays.jl b/src/workarounds/cuda_arrays.jl index d34247433f..8e00990ec4 100644 --- a/src/workarounds/cuda_arrays.jl +++ b/src/workarounds/cuda_arrays.jl @@ -1,16 +1,5 @@ using LinearAlgebra -# https://github.com/JuliaGPU/CUDA.jl/issues/1572 -function LinearAlgebra.eigen(A::Hermitian{<:Complex,<:CUDA.CuArray}) - values, vectors = CUDA.CUSOLVER.heevd!('V', 'U', A.data) - (; values, vectors) -end - -function LinearAlgebra.eigen(A::Hermitian{<:Real,<:CUDA.CuArray}) - values, vectors = CUDA.CUSOLVER.syevd!('V', 'U', A.data) - (; values, vectors) -end - synchronize_device(::GPU{<:CUDA.CuArray}) = CUDA.synchronize() for fun in (:potential_terms, :kernel_terms) diff --git a/test/gpu.jl b/test/gpu.jl index cf878ccdcc..92587208b4 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -10,13 +10,7 @@ include("testcases.jl") function run_problem(; architecture) model = model_PBE(silicon.lattice, silicon.atoms, silicon.positions) basis = PlaneWaveBasis(model; Ecut=10, kgrid=(3, 3, 3), architecture) - - # TODO Right now guess generation on the GPU does not work - basis_cpu = PlaneWaveBasis(model; basis.Ecut, basis.kgrid) - ρ_guess = guess_density(basis_cpu) - ρ = DFTK.to_device(architecture, ρ_guess) - - self_consistent_field(basis; tol=1e-10, solver=scf_damping_solver(1.0), ρ) + self_consistent_field(basis; tol=1e-10, solver=scf_damping_solver(1.0)) end scfres_cpu = run_problem(; architecture=DFTK.CPU()) @@ -31,12 +25,7 @@ end model = model_PBE(iron_bcc.lattice, iron_bcc.atoms, iron_bcc.positions; magnetic_moments, smearing=Smearing.Gaussian(), temperature=1e-3) basis = PlaneWaveBasis(model; Ecut=20, kgrid=(4, 4, 4), architecture) - - # TODO Right now guess generation on the GPU does not work - basis_cpu = PlaneWaveBasis(model; basis.Ecut, basis.kgrid) - ρ_guess = guess_density(basis_cpu, magnetic_moments) - ρ = DFTK.to_device(architecture, ρ_guess) - # ρ = guess_density(basis, magnetic_moments) + ρ = guess_density(basis, magnetic_moments) # TODO Bump tolerance a bit here ... still leads to NaNs unfortunately self_consistent_field(basis; ρ, tol=1e-7, mixing=KerkerMixing(), @@ -50,7 +39,8 @@ end end -# TODO Fix guess density generation +# TODO Test hamiltonian application on GPU +# TODO Direct minimisation # TODO Float32 # TODO meta GGA # TODO Aluminium with LdosMixing diff --git a/test/guess_density.jl b/test/guess_density.jl index b24f0b03dc..caadc4adfb 100644 --- a/test/guess_density.jl +++ b/test/guess_density.jl @@ -10,35 +10,33 @@ include("testcases.jl") end total_charge(basis, ρ) = sum(ρ) * basis.model.unit_cell_volume / prod(basis.fft_size) - Si_upf = ElementPsp(silicon.atnum, psp=load_psp(silicon.psp_upf)) Si_hgh = ElementPsp(silicon.atnum, psp=load_psp(silicon.psp_hgh)) magnetic_moments = [1.0, -1.0] - methods = [ValenceDensityGaussian(), ValenceDensityPseudo(), ValenceDensityAuto()] + methods = [ValenceDensityGaussian(), ValenceDensityPseudo(), ValenceDensityAuto()] elements = [[Si_upf, Si_hgh], [Si_upf, Si_upf], [Si_upf, Si_hgh]] @testset "Random" begin method = RandomDensity() basis = build_basis([Si_upf, Si_hgh], :none) - ρ = guess_density(basis, method) + ρ = @inferred guess_density(basis, method) @test total_charge(basis, ρ) ≈ basis.model.n_electrons - + basis = build_basis([Si_upf, Si_hgh], :collinear) - ρ = guess_density(basis, method) + ρ = @inferred guess_density(basis, method) @test total_charge(basis, ρ) ≈ basis.model.n_electrons end @testset "Atomic $(string(typeof(method)))" for (method, elements) in zip(methods, elements) basis = build_basis(elements, :none) - ρ = guess_density(basis, method) + ρ = @inferred guess_density(basis, method) @test total_charge(basis, ρ) ≈ basis.model.n_electrons - + basis = build_basis(elements, :collinear) - ρ = guess_density(basis, method) + ρ = @inferred guess_density(basis, method) @test total_charge(basis, ρ) ≈ basis.model.n_electrons - - basis = basis - ρ = guess_density(basis, method, magnetic_moments) + + ρ = @inferred guess_density(basis, method, magnetic_moments) @test total_charge(basis, ρ) ≈ basis.model.n_electrons end end diff --git a/test/nlcc.jl b/test/nlcc.jl index 3002c06df5..a56e4ccf3c 100644 --- a/test/nlcc.jl +++ b/test/nlcc.jl @@ -20,7 +20,7 @@ pseudos = Dict( atoms = [ElementPsp(element, psp=psp)] model = model_LDA(lattice, atoms, positions) basis = PlaneWaveBasis(model; Ecut=24, kgrid=[2, 2, 2]) - ρ_core = atomic_total_density(basis, CoreDensity()) + ρ_core = @inferred atomic_total_density(basis, CoreDensity()) ρ_core_neg = abs(sum(ρ_core[ρ_core .< 0])) @test ρ_core_neg * model.unit_cell_volume / prod(basis.fft_size) < 1e-6 end