diff --git a/src/PlaneWaveBasis.jl b/src/PlaneWaveBasis.jl
index caa422b406..2636acb6c7 100644
--- a/src/PlaneWaveBasis.jl
+++ b/src/PlaneWaveBasis.jl
@@ -41,9 +41,10 @@ Normalization conventions:
 """
 struct PlaneWaveBasis{T,
                       VT <: Real,
+                      Arch <: AbstractArchitecture,
                       T_G_vectors  <: AbstractArray{Vec3{Int}, 3},
                       T_r_vectors  <: AbstractArray{Vec3{VT},  3},
-                      T_kpt_G_vecs <: AbstractVector{Vec3{Int}}
+                      T_kpt_G_vecs <: AbstractVector{Vec3{Int}},
                      } <: AbstractBasis{T}
 
     # T is the default type to express data, VT the corresponding bare value type (i.e. not dual)
@@ -100,7 +101,7 @@ struct PlaneWaveBasis{T,
     #                                       respective rank in comm_kpts
 
     ## Information on the hardware and device used for computations.
-    architecture::AbstractArchitecture
+    architecture::Arch
 
     ## Symmetry operations that leave the discretized model (k and r grids) invariant.
     # Subset of model.symmetries.
@@ -160,7 +161,8 @@ end
 function PlaneWaveBasis(model::Model{T}, Ecut::Number, fft_size, variational,
                         kcoords, kweights, kgrid, kshift,
                         symmetries_respect_rgrid, comm_kpts,
-                        architecture::AbstractArchitecture) where {T <: Real}
+                        architecture::Arch
+                       ) where {T <: Real, Arch <: AbstractArchitecture}
     # Validate fft_size
     if variational
         max_E = norm2(model.recip_lattice * floor.(Int, Vec3(fft_size) ./ 2)) / 2
@@ -220,7 +222,7 @@ function PlaneWaveBasis(model::Model{T}, Ecut::Number, fft_size, variational,
     # by the same process
     n_kpt   = length(kcoords_global)
     n_procs = mpi_nprocs(comm_kpts)
-    
+
     # Custom reduction operators for MPI are currently not working on aarch64, so
     # fallbacks are defined in common/mpi.jl. For them to work, there cannot be more
     # than 1 MPI process.
@@ -279,7 +281,7 @@ function PlaneWaveBasis(model::Model{T}, Ecut::Number, fft_size, variational,
     r_vectors = to_device(architecture, r_vectors)
     terms = Vector{Any}(undef, length(model.term_types))  # Dummy terms array, filled below
 
-    basis = PlaneWaveBasis{T, value_type(T), typeof(Gs), typeof(r_vectors),
+    basis = PlaneWaveBasis{T, value_type(T), Arch, typeof(Gs), typeof(r_vectors),
                            typeof(kpoints[1].G_vectors)}(
         model, fft_size, dvol,
         Ecut, variational,
diff --git a/src/densities.jl b/src/densities.jl
index dcde8601c8..7fd187b50d 100644
--- a/src/densities.jl
+++ b/src/densities.jl
@@ -110,7 +110,7 @@ function ρ_from_total(basis, ρtot::AbstractArray{T}) where {T}
     if basis.model.spin_polarization in (:none, :spinless)
         ρspin = nothing
     else
-        ρspin = zeros(T, basis.fft_size)
+        ρspin = zeros_like(basis.G_vectors, T, basis.fft_size...)
     end
     ρ_from_total_and_spin(ρtot, ρspin)
 end
diff --git a/src/density_methods.jl b/src/density_methods.jl
index b8a1fda8fc..1a7d0d1f6c 100644
--- a/src/density_methods.jl
+++ b/src/density_methods.jl
@@ -18,7 +18,7 @@ end
 @doc raw"""
 Build a random charge density normalized to the provided number of electrons.
 """
-function random_density(basis::PlaneWaveBasis{T}, n_electrons) where {T}
+function random_density(basis::PlaneWaveBasis{T}, n_electrons::Integer) where {T}
     ρtot  = rand(T, basis.fft_size)
     ρtot  = ρtot .* n_electrons ./ (sum(ρtot) * basis.dvol)  # Integration to n_electrons
     ρspin = nothing
@@ -124,17 +124,17 @@ function atomic_spin_density(basis::PlaneWaveBasis{T}, method::AtomicDensity,
     end
 
     @assert length(magmoms) == length(basis.model.atoms)
-    coefficients = map(zip(basis.model.atoms, magmoms)) do (atom, magmom)
+    coefficients = map(basis.model.atoms, magmoms) do atom, magmom
         iszero(magmom[1:2]) || error("Non-collinear magnetization not yet implemented")
         magmom[3] ≤ n_elec_valence(atom) || error(
             "Magnetic moment $(magmom[3]) too large for element $(atomic_symbol(atom)) " *
             "with only $(n_elec_valence(atom)) valence electrons."
         )
         magmom[3] / n_elec_valence(atom)
-    end
+    end::AbstractVector{T}  # Needed to ensure type stability in final guess density
 
     form_factors = atomic_density_form_factors(basis, method)
-    atomic_density_superposition(basis, form_factors; coefficients)    
+    atomic_density_superposition(basis, form_factors; coefficients)
 end
 
 # Perform an atomic density superposition. The density is constructed in reciprocal space
@@ -143,13 +143,12 @@ end
 function atomic_density_superposition(basis::PlaneWaveBasis{T},
                                       form_factors::IdDict{Tuple{Int,T},T};
                                       coefficients=ones(T, length(basis.model.atoms))
-                                      )::Array{T,3} where {T}
+                                      ) where {T}
     model = basis.model
-    G_cart = G_vectors_cart(basis)
-
-    ρ = map(enumerate(G_vectors(basis))) do (iG, G)
+    G_cart = to_cpu(G_vectors_cart(basis))
+    ρ_cpu = map(enumerate(to_cpu(G_vectors(basis)))) do (iG, G)
         Gnorm = norm(G_cart[iG])
-        ρ_iG = sum(enumerate(model.atom_groups); init=zero(Complex{T})) do (igroup, group)
+        ρ_iG = sum(enumerate(model.atom_groups); init=zero(complex(T))) do (igroup, group)
             sum(group) do iatom
                 structure_factor = cis2pi(-dot(G, model.positions[iatom]))
                 coefficients[iatom] * form_factors[(igroup, Gnorm)] * structure_factor
@@ -157,6 +156,7 @@ function atomic_density_superposition(basis::PlaneWaveBasis{T},
         end
         ρ_iG / sqrt(model.unit_cell_volume)
     end
+    ρ = to_device(basis.architecture, ρ_cpu)
     enforce_real!(basis, ρ)  # Symmetrize Fourier coeffs to have real iFFT
     irfft(basis, ρ)
 end
@@ -166,7 +166,7 @@ function atomic_density_form_factors(basis::PlaneWaveBasis{T},
                                      )::IdDict{Tuple{Int,T},T} where {T<:Real}
     model = basis.model
     form_factors = IdDict{Tuple{Int,T},T}()  # IdDict for Dual compatability
-    for G in G_vectors_cart(basis)
+    for G in to_cpu(G_vectors_cart(basis))
         Gnorm = norm(G)
         for (igroup, group) in enumerate(model.atom_groups)
             if !haskey(form_factors, (igroup, Gnorm))
diff --git a/src/external/jld2io.jl b/src/external/jld2io.jl
index 1a61ea4ed4..bc906368e4 100644
--- a/src/external/jld2io.jl
+++ b/src/external/jld2io.jl
@@ -73,7 +73,7 @@ load_scfres(file::AbstractString) = JLD2.jldopen(load_scfres, file, "r")
 #
 # Custom serialisations
 #
-struct PlaneWaveBasisSerialisation{T <: Real}
+struct PlaneWaveBasisSerialisation{T <: Real, Arch <: AbstractArchitecture}
     model::Model{T,T}
     Ecut::T
     variational::Bool
@@ -83,17 +83,17 @@ struct PlaneWaveBasisSerialisation{T <: Real}
     kshift::Union{Nothing,Vec3{T}}
     symmetries_respect_rgrid::Bool
     fft_size::Tuple{Int, Int, Int}
-    architecture::AbstractArchitecture
+    architecture::Arch
 end
-function JLD2.writeas(::Type{PlaneWaveBasis{T,T,GT,RT,KGT}}) where {T,GT,RT,KGT}
+function JLD2.writeas(::Type{PlaneWaveBasis{T,T,Arch,GT,RT,KGT}}) where {T,Arch,GT,RT,KGT}
     # The GT, GT, KGT are uniquely determined by the architecture,
     # which is stored in the basis.
-    PlaneWaveBasisSerialisation{T}
+    PlaneWaveBasisSerialisation{T,Arch}
 end
 
-function Base.convert(::Type{PlaneWaveBasisSerialisation{T}},
-                      basis::PlaneWaveBasis{T,T}) where {T}
-    PlaneWaveBasisSerialisation{T}(
+function Base.convert(::Type{PlaneWaveBasisSerialisation{T,Arch}},
+                      basis::PlaneWaveBasis{T,T,Arch}) where {T,Arch}
+    PlaneWaveBasisSerialisation{T,Arch}(
         basis.model,
         basis.Ecut,
         basis.variational,
@@ -107,8 +107,8 @@ function Base.convert(::Type{PlaneWaveBasisSerialisation{T}},
     )
 end
 
-function Base.convert(::Type{PlaneWaveBasis{T,T,GT,RT,KGT}},
-                      serial::PlaneWaveBasisSerialisation{T}) where {T,GT,RT,KGT}
+function Base.convert(::Type{PlaneWaveBasis{T,T,Arch,GT,RT,KGT}},
+                      serial::PlaneWaveBasisSerialisation{T,Arch}) where {T,Arch,GT,RT,KGT}
     PlaneWaveBasis(serial.model, serial.Ecut, serial.kcoords, serial.kweights;
                    serial.fft_size,
                    serial.kgrid,
diff --git a/src/response/chi0.jl b/src/response/chi0.jl
index 6e17de5859..217dc17210 100644
--- a/src/response/chi0.jl
+++ b/src/response/chi0.jl
@@ -55,7 +55,7 @@ function compute_χ0(ham; temperature=ham.basis.model.temperature)
     T  = eltype(basis)
     occupation, εF = compute_occupation(basis, Es, fermialg; temperature, tol_n_elec=10eps(T))
 
-    χ0 = zeros(eltype(basis), n_spin * n_fft, n_spin * n_fft)
+    χ0 = zeros_like(basis.G_vectors, T, n_spin * n_fft, n_spin * n_fft)
     for (ik, kpt) in enumerate(basis.kpoints)
         # The sum-over-states terms of χ0 are diagonal in the spin blocks (no αβ / βα terms)
         # so the spin of the kpt selects the block we are in
@@ -109,8 +109,8 @@ precondprep!(P::FunctionPreconditioner, ::Any) = P
 # included).
 function sternheimer_solver(Hk, ψk, ε, rhs;
                             callback=identity, cg_callback=identity,
-                            ψk_extra=zeros(size(ψk,1), 0), εk_extra=zeros(0),
-                            Hψk_extra=zeros(size(ψk,1), 0), tol=1e-9)
+                            ψk_extra=zeros_like(ψk, size(ψk, 1), 0), εk_extra=zeros(0),
+                            Hψk_extra=zeros_like(ψk, size(ψk, 1), 0), tol=1e-9)
     basis = Hk.basis
     kpoint = Hk.kpoint
 
@@ -291,7 +291,7 @@ Perform in-place computations of the derivatives of the wave functions by solvin
 a Sternheimer equation for each `k`-points. It is assumed the passed `δψ` are initialised
 to zero.
 """
-function compute_δψ!(δψ, basis, H, ψ, εF, ε, δHψ; ψ_extra=[zeros(size(ψk,1), 0) for ψk in ψ],
+function compute_δψ!(δψ, basis, H, ψ, εF, ε, δHψ; ψ_extra=[zeros_like(ψk, size(ψk,1), 0) for ψk in ψ],
                      kwargs_sternheimer...)
     model = basis.model
     temperature = model.temperature
diff --git a/src/terms/operators.jl b/src/terms/operators.jl
index 99090db9d0..f552ed333b 100644
--- a/src/terms/operators.jl
+++ b/src/terms/operators.jl
@@ -41,7 +41,7 @@ end
 apply!(Hψ, op::NoopOperator, ψ) = nothing
 function Matrix(op::NoopOperator)
     n_Gk = length(G_vectors(op.basis, op.kpoint))
-    zeros(eltype(op.basis), n_Gk, n_Gk)
+    zeros_like(op.basis.G_vectors, eltype(op.basis), n_Gk, n_Gk)
 end
 
 """
diff --git a/src/workarounds/cuda_arrays.jl b/src/workarounds/cuda_arrays.jl
index d34247433f..8e00990ec4 100644
--- a/src/workarounds/cuda_arrays.jl
+++ b/src/workarounds/cuda_arrays.jl
@@ -1,16 +1,5 @@
 using LinearAlgebra
 
-# https://github.com/JuliaGPU/CUDA.jl/issues/1572
-function LinearAlgebra.eigen(A::Hermitian{<:Complex,<:CUDA.CuArray})
-    values, vectors = CUDA.CUSOLVER.heevd!('V', 'U', A.data)
-    (; values, vectors)
-end
-
-function LinearAlgebra.eigen(A::Hermitian{<:Real,<:CUDA.CuArray})
-    values, vectors = CUDA.CUSOLVER.syevd!('V', 'U', A.data)
-    (; values, vectors)
-end
-
 synchronize_device(::GPU{<:CUDA.CuArray}) = CUDA.synchronize()
 
 for fun in (:potential_terms, :kernel_terms)
diff --git a/test/gpu.jl b/test/gpu.jl
index cf878ccdcc..92587208b4 100644
--- a/test/gpu.jl
+++ b/test/gpu.jl
@@ -10,13 +10,7 @@ include("testcases.jl")
     function run_problem(; architecture)
         model = model_PBE(silicon.lattice, silicon.atoms, silicon.positions)
         basis = PlaneWaveBasis(model; Ecut=10, kgrid=(3, 3, 3), architecture)
-
-        # TODO Right now guess generation on the GPU does not work
-        basis_cpu = PlaneWaveBasis(model; basis.Ecut, basis.kgrid)
-        ρ_guess = guess_density(basis_cpu)
-        ρ = DFTK.to_device(architecture, ρ_guess)
-
-        self_consistent_field(basis; tol=1e-10, solver=scf_damping_solver(1.0), ρ)
+        self_consistent_field(basis; tol=1e-10, solver=scf_damping_solver(1.0))
     end
 
     scfres_cpu = run_problem(; architecture=DFTK.CPU())
@@ -31,12 +25,7 @@ end
         model = model_PBE(iron_bcc.lattice, iron_bcc.atoms, iron_bcc.positions;
                           magnetic_moments, smearing=Smearing.Gaussian(), temperature=1e-3)
         basis = PlaneWaveBasis(model; Ecut=20, kgrid=(4, 4, 4), architecture)
-
-        # TODO Right now guess generation on the GPU does not work
-        basis_cpu = PlaneWaveBasis(model; basis.Ecut, basis.kgrid)
-        ρ_guess = guess_density(basis_cpu, magnetic_moments)
-        ρ = DFTK.to_device(architecture, ρ_guess)
-        # ρ = guess_density(basis, magnetic_moments)
+        ρ = guess_density(basis, magnetic_moments)
 
         # TODO Bump tolerance a bit here ... still leads to NaNs unfortunately
         self_consistent_field(basis; ρ, tol=1e-7, mixing=KerkerMixing(),
@@ -50,7 +39,8 @@ end
 end
 
 
-# TODO Fix guess density generation
+# TODO Test hamiltonian application on GPU
+# TODO Direct minimisation
 # TODO Float32
 # TODO meta GGA
 # TODO Aluminium with LdosMixing
diff --git a/test/guess_density.jl b/test/guess_density.jl
index b24f0b03dc..caadc4adfb 100644
--- a/test/guess_density.jl
+++ b/test/guess_density.jl
@@ -10,35 +10,33 @@ include("testcases.jl")
     end
     total_charge(basis, ρ) = sum(ρ) * basis.model.unit_cell_volume / prod(basis.fft_size)
 
-
     Si_upf = ElementPsp(silicon.atnum, psp=load_psp(silicon.psp_upf))
     Si_hgh = ElementPsp(silicon.atnum, psp=load_psp(silicon.psp_hgh))
     magnetic_moments = [1.0, -1.0]
-    methods = [ValenceDensityGaussian(), ValenceDensityPseudo(), ValenceDensityAuto()]
+    methods  = [ValenceDensityGaussian(), ValenceDensityPseudo(), ValenceDensityAuto()]
     elements = [[Si_upf, Si_hgh], [Si_upf, Si_upf], [Si_upf, Si_hgh]]
 
     @testset "Random" begin
         method = RandomDensity()
         basis = build_basis([Si_upf, Si_hgh], :none)
-        ρ = guess_density(basis, method)
+        ρ = @inferred guess_density(basis, method)
         @test total_charge(basis, ρ) ≈ basis.model.n_electrons
-    
+
         basis = build_basis([Si_upf, Si_hgh], :collinear)
-        ρ = guess_density(basis, method)
+        ρ = @inferred guess_density(basis, method)
         @test total_charge(basis, ρ) ≈ basis.model.n_electrons
     end
 
     @testset "Atomic $(string(typeof(method)))" for (method, elements) in zip(methods, elements)
         basis = build_basis(elements, :none)
-        ρ = guess_density(basis, method)
+        ρ = @inferred guess_density(basis, method)
         @test total_charge(basis, ρ) ≈ basis.model.n_electrons
-    
+
         basis = build_basis(elements, :collinear)
-        ρ = guess_density(basis, method)
+        ρ = @inferred guess_density(basis, method)
         @test total_charge(basis, ρ) ≈ basis.model.n_electrons
-    
-        basis = basis
-        ρ = guess_density(basis, method, magnetic_moments)
+
+        ρ = @inferred guess_density(basis, method, magnetic_moments)
         @test total_charge(basis, ρ) ≈ basis.model.n_electrons
     end
 end
diff --git a/test/nlcc.jl b/test/nlcc.jl
index 3002c06df5..a56e4ccf3c 100644
--- a/test/nlcc.jl
+++ b/test/nlcc.jl
@@ -20,7 +20,7 @@ pseudos = Dict(
         atoms = [ElementPsp(element, psp=psp)]
         model = model_LDA(lattice, atoms, positions)
         basis = PlaneWaveBasis(model; Ecut=24, kgrid=[2, 2, 2])
-        ρ_core = atomic_total_density(basis, CoreDensity())
+        ρ_core = @inferred atomic_total_density(basis, CoreDensity())
         ρ_core_neg = abs(sum(ρ_core[ρ_core .< 0]))
         @test ρ_core_neg * model.unit_cell_volume / prod(basis.fft_size) < 1e-6
     end