Coverage for gpaw/new/pw/hamiltonian.py: 82%
156 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-12 00:18 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-12 00:18 +0000
1from __future__ import annotations
2from typing import Callable
4import numpy as np
6from gpaw.core.plane_waves import PWArray
7from gpaw.core.uniform_grid import UGArray
8from gpaw.core.arrays import DistributedArrays as XArray
9from gpaw.gpu import cupy as cp
10from gpaw.new import trace, zips
11from gpaw.new.hamiltonian import Hamiltonian
12from gpaw.new.c import pw_precond, pw_insert_gpu
13from gpaw.utilities import as_complex_dtype
16class PWHamiltonian(Hamiltonian):
17 def __init__(self, grid, pw, xp=np):
18 self.grid_local = grid.new(comm=None, dtype=pw.dtype)
19 self.plan = self.grid_local.fft_plans(xp=xp)
20 # It's a bit too expensive to create all the local PW-descriptors
21 # for all the k-points every time we apply the Hamiltonian, so we
22 # cache them:
23 self.pw_cache = {}
25 @trace
26 def apply_local_potential(self,
27 vt_R: UGArray,
28 psit_nG: XArray,
29 out: XArray) -> None:
30 assert isinstance(psit_nG, PWArray)
31 assert isinstance(out, PWArray)
32 out_nG = out
33 xp = psit_nG.xp
34 pw = psit_nG.desc
35 if xp is not np and pw.comm.size == 1:
36 return apply_local_potential_gpu(vt_R, psit_nG, out_nG)
37 vt_R = vt_R.gather(broadcast=True)
38 tmp_R = self.grid_local.empty(xp=xp)
39 if pw.comm.size == 1:
40 pw_local = pw
41 else:
42 key = tuple(pw.kpt_c)
43 pw_local = self.pw_cache.get(key)
44 if pw_local is None:
45 pw_local = pw.new(comm=None)
46 self.pw_cache[key] = pw_local
47 psit_G = pw_local.empty(xp=xp)
48 e_kin_G = xp.asarray(psit_G.desc.ekin_G)
49 domain_comm = psit_nG.desc.comm
50 mynbands = psit_nG.mydims[0]
51 vtpsit_G = pw_local.empty(xp=xp)
53 for n1 in range(0, mynbands, domain_comm.size):
54 n2 = min(n1 + domain_comm.size, mynbands)
55 psit_nG[n1:n2].gather_all(psit_G)
56 if domain_comm.rank < n2 - n1:
57 psit_G.ifft(out=tmp_R, plan=self.plan)
58 tmp_R.data *= vt_R.data
59 tmp_R.fft(out=vtpsit_G, plan=self.plan)
60 psit_G.data *= e_kin_G
61 vtpsit_G.data += psit_G.data
62 out_nG[n1:n2].scatter_from_all(vtpsit_G)
64 def apply_mgga(self,
65 dedtaut_R: UGArray,
66 psit_nG: XArray,
67 vt_nG: XArray) -> None:
68 pw = psit_nG.desc
69 dpsit_R = dedtaut_R.desc.new(dtype=pw.dtype).empty()
70 Gplusk1_Gv = pw.reciprocal_vectors()
71 tmp_G = pw.empty()
73 for psit_G, vt_G in zips(psit_nG, vt_nG):
74 for v in range(3):
75 tmp_G.data[:] = psit_G.data
76 tmp_G.data *= 1j * Gplusk1_Gv[:, v]
77 tmp_G.ifft(out=dpsit_R)
78 dpsit_R.data *= dedtaut_R.data
79 dpsit_R.fft(out=tmp_G)
80 vt_G.data -= 0.5j * Gplusk1_Gv[:, v] * tmp_G.data
82 def create_preconditioner(self,
83 blocksize: int,
84 xp=np
85 ) -> Callable[[PWArray,
86 PWArray,
87 PWArray], None]:
88 return precondition
90 def calculate_kinetic_energy(self, wfs, skip_sum=False):
91 e_kin = 0.0
92 for f, psit_G in zip(wfs.myocc_n, wfs.psit_nX):
93 if f > 1.0e-10:
94 e_kin += f * psit_G.norm2('kinetic', skip_sum=skip_sum)
95 if not skip_sum:
96 e_kin = psit_G.desc.comm.sum_scalar(e_kin)
97 e_kin = wfs.band_comm.sum_scalar(e_kin)
98 return e_kin * wfs.spin_degeneracy
101@trace
102def precondition(psit_nG: PWArray,
103 residual_nG: PWArray,
104 out: PWArray,
105 ekin_n=None) -> None:
106 """Preconditioner for KS equation.
108 From:
110 Teter, Payne and Allen, Phys. Rev. B 40, 12255 (1989)
112 as modified by:
114 Kresse and Furthmüller, Phys. Rev. B 54, 11169 (1996)
115 """
116 xp = psit_nG.xp
117 G2_G = xp.asarray(psit_nG.desc.ekin_G * 2)
118 if ekin_n is None:
119 ekin_n = psit_nG.norm2('kinetic')
121 if xp is np:
122 for r_G, o_G, ekin in zips(residual_nG.data,
123 out.data,
124 ekin_n):
125 pw_precond(G2_G, r_G, ekin, o_G)
126 else:
127 out.data[:] = gpu_prec(ekin_n[:, np.newaxis],
128 G2_G[np.newaxis],
129 residual_nG.data)
130 return ekin_n
133@trace(gpu=True)
134@cp.fuse()
135def gpu_prec(ekin, G2, residual):
136 x = 1 / ekin / 3 * G2
137 a = 27.0 + x * (18.0 + x * (12.0 + x * 8.0))
138 xx = x * x
139 return -4.0 / 3 / ekin * a / (a + 16.0 * xx * xx) * residual
142def spinor_precondition(psit_nsG, residual_nsG, out):
143 G2_G = psit_nsG.desc.ekin_G * 2
144 for r_sG, o_sG, ekin in zips(residual_nsG.data,
145 out.data,
146 psit_nsG.norm2('kinetic').sum(1)):
147 for r_G, o_G in zips(r_sG, o_sG):
148 pw_precond(G2_G, r_G, ekin, o_G)
151class SpinorPWHamiltonian(Hamiltonian):
152 def __init__(self, qspiral_v):
153 super().__init__()
154 self.qspiral_v = qspiral_v
156 def apply(self,
157 vt_xR: UGArray,
158 dedtaut_xR: UGArray | None,
159 ibzwfs,
160 D_asii,
161 psit_nsG: XArray,
162 out: XArray,
163 spin: int) -> XArray:
164 assert dedtaut_xR is None
165 out_nsG = out
166 pw = psit_nsG.desc
168 if self.qspiral_v is None:
169 np.multiply(pw.ekin_G, psit_nsG.data, out_nsG.data)
170 else:
171 for s, sign in enumerate([1, -1]):
172 ekin_G = 0.5 * ((pw.G_plus_k_Gv +
173 0.5 * sign * self.qspiral_v)**2).sum(1)
174 np.multiply(ekin_G, psit_nsG.data[:, s], out_nsG.data[:, s])
176 grid = vt_xR.desc.new(dtype=complex)
178 v, x, y, z = vt_xR.data
179 iy = y * 1j
181 f_sR = grid.empty(2)
182 g_R = grid.empty()
184 for p_sG, o_sG in zips(psit_nsG, out_nsG):
185 p_sG.ifft(out=f_sR)
186 a, b = f_sR.data
187 g_R.data = a * (v + z) + b * (x - iy)
188 o_sG.data[0] += g_R.fft(pw=pw).data
189 g_R.data = a * (x + iy) + b * (v - z)
190 o_sG.data[1] += g_R.fft(pw=pw).data
192 return out_nsG
194 def create_preconditioner(self, blocksize, xp):
195 return spinor_precondition
198@trace
199def apply_local_potential_gpu(vt_R,
200 psit_nG,
201 out_nG,
202 blocksize=10):
203 from gpaw.gpu import cupyx
204 pw = psit_nG.desc
205 e_kin_G = cp.asarray(pw.ekin_G)
206 mynbands = psit_nG.mydims[0]
207 size_c = vt_R.desc.size_c
208 w = trace(gpu=True)
209 if np.issubdtype(pw.dtype, np.floating):
210 shape = (size_c[0], size_c[1], size_c[2] // 2 + 1)
211 ifftn = w(cupyx.scipy.fft.irfftn)
212 fftn = w(cupyx.scipy.fft.rfftn)
213 else:
214 shape = tuple(size_c)
215 ifftn = w(cupyx.scipy.fft.ifftn)
216 fftn = w(cupyx.scipy.fft.fftn)
217 Q_G = cp.asarray(pw.indices(shape))
218 psit_bQ = None
219 for b1 in range(0, mynbands, blocksize):
220 b2 = min(b1 + blocksize, mynbands)
221 nb = b2 - b1
222 if psit_bQ is None:
223 psit_bQ = cp.empty((nb,) + shape, as_complex_dtype(pw.dtype))
224 elif nb < blocksize:
225 psit_bQ = psit_bQ[:nb]
226 psit_bQ[:] = 0.0
227 pw_insert_gpu(psit_nG.data[b1:b2],
228 Q_G,
229 1.0,
230 psit_bQ.reshape((nb, -1)),
231 *size_c)
232 psit_bR = ifftn(
233 psit_bQ,
234 tuple(size_c),
235 norm='forward',
236 overwrite_x=True)
237 psit_bR *= vt_R.data
238 vtpsit_bQ = fftn(
239 psit_bR,
240 tuple(size_c),
241 norm='forward',
242 overwrite_x=True)
243 out_nG.data[b1:b2] = psit_nG.data[b1:b2]
244 out_nG.data[b1:b2] *= e_kin_G
245 out_nG.data[b1:b2] += vtpsit_bQ.reshape((nb, -1))[:, Q_G]