Coverage for gpaw/new/pwfd/eigensolver.py: 98%
82 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-14 00:18 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-14 00:18 +0000
1from __future__ import annotations
3from functools import partial
4from typing import Callable
6import numpy as np
8from gpaw.core.arrays import DistributedArrays as XArray
9from gpaw.core.atom_centered_functions import AtomArrays
10from gpaw.mpi import broadcast_exception
11from gpaw.new import trace, zips
12from gpaw.new.c import calculate_residuals_gpu
13from gpaw.new.eigensolver import Eigensolver, calculate_weights
14from gpaw.new.energies import DFTEnergies
15from gpaw.new.hamiltonian import Hamiltonian
16from gpaw.utilities.blas import axpy
17from gpaw.utilities import as_real_dtype
20class PWFDEigensolver(Eigensolver):
21 def __init__(self,
22 hamiltonian,
23 converge_bands: int | str = 'occupied',
24 blocksize: int = 10,
25 max_buffer_mem: int | None = 200 * 1024 ** 2):
26 self.converge_bands = converge_bands
27 self.blocksize = blocksize
28 self.preconditioner: Callable
29 self.preconditioner_factory = hamiltonian.create_preconditioner
30 self.work_arrays: np.ndarray
31 self.data_buffers: np.ndarray
33 # Maximal memory to be used for the eigensolver
34 # should be infinite if hamiltonian is not band-local (hybrids)
35 self.max_buffer_mem = (
36 max_buffer_mem if hamiltonian.band_local else None)
38 def _initialize(self, ibzwfs):
39 # First time: allocate work-arrays
40 self.preconditioner = self.preconditioner_factory(self.blocksize,
41 xp=ibzwfs.xp)
43 def _allocate_buffer_arrays(self, ibzwfs, shape):
44 G_max = np.prod(ibzwfs.get_max_shape())
45 b = max(wfs.n2 - wfs.n1 for wfs in ibzwfs)
46 nbands = ibzwfs.nbands
47 dtype_size = ibzwfs.wfs_qs[0][0].psit_nX.data.dtype.itemsize
48 domain_size = ibzwfs.domain_comm.size
50 if self.max_buffer_mem is not None:
51 # Buffer size needs to ensure that the number of bands
52 # of the buffer is a multiple of domain_size.
53 buffer_size_per_domain = max(self.max_buffer_mem,
54 domain_size * G_max * dtype_size,
55 nbands * dtype_size) \
56 // (domain_size * G_max * dtype_size)
57 buffer_size = min(buffer_size_per_domain * domain_size
58 * G_max * dtype_size,
59 b * G_max * dtype_size)
60 else:
61 buffer_size = max(b * G_max * dtype_size,
62 nbands * dtype_size)
64 self.data_buffers = ibzwfs.xp.empty(shape + (buffer_size,),
65 np.byte)
67 def _allocate_work_arrays(self, ibzwfs, shape):
68 b = max(wfs.n2 - wfs.n1 for wfs in ibzwfs)
69 shape += (b,) + ibzwfs.get_max_shape()
70 dtype = ibzwfs.wfs_qs[0][0].psit_nX.data.dtype
71 self.work_arrays = ibzwfs.xp.empty(shape, dtype)
73 @trace
74 def iterate(self,
75 ibzwfs,
76 density,
77 potential,
78 hamiltonian: Hamiltonian,
79 pot_calc,
80 energies: DFTEnergies) -> tuple[float, float, DFTEnergies]:
81 """Iterate on state given fixed hamiltonian.
83 Returns
84 -------
85 float:
86 Weighted error of residuals:::
88 ~
89 R = (H - ε S)ψ
90 n n n
91 """
93 if not hasattr(self, 'preconditioner'):
94 self._initialize(ibzwfs)
96 wfs = ibzwfs.wfs_qs[0][0]
97 dS_aii = wfs.setups.get_overlap_corrections(wfs.P_ani.layout.atomdist,
98 wfs.xp)
99 apply = partial(hamiltonian.apply,
100 potential.vt_sR,
101 potential.dedtaut_sR,
102 ibzwfs, density.D_asii) # used by hybrids
104 weight_un = calculate_weights(self.converge_bands, ibzwfs)
106 wfs_error = 0.0
107 eig_error = 0.0
108 # Loop over k-points:
109 with broadcast_exception(ibzwfs.kpt_comm):
110 for wfs, weight_n in zips(ibzwfs, weight_un):
111 dH = partial(potential.dH, spin=wfs.spin)
112 Ht = partial(apply, spin=wfs.spin)
113 temp_wfs_error, temp_eig_error = \
114 self.iterate_kpt(wfs, weight_n, self.iterate1,
115 Ht=Ht, dH=dH, dS_aii=dS_aii)
116 wfs_error += wfs.weight * temp_wfs_error
117 if eig_error < temp_eig_error:
118 eig_error = temp_eig_error
120 wfs_error = ibzwfs.kpt_band_comm.sum_scalar(
121 float(wfs_error)) * ibzwfs.spin_degeneracy
122 eig_error = ibzwfs.kpt_band_comm.max_scalar(eig_error)
124 return eig_error, wfs_error, energies
126 def iterate1(self, wfs, Ht, dH, dS_aii, weight_n):
127 raise NotImplementedError
130@trace
131def calculate_residuals(psit_nX,
132 residual_nX: XArray,
133 pt_aiX,
134 P_ani,
135 eig_n,
136 dH: Callable[[AtomArrays, AtomArrays], AtomArrays],
137 dS_aii: AtomArrays,
138 P1_ani: AtomArrays,
139 P2_ani: AtomArrays) -> None:
140 """Complete the calculation of resuduals.
142 Starting from residual_nX having the values:::
144 ^ ~ ~
145 (T + v) ψ
146 n
148 add the following:::
150 -- a a ~a ~ ~a ~
151 > (ΔH - ε ΔS )<p |ψ > p - ε ψ .
152 -- ij n ij j n i n
153 ij
155 (P1_ani and P2_ani are work buffers).
156 """
157 xp = residual_nX.xp
158 if xp is np:
159 for r, e, p in zips(residual_nX.data, eig_n, psit_nX.data):
160 axpy(-e, p, r)
161 else:
162 eig_n = xp.asarray(eig_n, dtype=as_real_dtype(residual_nX.data.dtype))
163 calculate_residuals_gpu(residual_nX.data, eig_n, psit_nX.data)
165 dH(P_ani, P1_ani)
166 P_ani.block_diag_multiply(dS_aii, out_ani=P2_ani)
168 if P_ani.data.ndim == 2:
169 subscripts = 'nI, n -> nI'
170 else:
171 subscripts = 'nsI, n -> nsI'
172 if xp is np:
173 np.einsum(subscripts, P2_ani.data, eig_n, out=P2_ani.data,
174 dtype=P2_ani.data.dtype, casting='same_kind')
175 else:
176 P2_ani.data[:] = xp.einsum(subscripts, P2_ani.data, eig_n)
177 P1_ani.data -= P2_ani.data
178 pt_aiX.add_to(residual_nX, P1_ani)