Coverage for gpaw/new/pwfd/eigensolver.py: 98%

82 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-07-14 00:18 +0000

1from __future__ import annotations 

2 

3from functools import partial 

4from typing import Callable 

5 

6import numpy as np 

7 

8from gpaw.core.arrays import DistributedArrays as XArray 

9from gpaw.core.atom_centered_functions import AtomArrays 

10from gpaw.mpi import broadcast_exception 

11from gpaw.new import trace, zips 

12from gpaw.new.c import calculate_residuals_gpu 

13from gpaw.new.eigensolver import Eigensolver, calculate_weights 

14from gpaw.new.energies import DFTEnergies 

15from gpaw.new.hamiltonian import Hamiltonian 

16from gpaw.utilities.blas import axpy 

17from gpaw.utilities import as_real_dtype 

18 

19 

20class PWFDEigensolver(Eigensolver): 

21 def __init__(self, 

22 hamiltonian, 

23 converge_bands: int | str = 'occupied', 

24 blocksize: int = 10, 

25 max_buffer_mem: int | None = 200 * 1024 ** 2): 

26 self.converge_bands = converge_bands 

27 self.blocksize = blocksize 

28 self.preconditioner: Callable 

29 self.preconditioner_factory = hamiltonian.create_preconditioner 

30 self.work_arrays: np.ndarray 

31 self.data_buffers: np.ndarray 

32 

33 # Maximal memory to be used for the eigensolver 

34 # should be infinite if hamiltonian is not band-local (hybrids) 

35 self.max_buffer_mem = ( 

36 max_buffer_mem if hamiltonian.band_local else None) 

37 

38 def _initialize(self, ibzwfs): 

39 # First time: allocate work-arrays 

40 self.preconditioner = self.preconditioner_factory(self.blocksize, 

41 xp=ibzwfs.xp) 

42 

43 def _allocate_buffer_arrays(self, ibzwfs, shape): 

44 G_max = np.prod(ibzwfs.get_max_shape()) 

45 b = max(wfs.n2 - wfs.n1 for wfs in ibzwfs) 

46 nbands = ibzwfs.nbands 

47 dtype_size = ibzwfs.wfs_qs[0][0].psit_nX.data.dtype.itemsize 

48 domain_size = ibzwfs.domain_comm.size 

49 

50 if self.max_buffer_mem is not None: 

51 # Buffer size needs to ensure that the number of bands 

52 # of the buffer is a multiple of domain_size. 

53 buffer_size_per_domain = max(self.max_buffer_mem, 

54 domain_size * G_max * dtype_size, 

55 nbands * dtype_size) \ 

56 // (domain_size * G_max * dtype_size) 

57 buffer_size = min(buffer_size_per_domain * domain_size 

58 * G_max * dtype_size, 

59 b * G_max * dtype_size) 

60 else: 

61 buffer_size = max(b * G_max * dtype_size, 

62 nbands * dtype_size) 

63 

64 self.data_buffers = ibzwfs.xp.empty(shape + (buffer_size,), 

65 np.byte) 

66 

67 def _allocate_work_arrays(self, ibzwfs, shape): 

68 b = max(wfs.n2 - wfs.n1 for wfs in ibzwfs) 

69 shape += (b,) + ibzwfs.get_max_shape() 

70 dtype = ibzwfs.wfs_qs[0][0].psit_nX.data.dtype 

71 self.work_arrays = ibzwfs.xp.empty(shape, dtype) 

72 

73 @trace 

74 def iterate(self, 

75 ibzwfs, 

76 density, 

77 potential, 

78 hamiltonian: Hamiltonian, 

79 pot_calc, 

80 energies: DFTEnergies) -> tuple[float, float, DFTEnergies]: 

81 """Iterate on state given fixed hamiltonian. 

82 

83 Returns 

84 ------- 

85 float: 

86 Weighted error of residuals::: 

87 

88 ~ 

89 R = (H - ε S)ψ 

90 n n n 

91 """ 

92 

93 if not hasattr(self, 'preconditioner'): 

94 self._initialize(ibzwfs) 

95 

96 wfs = ibzwfs.wfs_qs[0][0] 

97 dS_aii = wfs.setups.get_overlap_corrections(wfs.P_ani.layout.atomdist, 

98 wfs.xp) 

99 apply = partial(hamiltonian.apply, 

100 potential.vt_sR, 

101 potential.dedtaut_sR, 

102 ibzwfs, density.D_asii) # used by hybrids 

103 

104 weight_un = calculate_weights(self.converge_bands, ibzwfs) 

105 

106 wfs_error = 0.0 

107 eig_error = 0.0 

108 # Loop over k-points: 

109 with broadcast_exception(ibzwfs.kpt_comm): 

110 for wfs, weight_n in zips(ibzwfs, weight_un): 

111 dH = partial(potential.dH, spin=wfs.spin) 

112 Ht = partial(apply, spin=wfs.spin) 

113 temp_wfs_error, temp_eig_error = \ 

114 self.iterate_kpt(wfs, weight_n, self.iterate1, 

115 Ht=Ht, dH=dH, dS_aii=dS_aii) 

116 wfs_error += wfs.weight * temp_wfs_error 

117 if eig_error < temp_eig_error: 

118 eig_error = temp_eig_error 

119 

120 wfs_error = ibzwfs.kpt_band_comm.sum_scalar( 

121 float(wfs_error)) * ibzwfs.spin_degeneracy 

122 eig_error = ibzwfs.kpt_band_comm.max_scalar(eig_error) 

123 

124 return eig_error, wfs_error, energies 

125 

126 def iterate1(self, wfs, Ht, dH, dS_aii, weight_n): 

127 raise NotImplementedError 

128 

129 

130@trace 

131def calculate_residuals(psit_nX, 

132 residual_nX: XArray, 

133 pt_aiX, 

134 P_ani, 

135 eig_n, 

136 dH: Callable[[AtomArrays, AtomArrays], AtomArrays], 

137 dS_aii: AtomArrays, 

138 P1_ani: AtomArrays, 

139 P2_ani: AtomArrays) -> None: 

140 """Complete the calculation of resuduals. 

141 

142 Starting from residual_nX having the values::: 

143 

144 ^ ~ ~ 

145 (T + v) ψ 

146 n 

147 

148 add the following::: 

149 

150 -- a a ~a ~ ~a ~ 

151 > (ΔH - ε ΔS )<p |ψ > p - ε ψ . 

152 -- ij n ij j n i n 

153 ij 

154 

155 (P1_ani and P2_ani are work buffers). 

156 """ 

157 xp = residual_nX.xp 

158 if xp is np: 

159 for r, e, p in zips(residual_nX.data, eig_n, psit_nX.data): 

160 axpy(-e, p, r) 

161 else: 

162 eig_n = xp.asarray(eig_n, dtype=as_real_dtype(residual_nX.data.dtype)) 

163 calculate_residuals_gpu(residual_nX.data, eig_n, psit_nX.data) 

164 

165 dH(P_ani, P1_ani) 

166 P_ani.block_diag_multiply(dS_aii, out_ani=P2_ani) 

167 

168 if P_ani.data.ndim == 2: 

169 subscripts = 'nI, n -> nI' 

170 else: 

171 subscripts = 'nsI, n -> nsI' 

172 if xp is np: 

173 np.einsum(subscripts, P2_ani.data, eig_n, out=P2_ani.data, 

174 dtype=P2_ani.data.dtype, casting='same_kind') 

175 else: 

176 P2_ani.data[:] = xp.einsum(subscripts, P2_ani.data, eig_n) 

177 P1_ani.data -= P2_ani.data 

178 pt_aiX.add_to(residual_nX, P1_ani)