Coverage for gpaw/wavefunctions/lcao.py: 89%

623 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-07-12 00:18 +0000

1import numpy as np 

2from ase.units import Bohr 

3from ase.utils.timing import timer 

4 

5# from gpaw import debug 

6from gpaw.directmin.etdm_lcao import LCAOETDM 

7from gpaw.directmin.tools import loewdin_lcao, gramschmidt_lcao 

8from gpaw.lcao.atomic_correction import (DenseAtomicCorrection, 

9 SparseAtomicCorrection) 

10# from gpaw.lcao.overlap import NewTwoCenterIntegrals as NewTCI 

11from gpaw.lcao.tci import TCIExpansions 

12from gpaw.lfc import BasisFunctions 

13from gpaw.utilities import unpack_hermitian 

14from gpaw.utilities.blas import mmm, gemmdot 

15from gpaw.utilities.tools import tri2full 

16from gpaw.wavefunctions.base import WaveFunctions 

17from gpaw.wavefunctions.mode import Mode 

18 

19 

20class LCAO(Mode): 

21 name = 'lcao' 

22 

23 def __init__(self, atomic_correction=None, interpolation=3, 

24 force_complex_dtype=False): 

25 self.atomic_correction = atomic_correction 

26 self.interpolation = interpolation 

27 Mode.__init__(self, force_complex_dtype) 

28 

29 def __call__(self, *args, **kwargs): 

30 return LCAOWaveFunctions(*args, 

31 atomic_correction=self.atomic_correction, 

32 **kwargs) 

33 

34 def __repr__(self): 

35 return f'LCAO({self.todict()})' 

36 

37 def todict(self): 

38 dct = Mode.todict(self) 

39 dct['interpolation'] = self.interpolation 

40 return dct 

41 

42 

43def update_phases(C_unM, q_u, ibzk_qc, spos_ac, oldspos_ac, setups, Mstart): 

44 """Complex-rotate coefficients compensating discontinuous phase shift. 

45 

46 This changes the coefficients to counteract the phase discontinuity 

47 of overlaps when atoms move across a cell boundary.""" 

48 

49 # We don't want to apply any phase shift unless we crossed a cell 

50 # boundary. So we round the shift to either 0 or 1. 

51 # 

52 # Example: spos_ac goes from 0.01 to 0.99 -- this rounds to 1 and 

53 # we apply the phase. If someone moves an atom by half a cell 

54 # without crossing a boundary, then we are out of luck. But they 

55 # should have reinitialized from LCAO anyway. 

56 phase_qa = np.exp(2j * np.pi * 

57 np.dot(ibzk_qc, (spos_ac - oldspos_ac).T.round())) 

58 

59 for q, C_nM in zip(q_u, C_unM): 

60 if C_nM is None: 

61 continue 

62 for a in range(len(spos_ac)): 

63 M1 = setups.M_a[a] - Mstart 

64 M2 = M1 + setups[a].nao 

65 M1 = max(0, M1) 

66 C_nM[:, M1:M2] *= phase_qa[q, a] # (may truncate M2) 

67 

68 

69# replace by class to make data structure perhaps a bit less confusing 

70def get_r_and_offsets(nl, spos_ac, cell_cv): 

71 r_and_offset_aao = {} 

72 

73 def add(a1, a2, R_c, offset): 

74 if (a1, a2) not in r_and_offset_aao: 

75 r_and_offset_aao[(a1, a2)] = [] 

76 r_and_offset_aao[(a1, a2)].append((R_c, offset)) 

77 

78 for a1, spos1_c in enumerate(spos_ac): 

79 a2_a, offsets = nl.get_neighbors(a1) 

80 for a2, offset in zip(a2_a, offsets): 

81 spos2_c = spos_ac[a2] + offset 

82 

83 R_c = np.dot(spos2_c - spos1_c, cell_cv) 

84 add(a1, a2, R_c, offset) 

85 if a1 != a2 or offset.any(): 

86 add(a2, a1, -R_c, -offset) 

87 

88 return r_and_offset_aao 

89 

90 

91class LCAOWaveFunctions(WaveFunctions): 

92 mode = 'lcao' 

93 

94 def __init__(self, ksl, gd, nvalence, setups, bd, 

95 dtype, world, kd, kptband_comm, timer, 

96 atomic_correction=None, collinear=True): 

97 WaveFunctions.__init__(self, gd, nvalence, setups, bd, 

98 dtype, collinear, world, kd, 

99 kptband_comm, timer) 

100 self.ksl = ksl 

101 self.S_qMM = None 

102 self.T_qMM = None 

103 self.P_aqMi = None 

104 self.debug_tci = False 

105 

106 if atomic_correction is None: 

107 atomic_correction = 'sparse' if ksl.using_blacs else 'dense' 

108 

109 if atomic_correction == 'sparse': 

110 self.atomic_correction_cls = SparseAtomicCorrection 

111 else: 

112 assert atomic_correction == 'dense' 

113 self.atomic_correction_cls = DenseAtomicCorrection 

114 

115 # self.tci = NewTCI(gd.cell_cv, gd.pbc_c, setups, kd.ibzk_qc, kd.gamma) 

116 with self.timer('TCI: Evaluate splines'): 

117 self.tciexpansions = TCIExpansions.new_from_setups(setups) 

118 

119 self.basis_functions = BasisFunctions(gd, 

120 [setup.basis_functions_J 

121 for setup in setups], 

122 kd, 

123 dtype=dtype, 

124 cut=True) 

125 

126 self.coefficients_read_from_file = False 

127 self.set_orthonormalized(False) 

128 

129 def set_orthonormalized(self, flag): 

130 self.orthonormalized = flag 

131 

132 @timer('Orthonormalize') 

133 def orthonormalize(self, kpt=None, type='gramschmidt'): 

134 assert type == 'gramschmidt' or type == 'loewdin' 

135 if kpt is None: 

136 for kpt in self.kpt_u: 

137 self.orthonormalize(kpt) 

138 self.orthonormalized = True 

139 return 

140 if type == 'loewdin': 

141 kpt.C_nM[:] = loewdin_lcao(kpt.C_nM, kpt.S_MM.conj()) 

142 elif type == 'gramschmidt': 

143 kpt.C_nM[:] = gramschmidt_lcao(kpt.C_nM, kpt.S_MM.conj()) 

144 

145 def empty(self, n=(), global_array=False, realspace=False): 

146 if realspace: 

147 return self.gd.empty(n, self.dtype, global_array) 

148 else: 

149 if isinstance(n, int): 

150 n = (n,) 

151 nao = self.setups.nao 

152 return np.empty(n + (nao,), self.dtype) 

153 

154 def __str__(self): 

155 s = 'Wave functions: LCAO\n' 

156 s += ' Diagonalizer: %s\n' % self.ksl.get_description() 

157 s += (' Atomic Correction: %s\n' 

158 % self.atomic_correction_cls.description) 

159 s += ' Data-type: %s\n' % self.dtype.__name__ 

160 return s 

161 

162 def set_eigensolver(self, eigensolver): 

163 WaveFunctions.set_eigensolver(self, eigensolver) 

164 if eigensolver: 

165 if isinstance(eigensolver, LCAOETDM): 

166 eigensolver.initialize(self.gd, self.dtype, self.bd.nbands, 

167 self.kd.nibzkpts, self.setups.nao, 

168 self.ksl.using_blacs, 

169 self.bd.comm.size, self.kpt_u) 

170 else: 

171 eigensolver.initialize(self.gd, self.dtype, self.setups.nao, 

172 self.ksl) 

173 

174 def set_positions(self, spos_ac, atom_partition=None, move_wfs=False): 

175 oldspos_ac = self.spos_ac 

176 with self.timer('Basic WFS set positions'): 

177 WaveFunctions.set_positions(self, spos_ac, atom_partition) 

178 

179 with self.timer('Basis functions set positions'): 

180 self.basis_functions.set_positions(spos_ac) 

181 

182 if self.ksl is not None: 

183 self.basis_functions.set_matrix_distribution(self.ksl.Mstart, 

184 self.ksl.Mstop) 

185 

186 Mstop = self.ksl.Mstop 

187 Mstart = self.ksl.Mstart 

188 

189 # if self.ksl.using_blacs: # XXX 

190 # S and T have been distributed to a layout with blacs, so 

191 # discard them to force reallocation from scratch. 

192 # 

193 # TODO: evaluate S and T when they *are* distributed, thus saving 

194 # memory and avoiding this problem 

195 for kpt in self.kpt_u: 

196 kpt.S_MM = None 

197 kpt.T_MM = None 

198 

199 # Free memory in case of old matrices: 

200 self.S_qMM = self.T_qMM = self.P_aqMi = None 

201 

202 if self.dtype == complex and oldspos_ac is not None: 

203 update_phases([kpt.C_nM for kpt in self.kpt_u], 

204 [kpt.q for kpt in self.kpt_u], 

205 self.kd.ibzk_qc, spos_ac, oldspos_ac, 

206 self.setups, Mstart) 

207 

208 self.timer.start('mktci') 

209 manytci = self.tciexpansions.get_manytci_calculator( 

210 self.setups, self.gd, spos_ac, self.kd.ibzk_qc, self.dtype, 

211 self.timer) 

212 self.timer.stop('mktci') 

213 self.manytci = manytci 

214 self.newtci = manytci.tci 

215 

216 my_atom_indices = self.basis_functions.my_atom_indices 

217 self.timer.start('ST tci') 

218 newS_qMM, newT_qMM = manytci.O_qMM_T_qMM(self.gd.comm, 

219 Mstart, Mstop, 

220 self.ksl.using_blacs) 

221 self.timer.stop('ST tci') 

222 self.timer.start('P tci') 

223 P_qIM = manytci.P_qIM(my_atom_indices) 

224 self.timer.stop('P tci') 

225 self.P_aqMi = manytci.P_aqMi(my_atom_indices) 

226 self.P_qIM = P_qIM # XXX atomic correction 

227 

228 self.atomic_correction = self.atomic_correction_cls.new_from_wfs(self) 

229 

230 # TODO 

231 # OK complex/conj, periodic images 

232 # OK scalapack 

233 # derivatives/forces 

234 # sparse 

235 # use symmetry/conj tricks to reduce calculations 

236 # enable caching of spherical harmonics 

237 

238 self.atomic_correction.add_overlap_correction(newS_qMM) 

239 self.allocate_arrays_for_projections(my_atom_indices) 

240 

241 newS_qMM = self.ksl.distribute_overlap_matrix(newS_qMM, root=-1) 

242 newT_qMM = self.ksl.distribute_overlap_matrix(newT_qMM, root=-1) 

243 

244 self.positions_set = True 

245 

246 for kpt in self.kpt_u: 

247 q = kpt.q 

248 kpt.S_MM = newS_qMM[q] 

249 kpt.T_MM = newT_qMM[q] 

250 self.S_qMM = newS_qMM 

251 self.T_qMM = newT_qMM 

252 

253 # Elpa wants to reuse the decomposed form of S_qMM. 

254 # We need to keep track of the existence of that object here, 

255 # since this is where we change S_qMM. Hence, expect this to 

256 # become arrays after the first diagonalization: 

257 self.decomposed_S_qMM = [None] * len(self.S_qMM) 

258 self.set_orthonormalized(False) 

259 

260 def initialize(self, density, hamiltonian, spos_ac): 

261 # Note: The above line exists also in set_positions. 

262 # This is guaranteed to be correct, but we can probably remove one. 

263 # Of course no human can understand the initialization process, 

264 # so this will be some other day. 

265 self.timer.start('LCAO WFS Initialize') 

266 if density.nt_sG is None: 

267 if self.kpt_u[0].f_n is None or self.kpt_u[0].C_nM is None: 

268 density.initialize_from_atomic_densities(self.basis_functions) 

269 else: 

270 # We have the info we need for a density matrix, so initialize 

271 # from that instead of from scratch. This will be the case 

272 # after set_positions() during a relaxation 

273 density.initialize_from_wavefunctions(self) 

274 # Initialize GLLB-potential from basis function orbitals 

275 if hamiltonian.xc.type == 'GLLB': 

276 hamiltonian.xc.initialize_from_atomic_orbitals( 

277 self.basis_functions) 

278 

279 else: 

280 # After a restart, nt_sg doesn't exist yet, so we'll have to 

281 # make sure it does. Of course, this should have been taken care 

282 # of already by this time, so we should improve the code elsewhere 

283 density.calculate_normalized_charges_and_mix() 

284 

285 hamiltonian.update(density) 

286 self.timer.stop('LCAO WFS Initialize') 

287 

288 return 0, 0 

289 

290 def initialize_wave_functions_from_lcao(self): 

291 """Fill the calc.wfs.kpt_[u].psit_nG arrays with useful data. 

292 

293 Normally psit_nG is NOT used in lcao mode, but some extensions 

294 (like ase.dft.wannier) want to have it. 

295 This code is adapted from fd.py / initialize_from_lcao_coefficients() 

296 and fills psit_nG with data constructed from the current lcao 

297 coefficients (kpt.C_nM). 

298 

299 (This may or may not work in band-parallel case!) 

300 """ 

301 from gpaw.wavefunctions.arrays import UniformGridWaveFunctions 

302 bfs = self.basis_functions 

303 for kpt in self.kpt_u: 

304 kpt.psit = UniformGridWaveFunctions( 

305 self.bd.nbands, self.gd, self.dtype, kpt=kpt.q, dist=None, 

306 spin=kpt.s, collinear=True) 

307 kpt.psit_nG[:] = 0.0 

308 bfs.lcao_to_grid(kpt.C_nM, kpt.psit_nG[:self.bd.mynbands], kpt.q) 

309 

310 def initialize_wave_functions_from_restart_file(self): 

311 """Dummy function to ensure compatibility to fd mode""" 

312 self.initialize_wave_functions_from_lcao() 

313 

314 def add_orbital_density(self, nt_G, kpt, n): 

315 rank, q = self.kd.get_rank_and_index(kpt.k) 

316 u = q * self.nspins + kpt.s 

317 assert rank == self.kd.comm.rank 

318 assert self.kpt_u[u] is kpt 

319 psit_G = self._get_wave_function_array(u, n, realspace=True) 

320 self.add_realspace_orbital_to_density(nt_G, psit_G) 

321 

322 def calculate_density_matrix(self, f_n, C_nM, rho_MM=None): 

323 self.timer.start('Calculate density matrix') 

324 rho_MM = self.ksl.calculate_density_matrix(f_n, C_nM, rho_MM) 

325 self.timer.stop('Calculate density matrix') 

326 return rho_MM 

327 

328 if 1: 

329 # XXX Should not conjugate, but call gemm(..., 'c') 

330 # Although that requires knowing C_Mn and not C_nM. 

331 # that also conforms better to the usual conventions in literature 

332 Cf_Mn = C_nM.T.conj() * f_n 

333 self.timer.start('gemm') 

334 mmm(1.0, Cf_Mn, 'N', C_nM, 'N', 0.0, rho_MM) 

335 self.timer.stop('gemm') 

336 self.timer.start('band comm sum') 

337 self.bd.comm.sum(rho_MM) 

338 self.timer.stop('band comm sum') 

339 else: 

340 # Alternative suggestion. Might be faster. Someone should test this 

341 from gpaw.utilities.blas import r2k 

342 C_Mn = C_nM.T.copy() 

343 r2k(0.5, C_Mn, f_n * C_Mn, 0.0, rho_MM) 

344 tri2full(rho_MM) 

345 

346 def calculate_atomic_density_matrices_with_occupation(self, D_asp, f_un): 

347 # ac = self.atomic_correction 

348 # if ac.implements_distributed_projections(): 

349 # D2_asp = ac.redistribute(self, D_asp, type='asp', op='forth') 

350 # WaveFunctions.calculate_atomic_density_matrices_with_occupation( 

351 # self, D2_asp, f_un) 

352 # D3_asp = ac.redistribute(self, D2_asp, type='asp', op='back') 

353 # for a in D_asp: 

354 # D_asp[a][:] = D3_asp[a] 

355 # else: 

356 WaveFunctions.calculate_atomic_density_matrices_with_occupation( 

357 self, D_asp, f_un) 

358 

359 def calculate_density_matrix_delta(self, d_nn, C_nM, rho_MM=None): 

360 self.timer.start('Calculate density matrix') 

361 rho_MM = self.ksl.calculate_density_matrix_delta(d_nn, C_nM, rho_MM) 

362 self.timer.stop('Calculate density matrix') 

363 return rho_MM 

364 

365 def add_to_density_from_k_point_with_occupation(self, nt_sG, kpt, f_n): 

366 """Add contribution to pseudo electron-density. Do not use the standard 

367 occupation numbers, but ones given with argument f_n.""" 

368 # Custom occupations are used in calculation of response potential 

369 # with GLLB-potential 

370 if kpt.rho_MM is None: 

371 rho_MM = self.calculate_density_matrix(f_n, kpt.C_nM) 

372 if hasattr(kpt, 'c_on'): 

373 assert self.bd.comm.size == 1 

374 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), 

375 dtype=kpt.C_nM.dtype) 

376 for ne, c_n in zip(kpt.ne_o, kpt.c_on): 

377 assert abs(c_n.imag).max() < 1e-14 

378 d_nn += ne * np.outer(c_n.conj(), c_n).real 

379 rho_MM += self.calculate_density_matrix_delta(d_nn, kpt.C_nM) 

380 else: 

381 rho_MM = kpt.rho_MM 

382 self.timer.start('Construct density') 

383 self.basis_functions.construct_density(rho_MM, nt_sG[kpt.s], kpt.q) 

384 self.timer.stop('Construct density') 

385 

386 def add_to_kinetic_density_from_k_point(self, taut_G, kpt): 

387 raise NotImplementedError('Kinetic density calculation for LCAO ' 

388 'wavefunctions is not implemented.') 

389 

390 def calculate_forces(self, hamiltonian, F_av): 

391 self.timer.start('LCAO forces') 

392 

393 Fref_av = np.zeros_like(F_av) 

394 self.forcecalc = LCAOforces(self.ksl, self.dtype, self.gd, 

395 self.bd, self.kd, self.kpt_u, self.nspins, 

396 self.basis_functions, self.newtci, 

397 self.P_aqMi, self.setups, 

398 self.manytci, hamiltonian, 

399 self, self.spos_ac, 

400 self.timer, Fref_av) 

401 

402 F_av[:, :] = self.forcecalc.get_forces_sum_GS() 

403 self.timer.stop('LCAO forces') 

404 

405 def _get_wave_function_array(self, u, n, realspace=True, periodic=False): 

406 # XXX Taking kpt is better than taking u 

407 kpt = self.kpt_u[u] 

408 C_M = kpt.C_nM[n] 

409 

410 if realspace: 

411 psit_G = self.gd.zeros(dtype=self.dtype) 

412 self.basis_functions.lcao_to_grid(C_M, psit_G, kpt.q) 

413 if periodic and self.dtype == complex: 

414 k_c = self.kd.ibzk_kc[kpt.k] 

415 return self.gd.plane_wave(-k_c) * psit_G 

416 return psit_G 

417 else: 

418 return C_M 

419 

420 def write(self, writer, write_wave_functions=False): 

421 WaveFunctions.write(self, writer) 

422 if write_wave_functions: 

423 self.write_wave_functions(writer) 

424 

425 def write_wave_functions(self, writer): 

426 writer.add_array( 

427 'coefficients', 

428 (self.nspins, self.kd.nibzkpts, self.bd.nbands, self.setups.nao), 

429 dtype=self.dtype) 

430 for s in range(self.nspins): 

431 for k in range(self.kd.nibzkpts): 

432 C_nM = self.collect_array('C_nM', k, s) 

433 writer.fill(C_nM * Bohr**-1.5) 

434 

435 def read(self, reader): 

436 WaveFunctions.read(self, reader) 

437 r = reader.wave_functions 

438 if 'coefficients' in r: 

439 self.read_wave_functions(r) 

440 

441 def read_wave_functions(self, reader): 

442 c = 1.0 if getattr(reader, 'version', 3) >= 4 else Bohr**1.5 

443 for kpt in self.kpt_u: 

444 C_nM = reader.proxy('coefficients', kpt.s, kpt.k) 

445 kpt.C_nM = self.bd.empty(self.setups.nao, dtype=self.dtype) 

446 for myn, C_M in enumerate(kpt.C_nM): 

447 n = self.bd.global_index(myn) 

448 # XXX number of bands could have been rounded up! 

449 if n >= len(C_nM): 

450 break 

451 C_M[:] = C_nM[n] * c 

452 

453 self.coefficients_read_from_file = True 

454 

455 def estimate_memory(self, mem): 

456 nq = len(self.kd.ibzk_qc) 

457 nao = self.setups.nao 

458 ni_total = sum([setup.ni for setup in self.setups]) 

459 itemsize = mem.itemsize[self.dtype] 

460 mem.subnode('C [qnM]', nq * self.bd.mynbands * nao * itemsize) 

461 nM1, nM2 = self.ksl.get_overlap_matrix_shape() 

462 mem.subnode('S, T [2 x qmm]', 2 * nq * nM1 * nM2 * itemsize) 

463 mem.subnode('P [aqMi]', nq * nao * ni_total // self.gd.comm.size) 

464 # self.tci.estimate_memory(mem.subnode('TCI')) 

465 self.basis_functions.estimate_memory(mem.subnode('BasisFunctions')) 

466 self.eigensolver.estimate_memory(mem.subnode('Eigensolver'), 

467 self.dtype) 

468 

469 

470class LCAOforces: 

471 

472 def __init__(self, ksl, dtype, gd, bd, kd, kpt_u, nspins, bfs, newtci, 

473 P_aqMi, setups, manytci, hamiltonian, wfs, spos_ac, 

474 timer, Fref_av): 

475 """ Object which calculates LCAO forces """ 

476 

477 self.ksl = ksl 

478 self.nao = ksl.nao 

479 self.mynao = ksl.mynao 

480 self.dtype = dtype 

481 self.newtci = newtci 

482 self.manytci = manytci 

483 self.P_aqMi = P_aqMi 

484 self.gd = gd 

485 self.bd = bd 

486 self.kd = kd 

487 self.kpt_u = kpt_u 

488 self.nspins = nspins 

489 self.bfs = bfs 

490 self.spos_ac = spos_ac 

491 self.Mstart = ksl.Mstart 

492 self.Mstop = ksl.Mstop 

493 self.setups = setups 

494 self.hamiltonian = hamiltonian 

495 self.wfs = wfs 

496 self.timer = timer 

497 self.Fref_av = Fref_av 

498 self.my_atom_indices = bfs.my_atom_indices 

499 self.atom_indices = bfs.atom_indices 

500 self.dH_asp = hamiltonian.dH_asp 

501 

502 from gpaw.kohnsham_layouts import BlacsOrbitalLayouts 

503 self.isblacs = isinstance(self.ksl, BlacsOrbitalLayouts) 

504 

505 if not self.isblacs: 

506 self.timer.start('TCI derivative') 

507 self.dThetadR_qvMM, self.dTdR_qvMM = self.manytci.O_qMM_T_qMM( 

508 self.gd.comm, self.Mstart, self.Mstop, False, derivative=True) 

509 self.dPdR_aqvMi = self.manytci.P_aqMi(self.bfs.my_atom_indices, 

510 derivative=True) 

511 

512 self.gd.comm.sum(self.dThetadR_qvMM) 

513 self.gd.comm.sum(self.dTdR_qvMM) 

514 self.timer.stop('TCI derivative') 

515 self.rhoT_uMM, self.ET_uMM = self.get_den_mat_and_E() 

516 

517 def get_forces_sum_GS(self): 

518 """ This function calculates ground state forces in LCAO mode """ 

519 if not self.isblacs: 

520 F_av = np.zeros_like(self.Fref_av) 

521 Fkin_av = self.get_kinetic_term() 

522 Fpot_av = self.get_pot_term() 

523 Ftheta_av = self.get_den_mat_term() 

524 Frho_av = self.get_den_mat_paw_term() 

525 Fatom_av = self.get_atomic_density_term() 

526 F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av 

527 else: 

528 F_av = np.zeros_like(self.Fref_av) 

529 Fpot_av = self.get_pot_term_blacs() 

530 Fkin_av, Ftheta_av = self.get_kin_and_den_term_blacs() 

531 Fatom_av, Frho_av = self.get_at_den_and_den_paw_blacs() 

532 

533 F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av 

534 

535 self.timer.start('Wait for sum') 

536 self.ksl.orbital_comm.sum(F_av) 

537 if self.bd.comm.rank == 0: 

538 self.kd.comm.sum(F_av, 0) 

539 self.timer.stop('Wait for sum') 

540 

541 return F_av 

542 

543 def _slices(self, indices): 

544 for a in indices: 

545 M1 = self.bfs.M_a[a] - self.Mstart 

546 M2 = M1 + self.setups[a].nao 

547 if M2 > 0: 

548 yield a, max(0, M1), M2 

549 

550 def slices(self): 

551 return self._slices(self.atom_indices) 

552 

553 def my_slices(self): 

554 return self._slices(self.my_atom_indices) 

555 

556 def get_den_mat_and_E(self): 

557 # 

558 # ----- ----- 

559 # \ -1 \ * 

560 # E = ) S H rho = ) c eps f c 

561 # mu nu / mu x x z z nu / n mu n n n nu 

562 # ----- ----- 

563 # x z n 

564 # 

565 # We use the transpose of that matrix. The first form is used 

566 # if rho is given, otherwise the coefficients are used. 

567 self.timer.start('Initial') 

568 if self.kpt_u[0].rho_MM is None: 

569 rhoT_uMM = [] 

570 ET_uMM = [] 

571 self.timer.start('Get density matrix') 

572 for kpt in self.kpt_u: 

573 rhoT_MM = self.ksl.get_transposed_density_matrix(kpt.f_n, 

574 kpt.C_nM) 

575 rhoT_uMM.append(rhoT_MM) 

576 ET_MM = self.ksl.get_transposed_density_matrix(kpt.f_n * 

577 kpt.eps_n, 

578 kpt.C_nM) 

579 ET_uMM.append(ET_MM) 

580 if hasattr(kpt, 'c_on'): 

581 # XXX does this work with BLACS/non-BLACS/etc.? 

582 assert self.bd.comm.size == 1 

583 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), 

584 dtype=kpt.C_nM.dtype) 

585 for ne, c_n in zip(kpt.ne_o, kpt.c_on): 

586 d_nn += ne * np.outer(c_n.conj(), c_n) 

587 rhoT_MM += self.ksl.get_transposed_density_matrix_delta( 

588 d_nn, kpt.C_nM) 

589 ET_MM += self.ksl.get_transposed_density_matrix_delta( 

590 d_nn * kpt.eps_n, kpt.C_nM) 

591 self.timer.stop('Get density matrix') 

592 else: 

593 rhoT_uMM = [] 

594 ET_uMM = [] 

595 for kpt in self.kpt_u: 

596 H_MM = self.wfs.eigensolver.calculate_hamiltonian_matrix( 

597 self.hamiltonian, self.wfs, kpt) 

598 tri2full(H_MM) 

599 S_MM = kpt.S_MM.copy() 

600 tri2full(S_MM) 

601 ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM, 

602 kpt.rho_MM)).T.copy() 

603 del S_MM, H_MM 

604 rhoT_MM = kpt.rho_MM.T.copy() 

605 rhoT_uMM.append(rhoT_MM) 

606 ET_uMM.append(ET_MM) 

607 self.timer.stop('Initial') 

608 return rhoT_uMM, ET_uMM 

609 

610 def get_kinetic_term(self): 

611 """Calculate Kinetic energy term in LCAO""" 

612 Fkin_av = np.zeros_like(self.Fref_av) 

613 self.timer.start('TCI derivative') 

614 # Kinetic energy contribution 

615 # 

616 # ----- d T 

617 # a \ mu nu 

618 # F += 2 Re ) -------- rho 

619 # / d R nu mu 

620 # ----- mu nu 

621 # mu in a; nu 

622 # 

623 Fkin_av = np.zeros_like(Fkin_av) 

624 for u, kpt in enumerate(self.kpt_u): 

625 dEdTrhoT_vMM = (self.dTdR_qvMM[kpt.q] * 

626 self.rhoT_uMM[u][np.newaxis]).real 

627 # XXX load distribution! 

628 for a, M1, M2 in self.my_slices(): 

629 Fkin_av[a, :] += \ 

630 2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1) 

631 self.timer.stop('TCI derivative') 

632 

633 return Fkin_av 

634 

635 def get_den_mat_term(self): 

636 """Calculate density matrix term in LCAO""" 

637 Ftheta_av = np.zeros_like(self.Fref_av) 

638 # Density matrix contribution due to basis overlap 

639 # 

640 # ----- d Theta 

641 # a \ mu nu 

642 # F += -2 Re ) ------------ E 

643 # / d R nu mu 

644 # ----- mu nu 

645 # mu in a; nu 

646 # 

647 Ftheta_av = np.zeros_like(Ftheta_av) 

648 for u, kpt in enumerate(self.kpt_u): 

649 dThetadRE_vMM = (self.dThetadR_qvMM[kpt.q] * 

650 self.ET_uMM[u][np.newaxis]).real 

651 for a, M1, M2 in self.my_slices(): 

652 Ftheta_av[a, :] += \ 

653 -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1) 

654 

655 return Ftheta_av 

656 

657 def get_pot_term(self): 

658 """Calculate potential term""" 

659 Fpot_av = np.zeros_like(self.Fref_av) 

660 # Potential contribution 

661 # 

662 # ----- / d Phi (r) 

663 # a \ | mu ~ 

664 # F += -2 Re ) | ---------- v (r) Phi (r) dr rho 

665 # / | d R nu nu mu 

666 # ----- / a 

667 # mu in a; nu 

668 # 

669 self.timer.start('Potential') 

670 vt_sG = self.hamiltonian.vt_sG 

671 Fpot_av = np.zeros_like(Fpot_av) 

672 for u, kpt in enumerate(self.kpt_u): 

673 vt_G = vt_sG[kpt.s] 

674 Fpot_av += self.bfs.calculate_force_contribution(vt_G, 

675 self.rhoT_uMM[u], 

676 kpt.q) 

677 self.timer.stop('Potential') 

678 

679 return Fpot_av 

680 

681 def get_den_mat_paw_term(self): 

682 """Calcualte PAW correction""" 

683 # TO DO: split this function into 

684 # _get_den_mat_paw_term (which calculate Frho_av) and 

685 # get_paw_correction (which calculate ZE_MM) 

686 # Density matrix contribution from PAW correction 

687 # 

688 # ----- ----- 

689 # a \ a \ b 

690 # F += 2 Re ) Z E - 2 Re ) Z E 

691 # / mu nu nu mu / mu nu nu mu 

692 # ----- ----- 

693 # mu nu b; mu in a; nu 

694 # 

695 # with 

696 # b* 

697 # ----- dP 

698 # b \ i mu b b 

699 # Z = ) -------- dS P 

700 # mu nu / dR ij j nu 

701 # ----- b mu 

702 # ij 

703 # 

704 self.timer.start('Paw correction') 

705 Frho_av = np.zeros_like(self.Fref_av) 

706 for u, kpt in enumerate(self.kpt_u): 

707 work_MM = np.zeros((self.mynao, self.nao), self.dtype) 

708 ZE_MM = None 

709 for b in self.my_atom_indices: 

710 setup = self.setups[b] 

711 dO_ii = np.asarray(setup.dO_ii, self.dtype) 

712 dOP_iM = np.zeros((setup.ni, self.nao), self.dtype) 

713 mmm(1.0, dO_ii, 'N', self.P_aqMi[b][kpt.q], 'C', 0.0, dOP_iM) 

714 for v in range(3): 

715 mmm(1.0, 

716 self.dPdR_aqvMi[b][kpt.q][v][self.Mstart:self.Mstop], 

717 'N', 

718 dOP_iM, 'N', 

719 0.0, work_MM) 

720 ZE_MM = (work_MM * self.ET_uMM[u]).real 

721 for a, M1, M2 in self.slices(): 

722 dE = 2 * ZE_MM[M1:M2].sum() 

723 Frho_av[a, v] -= dE # the "b; mu in a; nu" term 

724 Frho_av[b, v] += dE # the "mu nu" term 

725 self.timer.stop('Paw correction') 

726 return Frho_av 

727 

728 def _get_den_mat_paw_term(self): 

729 # THIS doesn't work in parallel 

730 # Density matrix contribution from PAW correction 

731 # 

732 # ----- ----- 

733 # a \ a \ b 

734 # F += 2 Re ) Z E - 2 Re ) Z E 

735 # / mu nu nu mu / mu nu nu mu 

736 # ----- ----- 

737 # mu nu b; mu in a; nu 

738 # 

739 # with 

740 # b* 

741 # ----- dP 

742 # b \ i mu b b 

743 # Z = ) -------- dS P 

744 # mu nu / dR ij j nu 

745 # ----- b mu 

746 # ij 

747 # 

748 Frho_av = np.zeros_like(self.Fref_av) 

749 self.timer.start('add paw correction') 

750 ZE_MM = self.get_paw_correction() 

751 for u, kpt in enumerate(self.kpt_u): 

752 for b in self.my_atom_indices: 

753 for v in range(3): 

754 for a, M1, M2 in self.slices(): 

755 dE = 2 * ZE_MM[u, b, v, M1:M2].sum() 

756 Frho_av[a, v] -= dE.real # the "b; mu in a; nu" term 

757 Frho_av[b, v] += dE.real # the "mu nu" term 

758 self.timer.stop('add paw correction') 

759 return Frho_av 

760 

761 def get_paw_correction(self): 

762 # THIS doesn't work in parallel 

763 # <Phi_nu|pt_i>O_ii<dPt_i/dR|Phi_mu> 

764 self.timer.start('get paw correction') 

765 ZE_MM = np.zeros((len(self.kpt_u), len(self.my_atom_indices), 3, 

766 self.mynao, self.nao), self.dtype) 

767 for u, kpt in enumerate(self.kpt_u): 

768 work_MM = np.zeros((self.mynao, self.nao), self.dtype) 

769 for b in self.my_atom_indices: 

770 setup = self.setups[b] 

771 dO_ii = np.asarray(setup.dO_ii, self.dtype) 

772 dOP_iM = np.zeros((setup.ni, self.nao), self.dtype) 

773 mmm(1.0, dO_ii, 'N', self.P_aqMi[b][kpt.q], 'C', 0.0, dOP_iM) 

774 for v in range(3): 

775 mmm(1.0, 

776 self.dPdR_aqvMi[b][kpt.q][v][self.Mstart:self.Mstop], 

777 'N', 

778 dOP_iM, 'N', 

779 0.0, work_MM) 

780 ZE_MM[u, b, v, :, :] = (work_MM * self.ET_uMM[u]).real 

781 self.timer.stop('get paw correction') 

782 return ZE_MM 

783 

784 def get_atomic_density_term(self): 

785 Fatom_av = np.zeros_like(self.Fref_av) 

786 # Atomic density contribution 

787 # ----- ----- 

788 # a \ a \ b 

789 # F += -2 Re ) A rho + 2 Re ) A rho 

790 # / mu nu nu mu / mu nu nu mu 

791 # ----- ----- 

792 # mu nu b; mu in a; nu 

793 # 

794 # b* 

795 # ----- d P 

796 # b \ i mu b b 

797 # A = ) ------- dH P 

798 # mu nu / d R ij j nu 

799 # ----- b mu 

800 # ij 

801 # 

802 self.timer.start('Atomic Hamiltonian force') 

803 Fatom_av = np.zeros_like(Fatom_av) 

804 for u, kpt in enumerate(self.kpt_u): 

805 for b in self.my_atom_indices: 

806 H_ii = np.asarray(unpack_hermitian(self.dH_asp[b][kpt.s]), 

807 self.dtype) 

808 if len(H_ii) == 0: 

809 # gemmdot does not like empty matrices! 

810 # (has been fixed in the new code) 

811 continue 

812 HP_iM = gemmdot(H_ii, np.ascontiguousarray( 

813 self.P_aqMi[b][kpt.q].T.conj())) 

814 for v in range(3): 

815 dPdR_Mi = \ 

816 self.dPdR_aqvMi[b][kpt.q][v][self.Mstart:self.Mstop] 

817 ArhoT_MM = \ 

818 (gemmdot(dPdR_Mi, HP_iM) * self.rhoT_uMM[u]).real 

819 for a, M1, M2 in self.slices(): 

820 dE = 2 * ArhoT_MM[M1:M2].sum() 

821 Fatom_av[a, v] += dE # the "b; mu in a; nu" term 

822 Fatom_av[b, v] -= dE # the "mu nu" term 

823 self.timer.stop('Atomic Hamiltonian force') 

824 

825 return Fatom_av 

826 

827 def get_den_mat_block_blacs(self, f_n, C_nM, redistributor): 

828 rho1_mm = self.ksl.calculate_blocked_density_matrix(f_n, 

829 C_nM).conj() 

830 rho_mm = redistributor.redistribute(rho1_mm) 

831 return rho_mm 

832 

833 def get_pot_term_blacs(self): 

834 Fpot_av = np.zeros_like(self.Fref_av) 

835 from gpaw.blacs import BlacsGrid, Redistributor 

836 self.grid = BlacsGrid(self.ksl.block_comm, self.gd.comm.size, 

837 self.bd.comm.size) 

838 self.blocksize1 = -(-self.nao // self.grid.nprow) 

839 self.blocksize2 = -(-self.nao // self.grid.npcol) 

840 desc = self.grid.new_descriptor(self.nao, self.nao, 

841 self.blocksize1, self.blocksize2) 

842 vt_sG = self.hamiltonian.vt_sG 

843 self.rhoT_umm = [] 

844 self.ET_umm = [] 

845 self.redistributor = Redistributor(self.grid.comm, 

846 self.ksl.mmdescriptor, desc) 

847 Fpot_av = np.zeros_like(self.Fref_av) 

848 for u, kpt in enumerate(self.kpt_u): 

849 self.timer.start('Get density matrix') 

850 rhoT_mm = self.get_den_mat_block_blacs(kpt.f_n, kpt.C_nM, 

851 self.redistributor) 

852 self.rhoT_umm.append(rhoT_mm) 

853 self.timer.stop('Get density matrix') 

854 self.timer.start('Potential') 

855 rhoT_mM = self.ksl.distribute_to_columns(rhoT_mm, desc) 

856 vt_G = vt_sG[kpt.s] 

857 Fpot_av += self.bfs.calculate_force_contribution(vt_G, rhoT_mM, 

858 kpt.q) 

859 del rhoT_mM 

860 self.timer.stop('Potential') 

861 

862 return Fpot_av 

863 

864 def get_kin_and_den_term_blacs(self): 

865 Fkin_av_sum = np.zeros_like(self.Fref_av) 

866 Ftheta_av_sum = np.zeros_like(self.Fref_av) 

867 # pcutoff_a = [max([pt.get_cutoff() for pt in setup.pt_j]) 

868 # for setup in self.setups] 

869 # phicutoff_a = [max([phit.get_cutoff() for phit in setup.phit_j]) 

870 # for setup in self.setups] 

871 # XXX should probably use bdsize x gdsize instead 

872 # That would be consistent with some existing grids 

873 # I'm not sure if this is correct 

874 # XXX what are rows and columns actually? 

875 dH_asp = self.hamiltonian.dH_asp 

876 self.timer.start('Get density matrix') 

877 for kpt in self.kpt_u: 

878 ET_mm = self.get_den_mat_block_blacs(kpt.f_n * kpt.eps_n, kpt.C_nM, 

879 self.redistributor) 

880 self.ET_umm.append(ET_mm) 

881 self.timer.stop('Get density matrix') 

882 self.M1start = self.blocksize1 * self.grid.myrow 

883 self.M2start = self.blocksize2 * self.grid.mycol 

884 self.M1stop = min(self.M1start + self.blocksize1, self.nao) 

885 self.M2stop = min(self.M2start + self.blocksize2, self.nao) 

886 self.m1max = self.M1stop - self.M1start 

887 self.m2max = self.M2stop - self.M2start 

888 # from gpaw.lcao.overlap import TwoCenterIntegralCalculator 

889 self.timer.start('Prepare TCI loop') 

890 self.M_a = self.bfs.M_a 

891 Fkin2_av = np.zeros_like(self.Fref_av) 

892 Ftheta2_av = np.zeros_like(self.Fref_av) 

893 self.atompairs = self.newtci.a1a2.get_atompairs() 

894 self.timer.start('broadcast dH') 

895 self.alldH_asp = {} 

896 for a in range(len(self.setups)): 

897 gdrank = self.bfs.sphere_a[a].rank 

898 if gdrank == self.gd.rank: 

899 dH_sp = dH_asp[a] 

900 else: 

901 ni = self.setups[a].ni 

902 dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2)) 

903 self.gd.comm.broadcast(dH_sp, gdrank) 

904 # okay, now everyone gets copies of dH_sp 

905 self.alldH_asp[a] = dH_sp 

906 self.timer.stop('broadcast dH') 

907 # This will get sort of hairy. We need to account for some 

908 # three-center overlaps, such as: 

909 # 

910 # a1 

911 # Phi ~a3 a3 ~a3 a2 a2,a1 

912 # < ---- |p > dH <p |Phi > rho 

913 # dR 

914 # 

915 # To this end we will loop over all pairs of atoms (a1, a3), 

916 # and then a sub-loop over (a3, a2). 

917 self.timer.stop('Prepare TCI loop') 

918 self.timer.start('Not so complicated loop') 

919 for (a1, a2) in self.atompairs: 

920 if a1 >= a2: 

921 # Actually this leads to bad load balance. 

922 # We should take a1 > a2 or a1 < a2 equally many times. 

923 # Maybe decide which of these choices 

924 # depending on whether a2 % 1 == 0 

925 continue 

926 m1start = self.M_a[a1] - self.M1start 

927 m2start = self.M_a[a2] - self.M2start 

928 if m1start >= self.blocksize1 or m2start >= self.blocksize2: 

929 continue # (we have only one block per CPU) 

930 nm1 = self.setups[a1].nao 

931 nm2 = self.setups[a2].nao 

932 m1stop = min(m1start + nm1, self.m1max) 

933 m2stop = min(m2start + nm2, self.m2max) 

934 if m1stop <= 0 or m2stop <= 0: 

935 continue 

936 m1start = max(m1start, 0) 

937 m2start = max(m2start, 0) 

938 J1start = max(0, self.M1start - self.M_a[a1]) 

939 J2start = max(0, self.M2start - self.M_a[a2]) 

940 M1stop = J1start + m1stop - m1start 

941 J2stop = J2start + m2stop - m2start 

942 dThetadR_qvmm, dTdR_qvmm = self.newtci.dOdR_dTdR(a1, a2) 

943 for u, kpt in enumerate(self.kpt_u): 

944 rhoT_mm = self.rhoT_umm[u][m1start:m1stop, m2start:m2stop] 

945 ET_mm = self.ET_umm[u][m1start:m1stop, m2start:m2stop] 

946 Fkin_v = 2.0 * (dTdR_qvmm[kpt.q][:, J1start:M1stop, 

947 J2start:J2stop] * 

948 rhoT_mm[np.newaxis]).real.sum(-1).sum(-1) 

949 Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop, 

950 J2start:J2stop] * 

951 ET_mm[np.newaxis]).real.sum(-1).sum(-1) 

952 Fkin2_av[a1] += Fkin_v 

953 Fkin2_av[a2] -= Fkin_v 

954 Ftheta2_av[a1] -= Ftheta_v 

955 Ftheta2_av[a2] += Ftheta_v 

956 Fkin_av = Fkin2_av 

957 Ftheta_av = Ftheta2_av 

958 self.timer.stop('Not so complicated loop') 

959 

960 Fkin_av_sum += Fkin_av 

961 Ftheta_av_sum += Ftheta_av 

962 

963 return Fkin_av_sum, Ftheta_av_sum 

964 

965 def get_at_den_and_den_paw_blacs(self): 

966 Fatom_av = np.zeros_like(self.Fref_av) 

967 Frho_av = np.zeros_like(self.Fref_av) 

968 Fatom_av_sum = np.zeros_like(self.Fref_av) 

969 Frho_av_sum = np.zeros_like(self.Fref_av) 

970 self.dHP_and_dSP_aauim = {} 

971 self.a2values = {} 

972 for (a2, a3) in self.atompairs: 

973 if a3 not in self.a2values: 

974 self.a2values[a3] = [] 

975 self.a2values[a3].append(a2) 

976 

977 self.timer.start('Complicated loop') 

978 for a1, a3 in self.atompairs: 

979 if a1 == a3: 

980 # Functions reside on same atom, so their overlap 

981 # does not change when atom is displaced 

982 continue 

983 m1start = self.M_a[a1] - self.M1start 

984 if m1start >= self.blocksize1: 

985 continue 

986 nm1 = self.setups[a1].nao 

987 m1stop = min(m1start + nm1, self.m1max) 

988 if m1stop <= 0: 

989 continue 

990 dPdR_qvim = self.newtci.dPdR(a3, a1) 

991 if dPdR_qvim is None: 

992 continue 

993 dPdR_qvmi = -dPdR_qvim.transpose(0, 1, 3, 2).conj() 

994 m1start = max(m1start, 0) 

995 J1start = max(0, self.M1start - self.M_a[a1]) 

996 J1stop = J1start + m1stop - m1start 

997 dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy() 

998 for a2 in self.a2values[a3]: 

999 m2start = self.M_a[a2] - self.M2start 

1000 if m2start >= self.blocksize2: 

1001 continue 

1002 nm2 = self.setups[a2].nao 

1003 m2stop = min(m2start + nm2, self.m2max) 

1004 if m2stop <= 0: 

1005 continue 

1006 m2start = max(m2start, 0) 

1007 J2start = max(0, self.M2start - self.M_a[a2]) 

1008 J2stop = J2start + m2stop - m2start 

1009 if (a2, a3) in self.dHP_and_dSP_aauim: 

1010 dHP_uim, dSP_uim = self.dHP_and_dSP_aauim[(a2, a3)] 

1011 else: 

1012 P_qim = self.newtci.P(a3, a2) 

1013 if P_qim is None: 

1014 continue 

1015 P_qmi = P_qim.transpose(0, 2, 1).conj() 

1016 P_qmi = P_qmi[:, J2start:J2stop].copy() 

1017 dH_sp = self.alldH_asp[a3] 

1018 dS_ii = self.setups[a3].dO_ii 

1019 dHP_uim = [] 

1020 dSP_uim = [] 

1021 for u, kpt in enumerate(self.kpt_u): 

1022 dH_ii = unpack_hermitian(dH_sp[kpt.s]) 

1023 dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj() 

1024 # XXX only need nq of these, 

1025 # but the looping is over all u 

1026 dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj() 

1027 dHP_uim.append(dHP_im) 

1028 dSP_uim.append(dSP_im) 

1029 self.dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim 

1030 for u, kpt in enumerate(self.kpt_u): 

1031 rhoT_mm = self.rhoT_umm[u][m1start:m1stop, m2start:m2stop] 

1032 ET_mm = self.ET_umm[u][m1start:m1stop, m2start:m2stop] 

1033 dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u]) 

1034 dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u]) 

1035 Fatom_c = 2.0 * (dPdRdHP_vmm * 

1036 rhoT_mm).real.sum(-1).sum(-1) 

1037 Frho_c = 2.0 * (dPdRdSP_vmm * 

1038 ET_mm).real.sum(-1).sum(-1) 

1039 Fatom_av[a1] += Fatom_c 

1040 Fatom_av[a3] -= Fatom_c 

1041 Frho_av[a1] -= Frho_c 

1042 Frho_av[a3] += Frho_c 

1043 self.timer.stop('Complicated loop') 

1044 

1045 Fatom_av_sum += Fatom_av 

1046 Frho_av_sum += Frho_av 

1047 

1048 return Fatom_av_sum, Frho_av_sum