Coverage for gpaw/wavefunctions/lcao.py: 89%
623 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-12 00:18 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-12 00:18 +0000
1import numpy as np
2from ase.units import Bohr
3from ase.utils.timing import timer
5# from gpaw import debug
6from gpaw.directmin.etdm_lcao import LCAOETDM
7from gpaw.directmin.tools import loewdin_lcao, gramschmidt_lcao
8from gpaw.lcao.atomic_correction import (DenseAtomicCorrection,
9 SparseAtomicCorrection)
10# from gpaw.lcao.overlap import NewTwoCenterIntegrals as NewTCI
11from gpaw.lcao.tci import TCIExpansions
12from gpaw.lfc import BasisFunctions
13from gpaw.utilities import unpack_hermitian
14from gpaw.utilities.blas import mmm, gemmdot
15from gpaw.utilities.tools import tri2full
16from gpaw.wavefunctions.base import WaveFunctions
17from gpaw.wavefunctions.mode import Mode
20class LCAO(Mode):
21 name = 'lcao'
23 def __init__(self, atomic_correction=None, interpolation=3,
24 force_complex_dtype=False):
25 self.atomic_correction = atomic_correction
26 self.interpolation = interpolation
27 Mode.__init__(self, force_complex_dtype)
29 def __call__(self, *args, **kwargs):
30 return LCAOWaveFunctions(*args,
31 atomic_correction=self.atomic_correction,
32 **kwargs)
34 def __repr__(self):
35 return f'LCAO({self.todict()})'
37 def todict(self):
38 dct = Mode.todict(self)
39 dct['interpolation'] = self.interpolation
40 return dct
43def update_phases(C_unM, q_u, ibzk_qc, spos_ac, oldspos_ac, setups, Mstart):
44 """Complex-rotate coefficients compensating discontinuous phase shift.
46 This changes the coefficients to counteract the phase discontinuity
47 of overlaps when atoms move across a cell boundary."""
49 # We don't want to apply any phase shift unless we crossed a cell
50 # boundary. So we round the shift to either 0 or 1.
51 #
52 # Example: spos_ac goes from 0.01 to 0.99 -- this rounds to 1 and
53 # we apply the phase. If someone moves an atom by half a cell
54 # without crossing a boundary, then we are out of luck. But they
55 # should have reinitialized from LCAO anyway.
56 phase_qa = np.exp(2j * np.pi *
57 np.dot(ibzk_qc, (spos_ac - oldspos_ac).T.round()))
59 for q, C_nM in zip(q_u, C_unM):
60 if C_nM is None:
61 continue
62 for a in range(len(spos_ac)):
63 M1 = setups.M_a[a] - Mstart
64 M2 = M1 + setups[a].nao
65 M1 = max(0, M1)
66 C_nM[:, M1:M2] *= phase_qa[q, a] # (may truncate M2)
69# replace by class to make data structure perhaps a bit less confusing
70def get_r_and_offsets(nl, spos_ac, cell_cv):
71 r_and_offset_aao = {}
73 def add(a1, a2, R_c, offset):
74 if (a1, a2) not in r_and_offset_aao:
75 r_and_offset_aao[(a1, a2)] = []
76 r_and_offset_aao[(a1, a2)].append((R_c, offset))
78 for a1, spos1_c in enumerate(spos_ac):
79 a2_a, offsets = nl.get_neighbors(a1)
80 for a2, offset in zip(a2_a, offsets):
81 spos2_c = spos_ac[a2] + offset
83 R_c = np.dot(spos2_c - spos1_c, cell_cv)
84 add(a1, a2, R_c, offset)
85 if a1 != a2 or offset.any():
86 add(a2, a1, -R_c, -offset)
88 return r_and_offset_aao
91class LCAOWaveFunctions(WaveFunctions):
92 mode = 'lcao'
94 def __init__(self, ksl, gd, nvalence, setups, bd,
95 dtype, world, kd, kptband_comm, timer,
96 atomic_correction=None, collinear=True):
97 WaveFunctions.__init__(self, gd, nvalence, setups, bd,
98 dtype, collinear, world, kd,
99 kptband_comm, timer)
100 self.ksl = ksl
101 self.S_qMM = None
102 self.T_qMM = None
103 self.P_aqMi = None
104 self.debug_tci = False
106 if atomic_correction is None:
107 atomic_correction = 'sparse' if ksl.using_blacs else 'dense'
109 if atomic_correction == 'sparse':
110 self.atomic_correction_cls = SparseAtomicCorrection
111 else:
112 assert atomic_correction == 'dense'
113 self.atomic_correction_cls = DenseAtomicCorrection
115 # self.tci = NewTCI(gd.cell_cv, gd.pbc_c, setups, kd.ibzk_qc, kd.gamma)
116 with self.timer('TCI: Evaluate splines'):
117 self.tciexpansions = TCIExpansions.new_from_setups(setups)
119 self.basis_functions = BasisFunctions(gd,
120 [setup.basis_functions_J
121 for setup in setups],
122 kd,
123 dtype=dtype,
124 cut=True)
126 self.coefficients_read_from_file = False
127 self.set_orthonormalized(False)
129 def set_orthonormalized(self, flag):
130 self.orthonormalized = flag
132 @timer('Orthonormalize')
133 def orthonormalize(self, kpt=None, type='gramschmidt'):
134 assert type == 'gramschmidt' or type == 'loewdin'
135 if kpt is None:
136 for kpt in self.kpt_u:
137 self.orthonormalize(kpt)
138 self.orthonormalized = True
139 return
140 if type == 'loewdin':
141 kpt.C_nM[:] = loewdin_lcao(kpt.C_nM, kpt.S_MM.conj())
142 elif type == 'gramschmidt':
143 kpt.C_nM[:] = gramschmidt_lcao(kpt.C_nM, kpt.S_MM.conj())
145 def empty(self, n=(), global_array=False, realspace=False):
146 if realspace:
147 return self.gd.empty(n, self.dtype, global_array)
148 else:
149 if isinstance(n, int):
150 n = (n,)
151 nao = self.setups.nao
152 return np.empty(n + (nao,), self.dtype)
154 def __str__(self):
155 s = 'Wave functions: LCAO\n'
156 s += ' Diagonalizer: %s\n' % self.ksl.get_description()
157 s += (' Atomic Correction: %s\n'
158 % self.atomic_correction_cls.description)
159 s += ' Data-type: %s\n' % self.dtype.__name__
160 return s
162 def set_eigensolver(self, eigensolver):
163 WaveFunctions.set_eigensolver(self, eigensolver)
164 if eigensolver:
165 if isinstance(eigensolver, LCAOETDM):
166 eigensolver.initialize(self.gd, self.dtype, self.bd.nbands,
167 self.kd.nibzkpts, self.setups.nao,
168 self.ksl.using_blacs,
169 self.bd.comm.size, self.kpt_u)
170 else:
171 eigensolver.initialize(self.gd, self.dtype, self.setups.nao,
172 self.ksl)
174 def set_positions(self, spos_ac, atom_partition=None, move_wfs=False):
175 oldspos_ac = self.spos_ac
176 with self.timer('Basic WFS set positions'):
177 WaveFunctions.set_positions(self, spos_ac, atom_partition)
179 with self.timer('Basis functions set positions'):
180 self.basis_functions.set_positions(spos_ac)
182 if self.ksl is not None:
183 self.basis_functions.set_matrix_distribution(self.ksl.Mstart,
184 self.ksl.Mstop)
186 Mstop = self.ksl.Mstop
187 Mstart = self.ksl.Mstart
189 # if self.ksl.using_blacs: # XXX
190 # S and T have been distributed to a layout with blacs, so
191 # discard them to force reallocation from scratch.
192 #
193 # TODO: evaluate S and T when they *are* distributed, thus saving
194 # memory and avoiding this problem
195 for kpt in self.kpt_u:
196 kpt.S_MM = None
197 kpt.T_MM = None
199 # Free memory in case of old matrices:
200 self.S_qMM = self.T_qMM = self.P_aqMi = None
202 if self.dtype == complex and oldspos_ac is not None:
203 update_phases([kpt.C_nM for kpt in self.kpt_u],
204 [kpt.q for kpt in self.kpt_u],
205 self.kd.ibzk_qc, spos_ac, oldspos_ac,
206 self.setups, Mstart)
208 self.timer.start('mktci')
209 manytci = self.tciexpansions.get_manytci_calculator(
210 self.setups, self.gd, spos_ac, self.kd.ibzk_qc, self.dtype,
211 self.timer)
212 self.timer.stop('mktci')
213 self.manytci = manytci
214 self.newtci = manytci.tci
216 my_atom_indices = self.basis_functions.my_atom_indices
217 self.timer.start('ST tci')
218 newS_qMM, newT_qMM = manytci.O_qMM_T_qMM(self.gd.comm,
219 Mstart, Mstop,
220 self.ksl.using_blacs)
221 self.timer.stop('ST tci')
222 self.timer.start('P tci')
223 P_qIM = manytci.P_qIM(my_atom_indices)
224 self.timer.stop('P tci')
225 self.P_aqMi = manytci.P_aqMi(my_atom_indices)
226 self.P_qIM = P_qIM # XXX atomic correction
228 self.atomic_correction = self.atomic_correction_cls.new_from_wfs(self)
230 # TODO
231 # OK complex/conj, periodic images
232 # OK scalapack
233 # derivatives/forces
234 # sparse
235 # use symmetry/conj tricks to reduce calculations
236 # enable caching of spherical harmonics
238 self.atomic_correction.add_overlap_correction(newS_qMM)
239 self.allocate_arrays_for_projections(my_atom_indices)
241 newS_qMM = self.ksl.distribute_overlap_matrix(newS_qMM, root=-1)
242 newT_qMM = self.ksl.distribute_overlap_matrix(newT_qMM, root=-1)
244 self.positions_set = True
246 for kpt in self.kpt_u:
247 q = kpt.q
248 kpt.S_MM = newS_qMM[q]
249 kpt.T_MM = newT_qMM[q]
250 self.S_qMM = newS_qMM
251 self.T_qMM = newT_qMM
253 # Elpa wants to reuse the decomposed form of S_qMM.
254 # We need to keep track of the existence of that object here,
255 # since this is where we change S_qMM. Hence, expect this to
256 # become arrays after the first diagonalization:
257 self.decomposed_S_qMM = [None] * len(self.S_qMM)
258 self.set_orthonormalized(False)
260 def initialize(self, density, hamiltonian, spos_ac):
261 # Note: The above line exists also in set_positions.
262 # This is guaranteed to be correct, but we can probably remove one.
263 # Of course no human can understand the initialization process,
264 # so this will be some other day.
265 self.timer.start('LCAO WFS Initialize')
266 if density.nt_sG is None:
267 if self.kpt_u[0].f_n is None or self.kpt_u[0].C_nM is None:
268 density.initialize_from_atomic_densities(self.basis_functions)
269 else:
270 # We have the info we need for a density matrix, so initialize
271 # from that instead of from scratch. This will be the case
272 # after set_positions() during a relaxation
273 density.initialize_from_wavefunctions(self)
274 # Initialize GLLB-potential from basis function orbitals
275 if hamiltonian.xc.type == 'GLLB':
276 hamiltonian.xc.initialize_from_atomic_orbitals(
277 self.basis_functions)
279 else:
280 # After a restart, nt_sg doesn't exist yet, so we'll have to
281 # make sure it does. Of course, this should have been taken care
282 # of already by this time, so we should improve the code elsewhere
283 density.calculate_normalized_charges_and_mix()
285 hamiltonian.update(density)
286 self.timer.stop('LCAO WFS Initialize')
288 return 0, 0
290 def initialize_wave_functions_from_lcao(self):
291 """Fill the calc.wfs.kpt_[u].psit_nG arrays with useful data.
293 Normally psit_nG is NOT used in lcao mode, but some extensions
294 (like ase.dft.wannier) want to have it.
295 This code is adapted from fd.py / initialize_from_lcao_coefficients()
296 and fills psit_nG with data constructed from the current lcao
297 coefficients (kpt.C_nM).
299 (This may or may not work in band-parallel case!)
300 """
301 from gpaw.wavefunctions.arrays import UniformGridWaveFunctions
302 bfs = self.basis_functions
303 for kpt in self.kpt_u:
304 kpt.psit = UniformGridWaveFunctions(
305 self.bd.nbands, self.gd, self.dtype, kpt=kpt.q, dist=None,
306 spin=kpt.s, collinear=True)
307 kpt.psit_nG[:] = 0.0
308 bfs.lcao_to_grid(kpt.C_nM, kpt.psit_nG[:self.bd.mynbands], kpt.q)
310 def initialize_wave_functions_from_restart_file(self):
311 """Dummy function to ensure compatibility to fd mode"""
312 self.initialize_wave_functions_from_lcao()
314 def add_orbital_density(self, nt_G, kpt, n):
315 rank, q = self.kd.get_rank_and_index(kpt.k)
316 u = q * self.nspins + kpt.s
317 assert rank == self.kd.comm.rank
318 assert self.kpt_u[u] is kpt
319 psit_G = self._get_wave_function_array(u, n, realspace=True)
320 self.add_realspace_orbital_to_density(nt_G, psit_G)
322 def calculate_density_matrix(self, f_n, C_nM, rho_MM=None):
323 self.timer.start('Calculate density matrix')
324 rho_MM = self.ksl.calculate_density_matrix(f_n, C_nM, rho_MM)
325 self.timer.stop('Calculate density matrix')
326 return rho_MM
328 if 1:
329 # XXX Should not conjugate, but call gemm(..., 'c')
330 # Although that requires knowing C_Mn and not C_nM.
331 # that also conforms better to the usual conventions in literature
332 Cf_Mn = C_nM.T.conj() * f_n
333 self.timer.start('gemm')
334 mmm(1.0, Cf_Mn, 'N', C_nM, 'N', 0.0, rho_MM)
335 self.timer.stop('gemm')
336 self.timer.start('band comm sum')
337 self.bd.comm.sum(rho_MM)
338 self.timer.stop('band comm sum')
339 else:
340 # Alternative suggestion. Might be faster. Someone should test this
341 from gpaw.utilities.blas import r2k
342 C_Mn = C_nM.T.copy()
343 r2k(0.5, C_Mn, f_n * C_Mn, 0.0, rho_MM)
344 tri2full(rho_MM)
346 def calculate_atomic_density_matrices_with_occupation(self, D_asp, f_un):
347 # ac = self.atomic_correction
348 # if ac.implements_distributed_projections():
349 # D2_asp = ac.redistribute(self, D_asp, type='asp', op='forth')
350 # WaveFunctions.calculate_atomic_density_matrices_with_occupation(
351 # self, D2_asp, f_un)
352 # D3_asp = ac.redistribute(self, D2_asp, type='asp', op='back')
353 # for a in D_asp:
354 # D_asp[a][:] = D3_asp[a]
355 # else:
356 WaveFunctions.calculate_atomic_density_matrices_with_occupation(
357 self, D_asp, f_un)
359 def calculate_density_matrix_delta(self, d_nn, C_nM, rho_MM=None):
360 self.timer.start('Calculate density matrix')
361 rho_MM = self.ksl.calculate_density_matrix_delta(d_nn, C_nM, rho_MM)
362 self.timer.stop('Calculate density matrix')
363 return rho_MM
365 def add_to_density_from_k_point_with_occupation(self, nt_sG, kpt, f_n):
366 """Add contribution to pseudo electron-density. Do not use the standard
367 occupation numbers, but ones given with argument f_n."""
368 # Custom occupations are used in calculation of response potential
369 # with GLLB-potential
370 if kpt.rho_MM is None:
371 rho_MM = self.calculate_density_matrix(f_n, kpt.C_nM)
372 if hasattr(kpt, 'c_on'):
373 assert self.bd.comm.size == 1
374 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands),
375 dtype=kpt.C_nM.dtype)
376 for ne, c_n in zip(kpt.ne_o, kpt.c_on):
377 assert abs(c_n.imag).max() < 1e-14
378 d_nn += ne * np.outer(c_n.conj(), c_n).real
379 rho_MM += self.calculate_density_matrix_delta(d_nn, kpt.C_nM)
380 else:
381 rho_MM = kpt.rho_MM
382 self.timer.start('Construct density')
383 self.basis_functions.construct_density(rho_MM, nt_sG[kpt.s], kpt.q)
384 self.timer.stop('Construct density')
386 def add_to_kinetic_density_from_k_point(self, taut_G, kpt):
387 raise NotImplementedError('Kinetic density calculation for LCAO '
388 'wavefunctions is not implemented.')
390 def calculate_forces(self, hamiltonian, F_av):
391 self.timer.start('LCAO forces')
393 Fref_av = np.zeros_like(F_av)
394 self.forcecalc = LCAOforces(self.ksl, self.dtype, self.gd,
395 self.bd, self.kd, self.kpt_u, self.nspins,
396 self.basis_functions, self.newtci,
397 self.P_aqMi, self.setups,
398 self.manytci, hamiltonian,
399 self, self.spos_ac,
400 self.timer, Fref_av)
402 F_av[:, :] = self.forcecalc.get_forces_sum_GS()
403 self.timer.stop('LCAO forces')
405 def _get_wave_function_array(self, u, n, realspace=True, periodic=False):
406 # XXX Taking kpt is better than taking u
407 kpt = self.kpt_u[u]
408 C_M = kpt.C_nM[n]
410 if realspace:
411 psit_G = self.gd.zeros(dtype=self.dtype)
412 self.basis_functions.lcao_to_grid(C_M, psit_G, kpt.q)
413 if periodic and self.dtype == complex:
414 k_c = self.kd.ibzk_kc[kpt.k]
415 return self.gd.plane_wave(-k_c) * psit_G
416 return psit_G
417 else:
418 return C_M
420 def write(self, writer, write_wave_functions=False):
421 WaveFunctions.write(self, writer)
422 if write_wave_functions:
423 self.write_wave_functions(writer)
425 def write_wave_functions(self, writer):
426 writer.add_array(
427 'coefficients',
428 (self.nspins, self.kd.nibzkpts, self.bd.nbands, self.setups.nao),
429 dtype=self.dtype)
430 for s in range(self.nspins):
431 for k in range(self.kd.nibzkpts):
432 C_nM = self.collect_array('C_nM', k, s)
433 writer.fill(C_nM * Bohr**-1.5)
435 def read(self, reader):
436 WaveFunctions.read(self, reader)
437 r = reader.wave_functions
438 if 'coefficients' in r:
439 self.read_wave_functions(r)
441 def read_wave_functions(self, reader):
442 c = 1.0 if getattr(reader, 'version', 3) >= 4 else Bohr**1.5
443 for kpt in self.kpt_u:
444 C_nM = reader.proxy('coefficients', kpt.s, kpt.k)
445 kpt.C_nM = self.bd.empty(self.setups.nao, dtype=self.dtype)
446 for myn, C_M in enumerate(kpt.C_nM):
447 n = self.bd.global_index(myn)
448 # XXX number of bands could have been rounded up!
449 if n >= len(C_nM):
450 break
451 C_M[:] = C_nM[n] * c
453 self.coefficients_read_from_file = True
455 def estimate_memory(self, mem):
456 nq = len(self.kd.ibzk_qc)
457 nao = self.setups.nao
458 ni_total = sum([setup.ni for setup in self.setups])
459 itemsize = mem.itemsize[self.dtype]
460 mem.subnode('C [qnM]', nq * self.bd.mynbands * nao * itemsize)
461 nM1, nM2 = self.ksl.get_overlap_matrix_shape()
462 mem.subnode('S, T [2 x qmm]', 2 * nq * nM1 * nM2 * itemsize)
463 mem.subnode('P [aqMi]', nq * nao * ni_total // self.gd.comm.size)
464 # self.tci.estimate_memory(mem.subnode('TCI'))
465 self.basis_functions.estimate_memory(mem.subnode('BasisFunctions'))
466 self.eigensolver.estimate_memory(mem.subnode('Eigensolver'),
467 self.dtype)
470class LCAOforces:
472 def __init__(self, ksl, dtype, gd, bd, kd, kpt_u, nspins, bfs, newtci,
473 P_aqMi, setups, manytci, hamiltonian, wfs, spos_ac,
474 timer, Fref_av):
475 """ Object which calculates LCAO forces """
477 self.ksl = ksl
478 self.nao = ksl.nao
479 self.mynao = ksl.mynao
480 self.dtype = dtype
481 self.newtci = newtci
482 self.manytci = manytci
483 self.P_aqMi = P_aqMi
484 self.gd = gd
485 self.bd = bd
486 self.kd = kd
487 self.kpt_u = kpt_u
488 self.nspins = nspins
489 self.bfs = bfs
490 self.spos_ac = spos_ac
491 self.Mstart = ksl.Mstart
492 self.Mstop = ksl.Mstop
493 self.setups = setups
494 self.hamiltonian = hamiltonian
495 self.wfs = wfs
496 self.timer = timer
497 self.Fref_av = Fref_av
498 self.my_atom_indices = bfs.my_atom_indices
499 self.atom_indices = bfs.atom_indices
500 self.dH_asp = hamiltonian.dH_asp
502 from gpaw.kohnsham_layouts import BlacsOrbitalLayouts
503 self.isblacs = isinstance(self.ksl, BlacsOrbitalLayouts)
505 if not self.isblacs:
506 self.timer.start('TCI derivative')
507 self.dThetadR_qvMM, self.dTdR_qvMM = self.manytci.O_qMM_T_qMM(
508 self.gd.comm, self.Mstart, self.Mstop, False, derivative=True)
509 self.dPdR_aqvMi = self.manytci.P_aqMi(self.bfs.my_atom_indices,
510 derivative=True)
512 self.gd.comm.sum(self.dThetadR_qvMM)
513 self.gd.comm.sum(self.dTdR_qvMM)
514 self.timer.stop('TCI derivative')
515 self.rhoT_uMM, self.ET_uMM = self.get_den_mat_and_E()
517 def get_forces_sum_GS(self):
518 """ This function calculates ground state forces in LCAO mode """
519 if not self.isblacs:
520 F_av = np.zeros_like(self.Fref_av)
521 Fkin_av = self.get_kinetic_term()
522 Fpot_av = self.get_pot_term()
523 Ftheta_av = self.get_den_mat_term()
524 Frho_av = self.get_den_mat_paw_term()
525 Fatom_av = self.get_atomic_density_term()
526 F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av
527 else:
528 F_av = np.zeros_like(self.Fref_av)
529 Fpot_av = self.get_pot_term_blacs()
530 Fkin_av, Ftheta_av = self.get_kin_and_den_term_blacs()
531 Fatom_av, Frho_av = self.get_at_den_and_den_paw_blacs()
533 F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av
535 self.timer.start('Wait for sum')
536 self.ksl.orbital_comm.sum(F_av)
537 if self.bd.comm.rank == 0:
538 self.kd.comm.sum(F_av, 0)
539 self.timer.stop('Wait for sum')
541 return F_av
543 def _slices(self, indices):
544 for a in indices:
545 M1 = self.bfs.M_a[a] - self.Mstart
546 M2 = M1 + self.setups[a].nao
547 if M2 > 0:
548 yield a, max(0, M1), M2
550 def slices(self):
551 return self._slices(self.atom_indices)
553 def my_slices(self):
554 return self._slices(self.my_atom_indices)
556 def get_den_mat_and_E(self):
557 #
558 # ----- -----
559 # \ -1 \ *
560 # E = ) S H rho = ) c eps f c
561 # mu nu / mu x x z z nu / n mu n n n nu
562 # ----- -----
563 # x z n
564 #
565 # We use the transpose of that matrix. The first form is used
566 # if rho is given, otherwise the coefficients are used.
567 self.timer.start('Initial')
568 if self.kpt_u[0].rho_MM is None:
569 rhoT_uMM = []
570 ET_uMM = []
571 self.timer.start('Get density matrix')
572 for kpt in self.kpt_u:
573 rhoT_MM = self.ksl.get_transposed_density_matrix(kpt.f_n,
574 kpt.C_nM)
575 rhoT_uMM.append(rhoT_MM)
576 ET_MM = self.ksl.get_transposed_density_matrix(kpt.f_n *
577 kpt.eps_n,
578 kpt.C_nM)
579 ET_uMM.append(ET_MM)
580 if hasattr(kpt, 'c_on'):
581 # XXX does this work with BLACS/non-BLACS/etc.?
582 assert self.bd.comm.size == 1
583 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands),
584 dtype=kpt.C_nM.dtype)
585 for ne, c_n in zip(kpt.ne_o, kpt.c_on):
586 d_nn += ne * np.outer(c_n.conj(), c_n)
587 rhoT_MM += self.ksl.get_transposed_density_matrix_delta(
588 d_nn, kpt.C_nM)
589 ET_MM += self.ksl.get_transposed_density_matrix_delta(
590 d_nn * kpt.eps_n, kpt.C_nM)
591 self.timer.stop('Get density matrix')
592 else:
593 rhoT_uMM = []
594 ET_uMM = []
595 for kpt in self.kpt_u:
596 H_MM = self.wfs.eigensolver.calculate_hamiltonian_matrix(
597 self.hamiltonian, self.wfs, kpt)
598 tri2full(H_MM)
599 S_MM = kpt.S_MM.copy()
600 tri2full(S_MM)
601 ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM,
602 kpt.rho_MM)).T.copy()
603 del S_MM, H_MM
604 rhoT_MM = kpt.rho_MM.T.copy()
605 rhoT_uMM.append(rhoT_MM)
606 ET_uMM.append(ET_MM)
607 self.timer.stop('Initial')
608 return rhoT_uMM, ET_uMM
610 def get_kinetic_term(self):
611 """Calculate Kinetic energy term in LCAO"""
612 Fkin_av = np.zeros_like(self.Fref_av)
613 self.timer.start('TCI derivative')
614 # Kinetic energy contribution
615 #
616 # ----- d T
617 # a \ mu nu
618 # F += 2 Re ) -------- rho
619 # / d R nu mu
620 # ----- mu nu
621 # mu in a; nu
622 #
623 Fkin_av = np.zeros_like(Fkin_av)
624 for u, kpt in enumerate(self.kpt_u):
625 dEdTrhoT_vMM = (self.dTdR_qvMM[kpt.q] *
626 self.rhoT_uMM[u][np.newaxis]).real
627 # XXX load distribution!
628 for a, M1, M2 in self.my_slices():
629 Fkin_av[a, :] += \
630 2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1)
631 self.timer.stop('TCI derivative')
633 return Fkin_av
635 def get_den_mat_term(self):
636 """Calculate density matrix term in LCAO"""
637 Ftheta_av = np.zeros_like(self.Fref_av)
638 # Density matrix contribution due to basis overlap
639 #
640 # ----- d Theta
641 # a \ mu nu
642 # F += -2 Re ) ------------ E
643 # / d R nu mu
644 # ----- mu nu
645 # mu in a; nu
646 #
647 Ftheta_av = np.zeros_like(Ftheta_av)
648 for u, kpt in enumerate(self.kpt_u):
649 dThetadRE_vMM = (self.dThetadR_qvMM[kpt.q] *
650 self.ET_uMM[u][np.newaxis]).real
651 for a, M1, M2 in self.my_slices():
652 Ftheta_av[a, :] += \
653 -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1)
655 return Ftheta_av
657 def get_pot_term(self):
658 """Calculate potential term"""
659 Fpot_av = np.zeros_like(self.Fref_av)
660 # Potential contribution
661 #
662 # ----- / d Phi (r)
663 # a \ | mu ~
664 # F += -2 Re ) | ---------- v (r) Phi (r) dr rho
665 # / | d R nu nu mu
666 # ----- / a
667 # mu in a; nu
668 #
669 self.timer.start('Potential')
670 vt_sG = self.hamiltonian.vt_sG
671 Fpot_av = np.zeros_like(Fpot_av)
672 for u, kpt in enumerate(self.kpt_u):
673 vt_G = vt_sG[kpt.s]
674 Fpot_av += self.bfs.calculate_force_contribution(vt_G,
675 self.rhoT_uMM[u],
676 kpt.q)
677 self.timer.stop('Potential')
679 return Fpot_av
681 def get_den_mat_paw_term(self):
682 """Calcualte PAW correction"""
683 # TO DO: split this function into
684 # _get_den_mat_paw_term (which calculate Frho_av) and
685 # get_paw_correction (which calculate ZE_MM)
686 # Density matrix contribution from PAW correction
687 #
688 # ----- -----
689 # a \ a \ b
690 # F += 2 Re ) Z E - 2 Re ) Z E
691 # / mu nu nu mu / mu nu nu mu
692 # ----- -----
693 # mu nu b; mu in a; nu
694 #
695 # with
696 # b*
697 # ----- dP
698 # b \ i mu b b
699 # Z = ) -------- dS P
700 # mu nu / dR ij j nu
701 # ----- b mu
702 # ij
703 #
704 self.timer.start('Paw correction')
705 Frho_av = np.zeros_like(self.Fref_av)
706 for u, kpt in enumerate(self.kpt_u):
707 work_MM = np.zeros((self.mynao, self.nao), self.dtype)
708 ZE_MM = None
709 for b in self.my_atom_indices:
710 setup = self.setups[b]
711 dO_ii = np.asarray(setup.dO_ii, self.dtype)
712 dOP_iM = np.zeros((setup.ni, self.nao), self.dtype)
713 mmm(1.0, dO_ii, 'N', self.P_aqMi[b][kpt.q], 'C', 0.0, dOP_iM)
714 for v in range(3):
715 mmm(1.0,
716 self.dPdR_aqvMi[b][kpt.q][v][self.Mstart:self.Mstop],
717 'N',
718 dOP_iM, 'N',
719 0.0, work_MM)
720 ZE_MM = (work_MM * self.ET_uMM[u]).real
721 for a, M1, M2 in self.slices():
722 dE = 2 * ZE_MM[M1:M2].sum()
723 Frho_av[a, v] -= dE # the "b; mu in a; nu" term
724 Frho_av[b, v] += dE # the "mu nu" term
725 self.timer.stop('Paw correction')
726 return Frho_av
728 def _get_den_mat_paw_term(self):
729 # THIS doesn't work in parallel
730 # Density matrix contribution from PAW correction
731 #
732 # ----- -----
733 # a \ a \ b
734 # F += 2 Re ) Z E - 2 Re ) Z E
735 # / mu nu nu mu / mu nu nu mu
736 # ----- -----
737 # mu nu b; mu in a; nu
738 #
739 # with
740 # b*
741 # ----- dP
742 # b \ i mu b b
743 # Z = ) -------- dS P
744 # mu nu / dR ij j nu
745 # ----- b mu
746 # ij
747 #
748 Frho_av = np.zeros_like(self.Fref_av)
749 self.timer.start('add paw correction')
750 ZE_MM = self.get_paw_correction()
751 for u, kpt in enumerate(self.kpt_u):
752 for b in self.my_atom_indices:
753 for v in range(3):
754 for a, M1, M2 in self.slices():
755 dE = 2 * ZE_MM[u, b, v, M1:M2].sum()
756 Frho_av[a, v] -= dE.real # the "b; mu in a; nu" term
757 Frho_av[b, v] += dE.real # the "mu nu" term
758 self.timer.stop('add paw correction')
759 return Frho_av
761 def get_paw_correction(self):
762 # THIS doesn't work in parallel
763 # <Phi_nu|pt_i>O_ii<dPt_i/dR|Phi_mu>
764 self.timer.start('get paw correction')
765 ZE_MM = np.zeros((len(self.kpt_u), len(self.my_atom_indices), 3,
766 self.mynao, self.nao), self.dtype)
767 for u, kpt in enumerate(self.kpt_u):
768 work_MM = np.zeros((self.mynao, self.nao), self.dtype)
769 for b in self.my_atom_indices:
770 setup = self.setups[b]
771 dO_ii = np.asarray(setup.dO_ii, self.dtype)
772 dOP_iM = np.zeros((setup.ni, self.nao), self.dtype)
773 mmm(1.0, dO_ii, 'N', self.P_aqMi[b][kpt.q], 'C', 0.0, dOP_iM)
774 for v in range(3):
775 mmm(1.0,
776 self.dPdR_aqvMi[b][kpt.q][v][self.Mstart:self.Mstop],
777 'N',
778 dOP_iM, 'N',
779 0.0, work_MM)
780 ZE_MM[u, b, v, :, :] = (work_MM * self.ET_uMM[u]).real
781 self.timer.stop('get paw correction')
782 return ZE_MM
784 def get_atomic_density_term(self):
785 Fatom_av = np.zeros_like(self.Fref_av)
786 # Atomic density contribution
787 # ----- -----
788 # a \ a \ b
789 # F += -2 Re ) A rho + 2 Re ) A rho
790 # / mu nu nu mu / mu nu nu mu
791 # ----- -----
792 # mu nu b; mu in a; nu
793 #
794 # b*
795 # ----- d P
796 # b \ i mu b b
797 # A = ) ------- dH P
798 # mu nu / d R ij j nu
799 # ----- b mu
800 # ij
801 #
802 self.timer.start('Atomic Hamiltonian force')
803 Fatom_av = np.zeros_like(Fatom_av)
804 for u, kpt in enumerate(self.kpt_u):
805 for b in self.my_atom_indices:
806 H_ii = np.asarray(unpack_hermitian(self.dH_asp[b][kpt.s]),
807 self.dtype)
808 if len(H_ii) == 0:
809 # gemmdot does not like empty matrices!
810 # (has been fixed in the new code)
811 continue
812 HP_iM = gemmdot(H_ii, np.ascontiguousarray(
813 self.P_aqMi[b][kpt.q].T.conj()))
814 for v in range(3):
815 dPdR_Mi = \
816 self.dPdR_aqvMi[b][kpt.q][v][self.Mstart:self.Mstop]
817 ArhoT_MM = \
818 (gemmdot(dPdR_Mi, HP_iM) * self.rhoT_uMM[u]).real
819 for a, M1, M2 in self.slices():
820 dE = 2 * ArhoT_MM[M1:M2].sum()
821 Fatom_av[a, v] += dE # the "b; mu in a; nu" term
822 Fatom_av[b, v] -= dE # the "mu nu" term
823 self.timer.stop('Atomic Hamiltonian force')
825 return Fatom_av
827 def get_den_mat_block_blacs(self, f_n, C_nM, redistributor):
828 rho1_mm = self.ksl.calculate_blocked_density_matrix(f_n,
829 C_nM).conj()
830 rho_mm = redistributor.redistribute(rho1_mm)
831 return rho_mm
833 def get_pot_term_blacs(self):
834 Fpot_av = np.zeros_like(self.Fref_av)
835 from gpaw.blacs import BlacsGrid, Redistributor
836 self.grid = BlacsGrid(self.ksl.block_comm, self.gd.comm.size,
837 self.bd.comm.size)
838 self.blocksize1 = -(-self.nao // self.grid.nprow)
839 self.blocksize2 = -(-self.nao // self.grid.npcol)
840 desc = self.grid.new_descriptor(self.nao, self.nao,
841 self.blocksize1, self.blocksize2)
842 vt_sG = self.hamiltonian.vt_sG
843 self.rhoT_umm = []
844 self.ET_umm = []
845 self.redistributor = Redistributor(self.grid.comm,
846 self.ksl.mmdescriptor, desc)
847 Fpot_av = np.zeros_like(self.Fref_av)
848 for u, kpt in enumerate(self.kpt_u):
849 self.timer.start('Get density matrix')
850 rhoT_mm = self.get_den_mat_block_blacs(kpt.f_n, kpt.C_nM,
851 self.redistributor)
852 self.rhoT_umm.append(rhoT_mm)
853 self.timer.stop('Get density matrix')
854 self.timer.start('Potential')
855 rhoT_mM = self.ksl.distribute_to_columns(rhoT_mm, desc)
856 vt_G = vt_sG[kpt.s]
857 Fpot_av += self.bfs.calculate_force_contribution(vt_G, rhoT_mM,
858 kpt.q)
859 del rhoT_mM
860 self.timer.stop('Potential')
862 return Fpot_av
864 def get_kin_and_den_term_blacs(self):
865 Fkin_av_sum = np.zeros_like(self.Fref_av)
866 Ftheta_av_sum = np.zeros_like(self.Fref_av)
867 # pcutoff_a = [max([pt.get_cutoff() for pt in setup.pt_j])
868 # for setup in self.setups]
869 # phicutoff_a = [max([phit.get_cutoff() for phit in setup.phit_j])
870 # for setup in self.setups]
871 # XXX should probably use bdsize x gdsize instead
872 # That would be consistent with some existing grids
873 # I'm not sure if this is correct
874 # XXX what are rows and columns actually?
875 dH_asp = self.hamiltonian.dH_asp
876 self.timer.start('Get density matrix')
877 for kpt in self.kpt_u:
878 ET_mm = self.get_den_mat_block_blacs(kpt.f_n * kpt.eps_n, kpt.C_nM,
879 self.redistributor)
880 self.ET_umm.append(ET_mm)
881 self.timer.stop('Get density matrix')
882 self.M1start = self.blocksize1 * self.grid.myrow
883 self.M2start = self.blocksize2 * self.grid.mycol
884 self.M1stop = min(self.M1start + self.blocksize1, self.nao)
885 self.M2stop = min(self.M2start + self.blocksize2, self.nao)
886 self.m1max = self.M1stop - self.M1start
887 self.m2max = self.M2stop - self.M2start
888 # from gpaw.lcao.overlap import TwoCenterIntegralCalculator
889 self.timer.start('Prepare TCI loop')
890 self.M_a = self.bfs.M_a
891 Fkin2_av = np.zeros_like(self.Fref_av)
892 Ftheta2_av = np.zeros_like(self.Fref_av)
893 self.atompairs = self.newtci.a1a2.get_atompairs()
894 self.timer.start('broadcast dH')
895 self.alldH_asp = {}
896 for a in range(len(self.setups)):
897 gdrank = self.bfs.sphere_a[a].rank
898 if gdrank == self.gd.rank:
899 dH_sp = dH_asp[a]
900 else:
901 ni = self.setups[a].ni
902 dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2))
903 self.gd.comm.broadcast(dH_sp, gdrank)
904 # okay, now everyone gets copies of dH_sp
905 self.alldH_asp[a] = dH_sp
906 self.timer.stop('broadcast dH')
907 # This will get sort of hairy. We need to account for some
908 # three-center overlaps, such as:
909 #
910 # a1
911 # Phi ~a3 a3 ~a3 a2 a2,a1
912 # < ---- |p > dH <p |Phi > rho
913 # dR
914 #
915 # To this end we will loop over all pairs of atoms (a1, a3),
916 # and then a sub-loop over (a3, a2).
917 self.timer.stop('Prepare TCI loop')
918 self.timer.start('Not so complicated loop')
919 for (a1, a2) in self.atompairs:
920 if a1 >= a2:
921 # Actually this leads to bad load balance.
922 # We should take a1 > a2 or a1 < a2 equally many times.
923 # Maybe decide which of these choices
924 # depending on whether a2 % 1 == 0
925 continue
926 m1start = self.M_a[a1] - self.M1start
927 m2start = self.M_a[a2] - self.M2start
928 if m1start >= self.blocksize1 or m2start >= self.blocksize2:
929 continue # (we have only one block per CPU)
930 nm1 = self.setups[a1].nao
931 nm2 = self.setups[a2].nao
932 m1stop = min(m1start + nm1, self.m1max)
933 m2stop = min(m2start + nm2, self.m2max)
934 if m1stop <= 0 or m2stop <= 0:
935 continue
936 m1start = max(m1start, 0)
937 m2start = max(m2start, 0)
938 J1start = max(0, self.M1start - self.M_a[a1])
939 J2start = max(0, self.M2start - self.M_a[a2])
940 M1stop = J1start + m1stop - m1start
941 J2stop = J2start + m2stop - m2start
942 dThetadR_qvmm, dTdR_qvmm = self.newtci.dOdR_dTdR(a1, a2)
943 for u, kpt in enumerate(self.kpt_u):
944 rhoT_mm = self.rhoT_umm[u][m1start:m1stop, m2start:m2stop]
945 ET_mm = self.ET_umm[u][m1start:m1stop, m2start:m2stop]
946 Fkin_v = 2.0 * (dTdR_qvmm[kpt.q][:, J1start:M1stop,
947 J2start:J2stop] *
948 rhoT_mm[np.newaxis]).real.sum(-1).sum(-1)
949 Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop,
950 J2start:J2stop] *
951 ET_mm[np.newaxis]).real.sum(-1).sum(-1)
952 Fkin2_av[a1] += Fkin_v
953 Fkin2_av[a2] -= Fkin_v
954 Ftheta2_av[a1] -= Ftheta_v
955 Ftheta2_av[a2] += Ftheta_v
956 Fkin_av = Fkin2_av
957 Ftheta_av = Ftheta2_av
958 self.timer.stop('Not so complicated loop')
960 Fkin_av_sum += Fkin_av
961 Ftheta_av_sum += Ftheta_av
963 return Fkin_av_sum, Ftheta_av_sum
965 def get_at_den_and_den_paw_blacs(self):
966 Fatom_av = np.zeros_like(self.Fref_av)
967 Frho_av = np.zeros_like(self.Fref_av)
968 Fatom_av_sum = np.zeros_like(self.Fref_av)
969 Frho_av_sum = np.zeros_like(self.Fref_av)
970 self.dHP_and_dSP_aauim = {}
971 self.a2values = {}
972 for (a2, a3) in self.atompairs:
973 if a3 not in self.a2values:
974 self.a2values[a3] = []
975 self.a2values[a3].append(a2)
977 self.timer.start('Complicated loop')
978 for a1, a3 in self.atompairs:
979 if a1 == a3:
980 # Functions reside on same atom, so their overlap
981 # does not change when atom is displaced
982 continue
983 m1start = self.M_a[a1] - self.M1start
984 if m1start >= self.blocksize1:
985 continue
986 nm1 = self.setups[a1].nao
987 m1stop = min(m1start + nm1, self.m1max)
988 if m1stop <= 0:
989 continue
990 dPdR_qvim = self.newtci.dPdR(a3, a1)
991 if dPdR_qvim is None:
992 continue
993 dPdR_qvmi = -dPdR_qvim.transpose(0, 1, 3, 2).conj()
994 m1start = max(m1start, 0)
995 J1start = max(0, self.M1start - self.M_a[a1])
996 J1stop = J1start + m1stop - m1start
997 dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy()
998 for a2 in self.a2values[a3]:
999 m2start = self.M_a[a2] - self.M2start
1000 if m2start >= self.blocksize2:
1001 continue
1002 nm2 = self.setups[a2].nao
1003 m2stop = min(m2start + nm2, self.m2max)
1004 if m2stop <= 0:
1005 continue
1006 m2start = max(m2start, 0)
1007 J2start = max(0, self.M2start - self.M_a[a2])
1008 J2stop = J2start + m2stop - m2start
1009 if (a2, a3) in self.dHP_and_dSP_aauim:
1010 dHP_uim, dSP_uim = self.dHP_and_dSP_aauim[(a2, a3)]
1011 else:
1012 P_qim = self.newtci.P(a3, a2)
1013 if P_qim is None:
1014 continue
1015 P_qmi = P_qim.transpose(0, 2, 1).conj()
1016 P_qmi = P_qmi[:, J2start:J2stop].copy()
1017 dH_sp = self.alldH_asp[a3]
1018 dS_ii = self.setups[a3].dO_ii
1019 dHP_uim = []
1020 dSP_uim = []
1021 for u, kpt in enumerate(self.kpt_u):
1022 dH_ii = unpack_hermitian(dH_sp[kpt.s])
1023 dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj()
1024 # XXX only need nq of these,
1025 # but the looping is over all u
1026 dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj()
1027 dHP_uim.append(dHP_im)
1028 dSP_uim.append(dSP_im)
1029 self.dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim
1030 for u, kpt in enumerate(self.kpt_u):
1031 rhoT_mm = self.rhoT_umm[u][m1start:m1stop, m2start:m2stop]
1032 ET_mm = self.ET_umm[u][m1start:m1stop, m2start:m2stop]
1033 dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u])
1034 dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u])
1035 Fatom_c = 2.0 * (dPdRdHP_vmm *
1036 rhoT_mm).real.sum(-1).sum(-1)
1037 Frho_c = 2.0 * (dPdRdSP_vmm *
1038 ET_mm).real.sum(-1).sum(-1)
1039 Fatom_av[a1] += Fatom_c
1040 Fatom_av[a3] -= Fatom_c
1041 Frho_av[a1] -= Frho_c
1042 Frho_av[a3] += Frho_c
1043 self.timer.stop('Complicated loop')
1045 Fatom_av_sum += Fatom_av
1046 Frho_av_sum += Frho_av
1048 return Fatom_av_sum, Frho_av_sum