Coverage for gpaw/xc/vdw.py: 75%
565 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-20 00:19 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-20 00:19 +0000
1"""Van der Waals density functional.
3This module implements the Dion-Rydberg–Schröder–Langreth–Lundqvist
4XC-functional. There are two implementations:
61. A simple real-space double sum.
82. A more efficient FFT implementation based on the Román-Pérez–Soler paper.
10"""
12import os
13import sys
14import time
15from math import sin, cos, exp, pi, log, sqrt, ceil
17import numpy as np
18from numpy.fft import fft, rfftn, irfftn
19from ase.utils import seterr
21from gpaw.utilities.timing import nulltimer
22from gpaw.xc.libxc import LibXC
23from gpaw.xc.gga import GGA, gga_vars, add_gradient_correction
24from gpaw.xc.mgga import MGGA
25from gpaw.grid_descriptor import GridDescriptor
26from gpaw.utilities.tools import construct_reciprocal
27from gpaw import setup_paths
28import gpaw.mpi as mpi
29import gpaw.cgpaw as cgpaw
32def T(w, x, y, z):
33 return 0.5 * ((1.0 / (w + x) + 1.0 / (y + z)) *
34 (1.0 / ((w + y) * (x + z)) + 1.0 / ((w + z) * (y + x))))
37def W(a, b):
38 return 2 * ((3 - a**2) * b * cos(b) * sin(a) +
39 (3 - b**2) * a * cos(a) * sin(b) +
40 (a**2 + b**2 - 3) * sin(a) * sin(b) -
41 3 * a * b * cos(a) * cos(b)) / (a * b)**3
44eta = 8 * pi / 9
47def nu(y, d):
48 return 0.5 * y**2 / (1 - exp(-0.5 * eta * (y / d)**2))
51def f(a, b, d, dp):
52 va = nu(a, d)
53 vb = nu(b, d)
54 vpa = nu(a, dp)
55 vpb = nu(b, dp)
56 return 2 * (a * b)**2 * W(a, b) * T(va, vb, vpa, vpb) / pi**2
59def phi(d, dp):
60 """vdW-DF kernel."""
61 from scipy.integrate import quad
62 kwargs = dict(epsabs=1.0e-6, epsrel=1.0e-6, limit=400)
63 cut = 35
64 return quad(lambda y: quad(f, 0, cut, (y, d, dp), **kwargs)[0],
65 0, cut, **kwargs)[0]
68C = 12 * (4 * pi / 9)**3
71def phi_asymptotic(d, dp):
72 """Asymptotic behavior of vdW-DF kernel."""
73 d2 = d**2
74 dp2 = dp**2
75 return -C / (d2 * dp2 * (d2 + dp2))
78def hRPS(x, xc=1.0):
79 """Cutoff function from Román-Pérez–Soler paper."""
80 x1 = x / xc
81 xm = x1 * 1.0
82 y = -x1
83 z = 1.0 + x1
84 for m in range(2, 13):
85 xm *= x1
86 y -= xm / m
87 if m < 12:
88 z += xm
89 np.clip(y, -1e10, 1e10, y)
90 y = np.exp(y)
91 return xc * (1.0 - y), z * y
94def VDWFunctional(name, fft=True, stencil=2, **kwargs):
95 if name == 'vdW-DF':
96 kernel = LibXC('GGA_X_PBE_R+LDA_C_PW')
97 elif name == 'vdW-DF2':
98 kernel = LibXC('GGA_X_RPW86+LDA_C_PW')
99 kwargs['Zab'] = -1.887
100 elif name == 'optPBE-vdW':
101 kernel = LibXC('GGA_X_OPTPBE_VDW+LDA_C_PW')
102 elif name == 'optB88-vdW':
103 kernel = LibXC('GGA_X_OPTB88_VDW+LDA_C_PW')
104 elif name == 'C09-vdW':
105 kernel = LibXC('GGA_X_C09X+LDA_C_PW')
106 elif name == 'BEEF-vdW':
107 from gpaw.xc.bee import BEEVDWKernel
108 kernel = BEEVDWKernel('BEE2', None,
109 0.600166476948828631066,
110 0.399833523051171368934)
111 kwargs['Zab'] = -1.887
112 kwargs['setup_name'] = 'PBE'
113 elif name == 'mBEEF-vdW':
114 from gpaw.xc.bee import BEEVDWKernel
115 kernel = BEEVDWKernel('BEE3', None, 0.405258352, 0.356642240)
116 kwargs['Zab'] = -1.887
117 kwargs['vdwcoef'] = 0.886774972
118 kwargs['Nr'] = 4096
119 kwargs['setup_name'] = 'PBEsol'
120 assert fft
121 return MGGAFFTVDWFunctional(name, kernel, **kwargs)
122 else:
123 2 / 0
124 if fft:
125 return GGAFFTVDWFunctional(name, kernel, stencil, **kwargs)
126 return GGARealSpaceVDWFunctional(name, kernel, stencil, **kwargs)
129class VDWFunctionalBase:
130 """Base class for vdW-DF."""
131 def __init__(self, world=None, Zab=-0.8491, vdwcoef=1.0, q0cut=5.0,
132 phi0=0.5, ds=1.0, Dmax=20.0, nD=201, ndelta=21,
133 soft_correction=False, setup_name='revPBE',
134 verbose=False, energy_only=False):
135 """vdW-DF.
137 parameters:
139 name: str
140 Name of functional.
141 world: MPI communicator
142 Communicator to parallelize over. Defaults to gpaw.mpi.world.
143 q0cut: float
144 Maximum value for q0.
145 phi0: float
146 Smooth value for phi(0,0).
147 ds: float
148 Cutoff for smooth kernel.
149 Dmax: float
150 Maximum value for D.
151 nD: int
152 Number of values for D in kernel-table.
153 ndelta: int
154 Number of values for delta in kernel-table.
155 soft_correction: bool
156 Correct for soft kernel.
157 kernel:
158 Which kernel to use.
159 Zab:
160 parameter in nonlocal kernel.
161 vdwcoef: float
162 Scaling of vdW energy.
163 verbose: bool
164 Print useful information.
165 """
167 if world is None:
168 self.world = mpi.world
169 else:
170 self.world = world
172 self.Zab = Zab
173 self.vdwcoef = vdwcoef
174 self.q0cut = q0cut
175 self.phi0 = phi0
176 self.ds = ds
178 self.delta_i = np.linspace(0, 1.0, ndelta)
179 self.D_j = np.linspace(0, Dmax, nD)
181 self.verbose = verbose
183 self.read_table()
185 self.soft_correction = soft_correction
186 if soft_correction:
187 dD = self.D_j[1]
188 self.C_soft = np.dot(self.D_j**2, self.phi_ij[0]) * 4 * pi * dD
190 self.gd = None
191 self.energy_only = energy_only
192 self.timer = nulltimer
194 self.LDAc = LibXC('LDA_C_PW')
195 self.setup_name = setup_name
197 def get_setup_name(self):
198 return self.setup_name
200 def get_Ecnl(self):
201 return self.Ecnl
203 def stress_tensor_contribution(self, n_sg, skip_sum=False):
204 raise NotImplementedError('Calculation of stress tensor is not ' +
205 f'implemented for {self.name}')
207 def calculate_impl(self, gd, n_sg, v_sg, e_g):
208 sigma_xg, dedsigma_xg, gradn_svg = gga_vars(gd, self.grad_v, n_sg)
209 self.calculate_exchange(e_g, n_sg, v_sg, sigma_xg, dedsigma_xg)
210 self.calculate_correlation(e_g, n_sg, v_sg, sigma_xg, dedsigma_xg)
211 add_gradient_correction(self.grad_v, gradn_svg, sigma_xg,
212 dedsigma_xg, v_sg)
214 def calculate_exchange(self, e_g, n_sg, dedn_sg, sigma_xg, dedsigma_xg):
215 raise NotImplementedError
217 def calculate_correlation(self, e_g, n_sg, dedn_sg, sigma_xg, dedsigma_xg):
218 eLDAc_g = self.gd.empty()
219 vLDAc_sg = self.gd.zeros(1)
221 if self.vdwcoef == 0.0:
222 return
224 if len(n_sg) == 1:
225 self.LDAc.calculate(eLDAc_g, n_sg, vLDAc_sg)
226 e = self.get_non_local_energy(n_sg[0], sigma_xg[0],
227 eLDAc_g,
228 vLDAc_sg[0],
229 dedn_sg[0], dedsigma_xg[0])
230 else:
231 n_sg = n_sg.sum(0)
232 n_sg.shape = (1,) + n_sg.shape
233 self.LDAc.calculate(eLDAc_g, n_sg, vLDAc_sg)
234 v_g = np.zeros_like(e_g)
235 deda2nl_g = np.zeros_like(e_g)
236 a2_g = sigma_xg[0] + 2 * sigma_xg[1] + sigma_xg[2]
237 e = self.get_non_local_energy(n_sg[0], a2_g, eLDAc_g,
238 vLDAc_sg[0],
239 v_g, deda2nl_g)
240 dedsigma_xg[0] += self.vdwcoef * deda2nl_g
241 dedsigma_xg[1] += self.vdwcoef * 2 * deda2nl_g
242 dedsigma_xg[2] += self.vdwcoef * deda2nl_g
243 dedn_sg += self.vdwcoef * v_g
245 if self.gd.comm.rank == 0:
246 e_g[0, 0, 0] += self.vdwcoef * e / self.gd.dv
248 def get_non_local_energy(self, n_g=None, a2_g=None, e_LDAc_g=None,
249 v_LDAc_g=None, v_g=None, deda2_g=None):
250 """Calculate non-local correlation energy.
252 parameters:
254 n_g: ndarray
255 Density.
256 a2_g: ndarray
257 Absolute value of the gradient of the density - squared.
258 e_LDAc_g: ndarray
259 LDA correlation energy density.
260 """
262 gd = self.gd
264 n_g = n_g.clip(1e-7, np.inf)
266 # Calculate q0 and cut it off smoothly at q0cut:
267 kF_g = (3 * pi**2 * n_g)**(1.0 / 3.0)
268 q0_g, dhdx_g = hRPS(kF_g -
269 4 * pi / 3 * e_LDAc_g / n_g -
270 self.Zab / 36 / kF_g * a2_g / n_g**2, self.q0cut)
272 if self.verbose:
273 print('VDW: q0 (min, mean, max): '
274 f'({q0_g.min():f}, {q0_g.mean():f}, {q0_g.max():f})')
276 if self.soft_correction:
277 dEcnl = -gd.integrate(n_g**2 / q0_g**3) * 0.5 * self.C_soft
278 else:
279 dEcnl = 0.0
281 # Distribute density and q0 to all processors:
282 n_g = gd.collect(n_g, broadcast=True)
283 q0_g = gd.collect(q0_g, broadcast=True)
285 if not self.energy_only:
286 self.dhdx_g = gd.collect(dhdx_g, broadcast=True)
288 Ecnl = self.calculate_6d_integral(n_g, q0_g, a2_g, e_LDAc_g, v_LDAc_g,
289 v_g, deda2_g)
290 Ecnl += dEcnl
291 self.Ecnl = Ecnl
292 return Ecnl
294 def read_table(self):
295 name = (f'phi-{self.phi0:.3f}-{self.ds:.3f}-{self.D_j[-1]:.3f}'
296 f'-{len(self.delta_i):d}-{len(self.D_j):d}.txt')
297 dirs = setup_paths + ['.']
299 for dir in dirs:
300 filename = os.path.join(dir, name)
301 if os.path.isfile(filename):
302 self.phi_ij = np.loadtxt(filename)
303 if self.verbose:
304 print('VDW: using', filename)
305 return
307 print('VDW: Could not find table file:', name)
308 self.make_table(name)
310 def make_table(self, name):
311 print('VDW: Generating vdW-DF kernel ...')
312 print('VDW:', end=' ')
313 ndelta = len(self.delta_i)
314 nD = len(self.D_j)
315 self.phi_ij = np.zeros((ndelta, nD))
316 for i in range(self.world.rank, ndelta, self.world.size):
317 print(ndelta - i, end=' ')
318 sys.stdout.flush()
319 delta = self.delta_i[i]
320 for j in range(nD - 1, -1, -1):
321 D = self.D_j[j]
322 d = D * (1.0 + delta)
323 dp = D * (1.0 - delta)
324 if d**2 + dp**2 > self.ds**2:
325 with seterr(divide='ignore'):
326 self.phi_ij[i, j] = phi(d, dp)
327 else:
328 P = np.polyfit([0, self.D_j[j + 1]**2, self.D_j[j + 2]**2],
329 [self.phi0,
330 self.phi_ij[i, j + 1],
331 self.phi_ij[i, j + 2]],
332 2)
333 self.phi_ij[i, :j + 3] = np.polyval(P, self.D_j[:j + 3]**2)
334 break
336 self.world.sum(self.phi_ij)
338 print()
339 print('VDW: Done!')
340 header = (f'phi0={self.phi0:.3f}, ds={self.ds:.3f}, '
341 f'Dmax={self.D_j[-1]:.3f}, nD={len(self.delta_i)}, '
342 f'ndelta={len(self.D_j)}')
343 if self.world.rank == 0:
344 np.savetxt(name, self.phi_ij, header=header)
346 def make_prl_plot(self, multiply_by_4_pi_D_squared=True):
347 import matplotlib.pyplot as plt
348 x = np.linspace(0, 8.0, 100)
349 for delta in [0, 0.5, 0.9]:
350 y = [self.phi(D * (1.0 + delta), D * (1.0 - delta))
351 for D in x]
352 if multiply_by_4_pi_D_squared:
353 y *= 4 * pi * x**2
354 plt.plot(x, y, label=r'$\delta=%.1f$' % delta)
355 plt.legend(loc='best')
356 plt.plot(x, np.zeros(len(x)), 'k-')
357 plt.xlabel('D')
358 plt.ylabel(r'$4\pi D^2 \phi(\rm{Hartree})$')
359 plt.show()
361 def phi(self, d, dp):
362 """Kernel function.
364 Uses bi-linear interpolation and returns zero for D > Dmax.
365 """
367 P = self.phi_ij
368 D = (d + dp) / 2.0
369 if D < 1e-14:
370 return P[0, 0]
371 if D >= self.D_j[-1]:
372 return 0.0
374 delta = abs((d - dp) / (2 * D))
375 ddelta = self.delta_i[1]
376 x = delta / ddelta
377 i = int(x)
378 if i == len(self.delta_i) - 1:
379 i -= 1
380 x = 1.0
381 else:
382 x -= i
384 dD = self.D_j[1]
385 y = D / dD
386 j = int(y)
387 y -= j
388 return (x * (y * P[i + 1, j + 1] +
389 (1 - y) * P[i + 1, j]) +
390 (1 - x) * (y * P[i, j + 1] +
391 (1 - y) * P[i, j]))
394class RealSpaceVDWFunctional(VDWFunctionalBase):
395 """Real-space implementation of vdW-DF."""
396 def __init__(self, repeat=None, ncut=0.0005, **kwargs):
397 """Real-space vdW-DF.
399 parameters:
401 repeat: 3-tuple
402 Repeat the unit cell.
403 ncut: float
404 Density cutoff.
405 """
407 VDWFunctionalBase.__init__(self, **kwargs)
408 self.repeat = repeat
409 self.ncut = ncut
411 def calculate_6d_integral(self, n_g, q0_g,
412 a2_g=None, e_LDAc_g=None, v_LDAc_g=None,
413 v_g=None, deda2_g=None):
414 """Real-space double-sum."""
415 gd = self.gd
416 if not gd.orthogonal:
417 raise NotImplementedError('Real-space vdW calculations require ' +
418 'an orthogonal cell.')
419 n_c = n_g.shape
420 R_gc = np.empty(n_c + (3,))
421 h_c = gd.h_cv.diagonal()
422 R_gc[..., 0] = (np.arange(0, n_c[0]) * h_c[0]).reshape((-1, 1, 1))
423 R_gc[..., 1] = (np.arange(0, n_c[1]) * h_c[1]).reshape((-1, 1))
424 R_gc[..., 2] = np.arange(0, n_c[2]) * h_c[2]
426 mask_g = (n_g.ravel() > self.ncut)
427 R_ic = R_gc.reshape((-1, 3)).compress(mask_g, axis=0)
428 n_i = n_g.ravel().compress(mask_g)
429 q0_i = q0_g.ravel().compress(mask_g)
431 # Number of grid points:
432 ni = len(n_i)
434 if self.verbose:
435 print('VDW: number of points:', ni)
437 # Number of pairs per processor:
438 world = self.world
439 p = ni * (ni - 1) // 2 // world.size
441 # When doing supercell, the pairs are not that important
442 if np.any(self.repeat): # XXX This can be further optimized
443 iA = world.rank * (ni // world.size)
444 iB = (world.rank + 1) * (ni // world.size)
445 else:
446 iA = 0
447 for r in range(world.size):
448 iB = iA + int(0.5 - iA + sqrt((iA - 0.5)**2 + 2 * p))
449 if r == world.rank:
450 break
451 iA = iB
453 assert iA <= iB
455 if world.rank == world.size - 1:
456 iB = ni
458 if self.repeat is None:
459 repeat_c = np.zeros(3, int)
460 else:
461 repeat_c = np.asarray(self.repeat, int)
463 self.rhistogram = np.zeros(200)
464 self.Dhistogram = np.zeros(200)
465 dr = 0.05
466 dD = 0.05
467 if self.verbose:
468 start = time.time()
469 E_vdwnl = cgpaw.vdw(n_i, q0_i, R_ic, gd.cell_cv.diagonal().copy(),
470 gd.pbc_c,
471 repeat_c,
472 self.phi_ij, self.delta_i[1], self.D_j[1],
473 iA, iB,
474 self.rhistogram, dr,
475 self.Dhistogram, dD)
476 end = time.time()
477 if self.verbose:
478 print("vdW in rank ", world.rank, 'took', end - start)
480 self.rhistogram *= gd.dv**2 / dr
481 self.Dhistogram *= gd.dv**2 / dD
482 self.world.sum(self.rhistogram)
483 self.world.sum(self.Dhistogram)
484 E_vdwnl = self.world.sum(E_vdwnl * gd.dv**2)
485 return E_vdwnl
488class FFTVDWFunctional(VDWFunctionalBase):
489 """FFT implementation of vdW-DF."""
490 def __init__(self,
491 Nalpha=20, lambd=1.2, rcut=125.0, Nr=4096, size=None,
492 **kwargs):
493 """FFT vdW-DF.
495 parameters:
497 Nalpha: int
498 Number of interpolating cubic splines.
499 lambd: float
500 Parameter for defining geometric series of interpolation points.
501 rcut: float
502 Cutoff for kernel function.
503 Nr: int
504 Number of real-space points for kernel function.
505 size: 3-tuple
506 Size of FFT-grid. Use only for zero boundary conditions in
507 order to get a grid-size that works well with the FFT
508 algorithm (powers of two are more efficient). The density
509 array will be zero padded to the correct size."""
511 VDWFunctionalBase.__init__(self, **kwargs)
512 self.Nalpha = Nalpha
513 self.lambd = lambd
514 self.rcut = rcut
515 self.Nr = Nr
516 self.size = size
518 self.C_aip = None
519 self.phi_aajp = None
521 self.get_alphas()
523 def initialize(self, density, hamiltonian, wfs):
524 self.timer = wfs.timer
525 self.world = wfs.world
526 self.get_alphas()
528 def get_alphas(self):
529 if self.Nalpha < self.world.size:
530 rstride = self.world.size // self.Nalpha
531 newranks = range(0, self.world.size, rstride)[:self.Nalpha]
532 self.vdwcomm = self.world.new_communicator(newranks)
533 # self.vdwcomm will be None for those ranks not in the communicator
534 else:
535 self.vdwcomm = self.world
537 if self.vdwcomm is not None:
538 self.alphas = [a for a in range(self.Nalpha)
539 if (a * self.vdwcomm.size // self.Nalpha ==
540 self.vdwcomm.rank)]
541 else:
542 self.alphas = []
544 def initialize_more_things(self):
545 if self.alphas:
546 from gpaw.mpi import SerialCommunicator
547 scale_c1 = (self.shape / (1.0 * self.gd.N_c))[:, np.newaxis]
548 gdfft = GridDescriptor(self.shape, self.gd.cell_cv * scale_c1,
549 True, comm=SerialCommunicator())
550 k_k = construct_reciprocal(gdfft)[0][:,
551 :,
552 :self.shape[2] // 2 + 1]**0.5
553 k_k[0, 0, 0] = 0.0
555 self.dj_k = k_k / (2 * pi / self.rcut)
556 self.j_k = self.dj_k.astype(int)
557 self.dj_k -= self.j_k
558 self.dj_k *= 2 * pi / self.rcut
560 if self.verbose:
561 print('VDW: density array size:',
562 self.gd.get_size_of_global_array())
563 print('VDW: zero-padded array size:', self.shape)
564 print('VDW: maximum kinetic energy: '
565 f'{(0.5 * k_k.max()**2):.3f} Hartree')
567 assert self.j_k.max() < self.Nr // 2, \
568 f'Use larger Nr than {self.Nr:d}.'
570 else:
571 self.dj_k = None
572 self.j_k = None
574 def construct_cubic_splines(self):
575 """Construc interpolating splines for q0.
577 The recipe is from
579 https://en.wikipedia.org/wiki/Spline_(mathematics) """
581 n = self.Nalpha
582 lambd = self.lambd
583 q1 = self.q0cut * (lambd - 1) / (lambd**(n - 1) - 1)
584 q = q1 * (lambd**np.arange(n) - 1) / (lambd - 1)
586 if self.verbose:
587 print(f'VDW: using {n:d} cubic splines: 0.00, '
588 f'{q1:.2f}, ..., {q[-2]:.2f}, {q[-1]:.2f}')
590 y = np.eye(n)
591 a = y
592 h = q[1:] - q[:-1]
593 alpha = 3 * ((a[2:] - a[1:-1]) / h[1:, np.newaxis] -
594 (a[1:-1] - a[:-2]) / h[:-1, np.newaxis])
595 l = np.ones((n, n))
596 mu = np.zeros((n, n))
597 z = np.zeros((n, n))
598 for i in range(1, n - 1):
599 l[i] = 2 * (q[i + 1] - q[i - 1]) - h[i - 1] * mu[i - 1]
600 mu[i] = h[i] / l[i]
601 z[i] = (alpha[i - 1] - h[i - 1] * z[i - 1]) / l[i]
602 b = np.zeros((n, n))
603 c = np.zeros((n, n))
604 d = np.zeros((n, n))
605 for i in range(n - 2, -1, -1):
606 c[i] = z[i] - mu[i] * c[i + 1]
607 b[i] = (a[i + 1] - a[i]) / h[i] - h[i] * (c[i + 1] + 2 * c[i]) / 3
608 d[i] = (c[i + 1] - c[i]) / 3 / h[i]
610 self.C_aip = np.zeros((n, n, 4))
611 self.C_aip[:, :-1, 0] = a[:-1].T
612 self.C_aip[:, :-1, 1] = b[:-1].T
613 self.C_aip[:, :-1, 2] = c[:-1].T
614 self.C_aip[:, :-1, 3] = d[:-1].T
615 self.C_aip[-1, -1, 0] = 1.0
616 self.q_a = q
618 def p(self, alpha, q):
619 """Interpolating spline."""
620 i = int(log(q / self.q_a[1] * (self.lambd - 1) + 1) / log(self.lambd))
621 a, b, c, d = self.C_aip[alpha, i]
622 dq = q - self.q_a[i]
623 return a + dq * (b + dq * (c + dq * d))
625 def construct_fourier_transformed_kernels(self):
626 self.phi_aajp = phi_aajp = {}
627 M = self.Nr
628 rcut = self.rcut
629 r_g = np.linspace(0, rcut, M, 0)
630 k_j = np.arange(M // 2) * (2 * pi / rcut)
632 if self.verbose:
633 print(f'VDW: cutoff for fft\'ed kernel: '
634 f'{(0.5 * k_j[-1]**2):.3f} Hartree')
636 for a in range(self.Nalpha):
637 qa = self.q_a[a]
638 for b in range(a, self.Nalpha):
639 qb = self.q_a[b]
640 phi_g = [self.phi(qa * r, qb * r) for r in r_g]
641 phi_j = (fft(r_g * phi_g * 1j).real[:M // 2] *
642 (rcut / M * 4 * pi))
643 phi_j[0] = np.dot(r_g, r_g * phi_g) * (rcut / M * 4 * pi)
644 phi_j[1:] /= k_j[1:]
645 phi_aajp[a, b] = phi_aajp[b, a] = spline(k_j, phi_j)
647 def set_grid_descriptor(self, gd):
648 if self.size is None:
649 self.shape = gd.N_c.copy()
650 for c, n in enumerate(self.shape):
651 if not gd.pbc_c[c]:
652 # self.shape[c] = get_efficient_fft_size(n)
653 self.shape[c] = int(2**ceil(log(n) / log(2)))
654 else:
655 self.shape = np.array(self.size)
656 for c, n in enumerate(self.shape):
657 if gd.pbc_c[c]:
658 assert n == gd.N_c[c]
659 else:
660 assert n >= gd.N_c[c]
662 assert self.shape.shape == (3,)
663 self.gd = gd
665 def calculate_6d_integral(self, n_g, q0_g,
666 a2_g=None, e_LDAc_g=None, v_LDAc_g=None,
667 v_g=None, deda2_g=None):
668 self.timer.start('VdW-DF integral')
669 self.timer.start('splines')
670 if self.C_aip is None:
671 self.initialize_more_things()
672 self.construct_cubic_splines()
673 self.construct_fourier_transformed_kernels()
674 self.timer.stop('splines')
676 gd = self.gd
677 N = self.Nalpha
679 world = self.world
680 vdwcomm = self.vdwcomm
682 if self.alphas:
683 self.timer.start('hmm1')
684 i_g = (np.log(q0_g / self.q_a[1] * (self.lambd - 1) + 1) /
685 log(self.lambd)).astype(int)
686 dq0_g = q0_g - self.q_a[i_g]
687 self.timer.stop('hmm1')
688 else:
689 i_g = None
690 dq0_g = None
692 if self.verbose:
693 print('VDW: fft:', end=' ')
695 theta_ak = {}
696 p_ag = {}
697 for a in self.alphas:
698 self.timer.start('hmm2')
699 C_pg = self.C_aip[a, i_g].transpose((3, 0, 1, 2))
700 pa_g = (C_pg[0] + dq0_g *
701 (C_pg[1] + dq0_g *
702 (C_pg[2] + dq0_g * C_pg[3])))
703 self.timer.stop('hmm2')
704 del C_pg
705 self.timer.start('FFT')
706 theta_ak[a] = rfftn(n_g * pa_g,
707 self.shape,
708 [0, 1, 2]).copy()
709 self.timer.stop()
711 if not self.energy_only:
712 p_ag[a] = pa_g
713 del pa_g
714 if self.verbose:
715 print(a, end=' ')
716 sys.stdout.flush()
718 if self.energy_only:
719 del i_g
720 del dq0_g
722 if self.verbose:
723 print()
724 print('VDW: convolution:', end=' ')
726 F_ak = {}
727 dj_k = self.dj_k
728 energy = 0.0
729 for a in range(N):
730 if vdwcomm is not None:
731 vdw_ranka = a * vdwcomm.size // N
732 F_k = np.zeros((self.shape[0],
733 self.shape[1],
734 self.shape[2] // 2 + 1), complex)
735 self.timer.start('Convolution')
736 for b in self.alphas:
737 cgpaw.vdw2(self.phi_aajp[a, b], self.j_k, dj_k,
738 theta_ak[b], F_k)
739 self.timer.stop()
741 if vdwcomm is not None:
742 self.timer.start('gather')
743 for F in F_k:
744 vdwcomm.sum(F, vdw_ranka)
745 self.timer.stop('gather')
747 if vdwcomm is not None and vdwcomm.rank == vdw_ranka:
748 if not self.energy_only:
749 F_ak[a] = F_k
750 energy += np.vdot(theta_ak[a][:, :, 0], F_k[:, :, 0]).real
751 energy += np.vdot(theta_ak[a][:, :, -1], F_k[:, :, -1]).real
752 energy += 2 * np.vdot(theta_ak[a][:, :, 1:-1],
753 F_k[:, :, 1:-1]).real
755 if self.verbose:
756 print(a, end=' ')
757 sys.stdout.flush()
759 del theta_ak
761 if self.verbose:
762 print()
764 if not self.energy_only:
765 F_ag = {}
766 for a in self.alphas:
767 n1, n2, n3 = gd.get_size_of_global_array()
768 self.timer.start('iFFT')
769 F_ag[a] = irfftn(F_ak[a]).real[:n1, :n2, :n3].copy()
770 self.timer.stop()
771 del F_ak
773 self.timer.start('potential')
774 self.calculate_potential(n_g, a2_g, i_g, dq0_g, p_ag, F_ag,
775 e_LDAc_g, v_LDAc_g,
776 v_g, deda2_g)
777 self.timer.stop()
779 self.timer.stop()
780 return 0.5 * world.sum_scalar(energy) * gd.dv / self.shape.prod()
782 def calculate_potential(self, n_g, a2_g, i_g, dq0_g, p_ag, F_ag,
783 e_LDAc_g, v_LDAc_g, v_g, deda2_g):
784 world = self.world
786 self.timer.start('collect')
787 a2_g = self.gd.collect(a2_g, broadcast=True)
788 e_LDAc_g = self.gd.collect(e_LDAc_g, broadcast=True)
789 v_LDAc_g = self.gd.collect(v_LDAc_g, broadcast=True)
790 self.timer.stop('collect')
791 if self.alphas:
792 self.timer.start('p1')
793 dq0dn_g = ((pi / 3 / n_g)**(2.0 / 3.0) +
794 4 * pi / 3 * (e_LDAc_g / n_g - v_LDAc_g) / n_g +
795 7 * self.Zab / 108 / (3 * pi**2)**(1.0 / 3.0) * a2_g *
796 n_g**(-10.0 / 3.0))
797 dq0da2_g = -(self.Zab / 36 / (3 * pi**2)**(1.0 / 3.0) /
798 n_g**(7.0 / 3.0))
799 self.timer.stop('p1')
801 v0_g = np.zeros_like(n_g)
802 deda20_g = np.zeros_like(n_g)
804 for a in self.alphas:
805 self.timer.start('p2')
806 C_pg = self.C_aip[a, i_g].transpose((3, 0, 1, 2))
807 dpadq0_g = C_pg[1] + dq0_g * (2 * C_pg[2] + 3 * dq0_g * C_pg[3])
808 del C_pg
809 dthetatmp_g = n_g * dpadq0_g * self.dhdx_g
810 dthetaadn_g = p_ag[a] + dq0dn_g * dthetatmp_g
811 v0_g += dthetaadn_g * F_ag[a]
812 del dthetaadn_g
813 dthetaada2_g = dq0da2_g * dthetatmp_g
814 del dthetatmp_g
815 deda20_g += dthetaada2_g * F_ag[a]
816 del dthetaada2_g
817 self.timer.stop('p2')
819 self.timer.start('sum')
820 world.sum(v0_g)
821 world.sum(deda20_g)
822 self.timer.stop('sum')
823 slice = tuple(self.gd.get_slice())
824 v_g += v0_g[slice]
825 deda2_g += deda20_g[slice]
828class GGAFFTVDWFunctional(FFTVDWFunctional, GGA):
829 def __init__(self, name, kernel, stencil, **kwargs):
830 FFTVDWFunctional.__init__(self, **kwargs)
831 GGA.__init__(self, kernel, stencil)
832 self.name = name
834 def calculate_exchange(self, *args):
835 self.kernel.calculate(*args)
837 def set_grid_descriptor(self, gd):
838 GGA.set_grid_descriptor(self, gd)
839 FFTVDWFunctional.set_grid_descriptor(self, gd)
842class GGARealSpaceVDWFunctional(RealSpaceVDWFunctional, GGA):
843 def __init__(self, name, kernel, stencil, **kwargs):
844 RealSpaceVDWFunctional.__init__(self, **kwargs)
845 GGA.__init__(self, kernel)
846 self.name = name
848 def calculate_exchange(self, *args):
849 self.kernel.calculate(*args)
851 def set_grid_descriptor(self, gd):
852 GGA.set_grid_descriptor(self, gd)
855class MGGAFFTVDWFunctional(FFTVDWFunctional, MGGA):
856 def __init__(self, name, kernel, **kwargs):
857 FFTVDWFunctional.__init__(self, **kwargs)
858 MGGA.__init__(self, kernel)
859 self.name = name
861 def calculate_exchange(self, *args):
862 MGGA.process_mgga(self, *args)
864 def initialize(self, *args):
865 MGGA.initialize(self, *args)
866 FFTVDWFunctional.initialize(self, *args)
868 def set_grid_descriptor(self, gd):
869 if (self.gd is not None and
870 (self.gd.N_c == gd.N_c).all() and
871 (self.gd.pbc_c == gd.pbc_c).all() and
872 (self.gd.cell_cv == gd.cell_cv).all()):
873 return
875 MGGA.set_grid_descriptor(self, gd)
876 FFTVDWFunctional.set_grid_descriptor(self, gd)
879def spline(x, y):
880 n = len(y)
881 result = np.zeros((n, 4))
882 a, b, c, d = result.T
883 a[:] = y
884 h = x[1:] - x[:-1]
885 alpha = 3 * ((a[2:] - a[1:-1]) / h[1:] - (a[1:-1] - a[:-2]) / h[:-1])
886 l = np.ones(n)
887 mu = np.zeros(n)
888 z = np.zeros(n)
889 for i in range(1, n - 1):
890 l[i] = 2 * (x[i + 1] - x[i - 1]) - h[i - 1] * mu[i - 1]
891 mu[i] = h[i] / l[i]
892 z[i] = (alpha[i - 1] - h[i - 1] * z[i - 1]) / l[i]
893 for i in range(n - 2, -1, -1):
894 c[i] = z[i] - mu[i] * c[i + 1]
895 b[i] = (a[i + 1] - a[i]) / h[i] - h[i] * (c[i + 1] + 2 * c[i]) / 3
896 d[i] = (c[i + 1] - c[i]) / 3 / h[i]
897 return result