Coverage for gpaw/xc/vdw.py: 75%

565 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-07-20 00:19 +0000

1"""Van der Waals density functional. 

2 

3This module implements the Dion-Rydberg–Schröder–Langreth–Lundqvist 

4XC-functional. There are two implementations: 

5 

61. A simple real-space double sum. 

7 

82. A more efficient FFT implementation based on the Román-Pérez–Soler paper. 

9 

10""" 

11 

12import os 

13import sys 

14import time 

15from math import sin, cos, exp, pi, log, sqrt, ceil 

16 

17import numpy as np 

18from numpy.fft import fft, rfftn, irfftn 

19from ase.utils import seterr 

20 

21from gpaw.utilities.timing import nulltimer 

22from gpaw.xc.libxc import LibXC 

23from gpaw.xc.gga import GGA, gga_vars, add_gradient_correction 

24from gpaw.xc.mgga import MGGA 

25from gpaw.grid_descriptor import GridDescriptor 

26from gpaw.utilities.tools import construct_reciprocal 

27from gpaw import setup_paths 

28import gpaw.mpi as mpi 

29import gpaw.cgpaw as cgpaw 

30 

31 

32def T(w, x, y, z): 

33 return 0.5 * ((1.0 / (w + x) + 1.0 / (y + z)) * 

34 (1.0 / ((w + y) * (x + z)) + 1.0 / ((w + z) * (y + x)))) 

35 

36 

37def W(a, b): 

38 return 2 * ((3 - a**2) * b * cos(b) * sin(a) + 

39 (3 - b**2) * a * cos(a) * sin(b) + 

40 (a**2 + b**2 - 3) * sin(a) * sin(b) - 

41 3 * a * b * cos(a) * cos(b)) / (a * b)**3 

42 

43 

44eta = 8 * pi / 9 

45 

46 

47def nu(y, d): 

48 return 0.5 * y**2 / (1 - exp(-0.5 * eta * (y / d)**2)) 

49 

50 

51def f(a, b, d, dp): 

52 va = nu(a, d) 

53 vb = nu(b, d) 

54 vpa = nu(a, dp) 

55 vpb = nu(b, dp) 

56 return 2 * (a * b)**2 * W(a, b) * T(va, vb, vpa, vpb) / pi**2 

57 

58 

59def phi(d, dp): 

60 """vdW-DF kernel.""" 

61 from scipy.integrate import quad 

62 kwargs = dict(epsabs=1.0e-6, epsrel=1.0e-6, limit=400) 

63 cut = 35 

64 return quad(lambda y: quad(f, 0, cut, (y, d, dp), **kwargs)[0], 

65 0, cut, **kwargs)[0] 

66 

67 

68C = 12 * (4 * pi / 9)**3 

69 

70 

71def phi_asymptotic(d, dp): 

72 """Asymptotic behavior of vdW-DF kernel.""" 

73 d2 = d**2 

74 dp2 = dp**2 

75 return -C / (d2 * dp2 * (d2 + dp2)) 

76 

77 

78def hRPS(x, xc=1.0): 

79 """Cutoff function from Román-Pérez–Soler paper.""" 

80 x1 = x / xc 

81 xm = x1 * 1.0 

82 y = -x1 

83 z = 1.0 + x1 

84 for m in range(2, 13): 

85 xm *= x1 

86 y -= xm / m 

87 if m < 12: 

88 z += xm 

89 np.clip(y, -1e10, 1e10, y) 

90 y = np.exp(y) 

91 return xc * (1.0 - y), z * y 

92 

93 

94def VDWFunctional(name, fft=True, stencil=2, **kwargs): 

95 if name == 'vdW-DF': 

96 kernel = LibXC('GGA_X_PBE_R+LDA_C_PW') 

97 elif name == 'vdW-DF2': 

98 kernel = LibXC('GGA_X_RPW86+LDA_C_PW') 

99 kwargs['Zab'] = -1.887 

100 elif name == 'optPBE-vdW': 

101 kernel = LibXC('GGA_X_OPTPBE_VDW+LDA_C_PW') 

102 elif name == 'optB88-vdW': 

103 kernel = LibXC('GGA_X_OPTB88_VDW+LDA_C_PW') 

104 elif name == 'C09-vdW': 

105 kernel = LibXC('GGA_X_C09X+LDA_C_PW') 

106 elif name == 'BEEF-vdW': 

107 from gpaw.xc.bee import BEEVDWKernel 

108 kernel = BEEVDWKernel('BEE2', None, 

109 0.600166476948828631066, 

110 0.399833523051171368934) 

111 kwargs['Zab'] = -1.887 

112 kwargs['setup_name'] = 'PBE' 

113 elif name == 'mBEEF-vdW': 

114 from gpaw.xc.bee import BEEVDWKernel 

115 kernel = BEEVDWKernel('BEE3', None, 0.405258352, 0.356642240) 

116 kwargs['Zab'] = -1.887 

117 kwargs['vdwcoef'] = 0.886774972 

118 kwargs['Nr'] = 4096 

119 kwargs['setup_name'] = 'PBEsol' 

120 assert fft 

121 return MGGAFFTVDWFunctional(name, kernel, **kwargs) 

122 else: 

123 2 / 0 

124 if fft: 

125 return GGAFFTVDWFunctional(name, kernel, stencil, **kwargs) 

126 return GGARealSpaceVDWFunctional(name, kernel, stencil, **kwargs) 

127 

128 

129class VDWFunctionalBase: 

130 """Base class for vdW-DF.""" 

131 def __init__(self, world=None, Zab=-0.8491, vdwcoef=1.0, q0cut=5.0, 

132 phi0=0.5, ds=1.0, Dmax=20.0, nD=201, ndelta=21, 

133 soft_correction=False, setup_name='revPBE', 

134 verbose=False, energy_only=False): 

135 """vdW-DF. 

136 

137 parameters: 

138 

139 name: str 

140 Name of functional. 

141 world: MPI communicator 

142 Communicator to parallelize over. Defaults to gpaw.mpi.world. 

143 q0cut: float 

144 Maximum value for q0. 

145 phi0: float 

146 Smooth value for phi(0,0). 

147 ds: float 

148 Cutoff for smooth kernel. 

149 Dmax: float 

150 Maximum value for D. 

151 nD: int 

152 Number of values for D in kernel-table. 

153 ndelta: int 

154 Number of values for delta in kernel-table. 

155 soft_correction: bool 

156 Correct for soft kernel. 

157 kernel: 

158 Which kernel to use. 

159 Zab: 

160 parameter in nonlocal kernel. 

161 vdwcoef: float 

162 Scaling of vdW energy. 

163 verbose: bool 

164 Print useful information. 

165 """ 

166 

167 if world is None: 

168 self.world = mpi.world 

169 else: 

170 self.world = world 

171 

172 self.Zab = Zab 

173 self.vdwcoef = vdwcoef 

174 self.q0cut = q0cut 

175 self.phi0 = phi0 

176 self.ds = ds 

177 

178 self.delta_i = np.linspace(0, 1.0, ndelta) 

179 self.D_j = np.linspace(0, Dmax, nD) 

180 

181 self.verbose = verbose 

182 

183 self.read_table() 

184 

185 self.soft_correction = soft_correction 

186 if soft_correction: 

187 dD = self.D_j[1] 

188 self.C_soft = np.dot(self.D_j**2, self.phi_ij[0]) * 4 * pi * dD 

189 

190 self.gd = None 

191 self.energy_only = energy_only 

192 self.timer = nulltimer 

193 

194 self.LDAc = LibXC('LDA_C_PW') 

195 self.setup_name = setup_name 

196 

197 def get_setup_name(self): 

198 return self.setup_name 

199 

200 def get_Ecnl(self): 

201 return self.Ecnl 

202 

203 def stress_tensor_contribution(self, n_sg, skip_sum=False): 

204 raise NotImplementedError('Calculation of stress tensor is not ' + 

205 f'implemented for {self.name}') 

206 

207 def calculate_impl(self, gd, n_sg, v_sg, e_g): 

208 sigma_xg, dedsigma_xg, gradn_svg = gga_vars(gd, self.grad_v, n_sg) 

209 self.calculate_exchange(e_g, n_sg, v_sg, sigma_xg, dedsigma_xg) 

210 self.calculate_correlation(e_g, n_sg, v_sg, sigma_xg, dedsigma_xg) 

211 add_gradient_correction(self.grad_v, gradn_svg, sigma_xg, 

212 dedsigma_xg, v_sg) 

213 

214 def calculate_exchange(self, e_g, n_sg, dedn_sg, sigma_xg, dedsigma_xg): 

215 raise NotImplementedError 

216 

217 def calculate_correlation(self, e_g, n_sg, dedn_sg, sigma_xg, dedsigma_xg): 

218 eLDAc_g = self.gd.empty() 

219 vLDAc_sg = self.gd.zeros(1) 

220 

221 if self.vdwcoef == 0.0: 

222 return 

223 

224 if len(n_sg) == 1: 

225 self.LDAc.calculate(eLDAc_g, n_sg, vLDAc_sg) 

226 e = self.get_non_local_energy(n_sg[0], sigma_xg[0], 

227 eLDAc_g, 

228 vLDAc_sg[0], 

229 dedn_sg[0], dedsigma_xg[0]) 

230 else: 

231 n_sg = n_sg.sum(0) 

232 n_sg.shape = (1,) + n_sg.shape 

233 self.LDAc.calculate(eLDAc_g, n_sg, vLDAc_sg) 

234 v_g = np.zeros_like(e_g) 

235 deda2nl_g = np.zeros_like(e_g) 

236 a2_g = sigma_xg[0] + 2 * sigma_xg[1] + sigma_xg[2] 

237 e = self.get_non_local_energy(n_sg[0], a2_g, eLDAc_g, 

238 vLDAc_sg[0], 

239 v_g, deda2nl_g) 

240 dedsigma_xg[0] += self.vdwcoef * deda2nl_g 

241 dedsigma_xg[1] += self.vdwcoef * 2 * deda2nl_g 

242 dedsigma_xg[2] += self.vdwcoef * deda2nl_g 

243 dedn_sg += self.vdwcoef * v_g 

244 

245 if self.gd.comm.rank == 0: 

246 e_g[0, 0, 0] += self.vdwcoef * e / self.gd.dv 

247 

248 def get_non_local_energy(self, n_g=None, a2_g=None, e_LDAc_g=None, 

249 v_LDAc_g=None, v_g=None, deda2_g=None): 

250 """Calculate non-local correlation energy. 

251 

252 parameters: 

253 

254 n_g: ndarray 

255 Density. 

256 a2_g: ndarray 

257 Absolute value of the gradient of the density - squared. 

258 e_LDAc_g: ndarray 

259 LDA correlation energy density. 

260 """ 

261 

262 gd = self.gd 

263 

264 n_g = n_g.clip(1e-7, np.inf) 

265 

266 # Calculate q0 and cut it off smoothly at q0cut: 

267 kF_g = (3 * pi**2 * n_g)**(1.0 / 3.0) 

268 q0_g, dhdx_g = hRPS(kF_g - 

269 4 * pi / 3 * e_LDAc_g / n_g - 

270 self.Zab / 36 / kF_g * a2_g / n_g**2, self.q0cut) 

271 

272 if self.verbose: 

273 print('VDW: q0 (min, mean, max): ' 

274 f'({q0_g.min():f}, {q0_g.mean():f}, {q0_g.max():f})') 

275 

276 if self.soft_correction: 

277 dEcnl = -gd.integrate(n_g**2 / q0_g**3) * 0.5 * self.C_soft 

278 else: 

279 dEcnl = 0.0 

280 

281 # Distribute density and q0 to all processors: 

282 n_g = gd.collect(n_g, broadcast=True) 

283 q0_g = gd.collect(q0_g, broadcast=True) 

284 

285 if not self.energy_only: 

286 self.dhdx_g = gd.collect(dhdx_g, broadcast=True) 

287 

288 Ecnl = self.calculate_6d_integral(n_g, q0_g, a2_g, e_LDAc_g, v_LDAc_g, 

289 v_g, deda2_g) 

290 Ecnl += dEcnl 

291 self.Ecnl = Ecnl 

292 return Ecnl 

293 

294 def read_table(self): 

295 name = (f'phi-{self.phi0:.3f}-{self.ds:.3f}-{self.D_j[-1]:.3f}' 

296 f'-{len(self.delta_i):d}-{len(self.D_j):d}.txt') 

297 dirs = setup_paths + ['.'] 

298 

299 for dir in dirs: 

300 filename = os.path.join(dir, name) 

301 if os.path.isfile(filename): 

302 self.phi_ij = np.loadtxt(filename) 

303 if self.verbose: 

304 print('VDW: using', filename) 

305 return 

306 

307 print('VDW: Could not find table file:', name) 

308 self.make_table(name) 

309 

310 def make_table(self, name): 

311 print('VDW: Generating vdW-DF kernel ...') 

312 print('VDW:', end=' ') 

313 ndelta = len(self.delta_i) 

314 nD = len(self.D_j) 

315 self.phi_ij = np.zeros((ndelta, nD)) 

316 for i in range(self.world.rank, ndelta, self.world.size): 

317 print(ndelta - i, end=' ') 

318 sys.stdout.flush() 

319 delta = self.delta_i[i] 

320 for j in range(nD - 1, -1, -1): 

321 D = self.D_j[j] 

322 d = D * (1.0 + delta) 

323 dp = D * (1.0 - delta) 

324 if d**2 + dp**2 > self.ds**2: 

325 with seterr(divide='ignore'): 

326 self.phi_ij[i, j] = phi(d, dp) 

327 else: 

328 P = np.polyfit([0, self.D_j[j + 1]**2, self.D_j[j + 2]**2], 

329 [self.phi0, 

330 self.phi_ij[i, j + 1], 

331 self.phi_ij[i, j + 2]], 

332 2) 

333 self.phi_ij[i, :j + 3] = np.polyval(P, self.D_j[:j + 3]**2) 

334 break 

335 

336 self.world.sum(self.phi_ij) 

337 

338 print() 

339 print('VDW: Done!') 

340 header = (f'phi0={self.phi0:.3f}, ds={self.ds:.3f}, ' 

341 f'Dmax={self.D_j[-1]:.3f}, nD={len(self.delta_i)}, ' 

342 f'ndelta={len(self.D_j)}') 

343 if self.world.rank == 0: 

344 np.savetxt(name, self.phi_ij, header=header) 

345 

346 def make_prl_plot(self, multiply_by_4_pi_D_squared=True): 

347 import matplotlib.pyplot as plt 

348 x = np.linspace(0, 8.0, 100) 

349 for delta in [0, 0.5, 0.9]: 

350 y = [self.phi(D * (1.0 + delta), D * (1.0 - delta)) 

351 for D in x] 

352 if multiply_by_4_pi_D_squared: 

353 y *= 4 * pi * x**2 

354 plt.plot(x, y, label=r'$\delta=%.1f$' % delta) 

355 plt.legend(loc='best') 

356 plt.plot(x, np.zeros(len(x)), 'k-') 

357 plt.xlabel('D') 

358 plt.ylabel(r'$4\pi D^2 \phi(\rm{Hartree})$') 

359 plt.show() 

360 

361 def phi(self, d, dp): 

362 """Kernel function. 

363 

364 Uses bi-linear interpolation and returns zero for D > Dmax. 

365 """ 

366 

367 P = self.phi_ij 

368 D = (d + dp) / 2.0 

369 if D < 1e-14: 

370 return P[0, 0] 

371 if D >= self.D_j[-1]: 

372 return 0.0 

373 

374 delta = abs((d - dp) / (2 * D)) 

375 ddelta = self.delta_i[1] 

376 x = delta / ddelta 

377 i = int(x) 

378 if i == len(self.delta_i) - 1: 

379 i -= 1 

380 x = 1.0 

381 else: 

382 x -= i 

383 

384 dD = self.D_j[1] 

385 y = D / dD 

386 j = int(y) 

387 y -= j 

388 return (x * (y * P[i + 1, j + 1] + 

389 (1 - y) * P[i + 1, j]) + 

390 (1 - x) * (y * P[i, j + 1] + 

391 (1 - y) * P[i, j])) 

392 

393 

394class RealSpaceVDWFunctional(VDWFunctionalBase): 

395 """Real-space implementation of vdW-DF.""" 

396 def __init__(self, repeat=None, ncut=0.0005, **kwargs): 

397 """Real-space vdW-DF. 

398 

399 parameters: 

400 

401 repeat: 3-tuple 

402 Repeat the unit cell. 

403 ncut: float 

404 Density cutoff. 

405 """ 

406 

407 VDWFunctionalBase.__init__(self, **kwargs) 

408 self.repeat = repeat 

409 self.ncut = ncut 

410 

411 def calculate_6d_integral(self, n_g, q0_g, 

412 a2_g=None, e_LDAc_g=None, v_LDAc_g=None, 

413 v_g=None, deda2_g=None): 

414 """Real-space double-sum.""" 

415 gd = self.gd 

416 if not gd.orthogonal: 

417 raise NotImplementedError('Real-space vdW calculations require ' + 

418 'an orthogonal cell.') 

419 n_c = n_g.shape 

420 R_gc = np.empty(n_c + (3,)) 

421 h_c = gd.h_cv.diagonal() 

422 R_gc[..., 0] = (np.arange(0, n_c[0]) * h_c[0]).reshape((-1, 1, 1)) 

423 R_gc[..., 1] = (np.arange(0, n_c[1]) * h_c[1]).reshape((-1, 1)) 

424 R_gc[..., 2] = np.arange(0, n_c[2]) * h_c[2] 

425 

426 mask_g = (n_g.ravel() > self.ncut) 

427 R_ic = R_gc.reshape((-1, 3)).compress(mask_g, axis=0) 

428 n_i = n_g.ravel().compress(mask_g) 

429 q0_i = q0_g.ravel().compress(mask_g) 

430 

431 # Number of grid points: 

432 ni = len(n_i) 

433 

434 if self.verbose: 

435 print('VDW: number of points:', ni) 

436 

437 # Number of pairs per processor: 

438 world = self.world 

439 p = ni * (ni - 1) // 2 // world.size 

440 

441 # When doing supercell, the pairs are not that important 

442 if np.any(self.repeat): # XXX This can be further optimized 

443 iA = world.rank * (ni // world.size) 

444 iB = (world.rank + 1) * (ni // world.size) 

445 else: 

446 iA = 0 

447 for r in range(world.size): 

448 iB = iA + int(0.5 - iA + sqrt((iA - 0.5)**2 + 2 * p)) 

449 if r == world.rank: 

450 break 

451 iA = iB 

452 

453 assert iA <= iB 

454 

455 if world.rank == world.size - 1: 

456 iB = ni 

457 

458 if self.repeat is None: 

459 repeat_c = np.zeros(3, int) 

460 else: 

461 repeat_c = np.asarray(self.repeat, int) 

462 

463 self.rhistogram = np.zeros(200) 

464 self.Dhistogram = np.zeros(200) 

465 dr = 0.05 

466 dD = 0.05 

467 if self.verbose: 

468 start = time.time() 

469 E_vdwnl = cgpaw.vdw(n_i, q0_i, R_ic, gd.cell_cv.diagonal().copy(), 

470 gd.pbc_c, 

471 repeat_c, 

472 self.phi_ij, self.delta_i[1], self.D_j[1], 

473 iA, iB, 

474 self.rhistogram, dr, 

475 self.Dhistogram, dD) 

476 end = time.time() 

477 if self.verbose: 

478 print("vdW in rank ", world.rank, 'took', end - start) 

479 

480 self.rhistogram *= gd.dv**2 / dr 

481 self.Dhistogram *= gd.dv**2 / dD 

482 self.world.sum(self.rhistogram) 

483 self.world.sum(self.Dhistogram) 

484 E_vdwnl = self.world.sum(E_vdwnl * gd.dv**2) 

485 return E_vdwnl 

486 

487 

488class FFTVDWFunctional(VDWFunctionalBase): 

489 """FFT implementation of vdW-DF.""" 

490 def __init__(self, 

491 Nalpha=20, lambd=1.2, rcut=125.0, Nr=4096, size=None, 

492 **kwargs): 

493 """FFT vdW-DF. 

494 

495 parameters: 

496 

497 Nalpha: int 

498 Number of interpolating cubic splines. 

499 lambd: float 

500 Parameter for defining geometric series of interpolation points. 

501 rcut: float 

502 Cutoff for kernel function. 

503 Nr: int 

504 Number of real-space points for kernel function. 

505 size: 3-tuple 

506 Size of FFT-grid. Use only for zero boundary conditions in 

507 order to get a grid-size that works well with the FFT 

508 algorithm (powers of two are more efficient). The density 

509 array will be zero padded to the correct size.""" 

510 

511 VDWFunctionalBase.__init__(self, **kwargs) 

512 self.Nalpha = Nalpha 

513 self.lambd = lambd 

514 self.rcut = rcut 

515 self.Nr = Nr 

516 self.size = size 

517 

518 self.C_aip = None 

519 self.phi_aajp = None 

520 

521 self.get_alphas() 

522 

523 def initialize(self, density, hamiltonian, wfs): 

524 self.timer = wfs.timer 

525 self.world = wfs.world 

526 self.get_alphas() 

527 

528 def get_alphas(self): 

529 if self.Nalpha < self.world.size: 

530 rstride = self.world.size // self.Nalpha 

531 newranks = range(0, self.world.size, rstride)[:self.Nalpha] 

532 self.vdwcomm = self.world.new_communicator(newranks) 

533 # self.vdwcomm will be None for those ranks not in the communicator 

534 else: 

535 self.vdwcomm = self.world 

536 

537 if self.vdwcomm is not None: 

538 self.alphas = [a for a in range(self.Nalpha) 

539 if (a * self.vdwcomm.size // self.Nalpha == 

540 self.vdwcomm.rank)] 

541 else: 

542 self.alphas = [] 

543 

544 def initialize_more_things(self): 

545 if self.alphas: 

546 from gpaw.mpi import SerialCommunicator 

547 scale_c1 = (self.shape / (1.0 * self.gd.N_c))[:, np.newaxis] 

548 gdfft = GridDescriptor(self.shape, self.gd.cell_cv * scale_c1, 

549 True, comm=SerialCommunicator()) 

550 k_k = construct_reciprocal(gdfft)[0][:, 

551 :, 

552 :self.shape[2] // 2 + 1]**0.5 

553 k_k[0, 0, 0] = 0.0 

554 

555 self.dj_k = k_k / (2 * pi / self.rcut) 

556 self.j_k = self.dj_k.astype(int) 

557 self.dj_k -= self.j_k 

558 self.dj_k *= 2 * pi / self.rcut 

559 

560 if self.verbose: 

561 print('VDW: density array size:', 

562 self.gd.get_size_of_global_array()) 

563 print('VDW: zero-padded array size:', self.shape) 

564 print('VDW: maximum kinetic energy: ' 

565 f'{(0.5 * k_k.max()**2):.3f} Hartree') 

566 

567 assert self.j_k.max() < self.Nr // 2, \ 

568 f'Use larger Nr than {self.Nr:d}.' 

569 

570 else: 

571 self.dj_k = None 

572 self.j_k = None 

573 

574 def construct_cubic_splines(self): 

575 """Construc interpolating splines for q0. 

576 

577 The recipe is from 

578 

579 https://en.wikipedia.org/wiki/Spline_(mathematics) """ 

580 

581 n = self.Nalpha 

582 lambd = self.lambd 

583 q1 = self.q0cut * (lambd - 1) / (lambd**(n - 1) - 1) 

584 q = q1 * (lambd**np.arange(n) - 1) / (lambd - 1) 

585 

586 if self.verbose: 

587 print(f'VDW: using {n:d} cubic splines: 0.00, ' 

588 f'{q1:.2f}, ..., {q[-2]:.2f}, {q[-1]:.2f}') 

589 

590 y = np.eye(n) 

591 a = y 

592 h = q[1:] - q[:-1] 

593 alpha = 3 * ((a[2:] - a[1:-1]) / h[1:, np.newaxis] - 

594 (a[1:-1] - a[:-2]) / h[:-1, np.newaxis]) 

595 l = np.ones((n, n)) 

596 mu = np.zeros((n, n)) 

597 z = np.zeros((n, n)) 

598 for i in range(1, n - 1): 

599 l[i] = 2 * (q[i + 1] - q[i - 1]) - h[i - 1] * mu[i - 1] 

600 mu[i] = h[i] / l[i] 

601 z[i] = (alpha[i - 1] - h[i - 1] * z[i - 1]) / l[i] 

602 b = np.zeros((n, n)) 

603 c = np.zeros((n, n)) 

604 d = np.zeros((n, n)) 

605 for i in range(n - 2, -1, -1): 

606 c[i] = z[i] - mu[i] * c[i + 1] 

607 b[i] = (a[i + 1] - a[i]) / h[i] - h[i] * (c[i + 1] + 2 * c[i]) / 3 

608 d[i] = (c[i + 1] - c[i]) / 3 / h[i] 

609 

610 self.C_aip = np.zeros((n, n, 4)) 

611 self.C_aip[:, :-1, 0] = a[:-1].T 

612 self.C_aip[:, :-1, 1] = b[:-1].T 

613 self.C_aip[:, :-1, 2] = c[:-1].T 

614 self.C_aip[:, :-1, 3] = d[:-1].T 

615 self.C_aip[-1, -1, 0] = 1.0 

616 self.q_a = q 

617 

618 def p(self, alpha, q): 

619 """Interpolating spline.""" 

620 i = int(log(q / self.q_a[1] * (self.lambd - 1) + 1) / log(self.lambd)) 

621 a, b, c, d = self.C_aip[alpha, i] 

622 dq = q - self.q_a[i] 

623 return a + dq * (b + dq * (c + dq * d)) 

624 

625 def construct_fourier_transformed_kernels(self): 

626 self.phi_aajp = phi_aajp = {} 

627 M = self.Nr 

628 rcut = self.rcut 

629 r_g = np.linspace(0, rcut, M, 0) 

630 k_j = np.arange(M // 2) * (2 * pi / rcut) 

631 

632 if self.verbose: 

633 print(f'VDW: cutoff for fft\'ed kernel: ' 

634 f'{(0.5 * k_j[-1]**2):.3f} Hartree') 

635 

636 for a in range(self.Nalpha): 

637 qa = self.q_a[a] 

638 for b in range(a, self.Nalpha): 

639 qb = self.q_a[b] 

640 phi_g = [self.phi(qa * r, qb * r) for r in r_g] 

641 phi_j = (fft(r_g * phi_g * 1j).real[:M // 2] * 

642 (rcut / M * 4 * pi)) 

643 phi_j[0] = np.dot(r_g, r_g * phi_g) * (rcut / M * 4 * pi) 

644 phi_j[1:] /= k_j[1:] 

645 phi_aajp[a, b] = phi_aajp[b, a] = spline(k_j, phi_j) 

646 

647 def set_grid_descriptor(self, gd): 

648 if self.size is None: 

649 self.shape = gd.N_c.copy() 

650 for c, n in enumerate(self.shape): 

651 if not gd.pbc_c[c]: 

652 # self.shape[c] = get_efficient_fft_size(n) 

653 self.shape[c] = int(2**ceil(log(n) / log(2))) 

654 else: 

655 self.shape = np.array(self.size) 

656 for c, n in enumerate(self.shape): 

657 if gd.pbc_c[c]: 

658 assert n == gd.N_c[c] 

659 else: 

660 assert n >= gd.N_c[c] 

661 

662 assert self.shape.shape == (3,) 

663 self.gd = gd 

664 

665 def calculate_6d_integral(self, n_g, q0_g, 

666 a2_g=None, e_LDAc_g=None, v_LDAc_g=None, 

667 v_g=None, deda2_g=None): 

668 self.timer.start('VdW-DF integral') 

669 self.timer.start('splines') 

670 if self.C_aip is None: 

671 self.initialize_more_things() 

672 self.construct_cubic_splines() 

673 self.construct_fourier_transformed_kernels() 

674 self.timer.stop('splines') 

675 

676 gd = self.gd 

677 N = self.Nalpha 

678 

679 world = self.world 

680 vdwcomm = self.vdwcomm 

681 

682 if self.alphas: 

683 self.timer.start('hmm1') 

684 i_g = (np.log(q0_g / self.q_a[1] * (self.lambd - 1) + 1) / 

685 log(self.lambd)).astype(int) 

686 dq0_g = q0_g - self.q_a[i_g] 

687 self.timer.stop('hmm1') 

688 else: 

689 i_g = None 

690 dq0_g = None 

691 

692 if self.verbose: 

693 print('VDW: fft:', end=' ') 

694 

695 theta_ak = {} 

696 p_ag = {} 

697 for a in self.alphas: 

698 self.timer.start('hmm2') 

699 C_pg = self.C_aip[a, i_g].transpose((3, 0, 1, 2)) 

700 pa_g = (C_pg[0] + dq0_g * 

701 (C_pg[1] + dq0_g * 

702 (C_pg[2] + dq0_g * C_pg[3]))) 

703 self.timer.stop('hmm2') 

704 del C_pg 

705 self.timer.start('FFT') 

706 theta_ak[a] = rfftn(n_g * pa_g, 

707 self.shape, 

708 [0, 1, 2]).copy() 

709 self.timer.stop() 

710 

711 if not self.energy_only: 

712 p_ag[a] = pa_g 

713 del pa_g 

714 if self.verbose: 

715 print(a, end=' ') 

716 sys.stdout.flush() 

717 

718 if self.energy_only: 

719 del i_g 

720 del dq0_g 

721 

722 if self.verbose: 

723 print() 

724 print('VDW: convolution:', end=' ') 

725 

726 F_ak = {} 

727 dj_k = self.dj_k 

728 energy = 0.0 

729 for a in range(N): 

730 if vdwcomm is not None: 

731 vdw_ranka = a * vdwcomm.size // N 

732 F_k = np.zeros((self.shape[0], 

733 self.shape[1], 

734 self.shape[2] // 2 + 1), complex) 

735 self.timer.start('Convolution') 

736 for b in self.alphas: 

737 cgpaw.vdw2(self.phi_aajp[a, b], self.j_k, dj_k, 

738 theta_ak[b], F_k) 

739 self.timer.stop() 

740 

741 if vdwcomm is not None: 

742 self.timer.start('gather') 

743 for F in F_k: 

744 vdwcomm.sum(F, vdw_ranka) 

745 self.timer.stop('gather') 

746 

747 if vdwcomm is not None and vdwcomm.rank == vdw_ranka: 

748 if not self.energy_only: 

749 F_ak[a] = F_k 

750 energy += np.vdot(theta_ak[a][:, :, 0], F_k[:, :, 0]).real 

751 energy += np.vdot(theta_ak[a][:, :, -1], F_k[:, :, -1]).real 

752 energy += 2 * np.vdot(theta_ak[a][:, :, 1:-1], 

753 F_k[:, :, 1:-1]).real 

754 

755 if self.verbose: 

756 print(a, end=' ') 

757 sys.stdout.flush() 

758 

759 del theta_ak 

760 

761 if self.verbose: 

762 print() 

763 

764 if not self.energy_only: 

765 F_ag = {} 

766 for a in self.alphas: 

767 n1, n2, n3 = gd.get_size_of_global_array() 

768 self.timer.start('iFFT') 

769 F_ag[a] = irfftn(F_ak[a]).real[:n1, :n2, :n3].copy() 

770 self.timer.stop() 

771 del F_ak 

772 

773 self.timer.start('potential') 

774 self.calculate_potential(n_g, a2_g, i_g, dq0_g, p_ag, F_ag, 

775 e_LDAc_g, v_LDAc_g, 

776 v_g, deda2_g) 

777 self.timer.stop() 

778 

779 self.timer.stop() 

780 return 0.5 * world.sum_scalar(energy) * gd.dv / self.shape.prod() 

781 

782 def calculate_potential(self, n_g, a2_g, i_g, dq0_g, p_ag, F_ag, 

783 e_LDAc_g, v_LDAc_g, v_g, deda2_g): 

784 world = self.world 

785 

786 self.timer.start('collect') 

787 a2_g = self.gd.collect(a2_g, broadcast=True) 

788 e_LDAc_g = self.gd.collect(e_LDAc_g, broadcast=True) 

789 v_LDAc_g = self.gd.collect(v_LDAc_g, broadcast=True) 

790 self.timer.stop('collect') 

791 if self.alphas: 

792 self.timer.start('p1') 

793 dq0dn_g = ((pi / 3 / n_g)**(2.0 / 3.0) + 

794 4 * pi / 3 * (e_LDAc_g / n_g - v_LDAc_g) / n_g + 

795 7 * self.Zab / 108 / (3 * pi**2)**(1.0 / 3.0) * a2_g * 

796 n_g**(-10.0 / 3.0)) 

797 dq0da2_g = -(self.Zab / 36 / (3 * pi**2)**(1.0 / 3.0) / 

798 n_g**(7.0 / 3.0)) 

799 self.timer.stop('p1') 

800 

801 v0_g = np.zeros_like(n_g) 

802 deda20_g = np.zeros_like(n_g) 

803 

804 for a in self.alphas: 

805 self.timer.start('p2') 

806 C_pg = self.C_aip[a, i_g].transpose((3, 0, 1, 2)) 

807 dpadq0_g = C_pg[1] + dq0_g * (2 * C_pg[2] + 3 * dq0_g * C_pg[3]) 

808 del C_pg 

809 dthetatmp_g = n_g * dpadq0_g * self.dhdx_g 

810 dthetaadn_g = p_ag[a] + dq0dn_g * dthetatmp_g 

811 v0_g += dthetaadn_g * F_ag[a] 

812 del dthetaadn_g 

813 dthetaada2_g = dq0da2_g * dthetatmp_g 

814 del dthetatmp_g 

815 deda20_g += dthetaada2_g * F_ag[a] 

816 del dthetaada2_g 

817 self.timer.stop('p2') 

818 

819 self.timer.start('sum') 

820 world.sum(v0_g) 

821 world.sum(deda20_g) 

822 self.timer.stop('sum') 

823 slice = tuple(self.gd.get_slice()) 

824 v_g += v0_g[slice] 

825 deda2_g += deda20_g[slice] 

826 

827 

828class GGAFFTVDWFunctional(FFTVDWFunctional, GGA): 

829 def __init__(self, name, kernel, stencil, **kwargs): 

830 FFTVDWFunctional.__init__(self, **kwargs) 

831 GGA.__init__(self, kernel, stencil) 

832 self.name = name 

833 

834 def calculate_exchange(self, *args): 

835 self.kernel.calculate(*args) 

836 

837 def set_grid_descriptor(self, gd): 

838 GGA.set_grid_descriptor(self, gd) 

839 FFTVDWFunctional.set_grid_descriptor(self, gd) 

840 

841 

842class GGARealSpaceVDWFunctional(RealSpaceVDWFunctional, GGA): 

843 def __init__(self, name, kernel, stencil, **kwargs): 

844 RealSpaceVDWFunctional.__init__(self, **kwargs) 

845 GGA.__init__(self, kernel) 

846 self.name = name 

847 

848 def calculate_exchange(self, *args): 

849 self.kernel.calculate(*args) 

850 

851 def set_grid_descriptor(self, gd): 

852 GGA.set_grid_descriptor(self, gd) 

853 

854 

855class MGGAFFTVDWFunctional(FFTVDWFunctional, MGGA): 

856 def __init__(self, name, kernel, **kwargs): 

857 FFTVDWFunctional.__init__(self, **kwargs) 

858 MGGA.__init__(self, kernel) 

859 self.name = name 

860 

861 def calculate_exchange(self, *args): 

862 MGGA.process_mgga(self, *args) 

863 

864 def initialize(self, *args): 

865 MGGA.initialize(self, *args) 

866 FFTVDWFunctional.initialize(self, *args) 

867 

868 def set_grid_descriptor(self, gd): 

869 if (self.gd is not None and 

870 (self.gd.N_c == gd.N_c).all() and 

871 (self.gd.pbc_c == gd.pbc_c).all() and 

872 (self.gd.cell_cv == gd.cell_cv).all()): 

873 return 

874 

875 MGGA.set_grid_descriptor(self, gd) 

876 FFTVDWFunctional.set_grid_descriptor(self, gd) 

877 

878 

879def spline(x, y): 

880 n = len(y) 

881 result = np.zeros((n, 4)) 

882 a, b, c, d = result.T 

883 a[:] = y 

884 h = x[1:] - x[:-1] 

885 alpha = 3 * ((a[2:] - a[1:-1]) / h[1:] - (a[1:-1] - a[:-2]) / h[:-1]) 

886 l = np.ones(n) 

887 mu = np.zeros(n) 

888 z = np.zeros(n) 

889 for i in range(1, n - 1): 

890 l[i] = 2 * (x[i + 1] - x[i - 1]) - h[i - 1] * mu[i - 1] 

891 mu[i] = h[i] / l[i] 

892 z[i] = (alpha[i - 1] - h[i - 1] * z[i - 1]) / l[i] 

893 for i in range(n - 2, -1, -1): 

894 c[i] = z[i] - mu[i] * c[i + 1] 

895 b[i] = (a[i + 1] - a[i]) / h[i] - h[i] * (c[i + 1] + 2 * c[i]) / 3 

896 d[i] = (c[i + 1] - c[i]) / 3 / h[i] 

897 return result