Coverage for gpaw/upf.py: 68%
411 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-19 00:19 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-19 00:19 +0000
1"""Unified pseudopotential format (UPF).
3UPF refers to a collection of different formats to store
4pseudopotentials or PAW setups. This file attempts to parse them and
5provide pseudopotential objects for use with GPAW.
6"""
8from optparse import OptionParser
9from xml.etree.ElementTree import parse as xmlparse, fromstring
10from xml.etree.ElementTree import ParseError
12import numpy as np
13from ase.data import atomic_numbers
15from gpaw.atom.atompaw import AtomPAW
16from gpaw.atom.radialgd import EquidistantRadialGridDescriptor
17from gpaw.setup_data import search_for_file
18from gpaw.basis_data import Basis, BasisFunction
19from gpaw.pseudopotential import (PseudoPotential, screen_potential,
20 figure_out_valence_states,
21 get_radial_hartree_energy)
22from gpaw.spline import Spline
23from gpaw.utilities import pack_hermitian, divrl
26class UPFStateSpec:
27 def __init__(self, index, label, l, values, occupation=None, n=None):
28 self.index = index
29 self.label = label
30 self.l = l
31 self.values = values
32 self.occupation = occupation
33 self.n = n
36class UPFCompensationChargeSpec: # Not used right now.......
37 def __init__(self, i, j, l, qint, values, coefs):
38 self.i = i
39 self.j = j
40 self.l = l
41 self.qint = qint
42 self.values = values
43 self.coefs = coefs
45 def __repr__(self):
46 return ('CompensationCharge(i=%d, j=%d, l=%d, qint=%s, '
47 'ccvalues=[...], coefs=[...(%d)...]') % (self.i, self.j,
48 self.l, self.qint,
49 len(self.coefs))
52def trim_outer_zeros(values, threshold=0.0):
53 values = list(values)
54 if abs(values[-1]) <= threshold:
55 while abs(values[-2]) <= threshold:
56 values.pop()
57 assert abs(values[-1]) <= threshold
58 # This will give an error if the array length becomes smaller than 2.
59 # Which would be fine I guess.
60 return np.array(values)
63def parse_upf(fname):
64 """Parse UPF pseudopotential from file descriptor.
66 Return a dictionary with the parsed data.
68 UPF is a custom format with some XML tags in it. The files are
69 not well-formed XML. We will just do some custom parsing rather
70 than involving an xml library.
71 """
73 pp = {}
75 try:
76 root = xmlparse(fname).getroot()
77 except ParseError:
78 # Typical! Non-well-formed file full of probable FORTRAN output.
79 # We'll try to insert our own header and see if things go well.
80 root = fromstring('\n'.join(['<xml>', open(fname).read(), '</xml>']))
82 pp['fname'] = fname
84 header_element = root.find('PP_HEADER')
85 header = header_element.attrib
87 # v201 is the sensible version.
88 # There are other files out there and we will have to deal with their
89 # inconsistencies in the most horrendous way conceivable: by if-statements.
90 v201 = (root.attrib.get('version') == '2.0.1')
92 def toarray(element):
93 attr = element.attrib
94 numbers = [float(n) for n in element.text.split()]
95 if attr:
96 assert attr['type'] == 'real'
97 assert int(attr['size']) == len(numbers)
98 icut = attr.get('cutoff_radius_index')
99 # XXX There's also something called ultrasoft_cutoff_radius
100 if icut is not None:
101 numbers = numbers[:int(icut)]
103 return np.array(numbers)
105 if v201:
106 for key, val in header.items():
107 header[key] = val.strip() # some values have whitespace...
108 for key in ['is_paw', 'is_coulomb', 'has_so', 'has_wfc', 'has_gipaw',
109 'paw_as_gipaw', 'core_correction']:
110 if key in header:
111 header[key] = {'F': False, 'T': True}[header[key]]
112 for key in ['z_valence', 'total_psenergy', 'wfc_cutoff', 'rho_cutoff']:
113 if key not in header:
114 continue
115 header[key] = float(header[key])
116 for key in ['l_max', 'l_max_rho', 'mesh_size', 'number_of_wfc',
117 'number_of_proj']:
118 if key not in header:
119 continue
120 header[key] = int(header[key])
121 else:
122 assert len(header) == 0
123 # This is the crappy format.
124 headerlines = [line for line
125 in header_element.text.strip().splitlines()]
126 # header['version'] = headerlines[0].split()[0]
127 header['element'] = headerlines[1].split()[0]
128 # header['has_nlcc'] = {'T': True, 'F': False} .........
129 # assert not header['has_nlcc']
130 # XC functional?
131 header['z_valence'] = float(headerlines[5].split()[0])
132 header['total_psenergy'] = float(headerlines[6].split()[0])
133 # cutoffs?
134 header['l_max'] = int(headerlines[8].split()[0])
135 header['mesh_size'] = int(headerlines[9].split()[0])
136 nwfs, nprojectors = headerlines[10].split()[:2]
137 header['number_of_wfc'] = int(nwfs)
138 header['number_of_proj'] = int(nprojectors)
140 info_element = root.find('PP_INFO')
141 pp['info'] = info_element.text
143 pp['header'] = header
144 mesh_element = root.find('PP_MESH')
145 pp['r'] = toarray(mesh_element.find('PP_R'))
146 pp['rab'] = toarray(mesh_element.find('PP_RAB'))
148 # Convert to Hartree from Rydberg.
149 pp['vlocal'] = 0.5 * toarray(root.find('PP_LOCAL'))
151 non_local = root.find('PP_NONLOCAL')
153 pp['projectors'] = []
154 for element in non_local:
155 if element.tag.startswith('PP_BETA'):
156 if '.' in element.tag:
157 name, num = element.tag.split('.')
158 assert name == 'PP_BETA'
159 attr = element.attrib
160 proj = UPFStateSpec(int(attr['index']),
161 attr.get('label'),
162 int(attr['angular_momentum']),
163 toarray(element))
164 assert num == attr['index']
165 else:
166 tokens = element.text.split()
167 metadata = tokens[:5]
168 values = map(float, tokens[5:])
170 npts = int(metadata[4])
171 assert npts == len(values), (npts, len(values))
172 while values[-1] == 0.0:
173 values.pop()
175 proj = UPFStateSpec(int(tokens[0]), '??', int(tokens[1]),
176 np.array(values))
178 pp['projectors'].append(proj)
179 else:
180 if v201:
181 assert element.tag == 'PP_DIJ', element.tag
182 # XXX probably measured in Rydberg.
183 pp['DIJ'] = 0.5 * toarray(element)
184 else:
185 lines = element.text.strip().splitlines()
186 nnonzero = int(lines[0].split()[0])
187 nproj = pp['header']['number_of_proj']
188 D_ij = np.zeros((nproj, nproj))
189 for n in range(1, nnonzero + 1):
190 tokens = lines[n].split()
191 i = int(tokens[0]) - 1
192 j = int(tokens[1]) - 1
193 D = float(tokens[2])
194 D_ij[i, j] = D
195 D_ij[j, i] = D
196 assert len(lines) == 1 + nnonzero
197 # XXX probably measured in Rydberg.
198 pp['DIJ'] = 0.5 * D_ij
200 pswfc_element = root.find('PP_PSWFC')
201 pp['states'] = []
202 if v201:
203 for element in pswfc_element:
204 attr = element.attrib
205 name = element.tag
206 state = UPFStateSpec(int(attr['index']),
207 attr['label'],
208 int(attr['l']),
209 toarray(element),
210 float(attr['occupation']),
211 int(attr['n']))
212 pp['states'].append(state)
213 else:
214 state_data = []
215 for line in pswfc_element.text.splitlines()[1:]:
216 if line.endswith('Wavefunction'):
217 values = []
218 state_data.append((line, values))
219 else:
220 values.extend(map(float, line.split()))
221 for header, values in state_data:
222 label, l, occupation, wfstring = header.split()
223 assert wfstring == 'Wavefunction'
224 state = UPFStateSpec(None, label, int(l), np.array(values),
225 occupation=float(occupation))
226 pp['states'].append(state)
227 # print repr(lines[0])
228 # assert len(pp['states']) > 0
230 pp['rhoatom'] = toarray(root.find('PP_RHOATOM'))
231 return pp
234sg15_special_valence_states = {
235 'Re': ([5, 5, 6, 5], [0, 1, 0, 2], [2., 6., 2., 5.]),
236 'Os': ([5, 5, 6, 5], [0, 1, 0, 2], [2., 6., 2., 6.]),
237 'Ir': ([5, 5, 6, 5], [0, 1, 0, 2], [2., 6., 2., 7.])}
240def read_sg15(fname):
241 data = parse_upf(fname)
242 symbol = data['header']['element']
243 valence_states = None
245 states_nlf = sg15_special_valence_states.get(symbol)
246 if states_nlf is not None:
247 valence_states = [UPFStateSpec(index=None, label=None, n=n, l=l,
248 values=None, occupation=f)
249 for n, l, f in zip(*states_nlf)]
251 data = UPFSetupData(data, filename=fname, valence_states=valence_states)
252 return data
255class UPFSetupData:
256 def __init__(self, data, valence_states=None, filename=None):
257 self.phi_jg = []
258 self.phit_jg = []
259 self.xc_correction = None
260 # data can be string (filename)
261 # or dict (that's what we are looking for).
262 # Maybe just a symbol would also be fine if we know the
263 # filename to look for.
264 if isinstance(data, str):
265 filename = data
266 data = parse_upf(data)
267 elif filename is None:
268 filename = '[N/A]'
270 self.filename = filename
272 assert isinstance(data, dict)
273 self.data = data # more or less "raw" data from the file
275 self.name = 'upf'
277 header = data['header']
279 self.symbol = header['element']
280 self.Z = atomic_numbers[self.symbol]
281 self.Nv = header['z_valence']
282 self.Nc = self.Z - self.Nv
283 self.Delta0 = -self.Nv / np.sqrt(4.0 * np.pi) # like hgh
284 self.rcut_j = [data['r'][len(proj.values) - 1]
285 for proj in data['projectors']]
287 # beta = 0.1 # XXX nice parameters?
288 # N = 4 * 450 # XXX
289 # This is "stolen" from hgh. Figure out something reasonable
290 # rgd = AERadialGridDescriptor(beta / N, 1.0 / N, N,
291 # default_spline_points=100)
293 from gpaw.atom.radialgd import EquidistantRadialGridDescriptor
294 rgd = EquidistantRadialGridDescriptor(0.02)
295 self.rgd = rgd
297 # Whyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy???
298 # What abominable part of the code requires the states
299 # to be ordered like this?
300 self.jargs = self.get_jargs()
302 projectors = [data['projectors'][jarg] for jarg in self.jargs]
303 if projectors:
304 self.l_j = [proj.l for proj in projectors]
305 self.pt_jg = []
306 for proj in projectors:
307 val = proj.values.copy()
308 val /= 2.
309 val[1:] /= data['r'][1:len(val)]
310 val[0] = val[1]
311 pt_g = self._interp(val) # * np.sqrt(4.0 * np.pi)
312 # sqrnorm = (pt_g**2 * self.rgd.dr_g).sum()
313 self.pt_jg.append(pt_g)
315 else:
316 self.l_j = [0]
317 gcut = self.rgd.r2g(1.0)
318 self.pt_jg = [np.zeros(gcut)] # XXX yet another "null" function
320 self.rcgauss = 0.0 # XXX .... what is this used for?
321 self.ni = sum([2 * l + 1 for l in self.l_j])
322 self.nabla_iiv = np.zeros((self.ni, self.ni, 3))
324 self.fingerprint = None # XXX hexdigest the file?
325 self.N0_q = None # XXX
327 if valence_states is None:
328 valence_states = data['states']
330 if valence_states:
331 states_lmax = max([state.l for state in valence_states])
332 f_ln = [[] for _ in range(1 + states_lmax)]
333 electroncount = 0.0
334 for state in valence_states:
335 # Where should the electrons be in the inner list??
336 # This is probably wrong and will lead to bad initialization
337 f_ln[state.l].append(state.occupation)
338 electroncount += state.occupation
339 # The Cl.pz-hgh.UPF from quantum espresso has only 6
340 # but should have 7 electrons. Oh well....
341 # err = abs(electroncount - self.Nv)
342 self.f_j = [state.occupation for state in valence_states]
343 self.n_j = [state.n for state in valence_states]
344 self.l_orb_J = [state.l for state in valence_states]
345 self.f_ln = f_ln
346 else:
347 self.n_j, self.l_orb_J, self.f_j, self.f_ln = \
348 figure_out_valence_states(self)
350 vlocal_unscreened = data['vlocal']
352 # The UPF representation of HGH setups should be equal to that
353 # used with setups='hgh'. But the UPF files do not contain
354 # info on the localization of the compensation charges! That
355 # means the screen_potential code will choose the shape of the
356 # compensation charges, resulting in some numerical differences.
357 #
358 # One could hack that it looks up some radii to compare e.g. H2O.
359 # The results of the below comments still have some numerical error.
360 # But it's not huge.
361 # a = None
362 # if self.symbol == 'H':
363 # a = 0.2
364 # elif self.symbol == 'O':
365 # a = .247621
367 vbar_g, ghat_g = screen_potential(data['r'], vlocal_unscreened,
368 self.Nv)
369 self.Eh_compcharge = get_radial_hartree_energy(data['r'][:len(ghat_g)],
370 ghat_g)
371 self.vbar_g = self._interp(vbar_g) * np.sqrt(4.0 * np.pi)
372 self.ghat_lg = [4.0 * np.pi / self.Nv * self._interp(ghat_g)]
374 def get_jargs(self):
375 projectors = list(self.data['projectors'])
376 jargs = []
378 for n in range(4):
379 for l in range(4):
380 for i, proj in enumerate(projectors):
381 if proj.l == l:
382 jargs.append(proj.index - 1)
383 projectors.pop(i)
384 break
385 return jargs
387 def tostring(self):
388 lines = []
389 indent = 0
391 def add(line):
392 lines.append(indent * ' ' + line)
394 add('Norm-conserving UPF setup:')
395 indent = 2
396 add('Element: %4s' % self.symbol)
397 add('Z: %4s' % self.Z)
398 add('Valence: %4s' % self.Nv)
399 indent -= 2
400 add('Projectors:')
401 indent += 2
402 for j, proj in enumerate(self.data['projectors']):
403 add('l=%d rcut=%s' % (proj.l, self.rcut_j[j]))
404 indent -= 2
405 if len(self.data['states']) == 0:
406 add('No states stored on this setup')
407 else:
408 add('States:')
409 indent += 2
410 for j, state in enumerate(self.data['states']):
411 add('l=%d f=%s' % (state.l, state.occupation))
412 indent -= 2
413 add('Local potential cutoff: %s'
414 % self.rgd.r_g[len(self.vbar_g) - 1])
415 add('Comp charge cutoff: %s'
416 % self.rgd.r_g[len(self.ghat_lg[0]) - 1])
417 add('File: %s' % self.filename)
418 add('')
419 return '\n'.join(lines)
421 def print_info(self, txt, setup):
422 txt(self.tostring())
424 # XXX something can perhaps be stolen from HGH
425 def expand_hamiltonian_matrix(self):
426 """Construct K_p from individual h_nn for each l."""
427 ni = sum([2 * l + 1 for l in self.l_j])
428 nj = len(self.l_j)
430 H_ii = np.zeros((ni, ni))
431 if len(self.data['DIJ']) == 0:
432 return pack_hermitian(H_ii)
434 # Multiply by 4.
435 # I think the factor of 4 compensates for the fact that the projectors
436 # all had square norms of 4, but we brought them back down to 1
437 # because that's more sensible.
438 H_jj = 4.0 * self.data['DIJ'].reshape((nj, nj))
439 m1start = 0
440 for j1, l1 in enumerate(self.l_j):
441 j1 = self.jargs[j1]
442 m1stop = m1start + 2 * l1 + 1
443 m2start = 0
444 for j2, l2 in enumerate(self.l_j):
445 j2 = self.jargs[j2]
446 m2stop = m2start + 2 * l2 + 1
447 if l1 == l2:
448 dm = m1stop - m1start
449 H_ii[m1start:m1stop, m2start:m2stop] = \
450 np.eye(dm) * H_jj[j1, j2]
451 else:
452 assert H_jj[j1, j2] == 0.0
453 m2start = m2stop
454 m1start = m1stop
455 return pack_hermitian(H_ii)
457 def get_local_potential(self):
458 vbar = Spline.from_data(
459 0, self.rgd.r_g[len(self.vbar_g) - 1], self.vbar_g,
460 )
461 return vbar
463 # XXXXXXXXXXXXXXXXX stolen from hghsetupdataf
464 def get_projectors(self):
465 # XXX equal-range projectors still required for some reason
466 maxlen = max([len(pt_g) for pt_g in self.pt_jg])
467 pt_j = []
468 for l, pt1_g in zip(self.l_j, self.pt_jg):
469 pt2_g = self.rgd.zeros()[:maxlen]
470 pt2_g[:len(pt1_g)] = divrl(pt1_g, l, self.rgd.r_g[:len(pt1_g)])
471 spline = Spline.from_data(l, self.rgd.r_g[maxlen - 1], pt2_g)
472 pt_j.append(spline)
473 return pt_j
475 def _interp(self, func):
476 r_g = self.rgd.r_g
477 gcut = len(func)
478 rcut = self.data['r'][gcut - 1]
479 gcutnew = np.searchsorted(self.rgd.r_g, rcut)
480 rnew = r_g[:gcutnew]
481 ynew = np.interp(rnew, self.data['r'][:gcut], func, right=0.0)
482 return ynew
484 def get_compensation_charge_functions(self):
485 assert len(self.ghat_lg) == 1
486 ghat_g = self.ghat_lg[0]
487 ng = len(ghat_g)
488 rcutcc = self.rgd.r_g[ng - 1] # correct or not?
489 r = np.linspace(0.0, rcutcc, 50)
490 ghat_g[-1] = 0.0
491 ghatnew_g = Spline.from_data(0, rcutcc, ghat_g).map(r)
492 return r, [0], [ghatnew_g]
494 def create_basis_functions(self):
495 if len(self.data['states']) > 0:
496 return self.get_stored_basis_functions()
497 else:
498 from gpaw.pseudopotential import generate_basis_functions
499 return generate_basis_functions(self)
501 def get_stored_basis_functions(self, ):
502 states = self.data['states']
503 maxlen = max([len(state.values) for state in states])
504 orig_r = self.data['r']
505 rcut = min(orig_r[maxlen - 1], 12.0) # XXX hardcoded 12 max radius
507 d = 0.02
508 ng = int(1 + rcut / d)
509 rgd = EquidistantRadialGridDescriptor(d, ng)
511 bf_j = []
512 for j, state in enumerate(states):
513 val = state.values
514 phit_g = np.interp(rgd.r_g, orig_r, val)
515 phit_g = divrl(phit_g, 1, rgd.r_g)
516 icut = len(phit_g) - 1 # XXX correct or off-by-one?
517 rcut = rgd.r_g[icut]
518 bf = BasisFunction(self.n_j[j], state.l, rcut, phit_g,
519 'pregenerated')
520 bf_j.append(bf)
521 return Basis(self.symbol, 'upf', rgd=rgd, bf_j=bf_j,
522 generatordata='upf-pregenerated')
524 def build(self, xcfunc, lmax, basis, filter=None):
525 # XXX better to create basis functions after filtering?
526 # Although basis functions are not meant for same grid
527 if basis is None:
528 basis = self.create_basis_functions()
529 return PseudoPotential(self, basis, filter)
532def main_plot():
533 p = OptionParser(usage='%prog [OPTION] [FILE...]',
534 description='plot upf pseudopotential from file.')
535 p.add_option('--calculate', action='store_true',
536 help='calculate density and orbitals.')
537 opts, args = p.parse_args()
539 import matplotlib.pyplot as plt
540 for arg in args:
541 if not arg.lower().endswith('.upf'):
542 # This is a bit bug prone. It is subject to files
543 # "sneaking in" unexpectedly due to '*' from
544 # higher-priority search directories. Then again, this
545 # probably will not happen, and the runtime setup loading
546 # mechanism is the same anyway so at least it is honest.
547 fname, source = search_for_file('%s*.[uU][pP][fF]' % arg)
548 if fname is None:
549 p.error('No match within search paths: %s' % arg)
550 else:
551 fname = arg
552 pp = parse_upf(fname)
553 print('--- %s ---' % fname)
554 print(UPFSetupData(pp).tostring())
555 print(pp['info'])
556 upfplot(pp, show=False, calculate=opts.calculate)
557 plt.show()
560def upfplot(setup, show=True, calculate=False):
561 # A version of this, perhaps nicer, is in pseudopotential.py.
562 # Maybe it is not worth keeping this version
563 if isinstance(setup, dict):
564 setup = UPFSetupData(setup)
566 pp = setup.data
567 r0 = pp['r'].copy()
568 r0[0] = 1e-8
570 def rtrunc(array, rdividepower=0):
571 r = r0[:len(array)]
572 arr = divrl(array, rdividepower, r)
573 return r, arr
575 import matplotlib.pyplot as plt
576 fig = plt.figure()
577 fig.canvas.set_window_title('{} - UPF setup for {}'.format(pp['fname'],
578 setup.symbol))
580 vax = fig.add_subplot(221)
581 pax = fig.add_subplot(222)
582 rhoax = fig.add_subplot(223)
583 wfsax = fig.add_subplot(224)
585 r, v = rtrunc(pp['vlocal'])
587 vax.plot(r, v, label='vloc')
589 vscreened, rhocomp = screen_potential(r, v, setup.Nv)
590 icut = len(rhocomp)
591 vcomp = v.copy()
592 vcomp[:icut] -= vscreened
593 vax.axvline(r[icut], ls=':', color='k')
595 vax.plot(r, vcomp, label='vcomp')
596 vax.plot(r[:icut], vscreened, label='vscr')
597 vax.axis(xmin=0.0, xmax=6.0)
598 rhoax.plot(r[:icut], rhocomp[:icut], label='rhocomp')
600 for j, proj in enumerate(pp['projectors']):
601 r, p = rtrunc(proj.values, 0)
602 pax.plot(r, p,
603 label='p%d [l=%d]' % (j + 1, proj.l))
605 for j, st in enumerate(pp['states']):
606 r, psi = rtrunc(st.values, 1)
607 wfsax.plot(r, r * psi, label='wf%d %s' % (j, st.label))
609 r, rho = rtrunc(pp['rhoatom'], 2)
610 wfsax.plot(r, r * rho, label='rho')
612 if calculate:
613 calc = AtomPAW(setup.symbol,
614 [setup.f_ln],
615 # xc='PBE', # XXX does not support GGAs :( :( :(
616 setups={setup.symbol: setup},
617 h=0.08,
618 rcut=10.0)
619 r_g = calc.wfs.gd.r_g
620 basis = calc.extract_basis_functions()
621 wfsax.plot(r_g, r_g * calc.density.nt_sg[0] * 4.0 * np.pi,
622 ls='--', label='rho [calc]', color='r')
624 splines = basis.tosplines()
625 for spline, bf in zip(splines, basis.bf_j):
626 wfsax.plot(r_g, r_g * spline.map(r_g), label=bf.type)
628 vax.legend(loc='best')
629 rhoax.legend(loc='best')
630 pax.legend(loc='best')
631 wfsax.legend(loc='best')
633 for ax in [vax, rhoax, pax, wfsax]:
634 ax.set_xlabel('r [Bohr]')
635 ax.axhline(0.0, ls=':', color='black')
637 vax.set_ylabel('potential')
638 pax.set_ylabel('projectors')
639 wfsax.set_ylabel(r'$r \psi(r), r n(r)$')
640 rhoax.set_ylabel('Comp charges')
642 fig.subplots_adjust(wspace=0.3)
644 if show:
645 plt.show()