Coverage for gpaw/cli/install_data.py: 31%
228 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-20 00:19 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-20 00:19 +0000
1import os
2import fnmatch
3from io import BytesIO, StringIO
4import tarfile
5import re
6from urllib.request import urlopen
7import ssl
8import contextlib
9import shlex
12sources = [('gpaw', 'official GPAW setups releases'),
13 ('sg15', 'SG15 pseudopotentials'),
14 ('basis', 'basis sets for LCAO mode'),
15 ('test', 'small file for testing this script')]
17names = [r for r, d in sources]
20# (We would like to use https always, but quantum-simulation.org does
21# not support that as of 2025-02-03)
22baseurls = {
23 'gpaw': 'https://gitlab.com/gpaw/gpaw/-/raw/master/doc/setups/setups.rst',
24 'sg15': 'http://www.quantum-simulation.org/potentials/sg15_oncv/',
25 'basis': 'https://wiki.fysik.dtu.dk/gpaw-files/',
26 'test': 'https://wiki.fysik.dtu.dk/gpaw-files/'}
29notfound_msg = """\
30For some reason the files were not found.
32Perhaps this script is out of date, and the data is no longer
33available at the expected URL:
35 {url}
37Or maybe there it is just a temporary problem or timeout. Please try
38again, or rummage around the GPAW web page until a solution is found.
39Writing e-mails to gpaw-users@listserv.fysik.dtu.dk or reporting
40an issue on https://gitlab.com/gpaw/gpaw/issues is also
41likely to help."""
44def urlopen_nocertcheck(src):
45 """Open a URL on a server without checking the certificate.
47 Some data is read from a DTU server with a self-signed
48 certificate. That causes trouble on some machines.
49 """
51 ctx = ssl.create_default_context()
52 ctx.check_hostname = False
53 ctx.verify_mode = ssl.CERT_NONE
54 return urlopen(src, context=ctx)
57class CLICommand:
58 """Install additional PAW datasets, pseudopotential or basis sets.
60 Without a directory or a source flag, show available setups and GPAW
61 setup paths.
63 With a directory and a source flag, download and install gpaw-setups
64 into INSTALLDIR/[setups-package-name-and-version].
65 """
67 @staticmethod
68 def add_arguments(parser):
69 add = parser.add_argument
70 add('directory', nargs='?')
71 add('--version',
72 help='download VERSION of package. '
73 'Run without arguments to display a list of versions. '
74 'VERSION can be the full URL or a part such as '
75 '\'0.8\' or \'0.6.6300\'')
76 add('--list-all', action='store_true',
77 help='list packages from all sources')
78 src_group = (parser
79 .add_argument_group('source flags')
80 .add_mutually_exclusive_group())
81 src_add = src_group.add_argument
82 src_add('--tarball', metavar='FILE',
83 help='unpack and install from local tarball FILE '
84 'instead of downloading')
85 for name, help in sources:
86 src_add('--' + name, action='store_const',
87 const=name, dest='source',
88 help=help)
89 reg_group = (parser
90 .add_argument_group('registration flags (script runs '
91 'interactively if neither is '
92 'supplied)')
93 .add_mutually_exclusive_group())
94 reg_add = reg_group.add_argument
95 reg_add('--register', action='store_const', const=True,
96 help='run non-interactively and register install path in '
97 'GPAW setup search paths. This is done by adding lines to '
98 '~/.gpaw/rc.py')
99 reg_add('--no-register', action='store_const',
100 const=False, dest='register',
101 help='run non-interactively and do not register install path '
102 'in GPAW setup search paths')
104 @staticmethod
105 def run(args, parser):
106 main(args, parser)
109def main(args, parser):
110 if args.source is None:
111 args.list_all = True
113 def print_blurb():
114 print_setups_info(parser)
115 print()
116 print('Run gpaw install-data --SOURCE DIR to install the newest '
117 'setups into DIR.')
118 print('Run gpaw install-data --SOURCE --version=VERSION DIR to '
119 'install VERSION (from above).')
120 print('See gpaw install-data --help for more info.')
122 # The sg15 file is a tarbomb. We will later defuse it by untarring
123 # into a subdirectory, so we don't leave a ghastly mess on the
124 # unsuspecting user's system.
126 if not args.tarball:
127 if args.list_all:
128 urls_dict = {source: get_urls(source) for source in names}
129 else:
130 urls_dict = {args.source: get_urls(args.source)}
132 def print_urls(urls, marked=None, file=None):
133 for url in urls:
134 pageurl, fname = url.rsplit('/', 1)
135 if url == marked:
136 marking = ' [*]'
137 else:
138 marking = ' '
139 print(f' {marking} {url}', file=file)
141 def print_all_urls(source=None, marked=None, file=None):
142 if source:
143 displayed_urls = {source: urls_dict[source]}
144 else:
145 displayed_urls = urls_dict
146 for source, url_sublist in displayed_urls.items():
147 print(f'Available setups and pseudopotentials (--{source}):',
148 file=file)
149 print_urls(url_sublist, marked, file)
150 print(file=file)
152 if args.source:
153 urls = urls_dict[args.source]
154 else:
155 print_all_urls()
156 print_blurb()
157 raise SystemExit
159 if len(urls) == 0:
160 url = baseurls[args.source]
161 parser.error(notfound_msg.format(url=url))
163 if args.version:
164 matching_urls = [url for url in urls
165 if match_version(url, args.version)]
166 with StringIO() as fobj:
167 if len(matching_urls) > 1:
168 print('\nMore than one setup file matches version '
169 '"%s":' % args.version,
170 file=fobj)
171 print_urls(matching_urls, file=fobj)
172 elif len(matching_urls) == 0:
173 print('\nNo setup matched the specified version '
174 '"%s".' % args.version,
175 file=fobj)
176 print_all_urls(args.source, file=fobj)
177 error_msg = fobj.getvalue()
178 if error_msg:
179 parser.error(error_msg)
180 url, = matching_urls
181 else:
182 url = urls[0]
184 print_all_urls(marked=url)
186 if not args.directory:
187 print_blurb()
188 raise SystemExit
190 targetpath = args.directory
192 with contextlib.ExitStack() as stack:
193 push = stack.enter_context
194 if args.tarball:
195 print('Reading local tarball %s' % args.tarball)
196 targzfile = push(tarfile.open(args.tarball))
197 tarfname = args.tarball
198 else:
199 tarfname = url.rsplit('/', 1)[1]
200 print('Selected %s. Downloading...' % tarfname)
201 response = push(urlopen_nocertcheck(url))
202 resp_fobj = push(BytesIO(response.read()))
203 targzfile = push(tarfile.open(fileobj=resp_fobj))
205 if not os.path.exists(targetpath):
206 os.makedirs(targetpath)
208 assert tarfname.endswith('.tar.gz')
209 # remove .tar.gz ending
210 setup_dirname = tarfname.rsplit('.', 2)[0]
211 setup_path = os.path.abspath(os.path.join(targetpath,
212 setup_dirname))
213 if tarfname.startswith('sg15'):
214 # Defuse tarbomb
215 if not os.path.isdir(setup_path):
216 os.mkdir(setup_path)
217 targetpath = os.path.join(targetpath, setup_dirname)
219 print('Extracting tarball into %s' % targetpath)
220 targzfile.extractall(targetpath)
221 assert os.path.isdir(setup_path)
222 print('Setups installed into %s.' % setup_path)
224 # Okay, now we have to maybe edit people's rc files.
225 rcfiledir = os.path.join(os.environ['HOME'], '.gpaw')
226 rcfilepath = os.path.join(rcfiledir, 'rc.py')
228 # We could do all this by importing the rcfile as well and checking
229 # whether things are okay or not.
230 rcline = "setup_paths.insert(0, {!r})".format(setup_path)
232 # Run interactive mode unless someone specified a flag requiring otherwise
233 interactive_mode = args.register is None
235 register_path = False
237 if interactive_mode:
238 answer = input('Register this setup path in %s? [y/n] ' % rcfilepath)
239 if answer.lower() in ['y', 'yes']:
240 register_path = True
241 elif answer.lower() in ['n', 'no']:
242 print('As you wish.')
243 else:
244 print('What do you mean by "%s"? Assuming "n".' % answer)
245 else:
246 register_path = args.register
248 if register_path:
249 # First we create the file
250 if not os.path.exists(rcfiledir):
251 os.makedirs(rcfiledir)
252 if not os.path.exists(rcfilepath):
253 with open(rcfilepath, 'w'): # Just create empty file
254 pass
256 with open(rcfilepath) as fobj:
257 for line in fobj:
258 if line.startswith(rcline):
259 print('It looks like the path is already registered in %s.'
260 % rcfilepath)
261 print('File will not be modified at this time.')
262 break
263 else:
264 with open(rcfilepath, 'a') as rcfd:
265 print(rcline, file=rcfd)
266 print('Setup path registered in %s.' % rcfilepath)
268 print_setups_info(parser)
269 else:
270 print('You can manually register the setups by adding the')
271 print('following line to %s:' % rcfilepath)
272 print()
273 print(rcline)
274 print()
275 print('Or if you prefer to use environment variables, you can')
276 print('set GPAW_SETUP_PATH. For example:')
277 print()
278 print(f'export GPAW_SETUP_PATH={shlex.quote(setup_path)}')
279 print()
280 print('Installation complete.')
283def get_urls(source):
284 page = baseurls[source]
285 if source == 'gpaw':
286 with urlopen_nocertcheck(page) as response:
287 pattern = ('https://wiki.fysik.dtu.dk/gpaw-files/'
288 'gpaw-setups-*.tar.gz')
289 lines = (line.strip().decode() for line in response)
290 urls = [line for line in lines if fnmatch.fnmatch(line, pattern)]
292 elif source == 'sg15':
293 # We want sg15_oncv_2015-10-07.tar.gz, but they may upload
294 # newer files, too.
295 pattern = (r'<a\s*href=[^>]+>\s*'
296 r'(sg15_oncv_upf_\d\d\d\d-\d\d-\d\d.tar.gz)'
297 r'\s*</a>')
299 with urlopen_nocertcheck(page) as response:
300 txt = response.read().decode('ascii', errors='replace')
301 files = re.compile(pattern).findall(txt)
302 files.sort(reverse=True)
303 urls = [page + fname for fname in files]
305 elif source == 'basis':
306 files = ['gpaw-basis-NAO-sz+coopt-NGTO-0.9.11271.tar.gz',
307 'gpaw-basis-pvalence-0.9.11271.tar.gz',
308 'gpaw-basis-pvalence-0.9.20000.tar.gz']
309 urls = [page + fname for fname in files]
311 elif source == 'test':
312 urls = [page + 'gpaw-dist-test-source.tar.gz']
314 else:
315 raise ValueError('Unknown source: %s' % source)
317 return urls
320def print_setups_info(parser):
321 try:
322 import gpaw
323 except ImportError as e:
324 parser.error('Cannot import \'gpaw\'. GPAW does not appear to be '
325 'installed. %s' % e)
327 # The contents of the rc file may have changed. Thus, we initialize
328 # setup_paths again to be sure that everything is as it should be.
329 gpaw.setup_paths[:] = gpaw.standard_setup_paths()
330 gpaw.read_rc_file()
331 gpaw.initialize_data_paths()
333 npaths = len(gpaw.setup_paths)
334 if npaths == 0:
335 print('GPAW currently has no setup search paths')
336 else:
337 print('Current GPAW setup paths in order of search priority:')
338 for i, path in enumerate(gpaw.setup_paths):
339 print('%4d. %s' % (i + 1, path))
342def get_runs(seq, criterion=lambda x: x):
343 """
344 >>> get_runs('aaabacbbcab') # doctest: +NORMALIZE_WHITESPACE
345 [['a', 'a', 'a'], ['b'], ['a'], ['c'], ['b', 'b'], ['c'], ['a'],
346 ['b']]
347 >>> get_runs('foo,bar,baz', str.isalnum)
348 [['f', 'o', 'o'], [','], ['b', 'a', 'r'], [','], ['b', 'a', 'z']]
349 >>> get_runs( # doctest: +NORMALIZE_WHITESPACE
350 ... [1, 2, 5, 3, 4, 7, 10, 3, 5, 2], lambda x: x % 3,
351 ... )
352 [[1], [2, 5], [3], [4, 7, 10], [3], [5, 2]]
353 """
354 if not seq:
355 return []
356 runs = []
357 for item in seq:
358 value = criterion(item)
359 try:
360 if value == runs[-1][0]:
361 runs[-1][1].append(item)
362 continue
363 except IndexError: # Empty `runs`
364 pass
365 runs.append((value, [item]))
366 return [run for _, run in runs]
369def split_into_chunks(string):
370 """
371 >>> split_into_chunks('')
372 []
373 >>> split_into_chunks('foo')
374 ['foo']
375 >>> split_into_chunks('version 10.1.rc0')
376 ['version', ' ', '10', '.', '1', '.', 'rc0']
377 >>> split_into_chunks('https://gpaw.readthedocs.io/')
378 ['https', '://', 'gpaw', '.', 'readthedocs', '.', 'io', '/']
379 """
380 return [''.join(run) for run in get_runs(string, str.isalnum)]
383def match_version(url, version):
384 """
385 >>> match_version('0.9.0', '0')
386 True
387 >>> match_version('1.9.0', '0')
388 False
389 >>> match_version('foo.0', '.0')
390 True
391 >>> match_version('1.0', '.0')
392 False
393 >>> match_version('11', '1')
394 False
395 >>> match_version('foo.bar.1.0', 'bar.1')
396 True
397 >>> match_version('foo.bar-1.0', 'bar.1')
398 False
399 >>> match_version('foobar.1', 'bar.1')
400 False
401 >>> match_version('foo.bar.11', 'bar.1')
402 False
403 """
404 url_chunks = split_into_chunks(url)
405 version_chunks = split_into_chunks(version)
406 num_chunks = len(version_chunks)
407 try:
408 token_offset = 0 if version_chunks[0].isalnum() else 1
409 first_token_is_numeric = version_chunks[token_offset].isnumeric()
410 except IndexError:
411 raise ValueError(
412 f'version = {version!r}: cannot find any alphanumeric token'
413 ) from None
414 # A match starting from the beginning is always a match
415 if version_chunks == url_chunks[:num_chunks]:
416 return True
417 # A match against the middle of the string may be a false positive
418 for rolling_offset in range(1, len(url_chunks) - num_chunks + 2):
419 # Non-match
420 if version_chunks != url_chunks[rolling_offset:][:num_chunks]:
421 is_match = False
422 # 'bar.0' matchs 'foo.bar.0' and '1.bar.0'
423 elif not first_token_is_numeric:
424 is_match = True
425 # '0' matches 'foo.0' but not '1.0'
426 else:
427 previous_token = url_chunks[rolling_offset + token_offset - 2]
428 is_match = not previous_token.isnumeric()
429 if is_match:
430 return True
431 return False