Coverage for gpaw/cli/install_data.py: 31%

228 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-07-20 00:19 +0000

1import os 

2import fnmatch 

3from io import BytesIO, StringIO 

4import tarfile 

5import re 

6from urllib.request import urlopen 

7import ssl 

8import contextlib 

9import shlex 

10 

11 

12sources = [('gpaw', 'official GPAW setups releases'), 

13 ('sg15', 'SG15 pseudopotentials'), 

14 ('basis', 'basis sets for LCAO mode'), 

15 ('test', 'small file for testing this script')] 

16 

17names = [r for r, d in sources] 

18 

19 

20# (We would like to use https always, but quantum-simulation.org does 

21# not support that as of 2025-02-03) 

22baseurls = { 

23 'gpaw': 'https://gitlab.com/gpaw/gpaw/-/raw/master/doc/setups/setups.rst', 

24 'sg15': 'http://www.quantum-simulation.org/potentials/sg15_oncv/', 

25 'basis': 'https://wiki.fysik.dtu.dk/gpaw-files/', 

26 'test': 'https://wiki.fysik.dtu.dk/gpaw-files/'} 

27 

28 

29notfound_msg = """\ 

30For some reason the files were not found. 

31 

32Perhaps this script is out of date, and the data is no longer 

33available at the expected URL: 

34 

35 {url} 

36 

37Or maybe there it is just a temporary problem or timeout. Please try 

38again, or rummage around the GPAW web page until a solution is found. 

39Writing e-mails to gpaw-users@listserv.fysik.dtu.dk or reporting 

40an issue on https://gitlab.com/gpaw/gpaw/issues is also 

41likely to help.""" 

42 

43 

44def urlopen_nocertcheck(src): 

45 """Open a URL on a server without checking the certificate. 

46 

47 Some data is read from a DTU server with a self-signed 

48 certificate. That causes trouble on some machines. 

49 """ 

50 

51 ctx = ssl.create_default_context() 

52 ctx.check_hostname = False 

53 ctx.verify_mode = ssl.CERT_NONE 

54 return urlopen(src, context=ctx) 

55 

56 

57class CLICommand: 

58 """Install additional PAW datasets, pseudopotential or basis sets. 

59 

60 Without a directory or a source flag, show available setups and GPAW 

61 setup paths. 

62 

63 With a directory and a source flag, download and install gpaw-setups 

64 into INSTALLDIR/[setups-package-name-and-version]. 

65 """ 

66 

67 @staticmethod 

68 def add_arguments(parser): 

69 add = parser.add_argument 

70 add('directory', nargs='?') 

71 add('--version', 

72 help='download VERSION of package. ' 

73 'Run without arguments to display a list of versions. ' 

74 'VERSION can be the full URL or a part such as ' 

75 '\'0.8\' or \'0.6.6300\'') 

76 add('--list-all', action='store_true', 

77 help='list packages from all sources') 

78 src_group = (parser 

79 .add_argument_group('source flags') 

80 .add_mutually_exclusive_group()) 

81 src_add = src_group.add_argument 

82 src_add('--tarball', metavar='FILE', 

83 help='unpack and install from local tarball FILE ' 

84 'instead of downloading') 

85 for name, help in sources: 

86 src_add('--' + name, action='store_const', 

87 const=name, dest='source', 

88 help=help) 

89 reg_group = (parser 

90 .add_argument_group('registration flags (script runs ' 

91 'interactively if neither is ' 

92 'supplied)') 

93 .add_mutually_exclusive_group()) 

94 reg_add = reg_group.add_argument 

95 reg_add('--register', action='store_const', const=True, 

96 help='run non-interactively and register install path in ' 

97 'GPAW setup search paths. This is done by adding lines to ' 

98 '~/.gpaw/rc.py') 

99 reg_add('--no-register', action='store_const', 

100 const=False, dest='register', 

101 help='run non-interactively and do not register install path ' 

102 'in GPAW setup search paths') 

103 

104 @staticmethod 

105 def run(args, parser): 

106 main(args, parser) 

107 

108 

109def main(args, parser): 

110 if args.source is None: 

111 args.list_all = True 

112 

113 def print_blurb(): 

114 print_setups_info(parser) 

115 print() 

116 print('Run gpaw install-data --SOURCE DIR to install the newest ' 

117 'setups into DIR.') 

118 print('Run gpaw install-data --SOURCE --version=VERSION DIR to ' 

119 'install VERSION (from above).') 

120 print('See gpaw install-data --help for more info.') 

121 

122 # The sg15 file is a tarbomb. We will later defuse it by untarring 

123 # into a subdirectory, so we don't leave a ghastly mess on the 

124 # unsuspecting user's system. 

125 

126 if not args.tarball: 

127 if args.list_all: 

128 urls_dict = {source: get_urls(source) for source in names} 

129 else: 

130 urls_dict = {args.source: get_urls(args.source)} 

131 

132 def print_urls(urls, marked=None, file=None): 

133 for url in urls: 

134 pageurl, fname = url.rsplit('/', 1) 

135 if url == marked: 

136 marking = ' [*]' 

137 else: 

138 marking = ' ' 

139 print(f' {marking} {url}', file=file) 

140 

141 def print_all_urls(source=None, marked=None, file=None): 

142 if source: 

143 displayed_urls = {source: urls_dict[source]} 

144 else: 

145 displayed_urls = urls_dict 

146 for source, url_sublist in displayed_urls.items(): 

147 print(f'Available setups and pseudopotentials (--{source}):', 

148 file=file) 

149 print_urls(url_sublist, marked, file) 

150 print(file=file) 

151 

152 if args.source: 

153 urls = urls_dict[args.source] 

154 else: 

155 print_all_urls() 

156 print_blurb() 

157 raise SystemExit 

158 

159 if len(urls) == 0: 

160 url = baseurls[args.source] 

161 parser.error(notfound_msg.format(url=url)) 

162 

163 if args.version: 

164 matching_urls = [url for url in urls 

165 if match_version(url, args.version)] 

166 with StringIO() as fobj: 

167 if len(matching_urls) > 1: 

168 print('\nMore than one setup file matches version ' 

169 '"%s":' % args.version, 

170 file=fobj) 

171 print_urls(matching_urls, file=fobj) 

172 elif len(matching_urls) == 0: 

173 print('\nNo setup matched the specified version ' 

174 '"%s".' % args.version, 

175 file=fobj) 

176 print_all_urls(args.source, file=fobj) 

177 error_msg = fobj.getvalue() 

178 if error_msg: 

179 parser.error(error_msg) 

180 url, = matching_urls 

181 else: 

182 url = urls[0] 

183 

184 print_all_urls(marked=url) 

185 

186 if not args.directory: 

187 print_blurb() 

188 raise SystemExit 

189 

190 targetpath = args.directory 

191 

192 with contextlib.ExitStack() as stack: 

193 push = stack.enter_context 

194 if args.tarball: 

195 print('Reading local tarball %s' % args.tarball) 

196 targzfile = push(tarfile.open(args.tarball)) 

197 tarfname = args.tarball 

198 else: 

199 tarfname = url.rsplit('/', 1)[1] 

200 print('Selected %s. Downloading...' % tarfname) 

201 response = push(urlopen_nocertcheck(url)) 

202 resp_fobj = push(BytesIO(response.read())) 

203 targzfile = push(tarfile.open(fileobj=resp_fobj)) 

204 

205 if not os.path.exists(targetpath): 

206 os.makedirs(targetpath) 

207 

208 assert tarfname.endswith('.tar.gz') 

209 # remove .tar.gz ending 

210 setup_dirname = tarfname.rsplit('.', 2)[0] 

211 setup_path = os.path.abspath(os.path.join(targetpath, 

212 setup_dirname)) 

213 if tarfname.startswith('sg15'): 

214 # Defuse tarbomb 

215 if not os.path.isdir(setup_path): 

216 os.mkdir(setup_path) 

217 targetpath = os.path.join(targetpath, setup_dirname) 

218 

219 print('Extracting tarball into %s' % targetpath) 

220 targzfile.extractall(targetpath) 

221 assert os.path.isdir(setup_path) 

222 print('Setups installed into %s.' % setup_path) 

223 

224 # Okay, now we have to maybe edit people's rc files. 

225 rcfiledir = os.path.join(os.environ['HOME'], '.gpaw') 

226 rcfilepath = os.path.join(rcfiledir, 'rc.py') 

227 

228 # We could do all this by importing the rcfile as well and checking 

229 # whether things are okay or not. 

230 rcline = "setup_paths.insert(0, {!r})".format(setup_path) 

231 

232 # Run interactive mode unless someone specified a flag requiring otherwise 

233 interactive_mode = args.register is None 

234 

235 register_path = False 

236 

237 if interactive_mode: 

238 answer = input('Register this setup path in %s? [y/n] ' % rcfilepath) 

239 if answer.lower() in ['y', 'yes']: 

240 register_path = True 

241 elif answer.lower() in ['n', 'no']: 

242 print('As you wish.') 

243 else: 

244 print('What do you mean by "%s"? Assuming "n".' % answer) 

245 else: 

246 register_path = args.register 

247 

248 if register_path: 

249 # First we create the file 

250 if not os.path.exists(rcfiledir): 

251 os.makedirs(rcfiledir) 

252 if not os.path.exists(rcfilepath): 

253 with open(rcfilepath, 'w'): # Just create empty file 

254 pass 

255 

256 with open(rcfilepath) as fobj: 

257 for line in fobj: 

258 if line.startswith(rcline): 

259 print('It looks like the path is already registered in %s.' 

260 % rcfilepath) 

261 print('File will not be modified at this time.') 

262 break 

263 else: 

264 with open(rcfilepath, 'a') as rcfd: 

265 print(rcline, file=rcfd) 

266 print('Setup path registered in %s.' % rcfilepath) 

267 

268 print_setups_info(parser) 

269 else: 

270 print('You can manually register the setups by adding the') 

271 print('following line to %s:' % rcfilepath) 

272 print() 

273 print(rcline) 

274 print() 

275 print('Or if you prefer to use environment variables, you can') 

276 print('set GPAW_SETUP_PATH. For example:') 

277 print() 

278 print(f'export GPAW_SETUP_PATH={shlex.quote(setup_path)}') 

279 print() 

280 print('Installation complete.') 

281 

282 

283def get_urls(source): 

284 page = baseurls[source] 

285 if source == 'gpaw': 

286 with urlopen_nocertcheck(page) as response: 

287 pattern = ('https://wiki.fysik.dtu.dk/gpaw-files/' 

288 'gpaw-setups-*.tar.gz') 

289 lines = (line.strip().decode() for line in response) 

290 urls = [line for line in lines if fnmatch.fnmatch(line, pattern)] 

291 

292 elif source == 'sg15': 

293 # We want sg15_oncv_2015-10-07.tar.gz, but they may upload 

294 # newer files, too. 

295 pattern = (r'<a\s*href=[^>]+>\s*' 

296 r'(sg15_oncv_upf_\d\d\d\d-\d\d-\d\d.tar.gz)' 

297 r'\s*</a>') 

298 

299 with urlopen_nocertcheck(page) as response: 

300 txt = response.read().decode('ascii', errors='replace') 

301 files = re.compile(pattern).findall(txt) 

302 files.sort(reverse=True) 

303 urls = [page + fname for fname in files] 

304 

305 elif source == 'basis': 

306 files = ['gpaw-basis-NAO-sz+coopt-NGTO-0.9.11271.tar.gz', 

307 'gpaw-basis-pvalence-0.9.11271.tar.gz', 

308 'gpaw-basis-pvalence-0.9.20000.tar.gz'] 

309 urls = [page + fname for fname in files] 

310 

311 elif source == 'test': 

312 urls = [page + 'gpaw-dist-test-source.tar.gz'] 

313 

314 else: 

315 raise ValueError('Unknown source: %s' % source) 

316 

317 return urls 

318 

319 

320def print_setups_info(parser): 

321 try: 

322 import gpaw 

323 except ImportError as e: 

324 parser.error('Cannot import \'gpaw\'. GPAW does not appear to be ' 

325 'installed. %s' % e) 

326 

327 # The contents of the rc file may have changed. Thus, we initialize 

328 # setup_paths again to be sure that everything is as it should be. 

329 gpaw.setup_paths[:] = gpaw.standard_setup_paths() 

330 gpaw.read_rc_file() 

331 gpaw.initialize_data_paths() 

332 

333 npaths = len(gpaw.setup_paths) 

334 if npaths == 0: 

335 print('GPAW currently has no setup search paths') 

336 else: 

337 print('Current GPAW setup paths in order of search priority:') 

338 for i, path in enumerate(gpaw.setup_paths): 

339 print('%4d. %s' % (i + 1, path)) 

340 

341 

342def get_runs(seq, criterion=lambda x: x): 

343 """ 

344 >>> get_runs('aaabacbbcab') # doctest: +NORMALIZE_WHITESPACE 

345 [['a', 'a', 'a'], ['b'], ['a'], ['c'], ['b', 'b'], ['c'], ['a'], 

346 ['b']] 

347 >>> get_runs('foo,bar,baz', str.isalnum) 

348 [['f', 'o', 'o'], [','], ['b', 'a', 'r'], [','], ['b', 'a', 'z']] 

349 >>> get_runs( # doctest: +NORMALIZE_WHITESPACE 

350 ... [1, 2, 5, 3, 4, 7, 10, 3, 5, 2], lambda x: x % 3, 

351 ... ) 

352 [[1], [2, 5], [3], [4, 7, 10], [3], [5, 2]] 

353 """ 

354 if not seq: 

355 return [] 

356 runs = [] 

357 for item in seq: 

358 value = criterion(item) 

359 try: 

360 if value == runs[-1][0]: 

361 runs[-1][1].append(item) 

362 continue 

363 except IndexError: # Empty `runs` 

364 pass 

365 runs.append((value, [item])) 

366 return [run for _, run in runs] 

367 

368 

369def split_into_chunks(string): 

370 """ 

371 >>> split_into_chunks('') 

372 [] 

373 >>> split_into_chunks('foo') 

374 ['foo'] 

375 >>> split_into_chunks('version 10.1.rc0') 

376 ['version', ' ', '10', '.', '1', '.', 'rc0'] 

377 >>> split_into_chunks('https://gpaw.readthedocs.io/') 

378 ['https', '://', 'gpaw', '.', 'readthedocs', '.', 'io', '/'] 

379 """ 

380 return [''.join(run) for run in get_runs(string, str.isalnum)] 

381 

382 

383def match_version(url, version): 

384 """ 

385 >>> match_version('0.9.0', '0') 

386 True 

387 >>> match_version('1.9.0', '0') 

388 False 

389 >>> match_version('foo.0', '.0') 

390 True 

391 >>> match_version('1.0', '.0') 

392 False 

393 >>> match_version('11', '1') 

394 False 

395 >>> match_version('foo.bar.1.0', 'bar.1') 

396 True 

397 >>> match_version('foo.bar-1.0', 'bar.1') 

398 False 

399 >>> match_version('foobar.1', 'bar.1') 

400 False 

401 >>> match_version('foo.bar.11', 'bar.1') 

402 False 

403 """ 

404 url_chunks = split_into_chunks(url) 

405 version_chunks = split_into_chunks(version) 

406 num_chunks = len(version_chunks) 

407 try: 

408 token_offset = 0 if version_chunks[0].isalnum() else 1 

409 first_token_is_numeric = version_chunks[token_offset].isnumeric() 

410 except IndexError: 

411 raise ValueError( 

412 f'version = {version!r}: cannot find any alphanumeric token' 

413 ) from None 

414 # A match starting from the beginning is always a match 

415 if version_chunks == url_chunks[:num_chunks]: 

416 return True 

417 # A match against the middle of the string may be a false positive 

418 for rolling_offset in range(1, len(url_chunks) - num_chunks + 2): 

419 # Non-match 

420 if version_chunks != url_chunks[rolling_offset:][:num_chunks]: 

421 is_match = False 

422 # 'bar.0' matchs 'foo.bar.0' and '1.bar.0' 

423 elif not first_token_is_numeric: 

424 is_match = True 

425 # '0' matches 'foo.0' but not '1.0' 

426 else: 

427 previous_token = url_chunks[rolling_offset + token_offset - 2] 

428 is_match = not previous_token.isnumeric() 

429 if is_match: 

430 return True 

431 return False