Coverage for gpaw/benchmark/__init__.py: 33%

267 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-07-20 00:19 +0000

1import collections.abc 

2from copy import deepcopy 

3from pprint import pp 

4import numpy as np 

5from gpaw.mpi import world 

6from time import time 

7from json import dumps, loads 

8from pathlib import Path 

9from collections import defaultdict 

10 

11from gpaw.benchmark.systems import parse_system 

12from gpaw.utilities.memory import maxrss 

13 

14pw_default_parameters = {'mode': {'name': 'pw', 'ecut': 400}} 

15 

16pw_parameter_subsets = {'high': {'mode': {'ecut': 800}}, 

17 'low': {'mode': {'ecut': 400}}, 

18 'float32': {'mode': {'dtype': np.float32}}} 

19 

20lcao_default_parameters = {'mode': {'name': 'lcao'}} 

21 

22lcao_parameter_subsets = {'sz': {'basis': 'sz(dzp)'}, 

23 'dzp': {'basis': 'dzp'}} 

24 

25kpts_parameter_subsets = {'gamma': {'kpts': (1, 1, 1)}, 

26 'density6': {'kpts': {'density': 6}}, 

27 'density10': {'kpts': {'density': 10}}, 

28 '411': ({'kpts': (4, 1, 1)})} 

29 

30xc_parameter_subsets = {'PBE': {'xc': 'PBE'}, 

31 'LDA': {'xc': 'LDA'}} 

32 

33eigensolver_parameter_subsets = {'RMMDIIS': 

34 {'eigensolver': 

35 {'name': 'rmm-diis', 

36 'trial_step': 0.1}}, 

37 'DAV3': 

38 {'eigensolver': 

39 {'name': 'dav', 

40 'niter': 3}}} 

41 

42benchmarks_list = [ 

43 ('C60_pw', 

44 'C60-pw.high:kpts.gamma', 

45 '1-56:4G:1GPU'), 

46 ('C60_lcao', 

47 'C60-lcao.dzp', 

48 '1-56:4G'), 

49 ('C60_lowpw', 

50 'C60-pw.low:kpts.gamma', 

51 '1-56:4G:1GPU'), 

52 ('C60_lowpw_float', 

53 'C60-pw.low.float32:kpts.gamma', 

54 '0:4G:1GPU'), 

55 ('MoS2_tube', 

56 'MoS2_tube-pw.high:kpts.411:xc.PBE:parallel.scalapack', 

57 '56-192:100G:4-16GPU'), 

58 ('676_graphene', 

59 'C676-pw:kpts.gamma:xc.PBE:parallel.scalapack', 

60 '56-192:100G:4-16GPU'), 

61 ('pw_C6000', 

62 'C6000-pw.low:kpts.gamma:parallel.domainband.scalapack', 

63 '576-:5000G:12-GPU'), 

64 ('pw_C2188', 

65 'C2188-pw.low:kpts.gamma:parallel.domainband.scalapack', 

66 '192-:1200G:4-GPU'), 

67 ('pw_C676', 

68 'C676-pw.high:kpts.gamma:parallel.scalapack:xc.PBE', 

69 '56-:500G:4-GPU'), 

70 ('pw_magbulk', 

71 'magbulk-pw.high:kpts.density6', 

72 '1-56:4G:1-4GPU'), 

73 ('pw_C60_DIIS32', 

74 'C60-pw.high.float32:kpts.gamma:xc.PBE:eigensolver.RMMDIIS', 

75 '0:4G:1GPU'), 

76 ('pw_C676_DIIS32', 

77 'C676-pw.low.float32:kpts.gamma:xc.PBE:eigensolver.RMMDIIS', 

78 '0:100G:1-4GPU'), 

79 ('pw_slab', 

80 'metalslab-pw.high:kpts.density10:xc.PBE:eigensolver.DAV3', 

81 '56-:100G:2-GPU')] 

82 

83 

84def get_domainband(size=None): 

85 """Divide a world size to domain and bands (as square as possible) 

86 

87 If size is None then use the mpi.world.size. 

88 """ 

89 if size is None: 

90 size = world.size 

91 

92 mid = int(np.sqrt(size)) 

93 while size % mid: 

94 mid -= 1 

95 assert mid > 0 

96 return {'band': size // mid, 

97 'domain': mid} 

98 

99 

100parallel_parameter_subsets = {'scalapack': {'parallel': {'sl_auto': True}}, 

101 'domainband': {'parallel': get_domainband()}} 

102 

103# A parameter set is a 2-tuple with dictionary for gpaw-parameters, 

104# and additional dictionary with named sub parameter sets 

105gpaw_parameter_sets = {'pw': (pw_default_parameters, pw_parameter_subsets), 

106 'lcao': (lcao_default_parameters, 

107 lcao_parameter_subsets), 

108 'eigensolver': ({}, eigensolver_parameter_subsets), 

109 'kpts': ({}, kpts_parameter_subsets), 

110 'xc': ({}, xc_parameter_subsets), 

111 'parallel': ({}, parallel_parameter_subsets)} 

112 

113 

114def parse_range(s): 

115 """Parse a CPU or GPU range string. 

116 

117 Returns a two tuple of lower and upper bound. 

118 Examples: 

119 0-1 

120 5 

121 -4GPU 

122 """ 

123 s = s.replace('GPU', '') 

124 if '-' not in s: 

125 return int(s), int(s) 

126 min_str, max_str = s.split('-') 

127 if min_str: 

128 a = int(min_str) 

129 else: 

130 a = 0 

131 if max_str: 

132 b = int(max_str) 

133 else: 

134 b = np.inf 

135 return a, b 

136 

137 

138def parse_mem(memstr): 

139 """Memory string contains G, M or K as the last character. 

140 """ 

141 mul = {'G': 1024**3, 

142 'M': 1024**2, 

143 'K': 1024**1}[memstr[-1]] 

144 return float(memstr[:-1]) * mul 

145 

146 

147def parse_requirement(req): 

148 """Parses the requirement string, which is a colon separated list of 

149 core_range:memory[:gpu_range] 

150 """ 

151 syntax = req.split(':') 

152 min_cores, max_cores = parse_range(syntax[0]) 

153 min_mem = parse_mem(syntax[1]) 

154 if len(syntax) == 3: 

155 min_gpus, max_gpus = parse_range(syntax[2]) 

156 else: 

157 min_gpus, max_gpus = (0, 0) 

158 return {'mincores': min_cores, 

159 'maxcores': max_cores, 

160 'minmem': min_mem, 

161 'mingpus': min_gpus, 

162 'maxgpus': max_gpus} 

163 

164 

165# Parse the benchmark definitions from the benchmarks_str into 

166# the benchmarks dictionary. 

167benchmarks = {} 

168benchmarks_reqs = {} 

169for nickname, definition, req in benchmarks_list: 

170 benchmarks[nickname] = definition 

171 benchmarks_reqs[nickname] = parse_requirement(req) 

172 

173 

174def recursive_update(d, u): 

175 for k, v in u.items(): 

176 if isinstance(v, collections.abc.Mapping): 

177 d[k] = recursive_update(d.get(k, {}), v) 

178 else: 

179 d[k] = v 

180 return d 

181 

182 

183def parse_parameters(parameter_sets): 

184 """Parses parameter_sets descriptor into a dictionary 

185 

186 First, individual parameter sets are separated by : 

187 And a single parameter set contains first the main paramter descriptor, 

188 which can be further refined by . for parameters subsets. 

189 

190 For example valid paramter strings are: 

191 pw.high:gamma 

192 pw.high:gamma:parallel.gpu 

193 lcao.dzp:kpt.density4:noscalapack 

194 """ 

195 

196 kwargs = {} 

197 parameter_sets = parameter_sets.split(':') 

198 for parameter_set in parameter_sets: 

199 firstsplit = parameter_set.split('.', 1) 

200 if len(firstsplit) == 1: 

201 firstsplit.append(None) 

202 print(firstsplit) 

203 set_name, parameter_subsets = firstsplit 

204 default_parameter_set, subsets = gpaw_parameter_sets[set_name] 

205 recursive_update(kwargs, deepcopy(default_parameter_set)) 

206 if parameter_subsets is None: 

207 continue 

208 for subsetname in parameter_subsets.split('.'): 

209 recursive_update(kwargs, deepcopy(subsets[subsetname])) 

210 return kwargs 

211 

212 

213def list_benchmarks(): 

214 lst = '' 

215 header = '{:20s} | {:35s}\n'.format('name', 'system-parameter sets') 

216 lst += header + '-' * len(header) + '\n' 

217 

218 for benchmark, system_and_parameter_set in benchmarks.items(): 

219 lst += f'{benchmark:20s} | {system_and_parameter_set:35s}\n' 

220 

221 return lst 

222 

223 

224def benchmarks_error(name): 

225 err = f'Cannot find benckmark with name {name}\n\n' 

226 err += 'Available benchmarks\n' 

227 err += list_benchmarks() 

228 return err 

229 

230 

231def shell_command(cmd, cwd=None): 

232 import subprocess 

233 try: 

234 output = subprocess.run(cmd, 

235 capture_output=True, 

236 text=True, 

237 check=True, 

238 shell=True, 

239 cwd=cwd).stdout 

240 except subprocess.CalledProcessError as e: 

241 output = f'{e.output} {e.stderr}' 

242 

243 return output 

244 

245 

246def gather_system_information(): 

247 import gpaw 

248 return {'processor': shell_command('lscpu'), 

249 'memory': shell_command('lsmem'), 

250 'mpi-ranks': world.size, 

251 'date': shell_command('date'), 

252 'nvidia-smi': shell_command('nvidia-smi'), 

253 'rocm-smi': shell_command('rocm-smi'), 

254 'git-hash': shell_command('git rev-parse --verify HEAD', 

255 cwd=Path(gpaw.__file__).parent), 

256 'git-status': shell_command('git status', 

257 cwd=Path(gpaw.__file__).parent), 

258 'hostname': shell_command('hostname')} 

259 

260 

261def parse_name(name): 

262 """Parse (either long or nickname input) possibly containing calc info 

263 

264 The name can be 

265 C60_pw 

266 C60_pw#new 

267 C60-pw.high:kpts.density6 

268 C60-pw.high:kpts.density6#new 

269 """ 

270 names = name.split('#') 

271 if len(names) > 1: 

272 calc_info = names[1] 

273 assert calc_info in {'new', 'old'} 

274 assert len(names) == 2 

275 else: 

276 # Default behaviour is new calculation 

277 calc_info = 'new' 

278 

279 name = names[0] 

280 # Replace nickname with long name 

281 if '-' not in name: 

282 if name in benchmarks: 

283 short_name = name 

284 long_name = benchmarks[name] 

285 else: 

286 raise Exception(benchmarks_error(name)) 

287 else: 

288 short_name = 'N/A' 

289 long_name = name 

290 

291 return short_name, long_name, calc_info 

292 

293 

294def benchmark_atoms_and_calc(long_name, calc_info): 

295 """Create atoms and calculator ibject from long name and calculator 

296 info (new/old) 

297 """ 

298 if calc_info == 'new': 

299 from gpaw.new.ase_interface import GPAW 

300 elif calc_info == 'old': 

301 from gpaw import GPAW 

302 else: 

303 raise Exception(f'Unknown calc info {calc_info}') 

304 

305 system, parameter_sets = long_name.split('-') 

306 atoms = parse_system(system) 

307 parameters = parse_parameters(parameter_sets) 

308 if world.rank == 0: 

309 pp(parameters, indent=4, sort_dicts=True) 

310 atoms.calc = GPAW(**parameters, txt=f'{long_name}-{calc_info}.log') 

311 return atoms, atoms.calc 

312 

313 

314def gs_and_move_atoms(long_name, calc_info): 

315 """Main GPAW benchmark function 

316 

317 Calculates one geometry step, and takes 0.1 * F step towards 

318 the minimum. The timings for first and second stops are recorded, 

319 to simulate a typical geometry relaxation step. 

320 """ 

321 atoms, calc = benchmark_atoms_and_calc(long_name, calc_info) 

322 with Walltime('First step') as step1: 

323 E = atoms.get_potential_energy() 

324 F = atoms.get_forces() 

325 if abs(F).max() < 0.0001: 

326 S = atoms.get_stress(voigt=False) 

327 atoms.set_cell(atoms.cell @ (np.eye(3) - 0.02 * S), scale_atoms=True) 

328 else: 

329 atoms.positions += 0.1 * F 

330 atoms.wrap() 

331 with Walltime('Second step') as step2: 

332 atoms.get_potential_energy() 

333 F = atoms.get_forces() 

334 

335 return {'energy': E, 

336 'forces': F.tolist(), 

337 **step1.todict(), 

338 **step2.todict()} 

339 

340 

341class Walltime: 

342 def __init__(self, name): 

343 self.name = name 

344 self.error = None 

345 self.max_rss = None 

346 

347 def __enter__(self): 

348 self.start = time() 

349 return self 

350 

351 def __exit__(self, exc_type, exc_value, exc_traceback): 

352 if exc_type is not None: 

353 self.error = (exc_type, exc_value, exc_traceback) 

354 self.end = time() 

355 self.max_rss = maxrss() 

356 

357 @property 

358 def walltime(self): 

359 return self.end - self.start 

360 

361 def todict(self): 

362 return {self.name: {'walltime': self.walltime, 

363 'error': self.error, 

364 'max_rss': self.max_rss}} 

365 

366 

367class Benchmark(Walltime): 

368 def __init__(self, system_info, **kwargs): 

369 super().__init__('Benchmark') 

370 self.system_info = system_info 

371 self.results = None 

372 self.kwargs = kwargs 

373 

374 def todict(self): 

375 dct = super().todict() 

376 dct[self.name].update({'system_info': self.system_info, 

377 'results': self.results, 

378 **self.kwargs}) 

379 return dct 

380 

381 def write_json(self, fname): 

382 Path(fname).write_text(dumps(self.todict())) 

383 

384 

385def benchmark_main(name): 

386 # Run the gs_and_move_atoms benchmars for 'name' where 

387 # name can be either a short name or a long name. 

388 short_name, long_name, calc_info = parse_name(name) 

389 

390 if world.rank == 0: 

391 system_info = gather_system_information() 

392 print('Running benchmark', name) 

393 else: 

394 system_info = None 

395 

396 benchmark_info = {'shortname': short_name, 

397 'longname': long_name, 

398 'calcinfo': calc_info} 

399 

400 world.barrier() 

401 with Benchmark(system_info, **benchmark_info) as results: 

402 results.results = gs_and_move_atoms(long_name, calc_info) 

403 if world.rank == 0: 

404 # Finally, write all of the results to a json file 

405 results.write_json(f'{name}-benchmark.json') 

406 

407 

408def get_benchmarks(memory='8G', cores=16, gpus=0): 

409 for benchmark, long_name in benchmarks.items(): 

410 requirements = benchmarks_reqs[benchmark] 

411 if gpus > 0: 

412 if gpus < requirements.get('mingpus', 1): 

413 continue 

414 if gpus > requirements.get('maxgpus', np.inf): 

415 continue 

416 else: 

417 if cores < requirements.get('mincores', 1): 

418 continue 

419 if cores > requirements.get('maxcores', np.inf): 

420 continue 

421 if parse_mem(memory) <= requirements.get('minmem', np.inf): 

422 continue 

423 yield benchmark 

424 

425 

426def sprint(s, summary=False): 

427 if len(s) > 60: 

428 if summary: 

429 print( 

430 ' '.join( 

431 s.replace('\n', ' ').replace('\t', ' ').split() 

432 )[:60], '...') 

433 else: 

434 print() 

435 print(s) 

436 else: 

437 print(s.rstrip()) 

438 

439 

440def mypp(dct, indent=0, summary=True): 

441 for key, value in dct.items(): 

442 print(' ' * indent + key + ': ', end='') 

443 if isinstance(value, str): 

444 sprint(value, summary=summary) 

445 elif isinstance(value, dict): 

446 print() 

447 mypp(value, indent=indent + 4, summary=summary) 

448 else: 

449 print(value) 

450 

451 

452def load_benchmark(fname): 

453 # Load a json file 

454 return loads(Path(fname).read_text()) 

455 

456 

457def view_benchmark(fname): 

458 mypp(load_benchmark(fname)) 

459 

460 

461def parse_git_status(text): 

462 """Parse the branch from git status output 

463 """ 

464 for line in text.split('\n'): 

465 if line.startswith('On branch'): 

466 return line.split()[-1] 

467 return '???' 

468 

469 

470def parse_processor(text): 

471 """Parse the processor model from lscpu 

472 """ 

473 for line in text.split('\n'): 

474 if line.startswith('Model name:'): 

475 return line.split('Model name:')[-1].strip() 

476 return 'No "Model name:" found' 

477 

478 

479def parse_nvidia_smi(dct, out): 

480 """Parse output from nvidia-smi command. 

481 

482 Gets the name of the GPU from out, and accumulates to dct 

483 how many there are.""" 

484 if 'command not found' in out: 

485 return 

486 for line in out.split('\n'): 

487 if 'NVIDIA ' in line: 

488 def get_gpu(): 

489 for n in line.split()[3:]: 

490 if n in {'|', 'On', 'Off'}: 

491 break 

492 yield n 

493 dct[' '.join(get_gpu())] += 1 

494 

495 

496def parse_rocm_smi(dct, out): 

497 if 'command not found' in out: 

498 return 

499 

500 raise NotImplementedError 

501 

502 

503def parse_gpu(nvidia, rocm): 

504 gpus = defaultdict(int) 

505 parse_nvidia_smi(gpus, nvidia) 

506 parse_rocm_smi(gpus, rocm) 

507 return ' '.join((f'{number}x ({name})' 

508 if number > 1 else name) for name, number in gpus.items()) 

509 

510 

511def benchmark_from_dict(dct): 

512 """Create a summary dictionary from the full json output of the benchmark. 

513 """ 

514 dct = dct['Benchmark'] 

515 results = dct['results'] 

516 system_info = dct['system_info'] 

517 

518 summary = {'walltime': dct['walltime'], 

519 'shortname': dct['shortname'], 

520 'processor': parse_processor(system_info['processor']), 

521 'gpu': parse_gpu(system_info['nvidia-smi'], 

522 system_info['rocm-smi']), 

523 'longname': dct['longname'], 

524 'hostname': system_info['hostname'].strip(), 

525 'calcinfo': dct['calcinfo'], 

526 'mpi-ranks': system_info['mpi-ranks'], 

527 'First step': results['First step']['walltime'], 

528 'Second step': results['Second step']['walltime'], 

529 'max_rss': dct['max_rss'], 

530 'githash': system_info['git-hash'].strip(), 

531 'branch': parse_git_status(system_info['git-status'])} 

532 return summary 

533 

534 

535def gather_benchmarks(directories, output_file): 

536 lst = [] 

537 for fname in directories: 

538 try: 

539 dct = load_benchmark(fname) 

540 lst.append(benchmark_from_dict(dct)) 

541 except Exception as e: 

542 print(str(e)) 

543 Path(output_file).write_text(dumps(lst, indent=4)) 

544 return lst