Coverage for gpaw/benchmark/__init__.py: 33%
267 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-20 00:19 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-20 00:19 +0000
1import collections.abc
2from copy import deepcopy
3from pprint import pp
4import numpy as np
5from gpaw.mpi import world
6from time import time
7from json import dumps, loads
8from pathlib import Path
9from collections import defaultdict
11from gpaw.benchmark.systems import parse_system
12from gpaw.utilities.memory import maxrss
14pw_default_parameters = {'mode': {'name': 'pw', 'ecut': 400}}
16pw_parameter_subsets = {'high': {'mode': {'ecut': 800}},
17 'low': {'mode': {'ecut': 400}},
18 'float32': {'mode': {'dtype': np.float32}}}
20lcao_default_parameters = {'mode': {'name': 'lcao'}}
22lcao_parameter_subsets = {'sz': {'basis': 'sz(dzp)'},
23 'dzp': {'basis': 'dzp'}}
25kpts_parameter_subsets = {'gamma': {'kpts': (1, 1, 1)},
26 'density6': {'kpts': {'density': 6}},
27 'density10': {'kpts': {'density': 10}},
28 '411': ({'kpts': (4, 1, 1)})}
30xc_parameter_subsets = {'PBE': {'xc': 'PBE'},
31 'LDA': {'xc': 'LDA'}}
33eigensolver_parameter_subsets = {'RMMDIIS':
34 {'eigensolver':
35 {'name': 'rmm-diis',
36 'trial_step': 0.1}},
37 'DAV3':
38 {'eigensolver':
39 {'name': 'dav',
40 'niter': 3}}}
42benchmarks_list = [
43 ('C60_pw',
44 'C60-pw.high:kpts.gamma',
45 '1-56:4G:1GPU'),
46 ('C60_lcao',
47 'C60-lcao.dzp',
48 '1-56:4G'),
49 ('C60_lowpw',
50 'C60-pw.low:kpts.gamma',
51 '1-56:4G:1GPU'),
52 ('C60_lowpw_float',
53 'C60-pw.low.float32:kpts.gamma',
54 '0:4G:1GPU'),
55 ('MoS2_tube',
56 'MoS2_tube-pw.high:kpts.411:xc.PBE:parallel.scalapack',
57 '56-192:100G:4-16GPU'),
58 ('676_graphene',
59 'C676-pw:kpts.gamma:xc.PBE:parallel.scalapack',
60 '56-192:100G:4-16GPU'),
61 ('pw_C6000',
62 'C6000-pw.low:kpts.gamma:parallel.domainband.scalapack',
63 '576-:5000G:12-GPU'),
64 ('pw_C2188',
65 'C2188-pw.low:kpts.gamma:parallel.domainband.scalapack',
66 '192-:1200G:4-GPU'),
67 ('pw_C676',
68 'C676-pw.high:kpts.gamma:parallel.scalapack:xc.PBE',
69 '56-:500G:4-GPU'),
70 ('pw_magbulk',
71 'magbulk-pw.high:kpts.density6',
72 '1-56:4G:1-4GPU'),
73 ('pw_C60_DIIS32',
74 'C60-pw.high.float32:kpts.gamma:xc.PBE:eigensolver.RMMDIIS',
75 '0:4G:1GPU'),
76 ('pw_C676_DIIS32',
77 'C676-pw.low.float32:kpts.gamma:xc.PBE:eigensolver.RMMDIIS',
78 '0:100G:1-4GPU'),
79 ('pw_slab',
80 'metalslab-pw.high:kpts.density10:xc.PBE:eigensolver.DAV3',
81 '56-:100G:2-GPU')]
84def get_domainband(size=None):
85 """Divide a world size to domain and bands (as square as possible)
87 If size is None then use the mpi.world.size.
88 """
89 if size is None:
90 size = world.size
92 mid = int(np.sqrt(size))
93 while size % mid:
94 mid -= 1
95 assert mid > 0
96 return {'band': size // mid,
97 'domain': mid}
100parallel_parameter_subsets = {'scalapack': {'parallel': {'sl_auto': True}},
101 'domainband': {'parallel': get_domainband()}}
103# A parameter set is a 2-tuple with dictionary for gpaw-parameters,
104# and additional dictionary with named sub parameter sets
105gpaw_parameter_sets = {'pw': (pw_default_parameters, pw_parameter_subsets),
106 'lcao': (lcao_default_parameters,
107 lcao_parameter_subsets),
108 'eigensolver': ({}, eigensolver_parameter_subsets),
109 'kpts': ({}, kpts_parameter_subsets),
110 'xc': ({}, xc_parameter_subsets),
111 'parallel': ({}, parallel_parameter_subsets)}
114def parse_range(s):
115 """Parse a CPU or GPU range string.
117 Returns a two tuple of lower and upper bound.
118 Examples:
119 0-1
120 5
121 -4GPU
122 """
123 s = s.replace('GPU', '')
124 if '-' not in s:
125 return int(s), int(s)
126 min_str, max_str = s.split('-')
127 if min_str:
128 a = int(min_str)
129 else:
130 a = 0
131 if max_str:
132 b = int(max_str)
133 else:
134 b = np.inf
135 return a, b
138def parse_mem(memstr):
139 """Memory string contains G, M or K as the last character.
140 """
141 mul = {'G': 1024**3,
142 'M': 1024**2,
143 'K': 1024**1}[memstr[-1]]
144 return float(memstr[:-1]) * mul
147def parse_requirement(req):
148 """Parses the requirement string, which is a colon separated list of
149 core_range:memory[:gpu_range]
150 """
151 syntax = req.split(':')
152 min_cores, max_cores = parse_range(syntax[0])
153 min_mem = parse_mem(syntax[1])
154 if len(syntax) == 3:
155 min_gpus, max_gpus = parse_range(syntax[2])
156 else:
157 min_gpus, max_gpus = (0, 0)
158 return {'mincores': min_cores,
159 'maxcores': max_cores,
160 'minmem': min_mem,
161 'mingpus': min_gpus,
162 'maxgpus': max_gpus}
165# Parse the benchmark definitions from the benchmarks_str into
166# the benchmarks dictionary.
167benchmarks = {}
168benchmarks_reqs = {}
169for nickname, definition, req in benchmarks_list:
170 benchmarks[nickname] = definition
171 benchmarks_reqs[nickname] = parse_requirement(req)
174def recursive_update(d, u):
175 for k, v in u.items():
176 if isinstance(v, collections.abc.Mapping):
177 d[k] = recursive_update(d.get(k, {}), v)
178 else:
179 d[k] = v
180 return d
183def parse_parameters(parameter_sets):
184 """Parses parameter_sets descriptor into a dictionary
186 First, individual parameter sets are separated by :
187 And a single parameter set contains first the main paramter descriptor,
188 which can be further refined by . for parameters subsets.
190 For example valid paramter strings are:
191 pw.high:gamma
192 pw.high:gamma:parallel.gpu
193 lcao.dzp:kpt.density4:noscalapack
194 """
196 kwargs = {}
197 parameter_sets = parameter_sets.split(':')
198 for parameter_set in parameter_sets:
199 firstsplit = parameter_set.split('.', 1)
200 if len(firstsplit) == 1:
201 firstsplit.append(None)
202 print(firstsplit)
203 set_name, parameter_subsets = firstsplit
204 default_parameter_set, subsets = gpaw_parameter_sets[set_name]
205 recursive_update(kwargs, deepcopy(default_parameter_set))
206 if parameter_subsets is None:
207 continue
208 for subsetname in parameter_subsets.split('.'):
209 recursive_update(kwargs, deepcopy(subsets[subsetname]))
210 return kwargs
213def list_benchmarks():
214 lst = ''
215 header = '{:20s} | {:35s}\n'.format('name', 'system-parameter sets')
216 lst += header + '-' * len(header) + '\n'
218 for benchmark, system_and_parameter_set in benchmarks.items():
219 lst += f'{benchmark:20s} | {system_and_parameter_set:35s}\n'
221 return lst
224def benchmarks_error(name):
225 err = f'Cannot find benckmark with name {name}\n\n'
226 err += 'Available benchmarks\n'
227 err += list_benchmarks()
228 return err
231def shell_command(cmd, cwd=None):
232 import subprocess
233 try:
234 output = subprocess.run(cmd,
235 capture_output=True,
236 text=True,
237 check=True,
238 shell=True,
239 cwd=cwd).stdout
240 except subprocess.CalledProcessError as e:
241 output = f'{e.output} {e.stderr}'
243 return output
246def gather_system_information():
247 import gpaw
248 return {'processor': shell_command('lscpu'),
249 'memory': shell_command('lsmem'),
250 'mpi-ranks': world.size,
251 'date': shell_command('date'),
252 'nvidia-smi': shell_command('nvidia-smi'),
253 'rocm-smi': shell_command('rocm-smi'),
254 'git-hash': shell_command('git rev-parse --verify HEAD',
255 cwd=Path(gpaw.__file__).parent),
256 'git-status': shell_command('git status',
257 cwd=Path(gpaw.__file__).parent),
258 'hostname': shell_command('hostname')}
261def parse_name(name):
262 """Parse (either long or nickname input) possibly containing calc info
264 The name can be
265 C60_pw
266 C60_pw#new
267 C60-pw.high:kpts.density6
268 C60-pw.high:kpts.density6#new
269 """
270 names = name.split('#')
271 if len(names) > 1:
272 calc_info = names[1]
273 assert calc_info in {'new', 'old'}
274 assert len(names) == 2
275 else:
276 # Default behaviour is new calculation
277 calc_info = 'new'
279 name = names[0]
280 # Replace nickname with long name
281 if '-' not in name:
282 if name in benchmarks:
283 short_name = name
284 long_name = benchmarks[name]
285 else:
286 raise Exception(benchmarks_error(name))
287 else:
288 short_name = 'N/A'
289 long_name = name
291 return short_name, long_name, calc_info
294def benchmark_atoms_and_calc(long_name, calc_info):
295 """Create atoms and calculator ibject from long name and calculator
296 info (new/old)
297 """
298 if calc_info == 'new':
299 from gpaw.new.ase_interface import GPAW
300 elif calc_info == 'old':
301 from gpaw import GPAW
302 else:
303 raise Exception(f'Unknown calc info {calc_info}')
305 system, parameter_sets = long_name.split('-')
306 atoms = parse_system(system)
307 parameters = parse_parameters(parameter_sets)
308 if world.rank == 0:
309 pp(parameters, indent=4, sort_dicts=True)
310 atoms.calc = GPAW(**parameters, txt=f'{long_name}-{calc_info}.log')
311 return atoms, atoms.calc
314def gs_and_move_atoms(long_name, calc_info):
315 """Main GPAW benchmark function
317 Calculates one geometry step, and takes 0.1 * F step towards
318 the minimum. The timings for first and second stops are recorded,
319 to simulate a typical geometry relaxation step.
320 """
321 atoms, calc = benchmark_atoms_and_calc(long_name, calc_info)
322 with Walltime('First step') as step1:
323 E = atoms.get_potential_energy()
324 F = atoms.get_forces()
325 if abs(F).max() < 0.0001:
326 S = atoms.get_stress(voigt=False)
327 atoms.set_cell(atoms.cell @ (np.eye(3) - 0.02 * S), scale_atoms=True)
328 else:
329 atoms.positions += 0.1 * F
330 atoms.wrap()
331 with Walltime('Second step') as step2:
332 atoms.get_potential_energy()
333 F = atoms.get_forces()
335 return {'energy': E,
336 'forces': F.tolist(),
337 **step1.todict(),
338 **step2.todict()}
341class Walltime:
342 def __init__(self, name):
343 self.name = name
344 self.error = None
345 self.max_rss = None
347 def __enter__(self):
348 self.start = time()
349 return self
351 def __exit__(self, exc_type, exc_value, exc_traceback):
352 if exc_type is not None:
353 self.error = (exc_type, exc_value, exc_traceback)
354 self.end = time()
355 self.max_rss = maxrss()
357 @property
358 def walltime(self):
359 return self.end - self.start
361 def todict(self):
362 return {self.name: {'walltime': self.walltime,
363 'error': self.error,
364 'max_rss': self.max_rss}}
367class Benchmark(Walltime):
368 def __init__(self, system_info, **kwargs):
369 super().__init__('Benchmark')
370 self.system_info = system_info
371 self.results = None
372 self.kwargs = kwargs
374 def todict(self):
375 dct = super().todict()
376 dct[self.name].update({'system_info': self.system_info,
377 'results': self.results,
378 **self.kwargs})
379 return dct
381 def write_json(self, fname):
382 Path(fname).write_text(dumps(self.todict()))
385def benchmark_main(name):
386 # Run the gs_and_move_atoms benchmars for 'name' where
387 # name can be either a short name or a long name.
388 short_name, long_name, calc_info = parse_name(name)
390 if world.rank == 0:
391 system_info = gather_system_information()
392 print('Running benchmark', name)
393 else:
394 system_info = None
396 benchmark_info = {'shortname': short_name,
397 'longname': long_name,
398 'calcinfo': calc_info}
400 world.barrier()
401 with Benchmark(system_info, **benchmark_info) as results:
402 results.results = gs_and_move_atoms(long_name, calc_info)
403 if world.rank == 0:
404 # Finally, write all of the results to a json file
405 results.write_json(f'{name}-benchmark.json')
408def get_benchmarks(memory='8G', cores=16, gpus=0):
409 for benchmark, long_name in benchmarks.items():
410 requirements = benchmarks_reqs[benchmark]
411 if gpus > 0:
412 if gpus < requirements.get('mingpus', 1):
413 continue
414 if gpus > requirements.get('maxgpus', np.inf):
415 continue
416 else:
417 if cores < requirements.get('mincores', 1):
418 continue
419 if cores > requirements.get('maxcores', np.inf):
420 continue
421 if parse_mem(memory) <= requirements.get('minmem', np.inf):
422 continue
423 yield benchmark
426def sprint(s, summary=False):
427 if len(s) > 60:
428 if summary:
429 print(
430 ' '.join(
431 s.replace('\n', ' ').replace('\t', ' ').split()
432 )[:60], '...')
433 else:
434 print()
435 print(s)
436 else:
437 print(s.rstrip())
440def mypp(dct, indent=0, summary=True):
441 for key, value in dct.items():
442 print(' ' * indent + key + ': ', end='')
443 if isinstance(value, str):
444 sprint(value, summary=summary)
445 elif isinstance(value, dict):
446 print()
447 mypp(value, indent=indent + 4, summary=summary)
448 else:
449 print(value)
452def load_benchmark(fname):
453 # Load a json file
454 return loads(Path(fname).read_text())
457def view_benchmark(fname):
458 mypp(load_benchmark(fname))
461def parse_git_status(text):
462 """Parse the branch from git status output
463 """
464 for line in text.split('\n'):
465 if line.startswith('On branch'):
466 return line.split()[-1]
467 return '???'
470def parse_processor(text):
471 """Parse the processor model from lscpu
472 """
473 for line in text.split('\n'):
474 if line.startswith('Model name:'):
475 return line.split('Model name:')[-1].strip()
476 return 'No "Model name:" found'
479def parse_nvidia_smi(dct, out):
480 """Parse output from nvidia-smi command.
482 Gets the name of the GPU from out, and accumulates to dct
483 how many there are."""
484 if 'command not found' in out:
485 return
486 for line in out.split('\n'):
487 if 'NVIDIA ' in line:
488 def get_gpu():
489 for n in line.split()[3:]:
490 if n in {'|', 'On', 'Off'}:
491 break
492 yield n
493 dct[' '.join(get_gpu())] += 1
496def parse_rocm_smi(dct, out):
497 if 'command not found' in out:
498 return
500 raise NotImplementedError
503def parse_gpu(nvidia, rocm):
504 gpus = defaultdict(int)
505 parse_nvidia_smi(gpus, nvidia)
506 parse_rocm_smi(gpus, rocm)
507 return ' '.join((f'{number}x ({name})'
508 if number > 1 else name) for name, number in gpus.items())
511def benchmark_from_dict(dct):
512 """Create a summary dictionary from the full json output of the benchmark.
513 """
514 dct = dct['Benchmark']
515 results = dct['results']
516 system_info = dct['system_info']
518 summary = {'walltime': dct['walltime'],
519 'shortname': dct['shortname'],
520 'processor': parse_processor(system_info['processor']),
521 'gpu': parse_gpu(system_info['nvidia-smi'],
522 system_info['rocm-smi']),
523 'longname': dct['longname'],
524 'hostname': system_info['hostname'].strip(),
525 'calcinfo': dct['calcinfo'],
526 'mpi-ranks': system_info['mpi-ranks'],
527 'First step': results['First step']['walltime'],
528 'Second step': results['Second step']['walltime'],
529 'max_rss': dct['max_rss'],
530 'githash': system_info['git-hash'].strip(),
531 'branch': parse_git_status(system_info['git-status'])}
532 return summary
535def gather_benchmarks(directories, output_file):
536 lst = []
537 for fname in directories:
538 try:
539 dct = load_benchmark(fname)
540 lst.append(benchmark_from_dict(dct))
541 except Exception as e:
542 print(str(e))
543 Path(output_file).write_text(dumps(lst, indent=4))
544 return lst