Coverage for gpaw/utilities/timing.py: 29%
249 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-19 00:19 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-19 00:19 +0000
1# Copyright (C) 2003 CAMP
2# Please see the accompanying LICENSE file for further information.
4import sys
5import time
6import math
8import numpy as np
9from ase.utils.timing import Timer
11import gpaw.mpi as mpi
14class NullTimer:
15 """Compatible with Timer and StepTimer interfaces. Does nothing."""
16 def __init__(self):
17 pass
19 def print_info(self, calc):
20 pass
22 def start(self, name):
23 pass
25 def stop(self, name=None):
26 pass
28 def get_time(self, name):
29 return 0.0
31 def write(self, out=sys.stdout):
32 pass
34 def write_now(self, mark=''):
35 pass
37 def add(self, timer):
38 pass
40 def __call__(self, name):
41 return self
43 def __enter__(self):
44 pass
46 def __exit__(self, *args):
47 pass
50nulltimer = NullTimer()
53class DebugTimer(Timer):
54 def __init__(self, print_levels=1000, comm=mpi.world, txt=sys.stdout):
55 Timer.__init__(self, print_levels)
56 ndigits = 1 + int(math.log10(comm.size))
57 self.srank = '%0*d' % (ndigits, comm.rank)
58 self.txt = txt
60 def start(self, name):
61 Timer.start(self, name)
62 abstime = time.time()
63 t = self.timers[tuple(self.running)] + abstime
64 self.txt.write('T%s >> %15.8f %s (%7.5fs) started\n'
65 % (self.srank, abstime, name, t))
67 def stop(self, name=None):
68 if name is None:
69 name = self.running[-1]
70 abstime = time.time()
71 t = self.timers[tuple(self.running)] + abstime
72 self.txt.write('T%s << %15.8f %s (%7.5fs) stopped\n'
73 % (self.srank, abstime, name, t))
74 Timer.stop(self, name)
77class GPUEvent:
78 def __init__(self, name):
79 self.name = name
80 import cupy
81 default = dict(block=False,
82 disable_timing=False,
83 interprocess=False)
84 self.stop_event = cupy.cuda.Event(**default)
85 self.start_event = cupy.cuda.Event(**default)
86 self.start_event.record()
88 def stop(self):
89 self.stop_event.record()
91 def get_time(self):
92 import cupy
93 return cupy.cuda.get_elapsed_time(self.start_event,
94 self.stop_event) / 1000
97class GPUTimerBase:
99 def __init__(self, max_stack=10):
100 self.event_queue = []
101 self.event_stack = []
102 self.max_stack = max_stack
103 from collections import defaultdict
104 self.gpu_timers = defaultdict(float)
106 def gpu_start(self, key):
107 self.event_stack.append(GPUEvent(key))
109 def gpu_stop(self):
110 gpu_event = self.event_stack.pop()
111 gpu_event.stop()
112 self.event_queue.append(gpu_event)
113 if len(self.event_queue) > self.max_stack:
114 self.handle_events()
116 def handle_events(self):
117 while len(self.event_queue):
118 event = self.event_queue[0]
119 if not event.stop_event.done:
120 break
121 del self.event_queue[0]
122 time = event.get_time()
123 self.gpu_timers[event.name] += time
124 self.handle_event_hook(event)
126 def handle_event_hook(self, event):
127 pass
129 def gpu_write(self, out=sys.stdout):
130 import cupy
131 event = cupy.cuda.Event(block=True)
132 event.synchronize()
133 self.handle_events()
135 timers, self.timers = self.timers, self.gpu_timers
136 Timer.write(self, out)
137 self.timers = timers
140class GPUTimer(Timer, GPUTimerBase):
141 def __init__(self, *args, **kwargs):
142 Timer.__init__(self, *args, **kwargs)
143 GPUTimerBase.__init__(self)
145 def start(self, name):
146 Timer.start(self, name)
147 GPUTimerBase.gpu_start(self, name)
149 def stop(self, name=None):
150 Timer.stop(self, name)
151 GPUTimerBase.gpu_stop(self)
153 def write(self, out=sys.stdout):
154 print('CPU event timings:', file=out)
155 Timer.write(self, out)
156 print('GPU event timings:', file=out)
157 GPUTimerBase.gpu_write(self, out)
160def ranktxt(comm, rank=None):
161 rank = comm.rank if rank is None else rank
162 ndigits = len(str(comm.size - 1))
163 return '%0*d' % (ndigits, rank)
166class ParallelTimer(DebugTimer):
167 """Like DebugTimer but writes timings from all ranks.
169 Each rank writes to timings.<rank>.txt. Also timings.metadata.txt
170 will contain information about the parallelization layout. The idea
171 is that the output from this timer can be used for plots and to
172 determine bottlenecks in the parallelization.
174 See the tool gpaw-plot-parallel-timings."""
175 def __init__(self, prefix='timings', flush=False):
176 fname = f'{prefix}.{ranktxt(mpi.world)}.txt'
177 txt = open(fname, 'w', buffering=1 if flush else -1)
178 DebugTimer.__init__(self, comm=mpi.world, txt=txt)
179 self.prefix = prefix
181 def print_info(self, calc):
182 """Print information about parallelization into a file."""
183 fd = open('%s.metadata.txt' % self.prefix, 'w')
184 DebugTimer.print_info(self, calc)
185 wfs = calc.wfs
187 # We won't have to type a lot if everyone just sends all their numbers.
188 myranks = np.array([wfs.world.rank, wfs.kd.comm.rank,
189 wfs.bd.comm.rank, wfs.gd.comm.rank])
190 allranks = None
191 if wfs.world.rank == 0:
192 allranks = np.empty(wfs.world.size * 4, dtype=int)
193 wfs.world.gather(myranks, 0, allranks)
194 if wfs.world.rank == 0:
195 for itsranks in allranks.reshape(-1, 4):
196 fd.write('r=%d k=%d b=%d d=%d\n' % tuple(itsranks))
197 fd.close()
200class Profiler(Timer):
201 def __init__(self, prefix, comm=mpi.world):
202 import atexit
204 self.prefix = prefix
205 self.comm = comm
206 self.ranktxt = ranktxt(comm)
207 fname = f'{prefix}.{self.ranktxt}.json'
208 self.txt = open(fname, 'w', buffering=-1)
209 self.pid = 0 # os.getpid() creates more confusing output
210 atexit.register(Profiler.finish_trace, self)
211 # legacy json format for perfetto always assumes microseconds
212 self.u = 1_000_000
214 self.synchronize()
215 Timer.__init__(self, 1000)
217 def synchronize(self):
218 # Synchronize in order to have same time reference
219 ref = np.zeros(1)
220 if self.comm.rank == 0:
221 ref[0] = time.time()
222 self.comm.broadcast(ref, 0)
223 self.ref = ref[0]
225 def finish_trace(self):
226 self.txt.close()
227 self.comm.barrier()
228 if self.comm.rank == 0:
229 out = open(self.prefix + '.json', 'w')
230 out.write("""{
231 "traceEvents":
232 [ """)
233 for i in range(self.comm.size):
234 fname = f'{self.prefix}.{ranktxt(self.comm, rank=i)}.json'
235 print('Processing', fname)
236 with open(fname, 'r') as f:
237 out.writelines(f.readlines())
238 out.write("] }\n")
239 out.close()
240 self.comm.barrier()
242 def start(self, name):
243 Timer.start(self, name)
244 self.txt.write(
245 f"""{{"name": "{name}", "cat": "PERF", "ph": "B","""
246 f""" "pid": {self.pid}, "tid": {self.ranktxt}, """
247 f""""ts": {int((time.time() - self.ref) * self.u)} }},\n""")
249 def stop(self, name=None):
250 if name is None:
251 name = self.running[-1]
252 self.txt.write(
253 f"""{{"name": "{name}", "cat": "PERF", "ph": "E", """
254 f""""pid": {self.pid}, "tid": {self.ranktxt}, """
255 f""""ts": {int((time.time() - self.ref) * self.u)}}},\n""")
256 Timer.stop(self, name)
259class GPUProfiler(Profiler, GPUTimerBase):
260 def __init__(self, prefix, comm=mpi.world):
261 Profiler.__init__(self, prefix, comm=comm)
262 GPUTimerBase.__init__(self)
264 def synchronize(self):
265 from cupy.cuda import Event
266 # Make sure GPU gets here
267 event = Event(block=True)
268 event.record()
269 event.synchronize()
271 # Wait all CPUs
272 self.comm.barrier()
274 # Now all GPUs and CPUs are somewhat simultaneous
275 # So, record the reference event and time
276 event = Event(block=True)
277 event.record()
278 self.ref = time.time()
279 self.ref_event = event
281 # Broadcast CPU time reference (possibly problematic)
282 buf = np.zeros(1)
283 buf[0] = self.ref
284 self.comm.broadcast(buf, 0)
285 self.ref = buf[0]
287 def start(self, name, gpu=False):
288 Profiler.start(self, name)
289 if gpu:
290 GPUTimerBase.gpu_start(self, name)
292 def stop(self, name=None, gpu=False):
293 Profiler.stop(self, name)
294 if gpu:
295 GPUTimerBase.gpu_stop(self)
297 def handle_event_hook(self, event):
298 import cupy
300 def get_time(e):
301 t = cupy.cuda.get_elapsed_time
302 return t(self.ref_event, e)
304 ms_start = get_time(event.start_event)
305 ms_stop = get_time(event.stop_event)
306 self.txt.write(
307 f"""{{"name": "{event.name}", "cat": "PERF", "ph": "B","""
308 f""" "pid": {self.pid}, "tid": "GPU {self.ranktxt}", """
309 f""""ts": {int(ms_start * 1000)} }},\n""")
310 self.txt.write(
311 f"""{{"name": "{event.name}", "cat": "PERF", "ph": "E", """
312 f""""pid": {self.pid}, "tid": "GPU {self.ranktxt}", """
313 f""""ts": {int(ms_stop * 1000)} }},\n""")
316class HPMTimer(Timer):
317 """HPMTimer requires installation of the IBM BlueGene/P HPM
318 middleware interface to the low-level UPC library. This will
319 most likely only work at ANL's BlueGene/P. Must compile
320 with GPAW_HPM macro in customize.py. Note that HPM_Init
321 and HPM_Finalize are called in cgpaw.c and not in the Python
322 interface. Timer must be called on all ranks in node, otherwise
323 HPM will hang. Hence, we only call HPM_start/stop on a list
324 subset of timers."""
326 top_level = 'GPAW.calculator' # HPM needs top level timer
327 compatible = ['Initialization', 'SCF-cycle']
329 def __init__(self):
330 Timer.__init__(self)
331 from gpaw.cgpaw import hpm_start, hpm_stop
332 self.hpm_start = hpm_start
333 self.hpm_stop = hpm_stop
334 hpm_start(self.top_level)
336 def start(self, name):
337 Timer.start(self, name)
338 if name in self.compatible:
339 self.hpm_start(name)
341 def stop(self, name=None):
342 Timer.stop(self, name)
343 if name in self.compatible:
344 self.hpm_stop(name)
346 def write(self, out=sys.stdout):
347 Timer.write(self, out)
348 self.hpm_stop(self.top_level)
351class CrayPAT_timer(Timer):
352 """Interface to CrayPAT API. In addition to regular timers,
353 the corresponding regions are profiled by CrayPAT. The gpaw-python has
354 to be compiled under CrayPAT.
355 """
357 def __init__(self, print_levels=4):
358 Timer.__init__(self, print_levels)
359 from gpaw.cgpaw import craypat_region_begin, craypat_region_end
360 self.craypat_region_begin = craypat_region_begin
361 self.craypat_region_end = craypat_region_end
362 self.regions = {}
363 self.region_id = 5 # leave room for regions in C
365 def start(self, name):
366 Timer.start(self, name)
367 if name in self.regions:
368 id = self.regions[name]
369 else:
370 id = self.region_id
371 self.regions[name] = id
372 self.region_id += 1
373 self.craypat_region_begin(id, name)
375 def stop(self, name=None):
376 Timer.stop(self, name)
377 id = self.regions[name]
378 self.craypat_region_end(id)