Coverage for gpaw/utilities/memory.py: 58%

84 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-07-20 00:19 +0000

1"""Utilities to measure and estimate memory""" 

2 

3# The functions _VmB, memory, resident, and stacksize are based on 

4# Python Cookbook, recipe number 286222 

5# https://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/286222 

6 

7import os 

8import numpy as np 

9 

10_proc_status = '/proc/%d/status' % os.getpid() 

11 

12_scale = {'kB': 1024.0, 'mB': 1024.0 * 1024.0, 

13 'KB': 1024.0, 'MB': 1024.0 * 1024.0} 

14 

15 

16def _VmB(VmKey): 

17 """Private.""" 

18 # get pseudo file /proc/<pid>/status 

19 try: 

20 t = open(_proc_status) 

21 v = t.read() 

22 t.close() 

23 # get VmKey line e.g. 'VmRSS: 9999 kB\n ...' 

24 i = v.index(VmKey) 

25 except (OSError, ValueError, LookupError): 

26 return 0.0 # non-Linux? 

27 

28 v = v[i:].split(None, 3) # whitespace 

29 if len(v) < 3: 

30 return 0.0 # invalid format? 

31 # convert Vm value to bytes 

32 return float(v[1]) * _scale[v[2]] 

33 

34 

35def maxrss(): 

36 """Return maximal resident memory size in bytes.""" 

37 # see https://www.kernel.org/doc/man-pages/online/pages/man5/proc.5.html 

38 

39 # try to get it from /proc/id/status 

40 # This will not work on supercomputers like Blue Gene or Cray 

41 for name in ('VmHWM:', # Peak resident set size ("high water mark") 

42 'VmRss:', # Resident set size 

43 'VmPeak:', # Peak virtual memory size 

44 'VmSize:', # Virtual memory size 

45 ): 

46 mm = _VmB(name) 

47 if mm > 0: 

48 return mm 

49 

50 # try to get it from rusage 

51 # Python documentation here: 

52 # https://docs.python.org/library/resource.html 

53 # says to multiply by the pagesize, but this is incorrect. 

54 # What is implementation depenedent is whether ru_maxrss is in 

55 # bytes or kilobytes. We make an intelligent attempt to convert 

56 # to detect this and convert to bytes. 

57 # Warning: this does not work for systems reporting kilobytes 

58 # for memory more than 1GiB (won't be scaled), 

59 # similarly for systems reporting bytes: memory of less that MiB 

60 # will be scaled by 1024 

61 # 

62 # the next call seems to return 'VmHWM' 

63 try: 

64 import resource 

65 mm = resource.getrusage(resource.RUSAGE_SELF)[2] 

66 if mm > 0: 

67 if mm < (1024)**2: # 1 MiB 

68 mm = mm * 1024 # then mm was probably in KiB so convert to MiB 

69 return mm 

70 except (TypeError, ImportError): 

71 pass 

72 

73 return 0.0 # no more ideas 

74 

75 

76class MemNode: 

77 """Represents the estimated memory use of an object and its components. 

78 

79 Can be used on any object which implements estimate_memory(). 

80 Example:: 

81 

82 from sys import stdout 

83 from gpaw.utilities.memory import MemNode 

84 node = MemNode('Root') # any name will do 

85 some_object.estimate_memory(node) 

86 nbytes = node.calculate_size() 

87 print('Bytes', nbytes) 

88 node.write(stdout) # write details 

89 

90 Note that calculate_size() must be called before write(). Some 

91 objects must be explicitly initialized before they can estimate 

92 their memory use. 

93 """ 

94 floatsize = np.array(1, float).itemsize 

95 complexsize = np.array(1, complex).itemsize 

96 itemsize = {float: floatsize, complex: complexsize} 

97 

98 def __init__(self, name, basesize=0): 

99 """Create node with specified name and intrinsic size without 

100 subcomponents.""" 

101 self.name = name 

102 self.basesize = float(basesize) 

103 self.totalsize = np.nan # Size including sub-objects 

104 self.nodes = [] 

105 self.indent = ' ' 

106 

107 def write(self, txt, maxdepth=-1, depth=0): 

108 """Write representation of this node and its subnodes, recursively. 

109 

110 The depth parameter determines indentation. maxdepth of -1 means 

111 infinity.""" 

112 print(''.join([depth * self.indent, self.name, ': ', 

113 self.memformat(self.totalsize)]), file=txt) 

114 if depth == maxdepth: 

115 return 

116 for node in self.nodes: 

117 node.write(txt, maxdepth, depth + 1) 

118 

119 def memformat(self, bytes): 

120 # One MiB is 1024*1024 bytes, as opposed to one MB which is ambiguous 

121 return '%.2f MiB' % (bytes / float(1 << 20)) 

122 

123 def calculate_size(self): 

124 self.totalsize = self.basesize 

125 for node in self.nodes: 

126 self.totalsize += node.calculate_size() 

127 # Data-type must not be fixed-size np integer 

128 return self.totalsize 

129 

130 def subnode(self, name, basesize=0): 

131 """Create subcomponent with given name and intrinsic size. Use this 

132 to build component tree.""" 

133 mem = MemNode(name, basesize) 

134 mem.indent = self.indent 

135 self.nodes.append(mem) 

136 return mem 

137 

138 def setsize(self, basesize): 

139 self.basesize = float(basesize) 

140 

141 

142def monkey_patch_timer(): 

143 """Make the timer object write information about memory usage. 

144 

145 Call this function before the actual work is done. Then do:: 

146 

147 $ less mem.???? 

148 

149 to see where the memory is allocated.""" 

150 

151 from ase.utils.timing import Timer 

152 

153 from gpaw.mpi import rank 

154 from time import time 

155 

156 i = Timer.__init__ 

157 st = Timer.start 

158 sp = Timer.stop 

159 

160 def init(self, print_levels=1000): 

161 i(self, print_levels) 

162 self.fd = open('mem.%04d' % rank, 'w') 

163 self.mem = [] 

164 self.t0 = time() 

165 

166 def start(self, name): 

167 st(self, name) 

168 print('start %14.6f %12d %s' % (time() - self.t0, maxrss(), name), 

169 file=self.fd) 

170 self.fd.flush() 

171 

172 def stop(self, name=None): 

173 names = sp(self, name) 

174 print('stop %14.6f %12d %s' % (time() - self.t0, maxrss(), 

175 '.'.join(names)), 

176 file=self.fd) 

177 self.fd.flush() 

178 

179 Timer.__init__ = init 

180 Timer.start = start 

181 Timer.stop = stop