Coverage for gpaw/matrix_descriptor.py: 20%
266 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-14 00:18 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-07-14 00:18 +0000
1import numpy as np
2from scipy.linalg import eigh
4from gpaw import debug
7class MatrixDescriptor:
8 """Class representing a 2D matrix shape. Base class for parallel
9 matrix descriptor with BLACS."""
11 def __init__(self, M, N):
12 self.shape = (M, N)
14 def __bool__(self):
15 return self.shape[0] != 0 and self.shape[1] != 0
17 def zeros(self, n=(), dtype=float):
18 """Return array of zeroes with the correct size on all CPUs.
20 The last two dimensions will be equal to the shape of this
21 descriptor. If specified as a tuple, can have any preceding
22 dimension."""
23 return self._new_array(np.zeros, n, dtype)
25 def empty(self, n=(), dtype=float):
26 """Return array of zeros with the correct size on all CPUs.
28 See zeros()."""
29 return self._new_array(np.empty, n, dtype)
31 def _new_array(self, func, n, dtype):
32 if isinstance(n, int):
33 n = n,
34 shape = n + self.shape
35 return func(shape, dtype)
37 def check(self, a_mn):
38 """Check that specified array is compatible with this descriptor."""
39 return a_mn.shape == self.shape and a_mn.flags.contiguous
41 def checkassert(self, a_mn):
42 ok = self.check(a_mn)
43 if not ok:
44 if not a_mn.flags.contiguous:
45 msg = f'Matrix with shape {a_mn.shape} is not contiguous'
46 else:
47 msg = ('%s-descriptor incompatible with %s-matrix' %
48 (self.shape, a_mn.shape))
49 raise AssertionError(msg)
51 def general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M,
52 UL='L', iu=None):
53 if iu is None:
54 n = len(H_mm)
55 eigvals = None # all
56 else:
57 n = iu
58 eigvals = (0, n - 1)
59 eps_M[:n], C_mm.T[:, :n] = eigh(H_mm, S_mm,
60 subset_by_index=eigvals,
61 overwrite_a=True,
62 check_finite=debug)
63 if C_mm.dtype == complex:
64 np.negative(C_mm.imag, C_mm.imag)
66 def my_blocks(self, array_mn):
67 yield (0, self.shape[0], 0, self.shape[1], array_mn)
69 def estimate_memory(self, mem, dtype):
70 """Handled by subclass."""
71 pass
74class BandMatrixDescriptor(MatrixDescriptor):
75 """Descriptor-class for square matrices of bands times bands."""
77 def __init__(self, bd, gd, ksl):
78 MatrixDescriptor.__init__(self, bd.nbands, bd.nbands)
79 self.bd = bd
80 self.gd = gd # XXX used?
81 self.ksl = ksl # not really used...
83 def assemble_blocks(self, A_qnn, A_NN, hermitian):
84 """Assign all distributed sub-blocks pertaining from various rank to
85 the relevant parts of a Hermitian or non-Hermitian matrix A_NN.
87 Parameters:
89 A_qnn: ndarray
90 Sub-blocks belonging to the specified rank.
91 A_NN: ndarray
92 Full matrix in which to write contributions from sub-blocks.
93 hermitian: bool
94 Indicates whether A_NN is a Hermitian matrix, in which
95 case only the lower triangular part is assigned to.
97 Note that the sub-block buffers are used for communicating across the
98 band communicator, hence A_qnn will be altered during the assembly.
99 """
100 if self.bd.comm.size == 1:
101 if hermitian:
102 self.triangular_blockwise_assign(A_qnn, A_NN, 0)
103 else:
104 self.full_blockwise_assign(A_qnn, A_NN, 0)
105 return
107 if self.bd.comm.rank == 0:
108 for band_rank in range(self.bd.comm.size):
109 if band_rank > 0:
110 self.bd.comm.receive(A_qnn, band_rank, 13)
111 if hermitian:
112 self.triangular_blockwise_assign(A_qnn, A_NN, band_rank)
113 else:
114 self.full_blockwise_assign(A_qnn, A_NN, band_rank)
115 else:
116 self.bd.comm.send(A_qnn, 0, 13)
118 def triangular_blockwise_assign(self, A_qnn, A_NN, band_rank):
119 """Assign the sub-blocks pertaining from a given rank to the lower
120 triangular part of a Hermitian matrix A_NN. This subroutine is used
121 for matrix assembly.
123 Parameters:
125 A_qnn: ndarray
126 Sub-blocks belonging to the specified rank.
127 A_NN: ndarray
128 Full matrix in which to write contributions from sub-blocks.
129 band_rank: int
130 Communicator rank to which the sub-blocks belongs.
132 Note that a Hermitian matrix requires Q=B//2+1 blocks of M x M
133 elements where B is the communicator size and M=N//B for N bands.
134 """
135 N = self.bd.mynbands
136 B = self.bd.comm.size
137 assert band_rank in range(B)
139 if B == 1:
140 # Only fill in the lower part
141 mask = np.tri(N).astype(bool)
142 A_NN[mask] = A_qnn.reshape((N, N))[mask]
143 return
145 # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated
146 # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and
147 # n2 <-> ((q1+q2)%B,myn2) since we've sent/received q2 times.
148 q1 = band_rank
149 Q = B // 2 + 1
150 if debug:
151 assert A_qnn.shape == (Q, N, N)
153 # Note that for integer inequalities, these relations are useful (X>0):
154 # A*X > B <=> A > B//X ^ A*X <= B <=> A <= B//X
156 if self.bd.strided:
157 A_nbnb = A_NN.reshape((N, B, N, B))
158 mask = np.empty((N, N), dtype=bool)
159 for q2 in range(Q):
160 # n1 = (q1+q2)%B + myn1*B ^ n2 = q1 + myn2*B
161 #
162 # We seek the lower triangular part i.e. n1 >= n2
163 # <=> (myn2-myn1)*B <= (q1+q2)%B-q1
164 # <=> myn2-myn1 <= dq//B
165 dq = (q1 + q2) % B - q1 # within ]-B; Q[ so dq//B is -1 or 0
167 # Create mask for lower part of current block
168 mask[:] = np.tri(N, N, dq // B)
169 if debug:
170 m1, m2 = np.indices((N, N))
171 assert dq in range(-B + 1, Q)
172 assert (mask == (m1 >= m2 - dq // B)).all()
174 # Copy lower part of A_qnn[q2] to its rightfull place
175 A_nbnb[:, (q1 + q2) % B, :, q1][mask] = A_qnn[q2][mask]
177 # Negate the transposed mask to get complementary mask
178 mask = ~mask.T
180 # Copy upper part of Hermitian conjugate of A_qnn[q2]
181 A_nbnb[:, q1, :,
182 (q1 + q2) % B][mask] = A_qnn[q2].T.conj()[mask]
183 else:
184 A_bnbn = A_NN.reshape((B, N, B, N))
186 # Optimization for the first block
187 if q1 == 0:
188 A_bnbn[:Q, :, 0] = A_qnn
189 return
191 for q2 in range(Q):
192 # n1 = ((q1+q2)%B)*N + myn1 ^ n2 = q1*N + myn2
193 #
194 # We seek the lower triangular part i.e. n1 >= n2
195 # <=> ((q1+q2)%B-q1)*N >= myn2-myn1
196 # <=> myn2-myn1 <= dq*N
197 # <=> entire block if dq > 0,
198 # ... myn2 <= myn1 if dq == 0,
199 # ... copy nothing if dq < 0
200 if q1 + q2 < B:
201 A_bnbn[q1 + q2, :, q1] = A_qnn[q2]
202 else:
203 A_bnbn[q1, :, q1 + q2 - B] = A_qnn[q2].T.conj()
205 def full_blockwise_assign(self, A_qnn, A_NN, band_rank):
206 """Assign the sub-blocks pertaining from a given rank to the full
207 non-Hermitian matrix A_NN. This subroutine is used for matrix assembly.
209 Parameters:
211 A_qnn: ndarray
212 Sub-blocks belonging to the specified rank.
213 A_NN: ndarray
214 Full matrix, in which to write contributions from sub-blocks.
215 band_rank: int
216 Communicator rank to which the sub-blocks belongs.
218 Note that a non-Hermitian matrix requires Q=B blocks of M x M
219 elements where B is the communicator size and M=N//B for N bands.
220 """
221 N = self.bd.mynbands
222 B = self.bd.comm.size
223 assert band_rank in range(B)
225 if B == 1:
226 A_NN[:] = A_qnn.reshape((N, N))
227 return
229 # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated
230 # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and
231 # n2 <-> ((q1+q2)%B,myn2) since we've sent/received q2 times.
232 q1 = band_rank
233 Q = B
234 if debug:
235 assert A_qnn.shape == (Q, N, N)
237 if self.bd.strided:
238 A_nbnb = A_NN.reshape((N, B, N, B))
239 for q2 in range(Q):
240 A_nbnb[:, (q1 + q2) % B, :, q1] = A_qnn[q2]
241 else:
242 A_bnbn = A_NN.reshape((B, N, B, N))
244 # Optimization for the first block
245 if q1 == 0:
246 A_bnbn[:Q, :, 0] = A_qnn
247 return
249 for q2 in range(Q):
250 A_bnbn[(q1 + q2) % B, :, q1] = A_qnn[q2]
252 def extract_block(self, A_NN, q1, q2):
253 """Extract the sub-block pertaining from a given pair of ranks within
254 the full matrix A_NN. Extraction may result in copies to assure unit
255 stride, thus one should not utilize this routine for altering A_NN.
257 Parameters:
259 A_NN: ndarray
260 Full matrix, from which to read the requested sub-block.
261 q1: int
262 Communicator rank to which the sub-block belongs (row index).
263 q2: int
264 Communicator rank the sub-block originated from (column index).
266 Note that a Hermitian matrix requires just Q=B//2+1 blocks of M x M
267 elements where B is the communicator size and M=N//B for N bands.
268 Therefor, care should be taken to only request q1,q2 pairs which
269 are connected by Q shifts or less if A_NN is lower triangular.
270 """
271 N = self.bd.mynbands
272 B = self.bd.comm.size
274 if B == 1:
275 return A_NN
277 if self.bd.strided:
278 A_nbnb = A_NN.reshape((N, B, N, B))
279 # last dim must have unit stride
280 return A_nbnb[:, q2, :, q1].copy()
281 else:
282 A_bnbn = A_NN.reshape((B, N, B, N))
283 return A_bnbn[q2, :, q1]
285 def redistribute_input(self, A_NN): # do nothing
286 return A_NN
288 def redistribute_output(self, A_NN): # do nothing
289 if debug:
290 self.checkassert(A_NN)
291 return A_NN
293 def estimate_memory(self, mem, dtype):
294 # Temporary work arrays included in estimate #
295 nbands = self.bd.nbands
296 itemsize = mem.itemsize[dtype]
297 mem.subnode('A_NN', nbands * nbands * itemsize)
300class BlacsBandMatrixDescriptor(MatrixDescriptor):
301 """Descriptor-class for square BLACS matrices of bands times bands."""
303 def __init__(self, bd, gd, ksl):
304 # XXX a hack...
305 MatrixDescriptor.__init__(self, bd.nbands, bd.mynbands)
306 self.bd = bd
307 self.gd = gd # XXX used?
308 self.ksl = ksl
310 def assemble_blocks(self, A_qnn, A_Nn, hermitian):
311 """Assign all distributed sub-blocks pertaining from various rank to
312 the relevant parts of a Hermitian or non-Hermitian matrix A_NN.
314 Parameters:
316 A_qnn: ndarray
317 Sub-blocks belonging to the specified rank.
318 A_Nn: ndarray
319 Full column vector in which to write contributions from sub-blocks.
320 hermitian: bool
321 Indicates whether A_Nn represents a Hermitian matrix, in which
322 case only the lower triangular part is assigned to.
324 Note that the sub-block buffers are used for communicating across the
325 band communicator, hence A_qnn will be altered during the assembly.
326 """
328 band_rank = self.bd.comm.rank
329 if hermitian:
330 self.triangular_columnwise_assign(A_qnn, A_Nn, band_rank)
331 else:
332 self.full_columnwise_assign(A_qnn, A_Nn, band_rank)
334 def triangular_columnwise_assign(self, A_qnn, A_Nn, band_rank):
335 """Assign the sub-blocks pertaining from a given rank to the lower
336 triangular part of a Hermitian matrix A_NN. This subroutine is used
337 for matrix assembly.
339 Parameters:
341 A_qnn: ndarray
342 Sub-blocks belonging to the specified rank.
343 A_Nn: ndarray
344 Full column vector in which to write contributions from sub-blocks.
345 band_rank: int
346 Communicator rank to which the sub-blocks belongs.
348 Note that a Hermitian matrix requires Q=B//2+1 blocks of M x M
349 elements where B is the communicator size and M=N//B for N bands.
350 """
351 N = self.bd.mynbands
352 B = self.bd.comm.size
353 assert band_rank in range(B)
355 if B == 1:
356 # Only fill in the lower part
357 mask = np.tri(N).astype(bool)
358 A_Nn[mask] = A_qnn.reshape((N, N))[mask]
359 return
361 # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated
362 # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and
363 # n2 <-> ((q1+q2)%B,myn2) since we've sent/received q2 times.
364 q1 = band_rank
365 Q = B // 2 + 1
366 if debug:
367 assert A_qnn.shape == (Q, N, N)
369 # Note that for integer inequalities, these relations are useful (X>0):
370 # A*X > B <=> A > B//X ^ A*X <= B <=> A <= B//X
372 if self.bd.strided:
373 raise NotImplementedError
375 """
376 A_nbn = A_NN.reshape((N, B, N))
377 mask = np.empty((N,N), dtype=bool)
378 for q2 in range(Q):
379 # n1 = (q1+q2)%B + myn1*B ^ n2 = q1 + myn2*B
380 #
381 # We seek the lower triangular part i.e. n1 >= n2
382 # <=> (myn2-myn1)*B <= (q1+q2)%B-q1
383 # <=> myn2-myn1 <= dq//B
384 dq = (q1+q2)%B-q1 # within ]-B; Q[ so dq//B is -1 or 0
386 # Create mask for lower part of current block
387 mask[:] = np.tri(N, N, dq//B)
388 if debug:
389 m1,m2 = np.indices((N,N))
390 assert dq in range(-B+1,Q)
391 assert (mask == (m1 >= m2 - dq//B)).all()
393 # Copy lower part of A_qnn[q2] to its rightfull place
394 A_nbn[:, (q1+q2)%B][mask] = A_qnn[q2][mask]
396 # Negate the transposed mask to get complementary mask
397 mask = ~mask.T
399 # Copy upper part of Hermitian conjugate of A_qnn[q2]
400 A_nbn[:, q1][mask] = A_qnn[q2].T.conj()[mask]
401 """
402 else:
403 A_bnn = A_Nn.reshape((B, N, N))
405 for q2 in range(Q):
406 # n1 = ((q1+q2)%B)*N + myn1 ^ n2 = q1*N + myn2
407 #
408 # We seek the lower triangular part i.e. n1 >= n2
409 # <=> ((q1+q2)%B-q1)*N >= myn2-myn1
410 # <=> myn2-myn1 <= dq*N
411 # <=> entire block if dq > 0,
412 # ... myn2 <= myn1 if dq == 0,
413 # ... copy nothing if dq < 0
415 if q1 + q2 < B:
416 A_bnn[q1 + q2] = A_qnn[q2]
418 reqs = []
419 if q1 < Q - 1: # receive from ranks >= Q
420 if debug:
421 Q2 = np.arange(Q, B - q1)
422 print('q1=%d, q2: %12s | recv from q1+q2:%12s -> A_bnn%s' %
423 (q1, Q2.tolist(), (q1 + Q2).tolist(),
424 (q1 + Q2).tolist()))
426 for q2 in range(Q, B - q1):
427 rrank = q1 + q2
428 A_nn = A_bnn[q1 + q2]
429 reqs.append(self.bd.comm.receive(A_nn, rrank, block=False))
430 elif q1 >= Q: # send to ranks < Q-1
431 if debug:
432 Q2 = np.arange(B - q1, B - Q + 1)[::-1]
433 print('q1=%d, q2: %12s | '
434 'send to q1+q2-B:%12s <- A_qnn%s.T.conj()' %
435 (q1, Q2.tolist(), (q1 + Q2 - B).tolist(),
436 Q2.tolist()))
438 # symmetrize comm
439 for q2 in reversed(range(B - q1, B - Q + 1)):
440 srank = q1 + q2 - B
441 sbuf_nn = np.ascontiguousarray(np.conjugate(A_qnn[q2].T))
442 reqs.append(self.bd.comm.send(sbuf_nn, srank, block=False))
443 else:
444 if debug:
445 print('q1=%d, do nothing...' % q1)
447 self.bd.comm.waitall(reqs)
449 def full_columnwise_assign(self, A_qnn, A_Nn, band_rank):
450 """Assign the sub-blocks pertaining from a given rank to the columns of
451 non-Hermitian matrix A_Nn. This subroutine is used for column assembly.
453 Parameters:
455 A_qnn: ndarray
456 Sub-blocks belonging to the specified rank.
457 A_Nn: ndarray
458 Full column vector, in which to write contributions from
459 sub-blocks.
460 band_rank: int
461 Communicator rank to which the sub-blocks belongs.
463 Note that a non-Hermitian matrix requires Q=B blocks of M x M
464 elements where B is the communicator size and M=N//B for N bands.
465 """
466 N = self.bd.mynbands
467 B = self.bd.comm.size
468 assert band_rank in range(B)
470 if B == 1:
471 A_Nn[:] = A_qnn.reshape((N, N))
472 return
474 # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated
475 # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and
476 # n2 <-> ((q1+q2)%B,myn2) since we've sent/received q2 times.
477 q1 = band_rank
478 Q = B
480 if self.bd.strided:
481 A_nbn = A_Nn.reshape((N, B, N))
482 for q2 in range(Q):
483 A_nbn[:, (q1 + q2) % B] = A_qnn[q2]
484 else:
485 A_bnn = A_Nn.reshape((B, N, N))
487 # Optimization for the first block
488 if q1 == 0:
489 A_bnn[:Q] = A_qnn
490 return
492 for q2 in range(Q):
493 A_bnn[(q1 + q2) % B] = A_qnn[q2]
495 def extract_block_from_column(self, A_Nn, q1, q2):
496 """Extract the sub-block pertaining from a given pair of ranks within
497 the full matrix A_NN. Extraction may result in copies to assure unit
498 stride, thus one should not utilize this routine for altering A_NN.
500 Parameters:
502 A_Nn: ndarray
503 Full column vector, from which to read the requested sub-block.
504 q1: int
505 Communicator rank to which the sub-block belongs (row index).
506 q2: int
507 Communicator rank the sub-block originated from (column index).
509 Note that a Hermitian matrix requires just Q=B//2+1 blocks of M x M
510 elements where B is the communicator size and M=N//B for N bands.
511 Therefor, care should be taken to only request q1,q2 pairs which
512 are connected by Q shifts or less if A_NN is lower triangular.
513 """
514 N = self.bd.mynbands
515 B = self.bd.comm.size
517 if B == 1:
518 return A_Nn
520 if q1 == self.bd.comm.rank: # XXX must evaluate the same on all ranks!
521 if self.bd.strided:
522 A_nbn = A_Nn.reshape((N, B, N))
523 return A_nbn[:, q2, :].copy() # block must be contiguous
524 else:
525 A_bnn = A_Nn.reshape((B, N, N))
526 return A_bnn[q2]
528 # Time for us to put the cards on the table
529 Qs = np.empty((self.bd.comm.size, 2), dtype=int)
530 self.bd.comm.all_gather(np.array([q1, q2]), Qs)
531 Q1, Q2 = Qs.T
533 # Block exchange. What do we want and who should we be sending to?
534 rrank = q1
535 srank = np.argwhere(Q1 == self.bd.comm.rank).ravel().item()
536 sq2 = Q2[srank]
538 if debug:
539 S = np.empty(self.bd.comm.size, dtype=int)
540 self.bd.comm.all_gather(np.array([srank]), S)
541 if self.bd.comm.rank == 0:
542 print('recv(Q1): %s, send(Q1==rank): %s' %
543 (Q1.tolist(), S.tolist()))
545 if self.bd.strided:
546 A_nbn = A_Nn.reshape((N, B, N))
547 sbuf_nn = A_nbn[:, sq2, :].copy() # block must be contiguous
548 else:
549 A_bnn = A_Nn.reshape((B, N, N))
550 sbuf_nn = A_bnn[sq2]
552 A_nn = np.empty_like(sbuf_nn)
553 self.bd.comm.sendreceive(sbuf_nn, srank, A_nn, rrank)
554 return A_nn
556 def extract_block_from_row(self, A_nN, q1, q2):
557 """Extract the sub-block pertaining from a given pair of ranks within
558 the full matrix A_NN. Extraction may result in copies to assure unit
559 stride, thus one should not utilize this routine for altering A_NN.
561 Parameters:
563 A_nN: ndarray
564 Full row vector, from which to read the requested sub-block.
565 q1: int
566 Communicator rank to which the sub-block belongs (row index).
567 q2: int
568 Communicator rank the sub-block originated from (column index).
570 Note that a Hermitian matrix requires just Q=B//2+1 blocks of M x M
571 elements where B is the communicator size and M=N//B for N bands.
572 Therefor, care should be taken to only request q1,q2 pairs which
573 are connected by Q shifts or less if A_NN is lower triangular.
574 """
575 N = self.bd.mynbands
576 B = self.bd.comm.size
578 if B == 1:
579 return A_nN
581 if q2 == self.bd.comm.rank: # XXX must evaluate the same on all ranks!
582 if self.bd.strided:
583 # XXX This case seems to be untested
584 A_nnb = A_nN.reshape((N, N, B))
585 return A_nnb[..., q1].copy('C') # block must be contiguous
586 else:
587 A_nbn = A_nN.reshape((N, B, N))
588 return A_nbn[:, q1, :].copy('C') # block must be contiguous
589 else:
590 raise NotImplementedError
592 extract_block = extract_block_from_row # XXX ugly but works
594 def redistribute_input(self, A_nn, A_nN=None): # 2D -> 1D row layout
595 if A_nN is None:
596 A_nN = self.ksl.nNdescriptor.empty(dtype=A_nn.dtype)
597 self.ksl.nn2nN.redistribute(A_nn, A_nN)
598 if not self.ksl.nNdescriptor.blacsgrid.is_active():
599 assert A_nN.shape == (0, 0)
600 A_nN = np.empty((self.bd.mynbands, self.bd.nbands),
601 dtype=A_nN.dtype)
602 self.gd.comm.broadcast(A_nN, 0)
603 return A_nN
605 def redistribute_output(self, A_Nn, A_nn=None): # 1D column -> 2D layout
606 if not self.ksl.Nndescriptor.blacsgrid.is_active():
607 A_Nn = np.empty((0, 0), dtype=A_Nn.dtype)
608 if A_nn is None:
609 A_nn = self.ksl.nndescriptor.empty(dtype=A_Nn.dtype)
610 self.ksl.Nn2nn.redistribute(A_Nn, A_nn)
611 return A_nn
613 def estimate_memory(self, mem, dtype):
614 # Temporary work arrays included in estimate #
615 mynbands = self.bd.mynbands
616 nbands = self.bd.nbands
617 itemsize = mem.itemsize[dtype]
618 mem.subnode('2 A_nN', 2 * mynbands * nbands * itemsize)
619 mem.subnode('2 A_nn', 2 * nbands * nbands / self.ksl.blockgrid.ncpus *
620 itemsize)