Coverage for gpaw/matrix_descriptor.py: 20%

266 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-07-14 00:18 +0000

1import numpy as np 

2from scipy.linalg import eigh 

3 

4from gpaw import debug 

5 

6 

7class MatrixDescriptor: 

8 """Class representing a 2D matrix shape. Base class for parallel 

9 matrix descriptor with BLACS.""" 

10 

11 def __init__(self, M, N): 

12 self.shape = (M, N) 

13 

14 def __bool__(self): 

15 return self.shape[0] != 0 and self.shape[1] != 0 

16 

17 def zeros(self, n=(), dtype=float): 

18 """Return array of zeroes with the correct size on all CPUs. 

19 

20 The last two dimensions will be equal to the shape of this 

21 descriptor. If specified as a tuple, can have any preceding 

22 dimension.""" 

23 return self._new_array(np.zeros, n, dtype) 

24 

25 def empty(self, n=(), dtype=float): 

26 """Return array of zeros with the correct size on all CPUs. 

27 

28 See zeros().""" 

29 return self._new_array(np.empty, n, dtype) 

30 

31 def _new_array(self, func, n, dtype): 

32 if isinstance(n, int): 

33 n = n, 

34 shape = n + self.shape 

35 return func(shape, dtype) 

36 

37 def check(self, a_mn): 

38 """Check that specified array is compatible with this descriptor.""" 

39 return a_mn.shape == self.shape and a_mn.flags.contiguous 

40 

41 def checkassert(self, a_mn): 

42 ok = self.check(a_mn) 

43 if not ok: 

44 if not a_mn.flags.contiguous: 

45 msg = f'Matrix with shape {a_mn.shape} is not contiguous' 

46 else: 

47 msg = ('%s-descriptor incompatible with %s-matrix' % 

48 (self.shape, a_mn.shape)) 

49 raise AssertionError(msg) 

50 

51 def general_diagonalize_dc(self, H_mm, S_mm, C_mm, eps_M, 

52 UL='L', iu=None): 

53 if iu is None: 

54 n = len(H_mm) 

55 eigvals = None # all 

56 else: 

57 n = iu 

58 eigvals = (0, n - 1) 

59 eps_M[:n], C_mm.T[:, :n] = eigh(H_mm, S_mm, 

60 subset_by_index=eigvals, 

61 overwrite_a=True, 

62 check_finite=debug) 

63 if C_mm.dtype == complex: 

64 np.negative(C_mm.imag, C_mm.imag) 

65 

66 def my_blocks(self, array_mn): 

67 yield (0, self.shape[0], 0, self.shape[1], array_mn) 

68 

69 def estimate_memory(self, mem, dtype): 

70 """Handled by subclass.""" 

71 pass 

72 

73 

74class BandMatrixDescriptor(MatrixDescriptor): 

75 """Descriptor-class for square matrices of bands times bands.""" 

76 

77 def __init__(self, bd, gd, ksl): 

78 MatrixDescriptor.__init__(self, bd.nbands, bd.nbands) 

79 self.bd = bd 

80 self.gd = gd # XXX used? 

81 self.ksl = ksl # not really used... 

82 

83 def assemble_blocks(self, A_qnn, A_NN, hermitian): 

84 """Assign all distributed sub-blocks pertaining from various rank to 

85 the relevant parts of a Hermitian or non-Hermitian matrix A_NN. 

86 

87 Parameters: 

88 

89 A_qnn: ndarray 

90 Sub-blocks belonging to the specified rank. 

91 A_NN: ndarray 

92 Full matrix in which to write contributions from sub-blocks. 

93 hermitian: bool 

94 Indicates whether A_NN is a Hermitian matrix, in which 

95 case only the lower triangular part is assigned to. 

96 

97 Note that the sub-block buffers are used for communicating across the 

98 band communicator, hence A_qnn will be altered during the assembly. 

99 """ 

100 if self.bd.comm.size == 1: 

101 if hermitian: 

102 self.triangular_blockwise_assign(A_qnn, A_NN, 0) 

103 else: 

104 self.full_blockwise_assign(A_qnn, A_NN, 0) 

105 return 

106 

107 if self.bd.comm.rank == 0: 

108 for band_rank in range(self.bd.comm.size): 

109 if band_rank > 0: 

110 self.bd.comm.receive(A_qnn, band_rank, 13) 

111 if hermitian: 

112 self.triangular_blockwise_assign(A_qnn, A_NN, band_rank) 

113 else: 

114 self.full_blockwise_assign(A_qnn, A_NN, band_rank) 

115 else: 

116 self.bd.comm.send(A_qnn, 0, 13) 

117 

118 def triangular_blockwise_assign(self, A_qnn, A_NN, band_rank): 

119 """Assign the sub-blocks pertaining from a given rank to the lower 

120 triangular part of a Hermitian matrix A_NN. This subroutine is used 

121 for matrix assembly. 

122 

123 Parameters: 

124 

125 A_qnn: ndarray 

126 Sub-blocks belonging to the specified rank. 

127 A_NN: ndarray 

128 Full matrix in which to write contributions from sub-blocks. 

129 band_rank: int 

130 Communicator rank to which the sub-blocks belongs. 

131 

132 Note that a Hermitian matrix requires Q=B//2+1 blocks of M x M 

133 elements where B is the communicator size and M=N//B for N bands. 

134 """ 

135 N = self.bd.mynbands 

136 B = self.bd.comm.size 

137 assert band_rank in range(B) 

138 

139 if B == 1: 

140 # Only fill in the lower part 

141 mask = np.tri(N).astype(bool) 

142 A_NN[mask] = A_qnn.reshape((N, N))[mask] 

143 return 

144 

145 # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated 

146 # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and 

147 # n2 <-> ((q1+q2)%B,myn2) since we've sent/received q2 times. 

148 q1 = band_rank 

149 Q = B // 2 + 1 

150 if debug: 

151 assert A_qnn.shape == (Q, N, N) 

152 

153 # Note that for integer inequalities, these relations are useful (X>0): 

154 # A*X > B <=> A > B//X ^ A*X <= B <=> A <= B//X 

155 

156 if self.bd.strided: 

157 A_nbnb = A_NN.reshape((N, B, N, B)) 

158 mask = np.empty((N, N), dtype=bool) 

159 for q2 in range(Q): 

160 # n1 = (q1+q2)%B + myn1*B ^ n2 = q1 + myn2*B 

161 # 

162 # We seek the lower triangular part i.e. n1 >= n2 

163 # <=> (myn2-myn1)*B <= (q1+q2)%B-q1 

164 # <=> myn2-myn1 <= dq//B 

165 dq = (q1 + q2) % B - q1 # within ]-B; Q[ so dq//B is -1 or 0 

166 

167 # Create mask for lower part of current block 

168 mask[:] = np.tri(N, N, dq // B) 

169 if debug: 

170 m1, m2 = np.indices((N, N)) 

171 assert dq in range(-B + 1, Q) 

172 assert (mask == (m1 >= m2 - dq // B)).all() 

173 

174 # Copy lower part of A_qnn[q2] to its rightfull place 

175 A_nbnb[:, (q1 + q2) % B, :, q1][mask] = A_qnn[q2][mask] 

176 

177 # Negate the transposed mask to get complementary mask 

178 mask = ~mask.T 

179 

180 # Copy upper part of Hermitian conjugate of A_qnn[q2] 

181 A_nbnb[:, q1, :, 

182 (q1 + q2) % B][mask] = A_qnn[q2].T.conj()[mask] 

183 else: 

184 A_bnbn = A_NN.reshape((B, N, B, N)) 

185 

186 # Optimization for the first block 

187 if q1 == 0: 

188 A_bnbn[:Q, :, 0] = A_qnn 

189 return 

190 

191 for q2 in range(Q): 

192 # n1 = ((q1+q2)%B)*N + myn1 ^ n2 = q1*N + myn2 

193 # 

194 # We seek the lower triangular part i.e. n1 >= n2 

195 # <=> ((q1+q2)%B-q1)*N >= myn2-myn1 

196 # <=> myn2-myn1 <= dq*N 

197 # <=> entire block if dq > 0, 

198 # ... myn2 <= myn1 if dq == 0, 

199 # ... copy nothing if dq < 0 

200 if q1 + q2 < B: 

201 A_bnbn[q1 + q2, :, q1] = A_qnn[q2] 

202 else: 

203 A_bnbn[q1, :, q1 + q2 - B] = A_qnn[q2].T.conj() 

204 

205 def full_blockwise_assign(self, A_qnn, A_NN, band_rank): 

206 """Assign the sub-blocks pertaining from a given rank to the full 

207 non-Hermitian matrix A_NN. This subroutine is used for matrix assembly. 

208 

209 Parameters: 

210 

211 A_qnn: ndarray 

212 Sub-blocks belonging to the specified rank. 

213 A_NN: ndarray 

214 Full matrix, in which to write contributions from sub-blocks. 

215 band_rank: int 

216 Communicator rank to which the sub-blocks belongs. 

217 

218 Note that a non-Hermitian matrix requires Q=B blocks of M x M 

219 elements where B is the communicator size and M=N//B for N bands. 

220 """ 

221 N = self.bd.mynbands 

222 B = self.bd.comm.size 

223 assert band_rank in range(B) 

224 

225 if B == 1: 

226 A_NN[:] = A_qnn.reshape((N, N)) 

227 return 

228 

229 # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated 

230 # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and 

231 # n2 <-> ((q1+q2)%B,myn2) since we've sent/received q2 times. 

232 q1 = band_rank 

233 Q = B 

234 if debug: 

235 assert A_qnn.shape == (Q, N, N) 

236 

237 if self.bd.strided: 

238 A_nbnb = A_NN.reshape((N, B, N, B)) 

239 for q2 in range(Q): 

240 A_nbnb[:, (q1 + q2) % B, :, q1] = A_qnn[q2] 

241 else: 

242 A_bnbn = A_NN.reshape((B, N, B, N)) 

243 

244 # Optimization for the first block 

245 if q1 == 0: 

246 A_bnbn[:Q, :, 0] = A_qnn 

247 return 

248 

249 for q2 in range(Q): 

250 A_bnbn[(q1 + q2) % B, :, q1] = A_qnn[q2] 

251 

252 def extract_block(self, A_NN, q1, q2): 

253 """Extract the sub-block pertaining from a given pair of ranks within 

254 the full matrix A_NN. Extraction may result in copies to assure unit 

255 stride, thus one should not utilize this routine for altering A_NN. 

256 

257 Parameters: 

258 

259 A_NN: ndarray 

260 Full matrix, from which to read the requested sub-block. 

261 q1: int 

262 Communicator rank to which the sub-block belongs (row index). 

263 q2: int 

264 Communicator rank the sub-block originated from (column index). 

265 

266 Note that a Hermitian matrix requires just Q=B//2+1 blocks of M x M 

267 elements where B is the communicator size and M=N//B for N bands. 

268 Therefor, care should be taken to only request q1,q2 pairs which 

269 are connected by Q shifts or less if A_NN is lower triangular. 

270 """ 

271 N = self.bd.mynbands 

272 B = self.bd.comm.size 

273 

274 if B == 1: 

275 return A_NN 

276 

277 if self.bd.strided: 

278 A_nbnb = A_NN.reshape((N, B, N, B)) 

279 # last dim must have unit stride 

280 return A_nbnb[:, q2, :, q1].copy() 

281 else: 

282 A_bnbn = A_NN.reshape((B, N, B, N)) 

283 return A_bnbn[q2, :, q1] 

284 

285 def redistribute_input(self, A_NN): # do nothing 

286 return A_NN 

287 

288 def redistribute_output(self, A_NN): # do nothing 

289 if debug: 

290 self.checkassert(A_NN) 

291 return A_NN 

292 

293 def estimate_memory(self, mem, dtype): 

294 # Temporary work arrays included in estimate # 

295 nbands = self.bd.nbands 

296 itemsize = mem.itemsize[dtype] 

297 mem.subnode('A_NN', nbands * nbands * itemsize) 

298 

299 

300class BlacsBandMatrixDescriptor(MatrixDescriptor): 

301 """Descriptor-class for square BLACS matrices of bands times bands.""" 

302 

303 def __init__(self, bd, gd, ksl): 

304 # XXX a hack... 

305 MatrixDescriptor.__init__(self, bd.nbands, bd.mynbands) 

306 self.bd = bd 

307 self.gd = gd # XXX used? 

308 self.ksl = ksl 

309 

310 def assemble_blocks(self, A_qnn, A_Nn, hermitian): 

311 """Assign all distributed sub-blocks pertaining from various rank to 

312 the relevant parts of a Hermitian or non-Hermitian matrix A_NN. 

313 

314 Parameters: 

315 

316 A_qnn: ndarray 

317 Sub-blocks belonging to the specified rank. 

318 A_Nn: ndarray 

319 Full column vector in which to write contributions from sub-blocks. 

320 hermitian: bool 

321 Indicates whether A_Nn represents a Hermitian matrix, in which 

322 case only the lower triangular part is assigned to. 

323 

324 Note that the sub-block buffers are used for communicating across the 

325 band communicator, hence A_qnn will be altered during the assembly. 

326 """ 

327 

328 band_rank = self.bd.comm.rank 

329 if hermitian: 

330 self.triangular_columnwise_assign(A_qnn, A_Nn, band_rank) 

331 else: 

332 self.full_columnwise_assign(A_qnn, A_Nn, band_rank) 

333 

334 def triangular_columnwise_assign(self, A_qnn, A_Nn, band_rank): 

335 """Assign the sub-blocks pertaining from a given rank to the lower 

336 triangular part of a Hermitian matrix A_NN. This subroutine is used 

337 for matrix assembly. 

338 

339 Parameters: 

340 

341 A_qnn: ndarray 

342 Sub-blocks belonging to the specified rank. 

343 A_Nn: ndarray 

344 Full column vector in which to write contributions from sub-blocks. 

345 band_rank: int 

346 Communicator rank to which the sub-blocks belongs. 

347 

348 Note that a Hermitian matrix requires Q=B//2+1 blocks of M x M 

349 elements where B is the communicator size and M=N//B for N bands. 

350 """ 

351 N = self.bd.mynbands 

352 B = self.bd.comm.size 

353 assert band_rank in range(B) 

354 

355 if B == 1: 

356 # Only fill in the lower part 

357 mask = np.tri(N).astype(bool) 

358 A_Nn[mask] = A_qnn.reshape((N, N))[mask] 

359 return 

360 

361 # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated 

362 # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and 

363 # n2 <-> ((q1+q2)%B,myn2) since we've sent/received q2 times. 

364 q1 = band_rank 

365 Q = B // 2 + 1 

366 if debug: 

367 assert A_qnn.shape == (Q, N, N) 

368 

369 # Note that for integer inequalities, these relations are useful (X>0): 

370 # A*X > B <=> A > B//X ^ A*X <= B <=> A <= B//X 

371 

372 if self.bd.strided: 

373 raise NotImplementedError 

374 

375 """ 

376 A_nbn = A_NN.reshape((N, B, N)) 

377 mask = np.empty((N,N), dtype=bool) 

378 for q2 in range(Q): 

379 # n1 = (q1+q2)%B + myn1*B ^ n2 = q1 + myn2*B 

380 # 

381 # We seek the lower triangular part i.e. n1 >= n2 

382 # <=> (myn2-myn1)*B <= (q1+q2)%B-q1 

383 # <=> myn2-myn1 <= dq//B 

384 dq = (q1+q2)%B-q1 # within ]-B; Q[ so dq//B is -1 or 0 

385 

386 # Create mask for lower part of current block 

387 mask[:] = np.tri(N, N, dq//B) 

388 if debug: 

389 m1,m2 = np.indices((N,N)) 

390 assert dq in range(-B+1,Q) 

391 assert (mask == (m1 >= m2 - dq//B)).all() 

392 

393 # Copy lower part of A_qnn[q2] to its rightfull place 

394 A_nbn[:, (q1+q2)%B][mask] = A_qnn[q2][mask] 

395 

396 # Negate the transposed mask to get complementary mask 

397 mask = ~mask.T 

398 

399 # Copy upper part of Hermitian conjugate of A_qnn[q2] 

400 A_nbn[:, q1][mask] = A_qnn[q2].T.conj()[mask] 

401 """ 

402 else: 

403 A_bnn = A_Nn.reshape((B, N, N)) 

404 

405 for q2 in range(Q): 

406 # n1 = ((q1+q2)%B)*N + myn1 ^ n2 = q1*N + myn2 

407 # 

408 # We seek the lower triangular part i.e. n1 >= n2 

409 # <=> ((q1+q2)%B-q1)*N >= myn2-myn1 

410 # <=> myn2-myn1 <= dq*N 

411 # <=> entire block if dq > 0, 

412 # ... myn2 <= myn1 if dq == 0, 

413 # ... copy nothing if dq < 0 

414 

415 if q1 + q2 < B: 

416 A_bnn[q1 + q2] = A_qnn[q2] 

417 

418 reqs = [] 

419 if q1 < Q - 1: # receive from ranks >= Q 

420 if debug: 

421 Q2 = np.arange(Q, B - q1) 

422 print('q1=%d, q2: %12s | recv from q1+q2:%12s -> A_bnn%s' % 

423 (q1, Q2.tolist(), (q1 + Q2).tolist(), 

424 (q1 + Q2).tolist())) 

425 

426 for q2 in range(Q, B - q1): 

427 rrank = q1 + q2 

428 A_nn = A_bnn[q1 + q2] 

429 reqs.append(self.bd.comm.receive(A_nn, rrank, block=False)) 

430 elif q1 >= Q: # send to ranks < Q-1 

431 if debug: 

432 Q2 = np.arange(B - q1, B - Q + 1)[::-1] 

433 print('q1=%d, q2: %12s | ' 

434 'send to q1+q2-B:%12s <- A_qnn%s.T.conj()' % 

435 (q1, Q2.tolist(), (q1 + Q2 - B).tolist(), 

436 Q2.tolist())) 

437 

438 # symmetrize comm 

439 for q2 in reversed(range(B - q1, B - Q + 1)): 

440 srank = q1 + q2 - B 

441 sbuf_nn = np.ascontiguousarray(np.conjugate(A_qnn[q2].T)) 

442 reqs.append(self.bd.comm.send(sbuf_nn, srank, block=False)) 

443 else: 

444 if debug: 

445 print('q1=%d, do nothing...' % q1) 

446 

447 self.bd.comm.waitall(reqs) 

448 

449 def full_columnwise_assign(self, A_qnn, A_Nn, band_rank): 

450 """Assign the sub-blocks pertaining from a given rank to the columns of 

451 non-Hermitian matrix A_Nn. This subroutine is used for column assembly. 

452 

453 Parameters: 

454 

455 A_qnn: ndarray 

456 Sub-blocks belonging to the specified rank. 

457 A_Nn: ndarray 

458 Full column vector, in which to write contributions from 

459 sub-blocks. 

460 band_rank: int 

461 Communicator rank to which the sub-blocks belongs. 

462 

463 Note that a non-Hermitian matrix requires Q=B blocks of M x M 

464 elements where B is the communicator size and M=N//B for N bands. 

465 """ 

466 N = self.bd.mynbands 

467 B = self.bd.comm.size 

468 assert band_rank in range(B) 

469 

470 if B == 1: 

471 A_Nn[:] = A_qnn.reshape((N, N)) 

472 return 

473 

474 # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated 

475 # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and 

476 # n2 <-> ((q1+q2)%B,myn2) since we've sent/received q2 times. 

477 q1 = band_rank 

478 Q = B 

479 

480 if self.bd.strided: 

481 A_nbn = A_Nn.reshape((N, B, N)) 

482 for q2 in range(Q): 

483 A_nbn[:, (q1 + q2) % B] = A_qnn[q2] 

484 else: 

485 A_bnn = A_Nn.reshape((B, N, N)) 

486 

487 # Optimization for the first block 

488 if q1 == 0: 

489 A_bnn[:Q] = A_qnn 

490 return 

491 

492 for q2 in range(Q): 

493 A_bnn[(q1 + q2) % B] = A_qnn[q2] 

494 

495 def extract_block_from_column(self, A_Nn, q1, q2): 

496 """Extract the sub-block pertaining from a given pair of ranks within 

497 the full matrix A_NN. Extraction may result in copies to assure unit 

498 stride, thus one should not utilize this routine for altering A_NN. 

499 

500 Parameters: 

501 

502 A_Nn: ndarray 

503 Full column vector, from which to read the requested sub-block. 

504 q1: int 

505 Communicator rank to which the sub-block belongs (row index). 

506 q2: int 

507 Communicator rank the sub-block originated from (column index). 

508 

509 Note that a Hermitian matrix requires just Q=B//2+1 blocks of M x M 

510 elements where B is the communicator size and M=N//B for N bands. 

511 Therefor, care should be taken to only request q1,q2 pairs which 

512 are connected by Q shifts or less if A_NN is lower triangular. 

513 """ 

514 N = self.bd.mynbands 

515 B = self.bd.comm.size 

516 

517 if B == 1: 

518 return A_Nn 

519 

520 if q1 == self.bd.comm.rank: # XXX must evaluate the same on all ranks! 

521 if self.bd.strided: 

522 A_nbn = A_Nn.reshape((N, B, N)) 

523 return A_nbn[:, q2, :].copy() # block must be contiguous 

524 else: 

525 A_bnn = A_Nn.reshape((B, N, N)) 

526 return A_bnn[q2] 

527 

528 # Time for us to put the cards on the table 

529 Qs = np.empty((self.bd.comm.size, 2), dtype=int) 

530 self.bd.comm.all_gather(np.array([q1, q2]), Qs) 

531 Q1, Q2 = Qs.T 

532 

533 # Block exchange. What do we want and who should we be sending to? 

534 rrank = q1 

535 srank = np.argwhere(Q1 == self.bd.comm.rank).ravel().item() 

536 sq2 = Q2[srank] 

537 

538 if debug: 

539 S = np.empty(self.bd.comm.size, dtype=int) 

540 self.bd.comm.all_gather(np.array([srank]), S) 

541 if self.bd.comm.rank == 0: 

542 print('recv(Q1): %s, send(Q1==rank): %s' % 

543 (Q1.tolist(), S.tolist())) 

544 

545 if self.bd.strided: 

546 A_nbn = A_Nn.reshape((N, B, N)) 

547 sbuf_nn = A_nbn[:, sq2, :].copy() # block must be contiguous 

548 else: 

549 A_bnn = A_Nn.reshape((B, N, N)) 

550 sbuf_nn = A_bnn[sq2] 

551 

552 A_nn = np.empty_like(sbuf_nn) 

553 self.bd.comm.sendreceive(sbuf_nn, srank, A_nn, rrank) 

554 return A_nn 

555 

556 def extract_block_from_row(self, A_nN, q1, q2): 

557 """Extract the sub-block pertaining from a given pair of ranks within 

558 the full matrix A_NN. Extraction may result in copies to assure unit 

559 stride, thus one should not utilize this routine for altering A_NN. 

560 

561 Parameters: 

562 

563 A_nN: ndarray 

564 Full row vector, from which to read the requested sub-block. 

565 q1: int 

566 Communicator rank to which the sub-block belongs (row index). 

567 q2: int 

568 Communicator rank the sub-block originated from (column index). 

569 

570 Note that a Hermitian matrix requires just Q=B//2+1 blocks of M x M 

571 elements where B is the communicator size and M=N//B for N bands. 

572 Therefor, care should be taken to only request q1,q2 pairs which 

573 are connected by Q shifts or less if A_NN is lower triangular. 

574 """ 

575 N = self.bd.mynbands 

576 B = self.bd.comm.size 

577 

578 if B == 1: 

579 return A_nN 

580 

581 if q2 == self.bd.comm.rank: # XXX must evaluate the same on all ranks! 

582 if self.bd.strided: 

583 # XXX This case seems to be untested 

584 A_nnb = A_nN.reshape((N, N, B)) 

585 return A_nnb[..., q1].copy('C') # block must be contiguous 

586 else: 

587 A_nbn = A_nN.reshape((N, B, N)) 

588 return A_nbn[:, q1, :].copy('C') # block must be contiguous 

589 else: 

590 raise NotImplementedError 

591 

592 extract_block = extract_block_from_row # XXX ugly but works 

593 

594 def redistribute_input(self, A_nn, A_nN=None): # 2D -> 1D row layout 

595 if A_nN is None: 

596 A_nN = self.ksl.nNdescriptor.empty(dtype=A_nn.dtype) 

597 self.ksl.nn2nN.redistribute(A_nn, A_nN) 

598 if not self.ksl.nNdescriptor.blacsgrid.is_active(): 

599 assert A_nN.shape == (0, 0) 

600 A_nN = np.empty((self.bd.mynbands, self.bd.nbands), 

601 dtype=A_nN.dtype) 

602 self.gd.comm.broadcast(A_nN, 0) 

603 return A_nN 

604 

605 def redistribute_output(self, A_Nn, A_nn=None): # 1D column -> 2D layout 

606 if not self.ksl.Nndescriptor.blacsgrid.is_active(): 

607 A_Nn = np.empty((0, 0), dtype=A_Nn.dtype) 

608 if A_nn is None: 

609 A_nn = self.ksl.nndescriptor.empty(dtype=A_Nn.dtype) 

610 self.ksl.Nn2nn.redistribute(A_Nn, A_nn) 

611 return A_nn 

612 

613 def estimate_memory(self, mem, dtype): 

614 # Temporary work arrays included in estimate # 

615 mynbands = self.bd.mynbands 

616 nbands = self.bd.nbands 

617 itemsize = mem.itemsize[dtype] 

618 mem.subnode('2 A_nN', 2 * mynbands * nbands * itemsize) 

619 mem.subnode('2 A_nn', 2 * nbands * nbands / self.ksl.blockgrid.ncpus * 

620 itemsize)