Source code for yt.utilities.decompose

import numpy as np


[docs] def SIEVE_PRIMES(x): return x and x[:1] + SIEVE_PRIMES([n for n in x if n % x[0]])
[docs] def decompose_to_primes(max_prime): """Decompose number into the primes""" for prime in SIEVE_PRIMES(list(range(2, max_prime))): if prime * prime > max_prime: break while max_prime % prime == 0: yield prime max_prime //= prime if max_prime > 1: yield max_prime
[docs] def decompose_array(shape, psize, bbox, *, cell_widths=None): """Calculate list of product(psize) subarrays of arr, along with their left and right edges """ return split_array(bbox[:, 0], bbox[:, 1], shape, psize, cell_widths=cell_widths)
[docs] def evaluate_domain_decomposition(n_d, pieces, ldom): """Evaluate longest to shortest edge ratio BEWARE: lot's of magic here""" eff_dim = (n_d > 1).sum() exp = float(eff_dim - 1) / float(eff_dim) ideal_bsize = eff_dim * pieces ** (1.0 / eff_dim) * np.prod(n_d) ** exp mask = np.where(n_d > 1) nd_arr = np.array(n_d, dtype=np.float64)[mask] bsize = int(np.sum(ldom[mask] / nd_arr * np.prod(nd_arr))) load_balance = float(np.prod(n_d)) / ( float(pieces) * np.prod((n_d - 1) // ldom + 1) ) # 0.25 is magic number quality = load_balance / (1 + 0.25 * (bsize / ideal_bsize - 1.0)) # \todo add a factor that estimates lower cost when x-direction is # not chopped too much # \deprecated estimate these magic numbers quality *= 1.0 - (0.001 * ldom[0] + 0.0001 * ldom[1]) / pieces if np.any(ldom > n_d): quality = 0 return quality
[docs] def factorize_number(pieces): """Return array consisting of prime, its power and number of different decompositions in three dimensions for this prime """ factors = list(decompose_to_primes(pieces)) temp = np.bincount(factors) return np.array( [ (prime, temp[prime], (temp[prime] + 1) * (temp[prime] + 2) // 2) for prime in np.unique(factors) ], dtype="int64", )
[docs] def get_psize(n_d, pieces): """Calculate the best division of array into px*py*pz subarrays. The goal is to minimize the ratio of longest to shortest edge to minimize the amount of inter-process communication. """ fac = factorize_number(pieces) nfactors = len(fac[:, 2]) best = 0.0 p_size = np.ones(3, dtype=np.int64) if pieces == 1: return p_size while np.all(fac[:, 2] > 0): ldom = np.ones(3, dtype=np.int64) for nfac in range(nfactors): i = int(np.sqrt(0.25 + 2 * (fac[nfac, 2] - 1)) - 0.5) k = fac[nfac, 2] - (1 + i * (i + 1) // 2) i = fac[nfac, 1] - i j = fac[nfac, 1] - (i + k) ldom *= fac[nfac, 0] ** np.array([i, j, k]) quality = evaluate_domain_decomposition(n_d, pieces, ldom) if quality > best: best = quality p_size = ldom # search for next unique combination for j in range(nfactors): if fac[j, 2] > 1: fac[j, 2] -= 1 break else: if j < nfactors - 1: fac[j, 2] = int((fac[j, 1] + 1) * (fac[j, 1] + 2) / 2) else: fac[:, 2] = 0 # no more combinations to try return p_size
[docs] def split_array(gle, gre, shape, psize, *, cell_widths=None): """Split array into px*py*pz subarrays.""" n_d = np.array(shape, dtype=np.int64) dds = (gre - gle) / shape left_edges = [] right_edges = [] shapes = [] slices = [] if cell_widths is None: cell_widths_by_grid = None else: cell_widths_by_grid = [] for i in range(psize[0]): for j in range(psize[1]): for k in range(psize[2]): piece = np.array((i, j, k), dtype=np.int64) lei = n_d * piece // psize rei = n_d * (piece + np.ones(3, dtype=np.int64)) // psize if cell_widths is not None: cws = [] offset_le = [] offset_re = [] for idim in range(3): cws.append(cell_widths[idim][lei[idim] : rei[idim]]) offset_le.append(np.sum(cell_widths[idim][0 : lei[idim]])) offset_re.append(offset_le[idim] + np.sum(cws[idim])) cell_widths_by_grid.append(cws) offset_re = np.array(offset_re) offset_le = np.array(offset_le) else: offset_le = lei * dds offset_re = rei * dds lle = gle + offset_le lre = gle + offset_re left_edges.append(lle) right_edges.append(lre) shapes.append(rei - lei) slices.append(np.s_[lei[0] : rei[0], lei[1] : rei[1], lei[2] : rei[2]]) return left_edges, right_edges, shapes, slices, cell_widths_by_grid