Source code for yt.data_objects.level_sets.clump_handling

import uuid

import numpy as np

from yt.fields.derived_field import ValidateSpatial
from yt.frontends.ytdata.utilities import save_as_dataset
from yt.funcs import get_output_filename, mylog
from yt.utilities.tree_container import TreeContainer

from .clump_info_items import clump_info_registry
from .clump_validators import clump_validator_registry
from .contour_finder import identify_contours


[docs] def add_contour_field(ds, contour_key): def _contours(field, data): fd = data.get_field_parameter(f"contour_slices_{contour_key}") vals = data["index", "ones"] * -1 if fd is None or fd == 0.0: return vals for sl, v in fd.get(data.id, []): vals[sl] = v return vals ds.add_field( ("index", f"contours_{contour_key}"), function=_contours, validators=[ValidateSpatial(0)], take_log=False, display_field=False, sampling_type="cell", units="", )
[docs] class Clump(TreeContainer): def __init__( self, data, field, parent=None, clump_info=None, validators=None, base=None, contour_key=None, contour_id=None, ): self.data = data self.field = field self.parent = parent self.quantities = data.quantities self.min_val = self.data[field].min() self.max_val = self.data[field].max() self.info = {} self.children = [] # is this the parent clump? if base is None: base = self self.total_clumps = 0 if clump_info is None: self.set_default_clump_info() else: self.clump_info = clump_info for ci in self.clump_info: ci(self) self.base = base self.clump_id = self.base.total_clumps self.base.total_clumps += 1 self.contour_key = contour_key self.contour_id = contour_id if parent is not None: self.data.parent = self.parent.data if validators is None: validators = [] self.validators = validators # Return value of validity function. self.valid = None _leaves = None @property def leaves(self): if self._leaves is not None: return self._leaves self._leaves = [] for clump in self: if not clump.children: self._leaves.append(clump) return self._leaves
[docs] def add_validator(self, validator, *args, **kwargs): """ Add a validating function to determine whether the clump should be kept. """ callback = clump_validator_registry.find(validator, *args, **kwargs) self.validators.append(callback) for child in self.children: child.add_validator(validator)
[docs] def add_info_item(self, info_item, *args, **kwargs): "Adds an entry to clump_info list and tells children to do the same." callback = clump_info_registry.find(info_item, *args, **kwargs) callback(self) self.clump_info.append(callback) for child in self.children: child.add_info_item(info_item)
[docs] def set_default_clump_info(self): "Defines default entries in the clump_info array." # add_info_item is recursive so this function does not need to be. self.clump_info = [] self.add_info_item("total_cells") self.add_info_item("cell_mass") if any("jeans" in f for f in self.data.pf.field_list): self.add_info_item("mass_weighted_jeans_mass") self.add_info_item("volume_weighted_jeans_mass") self.add_info_item("max_grid_level") if any("number_density" in f for f in self.data.pf.field_list): self.add_info_item("min_number_density") self.add_info_item("max_number_density")
[docs] def clear_clump_info(self): """ Clears the clump_info array and passes the instruction to its children. """ self.clump_info = [] for child in self.children: child.clear_clump_info()
[docs] def find_children(self, min_val, max_val=None): if self.children: mylog.info("Wiping out existing children clumps: %d.", len(self.children)) self.children = [] if max_val is None: max_val = self.max_val nj, cids = identify_contours(self.data, self.field, min_val, max_val) # Here, cids is the set of slices and values, keyed by the # parent_grid_id, that defines the contours. So we can figure out all # the unique values of the contours by examining the list here. unique_contours = set() for sl_list in cids.values(): for _sl, ff in sl_list: unique_contours.update(np.unique(ff)) contour_key = uuid.uuid4().hex base_object = getattr(self.data, "base_object", self.data) add_contour_field(base_object.ds, contour_key) for cid in sorted(unique_contours): if cid == -1: continue new_clump = base_object.cut_region( [f"obj['contours_{contour_key}'] == {cid}"], {(f"contour_slices_{contour_key}"): cids}, ) if new_clump[("index", "ones")].size == 0: # This is to skip possibly duplicate clumps. # Using "ones" here will speed things up. continue self.children.append( Clump( new_clump, self.field, parent=self, validators=self.validators, base=self.base, clump_info=self.clump_info, contour_key=contour_key, contour_id=cid, ) )
def __iter__(self): yield self for child in self.children: yield from child
[docs] def save_as_dataset(self, filename=None, fields=None): r"""Export clump tree to a reloadable yt dataset. This function will take a clump object and output a dataset containing the fields given in the ``fields`` list and all info items. The resulting dataset can be reloaded as a yt dataset. Parameters ---------- filename : str, optional The name of the file to be written. If None, the name will be a combination of the original dataset and the clump index. fields : list of strings or tuples, optional If this is supplied, it is the list of fields to be saved to disk. Returns ------- filename : str The name of the file that has been created. Examples -------- >>> import yt >>> from yt.data_objects.level_sets.api import Clump, find_clumps >>> ds = yt.load("IsolatedGalaxy/galaxy0030/galaxy0030") >>> data_source = ds.disk( ... [0.5, 0.5, 0.5], [0.0, 0.0, 1.0], (8, "kpc"), (1, "kpc") ... ) >>> field = ("gas", "density") >>> step = 2.0 >>> c_min = 10 ** np.floor(np.log10(data_source[field]).min()) >>> c_max = 10 ** np.floor(np.log10(data_source[field]).max() + 1) >>> master_clump = Clump(data_source, field) >>> master_clump.add_info_item("center_of_mass") >>> master_clump.add_validator("min_cells", 20) >>> find_clumps(master_clump, c_min, c_max, step) >>> fn = master_clump.save_as_dataset( ... fields=[("gas", "density"), ("all", "particle_mass")] ... ) >>> new_ds = yt.load(fn) >>> print(ds.tree["clump", "cell_mass"]) 1296926163.91 Msun >>> print(ds.tree["grid", "density"]) [ 2.54398434e-26 2.46620353e-26 2.25120154e-26 ..., 1.12879234e-25 1.59561490e-25 1.09824903e-24] g/cm**3 >>> print(ds.tree["all", "particle_mass"]) [ 4.25472446e+38 4.25472446e+38 4.25472446e+38 ..., 2.04238266e+38 2.04523901e+38 2.04770938e+38] g >>> print(ds.tree.children[0]["clump", "cell_mass"]) 909636495.312 Msun >>> print(ds.leaves[0]["clump", "cell_mass"]) 3756566.99809 Msun >>> print(ds.leaves[0]["grid", "density"]) [ 6.97820274e-24 6.58117370e-24 7.32046082e-24 6.76202430e-24 7.41184837e-24 6.76981480e-24 6.94287213e-24 6.56149658e-24 6.76584569e-24 6.94073710e-24 7.06713082e-24 7.22556526e-24 7.08338898e-24 6.78684331e-24 7.40647040e-24 7.03050456e-24 7.12438678e-24 6.56310217e-24 7.23201662e-24 7.17314333e-24] g/cm**3 """ ds = self.data.ds keyword = "%s_clump_%d" % (str(ds), self.clump_id) filename = get_output_filename(filename, keyword, ".h5") # collect clump info fields clump_info = {ci.name: [] for ci in self.base.clump_info} clump_info.update( { field: [] for field in ["clump_id", "parent_id", "contour_key", "contour_id"] } ) for clump in self: clump_info["clump_id"].append(clump.clump_id) if clump.parent is None: parent_id = -1 else: parent_id = clump.parent.clump_id clump_info["parent_id"].append(parent_id) contour_key = clump.contour_key if contour_key is None: contour_key = -1 clump_info["contour_key"].append(contour_key) contour_id = clump.contour_id if contour_id is None: contour_id = -1 clump_info["contour_id"].append(contour_id) for ci in self.base.clump_info: clump_info[ci.name].append(clump.info[ci.name][1]) for ci in clump_info: if hasattr(clump_info[ci][0], "units"): clump_info[ci] = ds.arr(clump_info[ci]) else: clump_info[ci] = np.array(clump_info[ci]) ftypes = {ci: "clump" for ci in clump_info} # collect data fields if fields is not None: contour_fields = [ ("index", f"contours_{ckey}") for ckey in np.unique(clump_info["contour_key"]) if str(ckey) != "-1" ] ptypes = [] field_data = {} need_grid_positions = False for f in self.base.data._determine_fields(fields) + contour_fields: if ds.field_info[f].sampling_type == "particle": if f[0] not in ptypes: ptypes.append(f[0]) ftypes[f] = f[0] else: need_grid_positions = True if f[1] in ("x", "y", "z", "dx", "dy", "dz"): # skip 'xyz' if a user passes that in because they # will be added to ftypes below continue ftypes[f] = "grid" field_data[f] = self.base[f] if len(ptypes) > 0: for ax in "xyz": for ptype in ptypes: p_field = (ptype, f"particle_position_{ax}") if p_field in ds.field_info and p_field not in field_data: ftypes[p_field] = p_field[0] field_data[p_field] = self.base[p_field] for clump in self: if clump.contour_key is None: continue for ptype in ptypes: cfield = (ptype, f"contours_{clump.contour_key}") if cfield not in field_data: field_data[cfield] = clump.data._part_ind(ptype).astype( np.int64 ) ftypes[cfield] = ptype field_data[cfield][ clump.data._part_ind(ptype) ] = clump.contour_id if need_grid_positions: for ax in "xyz": g_field = ("index", ax) if g_field in ds.field_info and g_field not in field_data: field_data[g_field] = self.base[g_field] ftypes[g_field] = "grid" g_field = ("index", "d" + ax) if g_field in ds.field_info and g_field not in field_data: ftypes[g_field] = "grid" field_data[g_field] = self.base[g_field] if self.contour_key is not None: cfilters = {} for field in field_data: if ftypes[field] == "grid": ftype = "index" else: ftype = field[0] cfield = (ftype, f"contours_{self.contour_key}") if cfield not in cfilters: cfilters[cfield] = field_data[cfield] == self.contour_id field_data[field] = field_data[field][cfilters[cfield]] clump_info.update(field_data) extra_attrs = {"data_type": "yt_clump_tree", "container_type": "yt_clump_tree"} save_as_dataset( ds, filename, clump_info, field_types=ftypes, extra_attrs=extra_attrs ) return filename
[docs] def pass_down(self, operation): """ Performs an operation on a clump with an exec and passes the instruction down to clump children. """ # Call if callable, otherwise do an exec. if callable(operation): operation() else: exec(operation) for child in self.children: child.pass_down(operation)
def _validate(self): "Apply all user specified validator functions." # Only call functions if not done already. if self.valid is not None: return self.valid self.valid = True for validator in self.validators: self.valid &= validator(self) if not self.valid: break return self.valid def __reduce__(self): raise RuntimeError( "Pickling Clump instances is not supported. Please use " "Clump.save_as_dataset instead" ) def __getitem__(self, request): return self.data[request]
[docs] def find_clumps(clump, min_val, max_val, d_clump): mylog.info("Finding clumps: min: %e, max: %e, step: %f", min_val, max_val, d_clump) if min_val >= max_val: return clump.find_children(min_val, max_val=max_val) if len(clump.children) == 1: find_clumps(clump, min_val * d_clump, max_val, d_clump) elif len(clump.children) > 0: these_children = [] mylog.info("Investigating %d children.", len(clump.children)) for child in clump.children: find_clumps(child, min_val * d_clump, max_val, d_clump) if len(child.children) > 0: these_children.append(child) elif child._validate(): these_children.append(child) else: mylog.info( "Eliminating invalid, childless clump with %d cells.", len(child.data[("index", "ones")]), ) if len(these_children) > 1: mylog.info( "%d of %d children survived.", len(these_children), len(clump.children) ) clump.children = these_children elif len(these_children) == 1: mylog.info( "%d of %d children survived, linking its children to parent.", len(these_children), len(clump.children), ) clump.children = these_children[0].children for child in clump.children: child.parent = clump child.data.parent = clump.data else: mylog.info( "%d of %d children survived, erasing children.", len(these_children), len(clump.children), ) clump.children = []