import uuid
import numpy as np
from yt.fields.derived_field import ValidateSpatial
from yt.frontends.ytdata.utilities import save_as_dataset
from yt.funcs import get_output_filename, mylog
from yt.utilities.tree_container import TreeContainer
from .clump_info_items import clump_info_registry
from .clump_validators import clump_validator_registry
from .contour_finder import identify_contours
[docs]
def add_contour_field(ds, contour_key):
def _contours(field, data):
fd = data.get_field_parameter(f"contour_slices_{contour_key}")
vals = data["index", "ones"] * -1
if fd is None or fd == 0.0:
return vals
for sl, v in fd.get(data.id, []):
vals[sl] = v
return vals
ds.add_field(
("index", f"contours_{contour_key}"),
function=_contours,
validators=[ValidateSpatial(0)],
take_log=False,
display_field=False,
sampling_type="cell",
units="",
)
[docs]
class Clump(TreeContainer):
def __init__(
self,
data,
field,
parent=None,
clump_info=None,
validators=None,
base=None,
contour_key=None,
contour_id=None,
):
self.data = data
self.field = field
self.parent = parent
self.quantities = data.quantities
self.min_val = self.data[field].min()
self.max_val = self.data[field].max()
self.info = {}
self.children = []
# is this the parent clump?
if base is None:
base = self
self.total_clumps = 0
if clump_info is None:
self.set_default_clump_info()
else:
self.clump_info = clump_info
for ci in self.clump_info:
ci(self)
self.base = base
self.clump_id = self.base.total_clumps
self.base.total_clumps += 1
self.contour_key = contour_key
self.contour_id = contour_id
if parent is not None:
self.data.parent = self.parent.data
if validators is None:
validators = []
self.validators = validators
# Return value of validity function.
self.valid = None
_leaves = None
@property
def leaves(self):
if self._leaves is not None:
return self._leaves
self._leaves = []
for clump in self:
if not clump.children:
self._leaves.append(clump)
return self._leaves
[docs]
def add_validator(self, validator, *args, **kwargs):
"""
Add a validating function to determine whether the clump should
be kept.
"""
callback = clump_validator_registry.find(validator, *args, **kwargs)
self.validators.append(callback)
for child in self.children:
child.add_validator(validator)
[docs]
def add_info_item(self, info_item, *args, **kwargs):
"Adds an entry to clump_info list and tells children to do the same."
callback = clump_info_registry.find(info_item, *args, **kwargs)
callback(self)
self.clump_info.append(callback)
for child in self.children:
child.add_info_item(info_item)
[docs]
def set_default_clump_info(self):
"Defines default entries in the clump_info array."
# add_info_item is recursive so this function does not need to be.
self.clump_info = []
self.add_info_item("total_cells")
self.add_info_item("cell_mass")
if any("jeans" in f for f in self.data.pf.field_list):
self.add_info_item("mass_weighted_jeans_mass")
self.add_info_item("volume_weighted_jeans_mass")
self.add_info_item("max_grid_level")
if any("number_density" in f for f in self.data.pf.field_list):
self.add_info_item("min_number_density")
self.add_info_item("max_number_density")
[docs]
def clear_clump_info(self):
"""
Clears the clump_info array and passes the instruction to its
children.
"""
self.clump_info = []
for child in self.children:
child.clear_clump_info()
[docs]
def find_children(self, min_val, max_val=None):
if self.children:
mylog.info("Wiping out existing children clumps: %d.", len(self.children))
self.children = []
if max_val is None:
max_val = self.max_val
nj, cids = identify_contours(self.data, self.field, min_val, max_val)
# Here, cids is the set of slices and values, keyed by the
# parent_grid_id, that defines the contours. So we can figure out all
# the unique values of the contours by examining the list here.
unique_contours = set()
for sl_list in cids.values():
for _sl, ff in sl_list:
unique_contours.update(np.unique(ff))
contour_key = uuid.uuid4().hex
base_object = getattr(self.data, "base_object", self.data)
add_contour_field(base_object.ds, contour_key)
for cid in sorted(unique_contours):
if cid == -1:
continue
new_clump = base_object.cut_region(
[f"obj['contours_{contour_key}'] == {cid}"],
{(f"contour_slices_{contour_key}"): cids},
)
if new_clump["index", "ones"].size == 0:
# This is to skip possibly duplicate clumps.
# Using "ones" here will speed things up.
continue
self.children.append(
Clump(
new_clump,
self.field,
parent=self,
validators=self.validators,
base=self.base,
clump_info=self.clump_info,
contour_key=contour_key,
contour_id=cid,
)
)
def __iter__(self):
yield self
for child in self.children:
yield from child
[docs]
def save_as_dataset(self, filename=None, fields=None):
r"""Export clump tree to a reloadable yt dataset.
This function will take a clump object and output a dataset
containing the fields given in the ``fields`` list and all info
items. The resulting dataset can be reloaded as a yt dataset.
Parameters
----------
filename : str, optional
The name of the file to be written. If None, the name
will be a combination of the original dataset and the clump
index.
fields : list of strings or tuples, optional
If this is supplied, it is the list of fields to be saved to
disk.
Returns
-------
filename : str
The name of the file that has been created.
Examples
--------
>>> import yt
>>> from yt.data_objects.level_sets.api import Clump, find_clumps
>>> ds = yt.load("IsolatedGalaxy/galaxy0030/galaxy0030")
>>> data_source = ds.disk(
... [0.5, 0.5, 0.5], [0.0, 0.0, 1.0], (8, "kpc"), (1, "kpc")
... )
>>> field = ("gas", "density")
>>> step = 2.0
>>> c_min = 10 ** np.floor(np.log10(data_source[field]).min())
>>> c_max = 10 ** np.floor(np.log10(data_source[field]).max() + 1)
>>> master_clump = Clump(data_source, field)
>>> master_clump.add_info_item("center_of_mass")
>>> master_clump.add_validator("min_cells", 20)
>>> find_clumps(master_clump, c_min, c_max, step)
>>> fn = master_clump.save_as_dataset(
... fields=[("gas", "density"), ("all", "particle_mass")]
... )
>>> new_ds = yt.load(fn)
>>> print(ds.tree["clump", "cell_mass"])
1296926163.91 Msun
>>> print(ds.tree["grid", "density"])
[ 2.54398434e-26 2.46620353e-26 2.25120154e-26 ..., 1.12879234e-25
1.59561490e-25 1.09824903e-24] g/cm**3
>>> print(ds.tree["all", "particle_mass"])
[ 4.25472446e+38 4.25472446e+38 4.25472446e+38 ..., 2.04238266e+38
2.04523901e+38 2.04770938e+38] g
>>> print(ds.tree.children[0]["clump", "cell_mass"])
909636495.312 Msun
>>> print(ds.leaves[0]["clump", "cell_mass"])
3756566.99809 Msun
>>> print(ds.leaves[0]["grid", "density"])
[ 6.97820274e-24 6.58117370e-24 7.32046082e-24 6.76202430e-24
7.41184837e-24 6.76981480e-24 6.94287213e-24 6.56149658e-24
6.76584569e-24 6.94073710e-24 7.06713082e-24 7.22556526e-24
7.08338898e-24 6.78684331e-24 7.40647040e-24 7.03050456e-24
7.12438678e-24 6.56310217e-24 7.23201662e-24 7.17314333e-24] g/cm**3
"""
ds = self.data.ds
keyword = "%s_clump_%d" % (str(ds), self.clump_id)
filename = get_output_filename(filename, keyword, ".h5")
# collect clump info fields
clump_info = {ci.name: [] for ci in self.base.clump_info}
clump_info.update(
{
field: []
for field in ["clump_id", "parent_id", "contour_key", "contour_id"]
}
)
for clump in self:
clump_info["clump_id"].append(clump.clump_id)
if clump.parent is None:
parent_id = -1
else:
parent_id = clump.parent.clump_id
clump_info["parent_id"].append(parent_id)
contour_key = clump.contour_key
if contour_key is None:
contour_key = -1
clump_info["contour_key"].append(contour_key)
contour_id = clump.contour_id
if contour_id is None:
contour_id = -1
clump_info["contour_id"].append(contour_id)
for ci in self.base.clump_info:
clump_info[ci.name].append(clump.info[ci.name][1])
for ci in clump_info:
if hasattr(clump_info[ci][0], "units"):
clump_info[ci] = ds.arr(clump_info[ci])
else:
clump_info[ci] = np.array(clump_info[ci])
ftypes = {ci: "clump" for ci in clump_info}
# collect data fields
if fields is not None:
contour_fields = [
("index", f"contours_{ckey}")
for ckey in np.unique(clump_info["contour_key"])
if str(ckey) != "-1"
]
ptypes = []
field_data = {}
need_grid_positions = False
for f in self.base.data._determine_fields(fields) + contour_fields:
if ds.field_info[f].sampling_type == "particle":
if f[0] not in ptypes:
ptypes.append(f[0])
ftypes[f] = f[0]
else:
need_grid_positions = True
if f[1] in ("x", "y", "z", "dx", "dy", "dz"):
# skip 'xyz' if a user passes that in because they
# will be added to ftypes below
continue
ftypes[f] = "grid"
field_data[f] = self.base[f]
if len(ptypes) > 0:
for ax in "xyz":
for ptype in ptypes:
p_field = (ptype, f"particle_position_{ax}")
if p_field in ds.field_info and p_field not in field_data:
ftypes[p_field] = p_field[0]
field_data[p_field] = self.base[p_field]
for clump in self:
if clump.contour_key is None:
continue
for ptype in ptypes:
cfield = (ptype, f"contours_{clump.contour_key}")
if cfield not in field_data:
field_data[cfield] = clump.data._part_ind(ptype).astype(
np.int64
)
ftypes[cfield] = ptype
field_data[cfield][clump.data._part_ind(ptype)] = (
clump.contour_id
)
if need_grid_positions:
for ax in "xyz":
g_field = ("index", ax)
if g_field in ds.field_info and g_field not in field_data:
field_data[g_field] = self.base[g_field]
ftypes[g_field] = "grid"
g_field = ("index", "d" + ax)
if g_field in ds.field_info and g_field not in field_data:
ftypes[g_field] = "grid"
field_data[g_field] = self.base[g_field]
if self.contour_key is not None:
cfilters = {}
for field in field_data:
if ftypes[field] == "grid":
ftype = "index"
else:
ftype = field[0]
cfield = (ftype, f"contours_{self.contour_key}")
if cfield not in cfilters:
cfilters[cfield] = field_data[cfield] == self.contour_id
field_data[field] = field_data[field][cfilters[cfield]]
clump_info.update(field_data)
extra_attrs = {"data_type": "yt_clump_tree", "container_type": "yt_clump_tree"}
save_as_dataset(
ds, filename, clump_info, field_types=ftypes, extra_attrs=extra_attrs
)
return filename
[docs]
def pass_down(self, operation):
"""
Performs an operation on a clump with an exec and passes the
instruction down to clump children.
"""
# Call if callable, otherwise do an exec.
if callable(operation):
operation()
else:
exec(operation)
for child in self.children:
child.pass_down(operation)
def _validate(self):
"Apply all user specified validator functions."
# Only call functions if not done already.
if self.valid is not None:
return self.valid
self.valid = True
for validator in self.validators:
self.valid &= validator(self)
if not self.valid:
break
return self.valid
def __reduce__(self):
raise RuntimeError(
"Pickling Clump instances is not supported. Please use "
"Clump.save_as_dataset instead"
)
def __getitem__(self, request):
return self.data[request]
[docs]
def find_clumps(clump, min_val, max_val, d_clump):
mylog.info("Finding clumps: min: %e, max: %e, step: %f", min_val, max_val, d_clump)
if min_val >= max_val:
return
clump.find_children(min_val, max_val=max_val)
if len(clump.children) == 1:
find_clumps(clump, min_val * d_clump, max_val, d_clump)
elif len(clump.children) > 0:
these_children = []
mylog.info("Investigating %d children.", len(clump.children))
for child in clump.children:
find_clumps(child, min_val * d_clump, max_val, d_clump)
if len(child.children) > 0:
these_children.append(child)
elif child._validate():
these_children.append(child)
else:
mylog.info(
"Eliminating invalid, childless clump with %d cells.",
len(child.data["index", "ones"]),
)
if len(these_children) > 1:
mylog.info(
"%d of %d children survived.", len(these_children), len(clump.children)
)
clump.children = these_children
elif len(these_children) == 1:
mylog.info(
"%d of %d children survived, linking its children to parent.",
len(these_children),
len(clump.children),
)
clump.children = these_children[0].children
for child in clump.children:
child.parent = clump
child.data.parent = clump.data
else:
mylog.info(
"%d of %d children survived, erasing children.",
len(these_children),
len(clump.children),
)
clump.children = []