import os
import weakref
from collections import defaultdict
from functools import cached_property, partial
import numpy as np
from yt.data_objects.selection_objects.data_selection_objects import (
YTSelectionContainer,
)
from yt.data_objects.static_output import ParticleDataset
from yt.frontends.gadget.data_structures import _fix_unit_ordering
from yt.frontends.gadget_fof.fields import GadgetFOFFieldInfo, GadgetFOFHaloFieldInfo
from yt.frontends.halo_catalog.data_structures import HaloCatalogFile, HaloDataset
from yt.funcs import only_on_root, setdefaultattr
from yt.geometry.particle_geometry_handler import ParticleIndex
from yt.utilities.cosmology import Cosmology
from yt.utilities.logger import ytLogger as mylog
from yt.utilities.on_demand_imports import _h5py as h5py
[docs]
class GadgetFOFParticleIndex(ParticleIndex):
def _calculate_particle_count(self):
"""
Calculate the total number of each type of particle.
"""
self.particle_count = {
ptype: sum(d.total_particles[ptype] for d in self.data_files)
for ptype in self.ds.particle_types_raw
}
def _calculate_particle_index_starts(self):
# Halo indices are not saved in the file, so we must count by hand.
# File 0 has halos 0 to N_0 - 1, file 1 has halos N_0 to N_0 + N_1 - 1, etc.
particle_count = defaultdict(int)
offset_count = 0
for data_file in self.data_files:
data_file.index_start = {
ptype: particle_count[ptype] for ptype in data_file.total_particles
}
data_file.offset_start = offset_count
for ptype in data_file.total_particles:
particle_count[ptype] += data_file.total_particles[ptype]
offset_count += data_file.total_offset
self._halo_index_start = {
ptype: np.array(
[data_file.index_start[ptype] for data_file in self.data_files]
)
for ptype in self.ds.particle_types_raw
}
def _calculate_file_offset_map(self):
# After the FOF is performed, a load-balancing step redistributes halos
# and then writes more fields. Here, for each file, we create a list of
# files which contain the rest of the redistributed particles.
ifof = np.array(
[data_file.total_particles["Group"] for data_file in self.data_files]
)
isub = np.array([data_file.total_offset for data_file in self.data_files])
subend = isub.cumsum()
fofend = ifof.cumsum()
istart = np.digitize(fofend - ifof, subend - isub) - 1
iend = np.clip(np.digitize(fofend, subend), 0, ifof.size - 2)
for i, data_file in enumerate(self.data_files):
data_file.offset_files = self.data_files[istart[i] : iend[i] + 1]
def _detect_output_fields(self):
field_list = []
units = {}
found_fields = {
ptype: False for ptype, pnum in self.particle_count.items() if pnum > 0
}
for data_file in self.data_files:
fl, _units = self.io._identify_fields(data_file)
units.update(_units)
field_list.extend([f for f in fl if f not in field_list])
for ptype in found_fields:
found_fields[ptype] |= data_file.total_particles[ptype]
if all(found_fields.values()):
break
self.field_list = field_list
ds = self.dataset
ds.particle_types = tuple({pt for pt, ds in field_list})
ds.field_units.update(units)
ds.particle_types_raw = ds.particle_types
def _setup_filenames(self):
template = self.ds.filename_template
ndoms = self.ds.file_count
cls = self.ds._file_class
self.data_files = [
cls(self.ds, self.io, template % {"num": i}, i, frange=None)
for i in range(ndoms)
]
def _setup_data_io(self):
super()._setup_data_io()
self._calculate_particle_count()
self._calculate_particle_index_starts()
self._calculate_file_offset_map()
[docs]
class GadgetFOFHDF5File(HaloCatalogFile):
def __init__(self, ds, io, filename, file_id, frange):
with h5py.File(filename, mode="r") as f:
self.header = {str(field): val for field, val in f["Header"].attrs.items()}
self.group_length_sum = (
f["Group/GroupLen"][()].sum() if "Group/GroupLen" in f else 0
)
self.group_subs_sum = (
f["Group/GroupNsubs"][()].sum() if "Group/GroupNsubs" in f else 0
)
self.total_ids = self.header["Nids_ThisFile"]
self.total_offset = 0
super().__init__(ds, io, filename, file_id, frange)
def _read_particle_positions(self, ptype, f=None):
"""
Read all particle positions in this file.
"""
if f is None:
close = True
f = h5py.File(self.filename, mode="r")
else:
close = False
pos = f[ptype][f"{ptype}Pos"][()].astype("float64")
if close:
f.close()
return pos
[docs]
class GadgetFOFDataset(ParticleDataset):
_load_requirements = ["h5py"]
_index_class = GadgetFOFParticleIndex
_file_class = GadgetFOFHDF5File
_field_info_class = GadgetFOFFieldInfo
def __init__(
self,
filename,
dataset_type="gadget_fof_hdf5",
index_order=None,
index_filename=None,
unit_base=None,
units_override=None,
unit_system="cgs",
):
if unit_base is not None and "UnitLength_in_cm" in unit_base:
# We assume this is comoving, because in the absence of comoving
# integration the redshift will be zero.
unit_base["cmcm"] = 1.0 / unit_base["UnitLength_in_cm"]
self._unit_base = unit_base
if units_override is not None:
raise RuntimeError(
"units_override is not supported for GadgetFOFDataset. "
+ "Use unit_base instead."
)
super().__init__(
filename,
dataset_type,
units_override=units_override,
index_order=index_order,
index_filename=index_filename,
unit_system=unit_system,
)
[docs]
def add_field(self, *args, **kwargs):
super().add_field(*args, **kwargs)
self._halos_ds.add_field(*args, **kwargs)
@property
def halos_field_list(self):
return self._halos_ds.field_list
@property
def halos_derived_field_list(self):
return self._halos_ds.derived_field_list
@cached_property
def _halos_ds(self):
return GadgetFOFHaloDataset(self)
def _setup_classes(self):
super()._setup_classes()
self.halo = partial(GadgetFOFHaloContainer, ds=self._halos_ds)
def _parse_parameter_file(self):
with h5py.File(self.parameter_filename, mode="r") as f:
self.parameters = {
str(field): val for field, val in f["Header"].attrs.items()
}
self.dimensionality = 3
self.refine_by = 2
# Set standard values
self.domain_left_edge = np.zeros(3, "float64")
self.domain_right_edge = np.ones(3, "float64") * self.parameters["BoxSize"]
self.domain_dimensions = np.ones(3, "int32")
self.cosmological_simulation = 1
self._periodicity = (True, True, True)
self.current_redshift = self.parameters["Redshift"]
self.omega_lambda = self.parameters["OmegaLambda"]
self.omega_matter = self.parameters["Omega0"]
self.hubble_constant = self.parameters["HubbleParam"]
cosmology = Cosmology(
hubble_constant=self.hubble_constant,
omega_matter=self.omega_matter,
omega_lambda=self.omega_lambda,
)
self.current_time = cosmology.t_from_z(self.current_redshift)
prefix = os.path.abspath(
os.path.join(
os.path.dirname(self.parameter_filename),
os.path.basename(self.parameter_filename).split(".", 1)[0],
)
)
suffix = self.parameter_filename.rsplit(".", 1)[-1]
self.filename_template = f"{prefix}.%(num)i.{suffix}"
self.file_count = self.parameters["NumFiles"]
self.particle_types = ("Group", "Subhalo")
self.particle_types_raw = ("Group", "Subhalo")
def _set_code_unit_attributes(self):
# Set a sane default for cosmological simulations.
if self._unit_base is None and self.cosmological_simulation == 1:
only_on_root(mylog.info, "Assuming length units are in Mpc/h (comoving)")
self._unit_base = {"length": (1.0, "Mpccm/h")}
# The other same defaults we will use from the standard Gadget
# defaults.
unit_base = self._unit_base or {}
if "length" in unit_base:
length_unit = unit_base["length"]
elif "UnitLength_in_cm" in unit_base:
if self.cosmological_simulation == 0:
length_unit = (unit_base["UnitLength_in_cm"], "cm")
else:
length_unit = (unit_base["UnitLength_in_cm"], "cmcm/h")
else:
raise RuntimeError
length_unit = _fix_unit_ordering(length_unit)
setdefaultattr(self, "length_unit", self.quan(length_unit[0], length_unit[1]))
if "velocity" in unit_base:
velocity_unit = unit_base["velocity"]
elif "UnitVelocity_in_cm_per_s" in unit_base:
velocity_unit = (unit_base["UnitVelocity_in_cm_per_s"], "cm/s")
else:
if self.cosmological_simulation == 0:
velocity_unit = (1e5, "cm/s")
else:
velocity_unit = (1e5, "cm/s * sqrt(a)")
velocity_unit = _fix_unit_ordering(velocity_unit)
setdefaultattr(
self, "velocity_unit", self.quan(velocity_unit[0], velocity_unit[1])
)
# We set hubble_constant = 1.0 for non-cosmology, so this is safe.
# Default to 1e10 Msun/h if mass is not specified.
if "mass" in unit_base:
mass_unit = unit_base["mass"]
elif "UnitMass_in_g" in unit_base:
if self.cosmological_simulation == 0:
mass_unit = (unit_base["UnitMass_in_g"], "g")
else:
mass_unit = (unit_base["UnitMass_in_g"], "g/h")
else:
# Sane default
mass_unit = (1.0, "1e10*Msun/h")
mass_unit = _fix_unit_ordering(mass_unit)
setdefaultattr(self, "mass_unit", self.quan(mass_unit[0], mass_unit[1]))
if "time" in unit_base:
time_unit = unit_base["time"]
elif "UnitTime_in_s" in unit_base:
time_unit = (unit_base["UnitTime_in_s"], "s")
else:
tu = (self.length_unit / self.velocity_unit).to("yr/h")
time_unit = (tu.d, tu.units)
setdefaultattr(self, "time_unit", self.quan(time_unit[0], time_unit[1]))
def __str__(self):
return self.basename.split(".", 1)[0]
@classmethod
def _is_valid(cls, filename: str, *args, **kwargs) -> bool:
if cls._missing_load_requirements():
return False
need_groups = ["Group", "Header", "Subhalo"]
veto_groups = ["FOF"]
valid = True
try:
fh = h5py.File(filename, mode="r")
valid = all(ng in fh["/"] for ng in need_groups) and not any(
vg in fh["/"] for vg in veto_groups
)
fh.close()
except Exception:
valid = False
pass
return valid
[docs]
class GadgetFOFHaloParticleIndex(GadgetFOFParticleIndex):
def __init__(self, ds, dataset_type):
self.real_ds = weakref.proxy(ds.real_ds)
super().__init__(ds, dataset_type)
def _create_halo_id_table(self):
"""
Create a list of halo start ids so we know which file
contains particles for a given halo. Note, the halo ids
are distributed over all files and so the ids for a given
halo are likely stored in a different file than the halo
itself.
"""
self._halo_id_number = np.array(
[data_file.total_ids for data_file in self.data_files]
)
self._halo_id_end = self._halo_id_number.cumsum()
self._halo_id_start = self._halo_id_end - self._halo_id_number
self._group_length_sum = np.array(
[data_file.group_length_sum for data_file in self.data_files]
)
def _detect_output_fields(self):
field_list = []
scalar_field_list = []
units = {}
found_fields = {
ptype: False for ptype, pnum in self.particle_count.items() if pnum > 0
}
has_ids = False
for data_file in self.data_files:
fl, sl, idl, _units = self.io._identify_fields(data_file)
units.update(_units)
field_list.extend([f for f in fl if f not in field_list])
scalar_field_list.extend([f for f in sl if f not in scalar_field_list])
for ptype in found_fields:
found_fields[ptype] |= data_file.total_particles[ptype]
has_ids |= len(idl) > 0
if all(found_fields.values()) and has_ids:
break
self.field_list = field_list
self.scalar_field_list = scalar_field_list
ds = self.dataset
ds.scalar_field_list = scalar_field_list
ds.particle_types = tuple({pt for pt, ds in field_list})
ds.field_units.update(units)
ds.particle_types_raw = ds.particle_types
def _identify_base_chunk(self, dobj):
pass
def _read_particle_fields(self, fields, dobj, chunk=None):
if len(fields) == 0:
return {}, []
fields_to_read, fields_to_generate = self._split_fields(fields)
if len(fields_to_read) == 0:
return {}, fields_to_generate
fields_to_return = self.io._read_particle_selection(dobj, fields_to_read)
return fields_to_return, fields_to_generate
def _get_halo_file_indices(self, ptype, identifiers):
return np.digitize(identifiers, self._halo_index_start[ptype], right=False) - 1
def _get_halo_scalar_index(self, ptype, identifier):
i_scalar = self._get_halo_file_indices(ptype, [identifier])[0]
scalar_index = identifier - self._halo_index_start[ptype][i_scalar]
return scalar_index
def _get_halo_values(self, ptype, identifiers, fields, f=None):
"""
Get field values for halos. IDs are likely to be
sequential (or at least monotonic), but not necessarily
all within the same file.
This does not do much to minimize file i/o, but with
halos randomly distributed across files, there's not
much more we can do.
"""
# if a file is already open, don't open it again
filename = None if f is None else f.filename
data = defaultdict(lambda: np.empty(identifiers.size))
i_scalars = self._get_halo_file_indices(ptype, identifiers)
for i_scalar in np.unique(i_scalars):
target = i_scalars == i_scalar
scalar_indices = identifiers - self._halo_index_start[ptype][i_scalar]
# only open file if it's not already open
my_f = (
f
if self.data_files[i_scalar].filename == filename
else h5py.File(self.data_files[i_scalar].filename, mode="r")
)
for field in fields:
data[field][target] = my_f[os.path.join(ptype, field)][()][
scalar_indices[target]
]
if self.data_files[i_scalar].filename != filename:
my_f.close()
return data
def _setup_data_io(self):
super()._setup_data_io()
self._create_halo_id_table()
[docs]
class GadgetFOFHaloDataset(HaloDataset):
_index_class = GadgetFOFHaloParticleIndex
_file_class = GadgetFOFHDF5File
_field_info_class = GadgetFOFHaloFieldInfo
def __init__(self, ds, dataset_type="gadget_fof_halo_hdf5"):
super().__init__(ds, dataset_type)
@classmethod
def _is_valid(cls, filename: str, *args, **kwargs) -> bool:
# This class is not meant to be instantiated by yt.load()
return False
[docs]
class GadgetFOFHaloContainer(YTSelectionContainer):
"""
Create a data container to get member particles and individual
values from halos and subhalos. Halo mass, position, and
velocity are set as attributes. Halo IDs are accessible
through the field, "member_ids". Other fields that are one
value per halo are accessible as normal. The field list for
halo objects can be seen in `ds.halos_field_list`.
Parameters
----------
ptype : string
The type of halo, either "Group" for the main halo or
"Subhalo" for subhalos.
particle_identifier : int or tuple of ints
The halo or subhalo id. If requesting a subhalo, the id
can also be given as a tuple of the main halo id and
subgroup id, such as (1, 4) for subgroup 4 of halo 1.
Attributes
----------
particle_identifier : int
The id of the halo or subhalo.
group_identifier : int
For subhalos, the id of the enclosing halo.
subgroup_identifier : int
For subhalos, the relative id of the subhalo within
the enclosing halo.
particle_number : int
Number of particles in the halo.
mass : float
Halo mass.
position : array of floats
Halo position.
velocity : array of floats
Halo velocity.
Note
----
Relevant Fields:
* particle_number - number of particles
* subhalo_number - number of subhalos
* group_identifier - id of parent group for subhalos
Examples
--------
>>> import yt
>>> ds = yt.load("gadget_halos/data/groups_298/fof_subhalo_tab_298.0.hdf5")
>>> halo = ds.halo("Group", 0)
>>> print(halo.mass)
13256.5517578 code_mass
>>> print(halo.position)
[ 16.18603706 6.95965052 12.52694607] code_length
>>> print(halo.velocity)
[ 6943694.22793569 -762788.90647454 -794749.63819757] cm/s
>>> print(halo["Group_R_Crit200"])
[ 0.79668683] code_length
>>> # particle ids for this halo
>>> print(halo["member_ids"])
[ 723631. 690744. 854212. ..., 608589. 905551. 1147449.] dimensionless
>>> # get the first subhalo of this halo
>>> subhalo = ds.halo("Subhalo", (0, 0))
>>> print(subhalo["member_ids"])
[ 723631. 690744. 854212. ..., 808362. 956359. 1248821.] dimensionless
"""
_type_name = "halo"
_con_args = ("ptype", "particle_identifier")
_spatial = False
# Do not register it to prevent .halo from being attached to all datasets
_skip_add = True
def __init__(self, ptype, particle_identifier, ds=None):
if ptype not in ds.particle_types_raw:
raise RuntimeError(
f'Possible halo types are {ds.particle_types_raw}, supplied "{ptype}".'
)
self.ptype = ptype
self._current_particle_type = ptype
super().__init__(ds, {})
if ptype == "Subhalo" and isinstance(particle_identifier, tuple):
self.group_identifier, self.subgroup_identifier = particle_identifier
my_data = self.index._get_halo_values(
"Group", np.array([self.group_identifier]), ["GroupFirstSub"]
)
self.particle_identifier = np.int64(
my_data["GroupFirstSub"][0] + self.subgroup_identifier
)
else:
self.particle_identifier = particle_identifier
if self.particle_identifier >= self.index.particle_count[ptype]:
raise RuntimeError(
"%s %d requested, but only %d %s objects exist."
% (ptype, particle_identifier, self.index.particle_count[ptype], ptype)
)
# Find the file that has the scalar values for this halo.
i_scalar = self.index._get_halo_file_indices(ptype, [self.particle_identifier])[
0
]
self.scalar_data_file = self.index.data_files[i_scalar]
# index within halo arrays that corresponds to this halo
self.scalar_index = self.index._get_halo_scalar_index(
ptype, self.particle_identifier
)
halo_fields = [f"{ptype}Len"]
if ptype == "Subhalo":
halo_fields.append("SubhaloGrNr")
my_data = self.index._get_halo_values(
ptype, np.array([self.particle_identifier]), halo_fields
)
self.particle_number = np.int64(my_data[f"{ptype}Len"][0])
if ptype == "Group":
self.group_identifier = self.particle_identifier
id_offset = 0
# index of file that has scalar values for the group
g_scalar = i_scalar
group_index = self.scalar_index
# If a subhalo, find the index of the parent.
elif ptype == "Subhalo":
self.group_identifier = np.int64(my_data["SubhaloGrNr"][0])
# Find the file that has the scalar values for the parent group.
g_scalar = self.index._get_halo_file_indices(
"Group", [self.group_identifier]
)[0]
# index within halo arrays that corresponds to the paent group
group_index = self.index._get_halo_scalar_index(
"Group", self.group_identifier
)
my_data = self.index._get_halo_values(
"Group",
np.array([self.group_identifier]),
["GroupNsubs", "GroupFirstSub"],
)
self.subgroup_identifier = self.particle_identifier - np.int64(
my_data["GroupFirstSub"][0]
)
parent_subhalos = my_data["GroupNsubs"][0]
mylog.debug(
"Subhalo %d is subgroup %s of %d in group %d.",
self.particle_identifier,
self.subgroup_identifier,
parent_subhalos,
self.group_identifier,
)
# ids of the sibling subhalos that come before this one
if self.subgroup_identifier > 0:
sub_ids = np.arange(
self.particle_identifier - self.subgroup_identifier,
self.particle_identifier,
)
my_data = self.index._get_halo_values(
"Subhalo", sub_ids, ["SubhaloLen"]
)
id_offset = my_data["SubhaloLen"].sum(dtype=np.int64)
else:
id_offset = 0
# Calculate the starting index for the member particles.
# First, add up all the particles in the earlier files.
all_id_start = self.index._group_length_sum[:g_scalar].sum(dtype=np.int64)
# Now add the halos in this file that come before.
with h5py.File(self.index.data_files[g_scalar].filename, mode="r") as f:
all_id_start += f["Group"]["GroupLen"][:group_index].sum(dtype=np.int64)
# Add the subhalo offset.
all_id_start += id_offset
# indices of first and last files containing member particles
i_start = (
np.digitize([all_id_start], self.index._halo_id_start, right=False)[0] - 1
)
i_end = np.digitize(
[all_id_start + self.particle_number], self.index._halo_id_end, right=True
)[0]
self.field_data_files = self.index.data_files[i_start : i_end + 1]
# starting and ending indices for each file containing particles
self.field_data_start = (
all_id_start - self.index._halo_id_start[i_start : i_end + 1]
).clip(min=0)
self.field_data_start = self.field_data_start.astype(np.int64)
self.field_data_end = (
all_id_start
+ self.particle_number
- self.index._halo_id_start[i_start : i_end + 1]
).clip(max=self.index._halo_id_number[i_start : i_end + 1])
self.field_data_end = self.field_data_end.astype(np.int64)
for attr in ["mass", "position", "velocity"]:
setattr(self, attr, self[self.ptype, f"particle_{attr}"][0])
def __repr__(self):
return "%s_%s_%09d" % (self.ds, self.ptype, self.particle_identifier)