Source code for yt.frontends.tipsy.io

import glob
import os
import struct

import numpy as np

from yt.frontends.sph.io import IOHandlerSPH
from yt.frontends.tipsy.definitions import npart_mapping
from yt.utilities.lib.particle_kdtree_tools import generate_smoothing_length
from yt.utilities.logger import ytLogger as mylog


[docs] class IOHandlerTipsyBinary(IOHandlerSPH): _dataset_type = "tipsy" _vector_fields = {"Coordinates": 3, "Velocity": 3, "Velocities": 3} _pdtypes = None # dtypes, to be filled in later _aux_pdtypes = None # auxiliary files' dtypes _ptypes = ("Gas", "DarkMatter", "Stars") _aux_fields = None _fields = ( ("Gas", "Mass"), ("Gas", "Coordinates"), ("Gas", "Velocities"), ("Gas", "Density"), ("Gas", "Temperature"), ("Gas", "Epsilon"), ("Gas", "Metals"), ("Gas", "Phi"), ("DarkMatter", "Mass"), ("DarkMatter", "Coordinates"), ("DarkMatter", "Velocities"), ("DarkMatter", "Epsilon"), ("DarkMatter", "Phi"), ("Stars", "Mass"), ("Stars", "Coordinates"), ("Stars", "Velocities"), ("Stars", "Metals"), ("Stars", "FormationTime"), ("Stars", "Epsilon"), ("Stars", "Phi"), ) def __init__(self, *args, **kwargs): self._aux_fields = [] super().__init__(*args, **kwargs) def _read_fluid_selection(self, chunks, selector, fields, size): raise NotImplementedError def _fill_fields(self, fields, vals, hsml, mask, data_file): if mask is None: size = 0 elif isinstance(mask, slice): if fields[0] == "smoothing_length": size = hsml.size else: size = vals[fields[0]].size else: size = mask.sum() rv = {} for field in fields: mylog.debug("Allocating %s values for %s", size, field) if field in self._vector_fields: rv[field] = np.empty((size, 3), dtype="float64") if size == 0: continue rv[field][:, 0] = vals[field]["x"][mask] rv[field][:, 1] = vals[field]["y"][mask] rv[field][:, 2] = vals[field]["z"][mask] elif field == "smoothing_length": rv[field] = hsml[mask] else: rv[field] = np.empty(size, dtype="float64") if size == 0: continue rv[field][:] = vals[field][mask] if field == "Coordinates": eps = np.finfo(rv[field].dtype).eps for i in range(3): rv[field][:, i] = np.clip( rv[field][:, i], self.ds.domain_left_edge[i].v + eps, self.ds.domain_right_edge[i].v - eps, ) return rv def _read_particle_coords(self, chunks, ptf): chunksize = self.ds.index.chunksize for data_file in self._sorted_chunk_iterator(chunks): poff = data_file.field_offsets tp = data_file.total_particles f = open(data_file.filename, "rb") for ptype in sorted(ptf, key=lambda a, poff=poff: poff.get(a, -1)): if data_file.total_particles[ptype] == 0: continue f.seek(poff[ptype]) total = 0 while total < tp[ptype]: count = min(chunksize, tp[ptype] - total) p = np.fromfile(f, self._pdtypes[ptype], count=count) total += p.size d = [p["Coordinates"][ax].astype("float64") for ax in "xyz"] del p if ptype == self.ds._sph_ptypes[0]: hsml = self._read_smoothing_length(data_file, count) else: hsml = 0.0 yield ptype, d, hsml @property def hsml_filename(self): return f"{self.ds.parameter_filename}-{'hsml'}" def _generate_smoothing_length(self, index): if os.path.exists(self.hsml_filename): with open(self.hsml_filename, "rb") as f: file_hash = struct.unpack("q", f.read(struct.calcsize("q")))[0] if file_hash != self.ds._file_hash: os.remove(self.hsml_filename) else: return positions = [] for data_file in index.data_files: for _, ppos in self._yield_coordinates( data_file, needed_ptype=self.ds._sph_ptypes[0] ): positions.append(ppos) if positions == []: return kdtree = index.kdtree positions = np.concatenate(positions)[kdtree.idx] hsml = generate_smoothing_length(positions, kdtree, self.ds._num_neighbors) hsml = hsml[np.argsort(kdtree.idx)] dtype = self._pdtypes["Gas"]["Coordinates"][0] with open(self.hsml_filename, "wb") as f: f.write(struct.pack("q", self.ds._file_hash)) f.write(hsml.astype(dtype).tobytes()) def _read_smoothing_length(self, data_file, count): dtype = self._pdtypes["Gas"]["Coordinates"][0] with open(self.hsml_filename, "rb") as f: f.seek(struct.calcsize("q") + data_file.start * dtype.itemsize) hsmls = np.fromfile(f, dtype, count=count) return hsmls.astype("float64") def _get_smoothing_length(self, data_file, dtype, shape): return self._read_smoothing_length(data_file, shape[0]) def _read_particle_data_file(self, data_file, ptf, selector=None): from numpy.lib.recfunctions import append_fields return_data = {} poff = data_file.field_offsets aux_fields_offsets = self._calculate_particle_offsets_aux(data_file) tp = data_file.total_particles f = open(data_file.filename, "rb") # we need to open all aux files for chunking to work _aux_fh = {} def aux_fh(afield): if afield not in _aux_fh: _aux_fh[afield] = open(data_file.filename + "." + afield, "rb") return _aux_fh[afield] for ptype, field_list in sorted(ptf.items(), key=lambda a: poff.get(a[0], -1)): if data_file.total_particles[ptype] == 0: continue f.seek(poff[ptype]) afields = list(set(field_list).intersection(self._aux_fields)) count = min(self.ds.index.chunksize, tp[ptype]) p = np.fromfile(f, self._pdtypes[ptype], count=count) auxdata = [] for afield in afields: aux_fh(afield).seek(aux_fields_offsets[afield][ptype]) if isinstance(self._aux_pdtypes[afield], np.dtype): auxdata.append( np.fromfile( aux_fh(afield), self._aux_pdtypes[afield], count=count ) ) else: aux_fh(afield).seek(0) sh = aux_fields_offsets[afield][ptype] if tp[ptype] > 0: aux = np.genfromtxt( aux_fh(afield), skip_header=sh, max_rows=count ) if aux.ndim < 1: aux = np.array([aux]) auxdata.append(aux) if afields: p = append_fields(p, afields, auxdata) if ptype == "Gas": hsml = self._read_smoothing_length(data_file, count) else: hsml = 0.0 if selector is None or getattr(selector, "is_all_data", False): mask = slice(None, None, None) else: x = p["Coordinates"]["x"].astype("float64") y = p["Coordinates"]["y"].astype("float64") z = p["Coordinates"]["z"].astype("float64") mask = selector.select_points(x, y, z, hsml) del x, y, z if mask is None: continue tf = self._fill_fields(field_list, p, hsml, mask, data_file) for field in field_list: return_data[ptype, field] = tf.pop(field) # close all file handles f.close() for fh in _aux_fh.values(): fh.close() return return_data def _update_domain(self, data_file): """ This method is used to determine the size needed for a box that will bound the particles. It simply finds the largest position of the whole set of particles, and sets the domain to +/- that value. """ ds = data_file.ds ind = 0 # NOTE: # We hardcode this value here because otherwise we get into a # situation where we require the existence of index before we # can successfully instantiate it, or where we are calling it # from within its instantiation. # # Because this value is not propagated later on, and does not # impact the construction of the bitmap indices, it should be # acceptable to just use a reasonable number here. chunksize = 64**3 # Check to make sure that the domain hasn't already been set # by the parameter file if np.all(np.isfinite(ds.domain_left_edge)) and np.all( np.isfinite(ds.domain_right_edge) ): return with open(data_file.filename, "rb") as f: ds.domain_left_edge = 0 ds.domain_right_edge = 0 f.seek(ds._header_offset) mi = np.array([1e30, 1e30, 1e30], dtype="float64") ma = -np.array([1e30, 1e30, 1e30], dtype="float64") for ptype in self._ptypes: # We'll just add the individual types separately count = data_file.total_particles[ptype] if count == 0: continue stop = ind + count while ind < stop: c = min(chunksize, stop - ind) pp = np.fromfile(f, dtype=self._pdtypes[ptype], count=c) np.minimum( mi, [ pp["Coordinates"]["x"].min(), pp["Coordinates"]["y"].min(), pp["Coordinates"]["z"].min(), ], mi, ) np.maximum( ma, [ pp["Coordinates"]["x"].max(), pp["Coordinates"]["y"].max(), pp["Coordinates"]["z"].max(), ], ma, ) ind += c # We extend by 1%. DW = ma - mi mi -= 0.01 * DW ma += 0.01 * DW ds.domain_left_edge = ds.arr(mi, "code_length") ds.domain_right_edge = ds.arr(ma, "code_length") ds.domain_width = DW = ds.domain_right_edge - ds.domain_left_edge ds.unit_registry.add( "unitary", float(DW.max() * DW.units.base_value), DW.units.dimensions ) def _yield_coordinates(self, data_file, needed_ptype=None): with open(data_file.filename, "rb") as f: poff = data_file.field_offsets for ptype in self._ptypes: if ptype not in poff: continue f.seek(poff[ptype]) if needed_ptype is not None and ptype != needed_ptype: continue # We'll just add the individual types separately count = data_file.total_particles[ptype] if count == 0: continue pp = np.fromfile(f, dtype=self._pdtypes[ptype], count=count) mis = np.empty(3, dtype="float64") mas = np.empty(3, dtype="float64") for axi, ax in enumerate("xyz"): mi = pp["Coordinates"][ax].min() ma = pp["Coordinates"][ax].max() mylog.debug("Spanning: %0.3e .. %0.3e in %s", mi, ma, ax) mis[axi] = mi mas[axi] = ma pos = np.empty((pp.size, 3), dtype="float64") for i, ax in enumerate("xyz"): pos[:, i] = pp["Coordinates"][ax] yield ptype, pos def _count_particles(self, data_file): pcount = np.array( [ data_file.ds.parameters["nsph"], data_file.ds.parameters["nstar"], data_file.ds.parameters["ndark"], ] ) si, ei = data_file.start, data_file.end if None not in (si, ei): np.clip(pcount - si, 0, ei - si, out=pcount) ptypes = ["Gas", "Stars", "DarkMatter"] npart = dict(zip(ptypes, pcount, strict=True)) return npart @classmethod def _compute_dtypes(cls, field_dtypes, endian="<"): pds = {} for ptype, field in cls._fields: dtbase = field_dtypes.get(field, "f") ff = f"{endian}{dtbase}" if field in cls._vector_fields: dt = (field, [("x", ff), ("y", ff), ("z", ff)]) else: dt = (field, ff) pds.setdefault(ptype, []).append(dt) pdtypes = {} for ptype in pds: pdtypes[ptype] = np.dtype(pds[ptype]) return pdtypes def _create_dtypes(self, data_file): # We can just look at the particle counts. self._header_offset = data_file.ds._header_offset self._pdtypes = self._compute_dtypes( data_file.ds._field_dtypes, data_file.ds.endian ) self._field_list = [] for ptype, field in self._fields: if data_file.total_particles[ptype] == 0: # We do not want out _pdtypes to have empty particles. self._pdtypes.pop(ptype, None) continue self._field_list.append((ptype, field)) if "Gas" in self._pdtypes.keys(): self._field_list.append(("Gas", "smoothing_length")) # Find out which auxiliaries we have and what is their format tot_parts = np.sum( [ data_file.ds.parameters["nsph"], data_file.ds.parameters["nstar"], data_file.ds.parameters["ndark"], ] ) endian = data_file.ds.endian self._aux_pdtypes = {} self._aux_fields = [] for f in glob.glob(data_file.filename + ".*"): afield = f.rsplit(".")[-1] filename = data_file.filename + "." + afield if not os.path.exists(filename): continue if afield in ["log", "parameter", "kdtree"]: # Amiga halo finder makes files like this we need to ignore continue self._aux_fields.append(afield) skip_afields = [] for afield in self._aux_fields: filename = data_file.filename + "." + afield # We need to do some fairly ugly detection to see what format the # auxiliary files are in. They can be either ascii or binary, and # the binary files can be either floats, ints, or doubles. We're # going to use a try-catch cascade to determine the format. filesize = os.stat(filename).st_size dtype = np.dtype(endian + "i4") tot_parts_from_file = np.fromfile(filename, dtype, count=1) if tot_parts_from_file != tot_parts: with open(filename, "rb") as f: header_nparts = f.readline() try: header_nparts = int(header_nparts) except ValueError: skip_afields.append(afield) continue if int(header_nparts) != tot_parts: raise RuntimeError self._aux_pdtypes[afield] = "ascii" elif (filesize - 4) / 8 == tot_parts: self._aux_pdtypes[afield] = np.dtype([("aux", endian + "d")]) elif (filesize - 4) / 4 == tot_parts: if afield.startswith("i"): self._aux_pdtypes[afield] = np.dtype([("aux", endian + "i")]) else: self._aux_pdtypes[afield] = np.dtype([("aux", endian + "f")]) else: skip_afields.append(afield) for afield in skip_afields: self._aux_fields.remove(afield) # Add the auxiliary fields to each ptype we have for ptype in self._ptypes: if any(ptype == field[0] for field in self._field_list): self._field_list += [(ptype, afield) for afield in self._aux_fields] return self._field_list def _identify_fields(self, data_file): return self._field_list, {} def _calculate_particle_offsets(self, data_file, pcounts): # This computes the offsets for each particle type into a "data_file." # Note that the term "data_file" here is a bit overloaded, and also refers to a # "chunk" of particles inside a data file. # data_file.start represents the *particle count* that we should start at. # # At this point, data_file will have the total number of particles # that this chunk represents located in the property total_particles. # Because in tipsy files the particles are stored sequentially, we can # figure out where each one starts. # We first figure out the global offsets, then offset them by the count # and size of each individual particle type. field_offsets = {} # Initialize pos to the point the first particle type would start pos = data_file.ds._header_offset global_offsets = {} field_offsets = {} for ptype in self._ptypes: if ptype not in self._pdtypes: # This means we don't have any, I think, and so we shouldn't # stick it in the offsets. continue # Note that much of this will be computed redundantly; future # refactorings could fix this. global_offsets[ptype] = pos size = self._pdtypes[ptype].itemsize npart = self.ds.parameters[npart_mapping[ptype]] # Get the offset into just this particle type, and start at data_file.start if npart > data_file.start: field_offsets[ptype] = pos + size * data_file.start pos += npart * size return field_offsets def _calculate_particle_offsets_aux(self, data_file): aux_fields_offsets = {} params = self.ds.parameters for afield in self._aux_fields: aux_fields_offsets[afield] = {} if isinstance(self._aux_pdtypes[afield], np.dtype): pos = 4 # i4 size = np.dtype(self._aux_pdtypes[afield]).itemsize else: pos = 1 size = 1 for i, ptype in enumerate(self._ptypes): if data_file.total_particles[ptype] == 0: continue elif params[npart_mapping[ptype]] > self.ds.index.chunksize: for j in range(i): npart = params[npart_mapping[self._ptypes[j]]] if npart > self.ds.index.chunksize: pos += npart * size pos += data_file.start * size aux_fields_offsets[afield][ptype] = pos pos += data_file.total_particles[ptype] * size return aux_fields_offsets