-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25 from CMB-S4/data_convert
Data conversion tools
- Loading branch information
Showing
3 changed files
with
461 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
This script loads v0 uncompressed TOAST observations and writes v1 compressed files. | ||
""" | ||
|
||
import os | ||
import sys | ||
import shutil | ||
import re | ||
import glob | ||
|
||
import datetime | ||
|
||
import argparse | ||
|
||
import numpy as np | ||
|
||
from astropy import units as u | ||
|
||
import toast | ||
import toast.ops | ||
|
||
from toast.timing import gather_timers, dump, Timer | ||
|
||
from toast.observation import default_values as defaults | ||
|
||
|
||
def parse_arguments(): | ||
""" | ||
Defines and parses the arguments for the script. | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Compress CMB-S4 simulation data" | ||
) | ||
|
||
parser.add_argument( | ||
"--verify", | ||
required=False, | ||
action="store_true", | ||
default=False, | ||
help="Re-load the converted data and verify consistency", | ||
) | ||
|
||
parser.add_argument( | ||
"--obs", | ||
type=str, | ||
required=False, | ||
nargs="+", | ||
help="One or more observation files", | ||
) | ||
|
||
# The operators we want to configure from the command line or a parameter file. | ||
operators = list() | ||
|
||
# Parse all of the operator configuration | ||
config, args, jobargs = toast.parse_config(parser, operators=operators) | ||
|
||
return config, args, jobargs | ||
|
||
|
||
def main(): | ||
env = toast.utils.Environment.get() | ||
log = toast.utils.Logger.get() | ||
env.enable_function_timers() | ||
global_timer = toast.timing.GlobalTimers.get() | ||
global_timer.start("compress HDF5 (total)") | ||
|
||
config, args, jobargs = parse_arguments() | ||
|
||
# Default group size | ||
comm = toast.Comm() | ||
|
||
# Process each observation | ||
for obs_path in args.obs: | ||
obs_dir = os.path.dirname(obs_path) | ||
file_root = os.path.splitext(obs_path)[0] | ||
if comm.world_rank == 0: | ||
print(f"Working on {obs_path}:") | ||
backup = f"{file_root}_uncompressed.h5" | ||
timer = Timer() | ||
timer.start() | ||
obs = toast.io.load_hdf5( | ||
obs_path, | ||
comm, | ||
process_rows=comm.group_size, | ||
meta=None, | ||
detdata=None, | ||
shared=None, | ||
intervals=None, | ||
detectors=None, | ||
force_serial=False, | ||
) | ||
|
||
if comm.comm_world is not None: | ||
comm.comm_world.barrier() | ||
timer.stop() | ||
if comm.world_rank == 0: | ||
print(f" Load {obs_path} in {timer.seconds()} s", flush=True) | ||
|
||
if comm.world_rank == 0: | ||
os.rename(obs_path, backup) | ||
|
||
if comm.comm_world is not None: | ||
comm.comm_world.barrier() | ||
|
||
timer.start() | ||
obf = toast.io.save_hdf5( | ||
obs, | ||
obs_dir, | ||
meta=None, | ||
detdata=[ | ||
(defaults.det_data, {"type": "flac"}), | ||
(defaults.det_flags, {"type": "gzip"}), | ||
], | ||
shared=None, | ||
intervals=None, | ||
config=None, | ||
times=defaults.times, | ||
force_serial=False, | ||
) | ||
if comm.comm_world is not None: | ||
comm.comm_world.barrier() | ||
timer.stop() | ||
if comm.world_rank == 0: | ||
print(f" Save {obs_path} in {timer.seconds()} s", flush=True) | ||
|
||
if obf != obs_path: | ||
msg = f"Generated HDF5 ({obf}) does not match original " | ||
msg += f"file name ({obs_path})" | ||
raise RuntimeError(msg) | ||
|
||
if args.verify: | ||
timer.start() | ||
compare = toast.io.load_hdf5( | ||
obs_path, | ||
comm, | ||
process_rows=comm.group_size, | ||
) | ||
if comm.comm_world is not None: | ||
comm.comm_world.barrier() | ||
timer.stop() | ||
if comm.world_rank == 0: | ||
print( | ||
f" Re-load {obs_path} for verification in {timer.seconds()} s", | ||
flush=True | ||
) | ||
|
||
if compare != obs: | ||
msg = f"Observation HDF5 verify failed:\n" | ||
msg += f"Input = {obs}\n" | ||
msg += f"Loaded = {compare}" | ||
log.error(msg) | ||
raise RuntimeError(msg) | ||
elif comm.world_rank == 0: | ||
print(f" Verification PASS", flush=True) | ||
else: | ||
if comm.world_rank == 0: | ||
print(f" Skipping verification", flush=True) | ||
|
||
# Dump all the timing information to the output dir | ||
|
||
global_timer.stop("compress HDF5 (total)") | ||
alltimers = gather_timers(comm=comm.comm_world) | ||
if comm.world_rank == 0: | ||
out = os.path.join(".", "timing") | ||
dump(alltimers, out) | ||
|
||
|
||
if __name__ == "__main__": | ||
world, procs, rank = toast.mpi.get_world() | ||
with toast.mpi.exception_guard(comm=world): | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#!/bin/bash | ||
#SBATCH --qos=regular | ||
#SBATCH --time=05:00:00 | ||
#SBATCH --nodes=1 | ||
#SBATCH --job-name=CMBS4_todcompress | ||
#SBATCH --licenses=SCRATCH | ||
#SBATCH --constraint=cpu | ||
#SBATCH --account=mp107 | ||
|
||
# set parent directory corresponding to one frequency band | ||
PARENT_DIRECTORY="/global/cfs/cdirs/cmbs4/dc/dc0/staging/noise_sim/outputs_rk/LAT0_CHLAT/f090" | ||
|
||
# set range of observations to compress by this script (inclusive) | ||
START_INDEX=1 | ||
END_INDEX=30 | ||
|
||
echo "Listing all observations in $PARENT_DIRECTORY" | ||
|
||
# list all observations in parent directory and save to a variable | ||
SUBDIR_LIST=$(find "$PARENT_DIRECTORY" -mindepth 1 -maxdepth 1 -type d | sort) | ||
# extract observations names for printing to console | ||
SUBDIR_NAMES=$(echo "$SUBDIR_LIST" | xargs -I{} basename {}) | ||
echo "Observations found: " | ||
echo "$SUBDIR_NAMES" | ||
|
||
echo "Proceeding to compress observations indexed in range: $START_INDEX-$END_INDEX" | ||
# select subset of observations (subdirectories) based on range and save to a variable | ||
SELECTED_SUBDIRS=$(echo "$SUBDIR_LIST" | sed -n "${START_INDEX},${END_INDEX}p") | ||
# extract selected observations names for printing to console | ||
SELECTED_SUBDIR_NAMES=$(echo "$SELECTED_SUBDIRS" | xargs -I{} basename {}) | ||
echo "Selected observations: " | ||
echo "$SELECTED_SUBDIR_NAMES" | ||
|
||
# loop through selected subdirectories and process each one | ||
for subdir in $SELECTED_SUBDIRS; do | ||
echo "Processing observation: $(basename $subdir)" | ||
# search for files with the expected starting keywords : 'RISING' or 'SETTING' | ||
FILE_LIST=$(find "$subdir" -type f \( -name "RISING*" -o -name "SETTING*" \) -printf "%p ") | ||
# extract file names for printing to console | ||
echo "Files to compress: " | ||
for filename in $FILE_LIST; do | ||
echo $(basename $filename) | ||
done | ||
|
||
# call compression script on the list of files | ||
date | ||
echo "Calling flac compression script ..." | ||
srun -n 128 python compress_hdf5.py --verify --obs $FILE_LIST > "log_$(basename $subdir).txt" 2>&1 | ||
|
||
# if python script runs without error, delete backup files | ||
if [ $? -eq 0 ]; then | ||
echo "FLAC compression script ran successfully. Deleting backup files..." | ||
if find "$subdir" -type f -name "*uncompressed.h5" -delete; then | ||
echo "Backup files deleted successfully." | ||
date | ||
else | ||
# If backup files deletion fails for some reason we stop the everything to avoid any risk of running out of disk memory. | ||
echo "Error deleting backup files. Exiting loop over observations." | ||
date | ||
break | ||
fi | ||
else | ||
echo "FLAC compression script encountered an error. Not deleting any files." | ||
date | ||
fi | ||
done | ||
|
||
echo "Observation batch $START_INDEX-$END_INDEX processing in $(basename $PARENT_DIRECTORY) band done. Please verify log files." |
Oops, something went wrong.