-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add files needed for figures in paper
- Loading branch information
1 parent
188e55b
commit dab4be1
Showing
28 changed files
with
4,262 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,27 @@ | ||
# multi_epoch_2 | ||
# multi_epoch_2 | ||
|
||
To ignore/unignore config file | ||
`$ git update-index --skip-worktree config.yaml` | ||
`$ git update-index --no-skip-worktree config.yaml` | ||
|
||
Python environment | ||
`$ conda create --name btml python=3.9.2` | ||
`$ conda activate btml` | ||
`$ conda install pip` | ||
`$ pip install -r requirements` | ||
|
||
Place virgo credentials in virgo_credentials.yaml, set permissions | ||
`$ chmod 600 virgo_credentials.yaml` | ||
Ignore virgo_credentials file to avoid committing credentials | ||
`$ git update-index --skip-worktree virgo_credentials.yaml` | ||
|
||
| Property | Units | | ||
|---------------------------|----------------------------------| | ||
| Ages | Gyr | | ||
| Mass | M<sub>⊙</sub> | | ||
| Black hole accretion, SFR | M<sub>⊙</sub> / yr | | ||
| Lengths | Mpc | | ||
| Magnitudes | mag | | ||
| Metallicity | M<sub>z</sub> / M<sub>tot</sub> | | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import os | ||
import sys | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import sklearn.ensemble | ||
import yaml | ||
|
||
helpers_path = os.path.abspath(sys.path[0]+'/..') | ||
sys.path.append(helpers_path) | ||
import helpers | ||
from helpers import log | ||
|
||
base_dir = helpers.Config.get_base_dir() | ||
|
||
snapshots = np.arange(6, 34, 3) | ||
|
||
for sim in ['IllustrisTNG', 'SIMBA']: | ||
log(f'Running for {sim}') | ||
data_dir = f'{helpers.Config.get_base_dir()}generated/baryon_tree_ml/camels/{sim}/' | ||
run_names = sorted(os.listdir(data_dir)) | ||
for run_name in run_names: | ||
log(f'Calculating feature importance for {run_name}') | ||
# TODO: Cut data so only tracked halos are used | ||
histories = pd.read_pickle(f'{data_dir}{run_name}/histories.pickle') | ||
histories = histories[histories[str(min(snapshots))+'stellar_mass'] != 0] | ||
|
||
input_properties = ['bh_mass', 'dm_sub_mass', 'gas_mass', 'stellar_mass'] | ||
output_features = ['bh_mass', 'gas_mass', 'sfr', 'stellar_mass', 'stellar_metallicity'] | ||
|
||
hubble_constant = 0.6711 | ||
box_length = 25 / hubble_constant | ||
|
||
# TODO: Set value for stellar mass | ||
mask = histories['stellar_mass'] > 10**9 | ||
histories = histories[mask] | ||
log(f'There are {histories.shape[0]} halos with stellar mass > 10**9') | ||
min_snap = np.min(snapshots) | ||
n_trackable = np.sum(histories[str(min_snap)+'dm_sub_mass'] != 0) | ||
log(f'There are {n_trackable} halos which can be traced to snapshot {min_snap}') | ||
|
||
for output_feature in ['bh_mass', 'gas_mass', 'sfr', 'stellar_metallicity']: | ||
arr = histories[output_feature] | ||
log(f'Fraction {output_feature} equal to zero: {np.sum(arr==0)/arr.shape[0]:.3g}') | ||
min_nonzero = np.min(arr[arr != 0]) | ||
histories[output_feature] = np.maximum(arr, min_nonzero*np.ones_like(arr)) | ||
|
||
for output_feature in output_features: | ||
histories['regr_'+output_feature] = np.log10(histories[output_feature]) | ||
|
||
input_columns = [str(snap)+prop for snap in snapshots for prop in input_properties] | ||
regr_columns = ['regr_'+feat for feat in output_features] | ||
output_columns = regr_columns + ['x', 'y', 'z'] | ||
data = histories[input_columns + output_columns] | ||
|
||
output_features = { | ||
'bh_mass': ['gas_mass', 'dm_sub_mass', 'stellar_mass'], | ||
'gas_mass': ['bh_mass', 'dm_sub_mass', 'stellar_mass'], | ||
'sfr': ['bh_mass', 'dm_sub_mass', 'gas_mass', 'stellar_mass'], | ||
'stellar_mass': ['bh_mass', 'dm_sub_mass', 'gas_mass'], | ||
'stellar_metallicity': ['bh_mass', 'dm_sub_mass', 'gas_mass', 'stellar_mass'] | ||
} | ||
feature_importance_dir = f'{data_dir}{run_name}/feature_importance/' | ||
if not os.path.exists(feature_importance_dir): | ||
os.makedirs(feature_importance_dir) | ||
with open(feature_importance_dir+'output_features.yaml', 'w') as yaml_file: | ||
yaml.dump(output_features, yaml_file) | ||
np.save(feature_importance_dir+'snapshots.npy', snapshots) | ||
|
||
for output_feature, input_properties in output_features.items(): | ||
input_features = [str(snap) + prop for snap in snapshots for prop in input_properties] | ||
|
||
n_dataset = 10 | ||
n_estimators = 100 | ||
n_process = 5 | ||
max_depth = 12 | ||
frac = 0.7 | ||
|
||
importances = np.zeros((n_dataset, len(input_features))) | ||
for i_dataset in range(n_dataset): | ||
|
||
train_box_length = np.cbrt(frac) * box_length | ||
|
||
shift = np.random.uniform(low=0, high=box_length, size=3) | ||
pos = np.array(data[['x', 'y', 'z']]) | ||
pos += shift | ||
pos %= box_length | ||
|
||
train_mask = np.all(pos < train_box_length, axis=1) | ||
train_data = data[train_mask] | ||
|
||
X_train = train_data[input_features] | ||
y_train = train_data['regr_'+output_feature] | ||
|
||
regr = sklearn.ensemble.ExtraTreesRegressor(n_estimators=n_estimators, | ||
n_jobs=n_process, | ||
max_depth=max_depth) | ||
regr.fit(X_train, y_train) | ||
|
||
importances[i_dataset] = regr.feature_importances_ | ||
|
||
mean_importance = np.mean(importances, axis=0) | ||
sem_importance = 1.96 * np.std(importances, axis=0) / np.sqrt(n_dataset) | ||
|
||
np.save(f'{feature_importance_dir}{output_feature}_mean_importance.npy', mean_importance) | ||
np.save(f'{feature_importance_dir}{output_feature}_sem_importance.npy', sem_importance) | ||
|
||
log('Script finished') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import os | ||
import sys | ||
|
||
import joblib | ||
import matplotlib.cm | ||
import matplotlib.colors | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import sklearn.ensemble | ||
import yaml | ||
|
||
helpers_path = os.path.abspath(sys.path[0]+'/..') | ||
sys.path.append(helpers_path) | ||
import helpers | ||
from helpers import log | ||
|
||
base_dir = helpers.Config.get_base_dir() | ||
|
||
snapshots = np.arange(6, 34, 3) | ||
output_features = { | ||
# 'bh_mass': ['gas_mass', 'dm_sub_mass', 'stellar_mass'], | ||
# 'gas_mass': ['bh_mass', 'dm_sub_mass', 'stellar_mass'], | ||
# 'sfr': ['bh_mass', 'dm_sub_mass', 'gas_mass', 'stellar_mass'], | ||
'stellar_mass': ['bh_mass', 'dm_sub_mass', 'gas_mass'], | ||
# 'stellar_metallicity': ['bh_mass', 'dm_sub_mass', 'gas_mass', 'stellar_mass'] | ||
} | ||
|
||
for output_feature, input_properties in output_features.items(): | ||
pca_model_filepath = f'/home/rmcg/camels_pca/IllustrisTNG/{output_feature}/pca_model.joblib' | ||
pca = joblib.load(pca_model_filepath) | ||
|
||
input_features = [str(snap) + prop for snap in snapshots for prop in input_properties] | ||
fig, axs = plt.subplots(nrows=4, figsize=(5, 12), sharex=True) | ||
fig.subplots_adjust(hspace=0) | ||
for i_component, (component_name, component_values, label, yticks) in enumerate([ | ||
('mean', pca.mean_ / np.max(pca.mean_), 'Mean', [0, 0.2, 0.4, 0.6, 0.8, 1]), # Note that mean is normalised | ||
('component_1', pca.components_[0], 'Component 1', [-0.2, 0, 0.2, 0.4]), | ||
('component_2', pca.components_[1], 'Component 2', [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3]), | ||
('component_3', pca.components_[2], 'Component 3', [-0.2, 0, 0.2, 0.4, 0.6]), | ||
]): | ||
ax = axs[i_component] | ||
for i_prop, input_property in enumerate(input_properties): | ||
# TODO: Could estimate std by bootstrapping, running PCA on 90% of camels simulations | ||
mean_values, sem_values = [], [] | ||
for snap in snapshots: | ||
idx = input_features.index(str(snap)+input_property) | ||
mean_values.append(component_values[idx]) | ||
mean_values, sem_values = np.array(mean_values), np.array(sem_values) | ||
|
||
p = ax.plot(snapshots, mean_values, '-o', | ||
markersize=2, color=helpers.Config.get_color(input_property))[0] | ||
if i_component == i_prop: | ||
ax.legend(handles=[p], labels=[helpers.Config.get_proper_name(input_property, False)], | ||
loc='upper right', fontsize=12) | ||
|
||
padding = 0.015 * (np.max(snapshots) - np.min(snapshots)) | ||
ax.set_xlim([np.min(snapshots)-padding, np.max(snapshots)+padding]) | ||
xticks = np.linspace(np.min(snapshots), np.max(snapshots), 6, dtype=int) | ||
xticks = np.round(xticks, 1) | ||
ax.set_xticks(xticks) | ||
|
||
ax.set_ylim(ax.get_ylim()[0], ax.get_ylim()[1]*1.25) | ||
ax.set_yticks(yticks) | ||
|
||
ax.text(0.05, 0.88, label, fontsize=12, transform=ax.transAxes) | ||
|
||
axs[0].set_ylabel('Feature importance') | ||
for ax in axs[1:]: | ||
ax.set_ylabel('Difference in\nfeature importance') | ||
|
||
ax2 = axs[0].twiny() | ||
ax2.set_xlim(axs[0].get_xlim()) | ||
ax2.set_xticks(axs[0].get_xticks()) | ||
ax2.set_xticklabels([2.63, 1.86, 1.25, 0.69, 0.33, 0]) | ||
ax2.set_xlabel('z') | ||
|
||
axs[-1].set_xlabel('Snapshot') | ||
|
||
plt.savefig('/home/rmcg/test.pdf', dpi=450, bbox_inches='tight') | ||
plt.close() | ||
|
||
log('Script finished') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#!/bin/bash | ||
|
||
base_dir="/disk01/rmcg/" | ||
#base_dir="/home/rmcg/data/" | ||
for sim in "IllustrisTNG" "SIMBA"; do | ||
for i in {0..999}; do | ||
mkdir -p "${base_dir}downloaded/camels/${sim}/LH_${i}" | ||
cd "${base_dir}downloaded/camels/${sim}/LH_${i}" | ||
wget -N "https://users.flatironinstitute.org/~camels/Rockstar/${sim}/LH_${i}/trees/tree_0_0_0.dat" | ||
for j in {000..033}; do | ||
wget -N "https://users.flatironinstitute.org/~camels/FOF_Subfind/${sim}/LH_${i}/fof_subhalo_tab_${j}.hdf5" | ||
done | ||
mkdir -p "${base_dir}downloaded/camels/${sim}_DM/LH_${i}" | ||
cd "${base_dir}downloaded/camels/${sim}_DM/LH_${i}" | ||
wget -N "https://users.flatironinstitute.org/~camels/Rockstar/${sim}_DM/LH_${i}/trees/tree_0_0_0.dat" | ||
done | ||
for i in {1..6}; do | ||
# 1P_{param}_0 is not available for IllustrisTNG with param != 1. I need to delete the folders | ||
for run in "n5" "n4" "n3" "n2" "n1" "0" "1" "2" "3" "4" "5"; do | ||
mkdir -p "${base_dir}downloaded/camels/${sim}/1P_${i}_${run}" | ||
cd "${base_dir}downloaded/camels/${sim}/1P_${i}_${run}" | ||
wget -N "https://users.flatironinstitute.org/~camels/Rockstar/${sim}/1P_${i}_${run}/trees/tree_0_0_0.dat" | ||
for j in {000..033}; do | ||
wget -N "https://users.flatironinstitute.org/~camels/FOF_Subfind/${sim}/1P_${i}_${run}/fof_subhalo_tab_${j}.hdf5" | ||
done | ||
mkdir -p "${base_dir}downloaded/camels/${sim}_DM/1P_${i}_${run}" | ||
cd "${base_dir}downloaded/camels/${sim}_DM/1P_${i}_${run}" | ||
wget -N "https://users.flatironinstitute.org/~camels/Rockstar/${sim}_DM/1P_${i}_${run}/trees/tree_0_0_0.dat" | ||
done | ||
done | ||
for i in {0..26}; do | ||
mkdir -p "${base_dir}downloaded/camels/${sim}/CV_${i}" | ||
cd "${base_dir}downloaded/camels/${sim}/CV_${i}" | ||
wget -N "https://users.flatironinstitute.org/~camels/Rockstar/${sim}/CV_${i}/trees/tree_0_0_0.dat" | ||
for j in {000..033}; do | ||
wget -N "https://users.flatironinstitute.org/~camels/FOF_Subfind/${sim}/CV_${i}/fof_subhalo_tab_${j}.hdf5" | ||
done | ||
mkdir -p "${base_dir}downloaded/camels/${sim}_DM/CV_${i}" | ||
cd "${base_dir}downloaded/camels/${sim}_DM/CV_${i}" | ||
wget -N "https://users.flatironinstitute.org/~camels/Rockstar/${sim}_DM/CV_${i}/trees/tree_0_0_0.dat" | ||
done | ||
cd "${base_dir}downloaded/camels" | ||
wget "https://users.flatironinstitute.org/~camels/Sims/${sim}/CosmoAstroSeed_params.txt" | ||
j=0 | ||
for i in {1..6}; do | ||
for run in "n5" "n4" "n3" "n2" "n1" "0" "1" "2" "3" "4" "5"; do | ||
sed -i "s/1P_${j} /1P_${i}_${run} /" CosmoAstroSeed_params.txt | ||
j=$((j + 1)) | ||
done | ||
done | ||
grep -v 'EX' CosmoAstroSeed_params.txt > "${sim}_params.txt" | ||
rm CosmoAstroSeed_params.txt | ||
done | ||
# SIMBA run LH_196 has zero gas mass after I extract it. I think it may be corrupted. I've deleted it |
Oops, something went wrong.