feat: Add MoMEMta Drell-Yan hypothesis example (#8)

* Add momemta directory for Drell-Yan hypothesis using MoMEMta-MaGMEE plugin - c.f. configs/momemta/drell-yan.mg5 for MoMEMta-MaGMEE MadGraph5 config - Add CLI arguments to momemta/drell-yan/drell-yan_example.cxx to set paths and options - Add lep1_PID branch to preprocessing/scripts/HistCollections.py for use in MoMEMta example * Add Blue Waters example and PBS submission scripts * Update .gitignore for ROOT files and generated MatrixElements/ directory
scailfin · Aug 4, 2021 · 9ec7c5c · 9ec7c5c
1 parent bee6346
commit 9ec7c5c
Show file tree

Hide file tree

Showing 11 changed files with 499 additions and 40 deletions.
diff --git a/.gitignore b/.gitignore
@@ -137,3 +137,10 @@ dmypy.json
 # MadGraph5
 py.py
 drell-yan_output/
+MatrixElements/
+
+# ROOT
+*.root
+
+# Failure
+core
diff --git a/bluewaters/README.md b/bluewaters/README.md
@@ -36,6 +36,12 @@ bash run_delphes.sh drell-yan
 bash run_preprocessing.sh drell-yan
 ```
 
+* To then finally run MoMEMta for the hypothesis described with the [MoMEMta-MaGMEE](https://github.com/MoMEMta/MoMEMta-MaGMEE) MadGraph5 plugin run
+
+```console
+bash run_momemta.sh drell-yan
+```
+
 ## Interactive Session
 
 If you need to run an interactive session (which will be slower) you can first allocate resources on the `shifter` queue from Torque with `qsub`

diff --git a/bluewaters/drell-yan/momemta.pbs b/bluewaters/drell-yan/momemta.pbs
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Set the number of processing elements (PEs) or cores
+# Set the number of PEs per node
+#PBS -l nodes=1:ppn=8:xk
+
+# Set the wallclock time
+#PBS -l walltime=48:00:00
+
+# Use shifter queue
+#PBS -l gres=shifter
+
+# Set the PBS_JOBNAME
+#PBS -N momemta
+
+# Set the job stdout and stderr
+#PBS -e "${PBS_JOBNAME}.${PBS_JOBID}.err"
+#PBS -o "${PBS_JOBNAME}.${PBS_JOBID}.out"
+
+# Set email notification on termination or abort
+#PBS -m ea
+#PBS -M [email protected]
+
+# Set allocation to charge
+#PBS -A bbdz
+
+# Ensure shifter enabled
+module load shifter
+
+PHYSICS_PROCESS="drell-yan"
+OUTPUT_BASE_PATH="/mnt/c/scratch/sciteam/${USER}/${PHYSICS_PROCESS}/${PBS_JOBNAME}"
+OUTPUT_PATH="${OUTPUT_BASE_PATH}/${PBS_JOBID}"
+mkdir -p "${OUTPUT_PATH}"
+
+# $HOME is /u/sciteam/${USER}
+SHIFTER_IMAGE="neubauergroup/bluewaters-momemta:1.0.1"
+shifterimg pull "${SHIFTER_IMAGE}"
+
+INPUT_PATH="/mnt/c/scratch/sciteam/${USER}/${PHYSICS_PROCESS}/preprocessing/12498179.bw/preprocessing_output_10e4.root"
+OUTPUT_FILE="${OUTPUT_PATH}/momemta_weights.root"
+
+aprun \
+  --bypass-app-transfer \
+  --pes-per-node 1 \
+  --cpu-binding none \
+  -- shifter \
+    --clearenv \
+    --image="${SHIFTER_IMAGE}" \
+    --volume="${OUTPUT_BASE_PATH}":/root/data \
+    --volume=/mnt/a/"${HOME}":/mnt/a/"${HOME}" \
+    --workdir=/root/data \
+    -- /bin/bash -c 'source scl_source enable devtoolset-8 && \
+        export PATH="/usr/local/venv/bin:${PATH}" && \
+        printf "\n# printenv:\n" && printenv && printf "\n\n" && \
+        cd /mnt/a/'"${HOME}"'/MadGraph5-simulation-configs/momemta/'"${PHYSICS_PROCESS}"' && \
+        bash run_momemta.sh '"${INPUT_PATH}"' '"${OUTPUT_FILE}"
diff --git a/bluewaters/run_momemta.sh b/bluewaters/run_momemta.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+PROCESS_DIRECTORY="${1:-drell-yan}"
+qsub "${PROCESS_DIRECTORY}/momemta.pbs"
diff --git a/configs/momemta/drell-yan.mg5 b/configs/momemta/drell-yan.mg5
@@ -0,0 +1,2 @@
+generate p p > l+ l-
+output MoMEMta pp_drell_yan
diff --git a/momemta/drell-yan/CMakeLists.txt b/momemta/drell-yan/CMakeLists.txt
@@ -0,0 +1,39 @@
+cmake_minimum_required(VERSION 3.7...3.20)
+
+if(${CMAKE_VERSION} VERSION_LESS 3.12)
+    cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
+endif()
+
+project(Drell-Yan-Examples)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pedantic -Wextra")
+
+# Require c++11 *at least*, use default compiler standard if possible
+if (CMAKE_CXX_STANDARD_COMPUTED_DEFAULT STRLESS "11" OR
+        CMAKE_CXX_STANDARD_COMPUTED_DEFAULT STREQUAL "98")
+    set(CMAKE_CXX_STANDARD 11)
+    set(CMAKE_CXX_STANDARD_REQUIRED ON)
+endif()
+
+# Stick to the standard
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Find dependencices
+
+# CMake will automagically also link to MoMEMta's dependencies, ie LHAPDF and ROOT
+find_package(MoMEMta CONFIG REQUIRED)
+
+# But MoMEMta doesn't use TreePlayer: we have to add it ourselves
+find_library(ROOT_TREEPLAYER_LIBRARY TreePlayer HINTS ${ROOT_LIBRARY_DIR} REQUIRED)
+
+# Figure out what do do here and how to simplify things. Above is bolierplate and below is the code
+
+add_executable(drell-yan_example "drell-yan_example.cxx")
+
+target_link_libraries(drell-yan_example momemta::momemta)
+# FIXME: The TTbar example uses TreePlayer so copy this here FOR NOW
+target_link_libraries(drell-yan_example ${ROOT_TREEPLAYER_LIBRARY})
+
+set_target_properties(drell-yan_example
+      PROPERTIES
+      OUTPUT_NAME "drell-yan_example")
diff --git a/momemta/drell-yan/drell-yan_example.cxx b/momemta/drell-yan/drell-yan_example.cxx
@@ -0,0 +1,172 @@
+#include <momemta/ConfigurationReader.h>
+#include <momemta/Logging.h>
+#include <momemta/MoMEMta.h>
+#include <momemta/Unused.h>
+
+#include <TTree.h>
+#include <TChain.h>
+#include <TTreeReader.h>
+#include <TTreeReaderValue.h>
+#include <Math/PtEtaPhiM4D.h>
+#include <Math/LorentzVector.h>
+
+#include <chrono>
+#include <memory>
+#include <iostream>
+
+using LorentzVectorM = ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiM4D<float>>;
+
+/*
+ * Example executable file loading an input sample of events,
+ * computing weights using MoMEMta in the Drell-Yan hypothesis,
+ * and saving these weights along with a copy of the event content in an output file.
+ */
+
+void normalizeInput(LorentzVector& p4) {
+    if (p4.M() > 0)
+        return;
+
+    // Increase the energy until M is positive
+    p4.SetE(p4.P());
+    while (p4.M2() < 0) {
+        double delta = p4.E() * 1e-5;
+        p4.SetE(p4.E() + delta);
+    };
+}
+
+int main(int argc, char** argv) {
+
+    std::string inputPath;  // required input
+    std::string outputPath;  // required input
+    std::string configPath {"drell-yan_example.lua"};  // default value
+    std::string chainName {"event_selection/hftree"};  // default value
+    std::vector <std::string> unusedCLIArguments;
+
+    for (int idx = 1; idx < argc; ++idx) {
+        // --input
+        if (std::string(argv[idx]) == "--input") {
+            if (idx + 1 < argc) { // Make sure not at the end of argv
+                inputPath = argv[++idx]; // value is argv entry after flag
+            } else {
+                std::cerr << "--input option requires one argument." << std::endl;
+                return 1;
+            }
+        }
+        // --output
+        else if (std::string(argv[idx]) == "--output") {
+            if (idx + 1 < argc) {
+                outputPath = argv[++idx];
+            } else {
+                std::cerr << "--output option requires one argument." << std::endl;
+                return 1;
+            }
+        }
+        // --chain
+        else if (std::string(argv[idx]) == "--chain") {
+            if (idx + 1 < argc) {
+                chainName = argv[++idx];
+            }
+        }
+        // --luaconfig
+        else if (std::string(argv[idx]) == "--luaconfig") {
+            if (idx + 1 < argc) {
+                configPath = argv[++idx];
+            }
+        }
+        else {
+            unusedCLIArguments.push_back(argv[idx]);
+        }
+    }
+
+    using std::swap;
+
+    // Load events from input file, retrieve reconstructed particles and MET
+    TChain chain(chainName.c_str());
+    // Path needs to be findable inside of Docker container
+    chain.Add(inputPath.c_str());
+    TTreeReader myReader(&chain);
+
+    // TODO: serialize the 4-momentum into the TTree over just using branches
+    // TTreeReaderValue<LorentzVectorM> lep_plus_p4M(myReader, "lep1_p4");
+    // TTreeReaderValue<LorentzVectorM> lep_minus_p4M(myReader, "lep2_p4");
+
+    TTreeReaderValue<int> leading_lep_PID(myReader, "lep1_PID");
+
+    TTreeReaderValue<float> lep_plus_Px(myReader, "lep1_Px");
+    TTreeReaderValue<float> lep_plus_Py(myReader, "lep1_Py");
+    TTreeReaderValue<float> lep_plus_Pz(myReader, "lep1_Pz");
+    TTreeReaderValue<float> lep_plus_E(myReader, "lep1_E");
+
+    TTreeReaderValue<float> lep_minus_Px(myReader, "lep2_Px");
+    TTreeReaderValue<float> lep_minus_Py(myReader, "lep2_Py");
+    TTreeReaderValue<float> lep_minus_Pz(myReader, "lep2_Pz");
+    TTreeReaderValue<float> lep_minus_E(myReader, "lep2_E");
+
+    // Define output TTree, which will contain the weights we're computing (including uncertainty and computation time)
+    std::unique_ptr<TTree> out_tree = std::make_unique<TTree>("momemta", "momemta");
+    double weight_DY, weight_DY_err, weight_DY_time;
+    out_tree->Branch("weight_DY", &weight_DY);
+    out_tree->Branch("weight_DY_err", &weight_DY_err);
+    out_tree->Branch("weight_DY_time", &weight_DY_time);
+
+    // Prepare MoMEMta to compute the weights
+
+    // logging::set_level(logging::level::debug);
+    logging::set_level(logging::level::error);
+
+    // Construct the ConfigurationReader from the Lua file
+    ConfigurationReader configuration(configPath);
+
+    // Instantiate MoMEMta using a **frozen** configuration
+    MoMEMta weight(configuration.freeze());
+
+    int counter = 0;
+    while (myReader.Next()) {
+        /*
+         * Prepare the LorentzVectors passed to MoMEMta:
+         * In the input file they are written in the PtEtaPhiM<float> basis,
+         * while MoMEMta expects PxPyPzE<double>, so we have to perform this change of basis:
+         *
+         * We define here Particles, allowing MoMEMta to correctly map the inputs to the configuration file.
+         * The string identifier used here must be the same as used to declare the inputs in the config file
+         */
+        // momemta::Particle lep_plus("lepton1",  LorentzVector { lep_plus_p4M->Px(), lep_plus_p4M->Py(), lep_plus_p4M->Pz(), lep_plus_p4M->E() });
+        // momemta::Particle lep_minus("lepton2", LorentzVector { lep_minus_p4M->Px(), lep_minus_p4M->Py(), lep_minus_p4M->Pz(), lep_minus_p4M->E() });
+        momemta::Particle lep_plus("lepton1",  LorentzVector { *lep_plus_Px, *lep_plus_Py, *lep_plus_Pz, *lep_plus_E });
+        momemta::Particle lep_minus("lepton2", LorentzVector { *lep_minus_Px, *lep_minus_Py, *lep_minus_Pz, *lep_minus_E });
+
+        // Due to numerical instability, the mass can sometimes be negative. If it's the case, change the energy in order to be mass-positive
+        normalizeInput(lep_plus.p4);
+        normalizeInput(lep_minus.p4);
+
+        // Ensure the leptons are given in the correct order w.r.t their charge
+        if (*leading_lep_PID < 0)
+            swap(lep_plus, lep_minus);
+
+        auto start_time = std::chrono::system_clock::now();
+        // Compute the weights!
+        std::vector<std::pair<double, double>> weights = weight.computeWeights({lep_minus, lep_plus});
+        auto end_time = std::chrono::system_clock::now();
+
+        // Retrieve the weight and uncertainty
+        weight_DY = weights.back().first;
+        weight_DY_err = weights.back().second;
+        weight_DY_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
+
+        LOG(debug) << "Event " << myReader.GetCurrentEntry() << " result: " << weight_DY << " +- " << weight_DY_err;
+        LOG(info) << "Weight computed in " << weight_DY_time << "ms";
+
+        out_tree->Fill();
+
+        ++counter;
+        if (counter % 1000 == 0)
+            std::cout << "calculated weights for " << counter << " events\n";
+
+    }
+    std::cout << "calculated weights for " << counter << " events\n";
+
+    // Save output to TTree
+    out_tree->SaveAs(outputPath.c_str());
+
+    return 0;
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		generate p p > l+ l-
		output MoMEMta pp_drell_yan