From ed64b56e420cc7a06386c7ce51ea4c427ec61d91 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Tue, 10 Aug 2021 00:38:42 -0500
Subject: [PATCH] feat: Add MoMEMta scripts for llbb topology and Drell-Yan
 hypothesis (#11)

* Add a directory and C++ script for the llbb event topology for MoMEMta
   - The idea is to have one C++ script for an event topology and then switch out different physics hypotheses for that one script
* Add a Drell-Yan hypothesis MoMEMta-MaGMEE config and MoMEMta Lua config
   - The momemta/llbb/drell-yan.lua currently causes and error for unknown reasons (c.f. Issue #3)
   - Lua config is altered from a version provided by Florian Bury
* Add a run Bash script
---
 configs/momemta/drell-yan_llbb.mg5 |   2 +
 momemta/llbb/CMakeLists.txt        |  39 ++++++
 momemta/llbb/drell-yan.lua         | 185 ++++++++++++++++++++++++++++
 momemta/llbb/run_momemta.sh        |  53 ++++++++
 momemta/llbb/topology_llbb.cxx     | 190 +++++++++++++++++++++++++++++
 5 files changed, 469 insertions(+)
 create mode 100644 configs/momemta/drell-yan_llbb.mg5
 create mode 100644 momemta/llbb/CMakeLists.txt
 create mode 100644 momemta/llbb/drell-yan.lua
 create mode 100644 momemta/llbb/run_momemta.sh
 create mode 100644 momemta/llbb/topology_llbb.cxx

diff --git a/configs/momemta/drell-yan_llbb.mg5 b/configs/momemta/drell-yan_llbb.mg5
new file mode 100644
index 0000000..835b8f1
--- /dev/null
+++ b/configs/momemta/drell-yan_llbb.mg5
@@ -0,0 +1,2 @@
+generate p p > l+ l- b b~
+output MoMEMta pp_drell_yan_llbb
diff --git a/momemta/llbb/CMakeLists.txt b/momemta/llbb/CMakeLists.txt
new file mode 100644
index 0000000..fb84951
--- /dev/null
+++ b/momemta/llbb/CMakeLists.txt
@@ -0,0 +1,39 @@
+cmake_minimum_required(VERSION 3.7...3.20)
+
+if(${CMAKE_VERSION} VERSION_LESS 3.12)
+    cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
+endif()
+
+project(Drell-Yan-Examples)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pedantic -Wextra")
+
+# Require c++11 *at least*, use default compiler standard if possible
+if (CMAKE_CXX_STANDARD_COMPUTED_DEFAULT STRLESS "11" OR
+        CMAKE_CXX_STANDARD_COMPUTED_DEFAULT STREQUAL "98")
+    set(CMAKE_CXX_STANDARD 11)
+    set(CMAKE_CXX_STANDARD_REQUIRED ON)
+endif()
+
+# Stick to the standard
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Find dependencices
+
+# CMake will automagically also link to MoMEMta's dependencies, ie LHAPDF and ROOT
+find_package(MoMEMta CONFIG REQUIRED)
+
+# But MoMEMta doesn't use TreePlayer: we have to add it ourselves
+find_library(ROOT_TREEPLAYER_LIBRARY TreePlayer HINTS ${ROOT_LIBRARY_DIR} REQUIRED)
+
+# Figure out what do do here and how to simplify things. Above is bolierplate and below is the code
+
+add_executable(topology_llbb "topology_llbb.cxx")
+
+target_link_libraries(topology_llbb momemta::momemta)
+# FIXME: The TTbar example uses TreePlayer so copy this here FOR NOW
+target_link_libraries(topology_llbb ${ROOT_TREEPLAYER_LIBRARY})
+
+set_target_properties(topology_llbb
+      PROPERTIES
+      OUTPUT_NAME "topology_llbb")
diff --git a/momemta/llbb/drell-yan.lua b/momemta/llbb/drell-yan.lua
new file mode 100644
index 0000000..b00f86c
--- /dev/null
+++ b/momemta/llbb/drell-yan.lua
@@ -0,0 +1,185 @@
+-- Lua config originally provided by Florian Bury
+function append(t1, t2)
+    for i = 1, #t2 do
+        t1[#t1 + 1] = t2[i]
+    end
+
+    return t1
+end
+
+-- Order convention : e+ e- b bbar
+-- Reco particles : input::particles/1
+-- Gen particles, depends on what you do
+
+-- Available parameters : https://github.com/MoMEMta/MoMEMta/blob/master/core/src/MoMEMta.cc#L141
+-- If "relative accuracy" is reached stop launching points unless "min_eval" point have been launched,
+-- if after "max_eval" points have been launched stop launching even if we do not have reahced accuracy.
+-- "n_start" : Vegas works by iterations will launch n_start point by n_start point and refine the grid at each step
+-- n_increase allow to launch more points at each iteration
+-- NB : all the point that have been launched are used to compute the intergal
+cuba = {
+    seed = random,
+    relative_accuracy = 0.01,
+    verbosity = 3,
+    max_eval = max_eval,
+    n_start = n_start
+}
+
+-- NB: to be defined in the .cxx is matrix_element_prefix
+
+--inputs as will be feed to the blocks and ME
+local neg_lepton = declare_input("lepton1")
+local pos_lepton = declare_input("lepton2")
+local bjet1 = declare_input("bjet1")
+local bjet2 = declare_input("bjet2")
+
+
+USE_TF = true
+USE_PERM = true -- carefull if you use TF binned in eta and the permutations, jet1 tf is applied to jet2
+
+parameters = {
+    energy = 13000.,
+    Z_mass = 91.1876,
+    Z_width = 2.49,
+    lep1_me_index = 1,
+    lep2_me_index = 2,
+    matrix_element = 'pp_drell_yan_llbb_sm_P1_Sigma_sm_gg_epembbx',
+    matrix_element_parameters = {
+      card = 'MatrixElements/pp_drell_yan_llbb/Cards/param_card.dat'
+    },
+    export_graph_as = "drell-yan_example_computing_graph.dot"
+}
+load_modules('MatrixElements/pp_drell_yan_llbb/build/libme_pp_drell_yan_llbb.so')
+
+if USE_PERM then
+    add_reco_permutations(bjet1, bjet2)
+end
+
+if USE_TF then
+    -- Fix gen energy of the two leptons according to their transfer function
+    GaussianTransferFunctionOnEnergy.tf_neg_lepton = {
+        ps_point = add_dimension(),
+        reco_particle = neg_lepton.reco_p4,
+        sigma = 0.10,
+        sigma_range = 3.,
+    }
+    neg_lepton.set_gen_p4("tf_neg_lepton::output")
+
+    GaussianTransferFunctionOnEnergy.tf_pos_lepton = {
+        ps_point = add_dimension(),
+        reco_particle = pos_lepton.reco_p4,
+        sigma = 0.10,
+        sigma_range = 3.,
+    }
+    pos_lepton.set_gen_p4("tf_pos_lepton::output")
+
+    -- Compute the phase space density for observed particles not concerned by the change of variable:
+    -- here lepton on which we evaluate the TF
+    -- The TF jacobian just account for dp/dE to go from |p| to E, not the phase space volume, the
+    -- other blocks give the whole product of jacobian and phase space volume to go from one param to another
+    StandardPhaseSpace.phaseSpaceOut = {
+        particles = {'tf_neg_lepton::output', 'tf_pos_lepton::output'}
+    }
+    -- Obtain the energy of the b's from momentum conservation (first two input are RECO b's, other inputs are
+    -- used to fix PT to be balanced, all at gen level)
+    BlockA.blocka = {
+        p1 = bjet1.reco_p4,
+        p2 = bjet2.reco_p4,
+        branches = {'tf_neg_lepton::output', 'tf_pos_lepton::output'},
+    }
+    Looper.looper = {
+        solutions = "blocka::solutions",
+        path = Path("tfEval_bjet1", "tfEval_bjet2", "boost", "drellyan", "integrand") -- everything that will depend on the different solutions
+    }
+    bjet1.set_gen_p4("looper::particles/1")
+    bjet2.set_gen_p4("looper::particles/2")
+
+    GaussianTransferFunctionOnEnergyEvaluator.tfEval_bjet1 = {
+        ps_point = add_dimension(),
+        reco_particle = bjet1.reco_p4,
+        sigma = 0.30,
+        sigma_range = 3.,
+    }
+    GaussianTransferFunctionOnEnergyEvaluator.tfEval_bjet2 = {
+        ps_point = add_dimension(),
+        reco_particle = bjet2.reco_p4,
+        sigma = 0.30,
+        sigma_range = 3.,
+    }
+
+    genParticles = {
+        pos_lepton.gen_p4,
+        neg_lepton.gen_p4,
+        bjet1.gen_p4,
+        bjet2.gen_p4,
+        }
+
+else
+    genParticles = {
+        pos_lepton.reco_p4,
+        neg_lepton.reco_p4,
+        bjet1.reco_p4,
+        bjet2.reco_p4,
+        }
+    -- Compute the phase space density for observed particles not concerned by the change of variable:
+    -- here lepton on which we evaluate the TF
+    -- The TF jacobian just account for dp/dE to go from |p| to E, not the phase space volume,
+    -- the other blocks give the whole product of jacobian and phase space volume to go from one param to another
+    StandardPhaseSpace.phaseSpaceOut = {
+        particles = genParticles
+    }
+end
+
+
+BuildInitialState.boost = {
+    do_transverse_boost = true,
+    particles = genParticles
+}
+
+jacobians = {'phaseSpaceOut::phase_space'}
+
+if USE_TF then
+    append(jacobians, {'tf_neg_lepton::TF_times_jacobian', 'looper::jacobian', 'tf_pos_lepton::TF_times_jacobian', 'tfEval_bjet1::TF', 'tfEval_bjet2::TF'})
+end
+
+MatrixElement.drellyan = {
+  pdf = 'CT10nlo',
+  pdf_scale = parameter('Z_mass'),
+
+  matrix_element = parameter('matrix_element'),
+  matrix_element_parameters = {
+      card = parameter('matrix_element_parameters'),
+  },
+
+  initialState = 'boost::partons',
+
+  particles = {
+    inputs = genParticles,
+    ids = {
+      {
+        pdg_id =  -11,
+        me_index = 1,
+      },
+      {
+        pdg_id = 11,
+        me_index = 2,
+      },
+      {
+        pdg_id = 5,
+        me_index = 3,
+      },
+      {
+        pdg_id = -5,
+        me_index = 4,
+      }
+    }
+  },
+
+  jacobians = jacobians
+}
+
+DoubleLooperSummer.integrand = {
+    input = "drellyan::output"
+}
+
+integrand("integrand::sum")
diff --git a/momemta/llbb/run_momemta.sh b/momemta/llbb/run_momemta.sh
new file mode 100644
index 0000000..2383ec1
--- /dev/null
+++ b/momemta/llbb/run_momemta.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Run this inside of the directory for the hypothesis
+
+# Ensure lhadpdf set exists
+lhapdf get CT10nlo
+
+if [[ -d MatrixElements ]];then
+  rm -rf MatrixElements
+fi
+mkdir MatrixElements
+pushd MatrixElements
+# Generate the matrix Element with MadGraph5
+mg5_aMC ../../../configs/momemta/drell-yan_llbb.mg5
+rm py.py
+popd
+
+# Build Matrix Element
+# c.f. https://github.com/MoMEMta/MoMEMta-MaGMEE#usage
+# Matrix Element namespace name defined in ../../../configs/momemta/drell-yan_llbb.mg5
+cmake \
+    -DCMAKE_INSTALL_PREFIX=/usr/local/venv \
+    -S MatrixElements/pp_drell_yan_llbb \
+    -B MatrixElements/pp_drell_yan_llbb/build
+cmake MatrixElements/pp_drell_yan_llbb/build -L
+cmake --build MatrixElements/pp_drell_yan_llbb/build \
+    --clean-first \
+    --parallel $(($(nproc) - 1))
+
+# Example level build
+if [ -d build ];then
+  rm -rf build
+fi
+# Cleanup if any failed runs
+if [ -f core ]; then
+  rm core
+fi
+cmake \
+    -DCMAKE_INSTALL_PREFIX=/usr/local/venv \
+    -S . \
+    -B build
+cmake build -L
+cmake --build build \
+    --clean-first \
+    --parallel $(($(nproc) - 1))
+
+INPUT_PATH="${1:-/home/feickert/Code/GitHub/SCAILFIN/MadGraph5-simulation-configs/preprocessing/preprocessing_output.root}"
+OUTPUT_PATH="${2:-momemta_weights.root}"
+
+# Current configuration in topology_llbb.cxx requires running from top level of example dir
+time ./build/topology_llbb \
+  --input "${INPUT_PATH}" \
+  --output "${OUTPUT_PATH}"
diff --git a/momemta/llbb/topology_llbb.cxx b/momemta/llbb/topology_llbb.cxx
new file mode 100644
index 0000000..67b38ae
--- /dev/null
+++ b/momemta/llbb/topology_llbb.cxx
@@ -0,0 +1,190 @@
+#include <momemta/ConfigurationReader.h>
+#include <momemta/Logging.h>
+#include <momemta/MoMEMta.h>
+#include <momemta/Unused.h>
+
+#include <TTree.h>
+#include <TChain.h>
+#include <TTreeReader.h>
+#include <TTreeReaderValue.h>
+#include <Math/PtEtaPhiM4D.h>
+#include <Math/LorentzVector.h>
+
+#include <chrono>
+#include <memory>
+#include <iostream>
+
+using LorentzVectorM = ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiM4D<float>>;
+
+/*
+ * Example executable file loading an input sample of events,
+ * computing weights using MoMEMta in the Drell-Yan hypothesis,
+ * and saving these weights along with a copy of the event content in an output file.
+ */
+
+void normalizeInput(LorentzVector& p4) {
+    if (p4.M() > 0)
+        return;
+
+    // Increase the energy until M is positive
+    p4.SetE(p4.P());
+    while (p4.M2() < 0) {
+        double delta = p4.E() * 1e-5;
+        p4.SetE(p4.E() + delta);
+    };
+}
+
+int main(int argc, char** argv) {
+
+    std::string inputPath;  // required input
+    std::string outputPath;  // required input
+    std::string configPath {"drell-yan_example.lua"};  // default value
+    std::string chainName {"event_selection/hftree"};  // default value
+    std::vector <std::string> unusedCLIArguments;
+
+    for (int idx = 1; idx < argc; ++idx) {
+        // --input
+        if (std::string(argv[idx]) == "--input") {
+            if (idx + 1 < argc) { // Make sure not at the end of argv
+                inputPath = argv[++idx]; // value is argv entry after flag
+            } else {
+                std::cerr << "--input option requires one argument." << std::endl;
+                return 1;
+            }
+        }
+        // --output
+        else if (std::string(argv[idx]) == "--output") {
+            if (idx + 1 < argc) {
+                outputPath = argv[++idx];
+            } else {
+                std::cerr << "--output option requires one argument." << std::endl;
+                return 1;
+            }
+        }
+        // --chain
+        else if (std::string(argv[idx]) == "--chain") {
+            if (idx + 1 < argc) {
+                chainName = argv[++idx];
+            }
+        }
+        // --luaconfig
+        else if (std::string(argv[idx]) == "--luaconfig") {
+            if (idx + 1 < argc) {
+                configPath = argv[++idx];
+            }
+        }
+        else {
+            unusedCLIArguments.push_back(argv[idx]);
+        }
+    }
+
+    using std::swap;
+
+    // Load events from input file, retrieve reconstructed particles and MET
+    TChain chain(chainName.c_str());
+    // Path needs to be findable inside of Docker container
+    chain.Add(inputPath.c_str());
+    TTreeReader myReader(&chain);
+
+    // TODO: serialize the 4-momentum into the TTree over just using branches
+    // TTreeReaderValue<LorentzVectorM> lep_plus_p4M(myReader, "lep1_p4");
+    // TTreeReaderValue<LorentzVectorM> lep_minus_p4M(myReader, "lep2_p4");
+
+    TTreeReaderValue<int> leading_lep_PID(myReader, "lep1_PID");
+
+    TTreeReaderValue<float> lep_plus_Px(myReader, "lep1_Px");
+    TTreeReaderValue<float> lep_plus_Py(myReader, "lep1_Py");
+    TTreeReaderValue<float> lep_plus_Pz(myReader, "lep1_Pz");
+    TTreeReaderValue<float> lep_plus_E(myReader, "lep1_E");
+
+    TTreeReaderValue<float> lep_minus_Px(myReader, "lep2_Px");
+    TTreeReaderValue<float> lep_minus_Py(myReader, "lep2_Py");
+    TTreeReaderValue<float> lep_minus_Pz(myReader, "lep2_Pz");
+    TTreeReaderValue<float> lep_minus_E(myReader, "lep2_E");
+
+    TTreeReaderValue<float> bjet1_Px(myReader, "bjet1_Px");
+    TTreeReaderValue<float> bjet1_Py(myReader, "bjet1_Py");
+    TTreeReaderValue<float> bjet1_Pz(myReader, "bjet1_Pz");
+    TTreeReaderValue<float> bjet1_E(myReader, "bjet1_E");
+
+    TTreeReaderValue<float> bjet2_Px(myReader, "bjet2_Px");
+    TTreeReaderValue<float> bjet2_Py(myReader, "bjet2_Py");
+    TTreeReaderValue<float> bjet2_Pz(myReader, "bjet2_Pz");
+    TTreeReaderValue<float> bjet2_E(myReader, "bjet2_E");
+
+    // Define output TTree, which will contain the weights we're computing (including uncertainty and computation time)
+    std::unique_ptr<TTree> out_tree = std::make_unique<TTree>("momemta", "momemta");
+    double weight_DY, weight_DY_err, weight_DY_time;
+    out_tree->Branch("weight_DY", &weight_DY);
+    out_tree->Branch("weight_DY_err", &weight_DY_err);
+    out_tree->Branch("weight_DY_time", &weight_DY_time);
+
+    // Prepare MoMEMta to compute the weights
+
+    // logging::set_level(logging::level::debug);
+    logging::set_level(logging::level::error);
+
+    // Construct the ConfigurationReader from the Lua file
+    ConfigurationReader configuration(configPath);
+
+    std::cout << "\n# Loaded MoMEMta Lua configuration\n" << std::endl;
+
+    // Instantiate MoMEMta using a **frozen** configuration
+    MoMEMta weight(configuration.freeze());
+
+    int counter = 0;
+    while (myReader.Next()) {
+        /*
+         * Prepare the LorentzVectors passed to MoMEMta:
+         * In the input file they are written in the PtEtaPhiM<float> basis,
+         * while MoMEMta expects PxPyPzE<double>, so we have to perform this change of basis:
+         *
+         * We define here Particles, allowing MoMEMta to correctly map the inputs to the configuration file.
+         * The string identifier used here must be the same as used to declare the inputs in the config file
+         */
+        // momemta::Particle lep_plus("lepton1",  LorentzVector { lep_plus_p4M->Px(), lep_plus_p4M->Py(), lep_plus_p4M->Pz(), lep_plus_p4M->E() });
+        // momemta::Particle lep_minus("lepton2", LorentzVector { lep_minus_p4M->Px(), lep_minus_p4M->Py(), lep_minus_p4M->Pz(), lep_minus_p4M->E() });
+        momemta::Particle lep_plus("lepton1",  LorentzVector { *lep_plus_Px, *lep_plus_Py, *lep_plus_Pz, *lep_plus_E });
+        momemta::Particle lep_minus("lepton2", LorentzVector { *lep_minus_Px, *lep_minus_Py, *lep_minus_Pz, *lep_minus_E });
+        momemta::Particle bjet1("bjet1", LorentzVector { *bjet1_Px, *bjet1_Py, *bjet1_Pz, *bjet1_E });
+        momemta::Particle bjet2("bjet2", LorentzVector { *bjet2_Px, *bjet2_Py, *bjet2_Pz, *bjet2_E });
+
+        // Due to numerical instability, the mass can sometimes be negative. If it's the case, change the energy in order to be mass-positive
+        normalizeInput(lep_plus.p4);
+        normalizeInput(lep_minus.p4);
+        normalizeInput(bjet1.p4);
+        normalizeInput(bjet2.p4);
+
+        // Ensure the leptons are given in the correct order w.r.t their charge
+        if (*leading_lep_PID < 0)
+            swap(lep_plus, lep_minus);
+
+        auto start_time = std::chrono::system_clock::now();
+        // Compute the weights!
+        // MoMEMta::computeWeights({vector of particles}, met)
+        // The MET is an optional argument (defaults to a null vector)
+        std::vector<std::pair<double, double>> weights = weight.computeWeights({lep_minus, lep_plus, bjet1, bjet2});
+        auto end_time = std::chrono::system_clock::now();
+
+        // Retrieve the weight and uncertainty
+        weight_DY = weights.back().first;
+        weight_DY_err = weights.back().second;
+        weight_DY_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
+
+        LOG(debug) << "Event " << myReader.GetCurrentEntry() << " result: " << weight_DY << " +- " << weight_DY_err;
+        LOG(info) << "Weight computed in " << weight_DY_time << "ms";
+
+        out_tree->Fill();
+
+        ++counter;
+        if (counter % 1000 == 0)
+            std::cout << "calculated weights for " << counter << " events\n";
+
+    }
+    std::cout << "calculated weights for " << counter << " events\n";
+
+    // Save output to TTree
+    out_tree->SaveAs(outputPath.c_str());
+
+    return 0;
+}