diff --git a/.github/workflows/build-with-kokkos.yml b/.github/workflows/build-with-kokkos.yml index ad56034b1..14c0dd76d 100644 --- a/.github/workflows/build-with-kokkos.yml +++ b/.github/workflows/build-with-kokkos.yml @@ -29,6 +29,7 @@ jobs: image: ${{ matrix.image }} env: Kokkos_ROOT: /opt/kokkos + VARIORUM_ROOT: /opt/variorum steps: - name: Checkout Kokkos Tools uses: actions/checkout@v4 @@ -38,22 +39,29 @@ jobs: repository: kokkos/kokkos path: kokkos ref: develop + - name: Checkout Varioum + uses: actions/checkout@v4 + with: + repository: LLNL/variorum + path: variorum + ref: 249a39bf41972afe2213d3dc904e47647364f466 - name: Install compilers run : | apt update if [ ${{ matrix.compiler }} != 'default' ];then - apt --yes --no-install-recommends install ${{ matrix.compiler.c }} ${{ matrix.compiler.cpp }} + apt --yes --no-install-recommends install ${{ matrix.compiler.c }} ${{ matrix.compiler.cpp }} gfortran export CC=${{ matrix.compiler.c }} export CXX=${{ matrix.compiler.cpp }} else - apt --yes --no-install-recommends install gcc g++ + apt --yes --no-install-recommends install gcc g++ gfortran export CC=gcc export CXX=g++ fi echo "CC=$CC" >> $GITHUB_ENV echo "CXX=$CXX" >> $GITHUB_ENV + echo "FC=gfortran" >> $GITHUB_ENV case ${{ matrix.preset }} in *OpenMP* ) @@ -71,14 +79,22 @@ jobs: exit -1 esac - - name: Install git, CMake, OpenMPI, PAPI and dtrace + - name: Install git, CMake, Jansson, OpenMPI, PAPI and dtrace run: | apt --yes --no-install-recommends install \ git ca-certificates \ cmake make \ libopenmpi-dev \ systemtap-sdt-dev \ + libjansson-dev \ libpapi-dev + - name: Compile and install Variorum + working-directory: variorum + run: | + cp ${GITHUB_WORKSPACE}/kokkos.presets.json CMakePresets.json + cmake -S src -B build -DCMAKE_INSTALL_PREFIX=${VARIORUM_ROOT} + cd build + make install - name: Compile and install Kokkos working-directory: kokkos run: | @@ -92,7 +108,8 @@ jobs: cmake --build --preset=${{ matrix.preset }} cmake --install build-with-${{ matrix.preset }} --prefix=/opt/kokkos-tools # For now, GitHub runners are used. These runner don't have GPUs. Therefore, testing can only be done for OpenMP. + # Skip variorum test since the architecture detected by variorum is not supported - name: Run tests if: ${{ matrix.preset == 'OpenMP' }} run: | - ctest --preset=${{ matrix.preset }} + ctest -E "test_kokkos_tools_variorum" --preset=${{ matrix.preset }} diff --git a/profiling/variorum-connector/variorum-connector.cpp b/profiling/variorum-connector/variorum-connector.cpp index 14c44314c..c7c0fa999 100644 --- a/profiling/variorum-connector/variorum-connector.cpp +++ b/profiling/variorum-connector/variorum-connector.cpp @@ -14,425 +14,218 @@ // //@HEADER -// Modified by Zach Frye at LLNL -// Contact: frye7@llnl.gov -// Organization: CASC at LLNL - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "kp_core.hpp" -// variorum trial run extern "C" { #include #include } -#include "kp_core.hpp" - -#if USE_MPI -#include -#endif +#include +#include namespace KokkosTools { namespace VariorumConnector { -bool filterKernels; -uint64_t nextKernelID; -std::vector kernelNames; -std::unordered_set activeKernels; - -typedef void (*initFunction)(const int, const uint64_t, const uint32_t, void*); -typedef void (*finalizeFunction)(); -typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*); -typedef void (*endFunction)(uint64_t); - -static initFunction initProfileLibrary = NULL; -static finalizeFunction finalizeProfileLibrary = NULL; -static beginFunction beginForCallee = NULL; -static beginFunction beginScanCallee = NULL; -static beginFunction beginReduceCallee = NULL; -static endFunction endForCallee = NULL; -static endFunction endScanCallee = NULL; -static endFunction endReduceCallee = NULL; - -// the output path for ranekd output files -std::string printpath = "./"; - -// variables for simple timer -time_t start_time; +// Initial and final power values for a kernel +double global_power[2] = {0, 0}; +// Initial and final time value for a kernel +long long global_time[2] = {0, 0}; +uint32_t global_device_id = -1; +uint32_t global_instance_id = -1; +std::string global_filename; +std::string global_kernel_name; +std::string global_device_type; + +using initFunction = void (*)(const int, const uint64_t, const uint32_t, void*); +using finalizeFunction = void (*)(); +using beginFunction = void (*)(const char*, const uint32_t, uint64_t*); +using endFunction = void (*)(uint64_t); + +static initFunction initProfileLibrary = nullptr; +static finalizeFunction finalizeProfileLibrary = nullptr; +static beginFunction beginForCallee = nullptr; +static beginFunction beginScanCallee = nullptr; +static beginFunction beginReduceCallee = nullptr; +static endFunction endForCallee = nullptr; +static endFunction endScanCallee = nullptr; +static endFunction endReduceCallee = nullptr; + +inline std::string get_file_name(const char* env_var_name) { + char* parsed_output_file = getenv(env_var_name); + if (!parsed_output_file) { + std::cerr << "Couldn't parse " << env_var_name + << " environment " + "variable! Printing to variorumoutput.txt\n"; + return "variorumoutput.txt"; + } + return parsed_output_file; +} -int type_of_profiling = - 0; // 0 is for both print power & json, 1 is for print power, 2 is for json -bool usingMPI = false; -bool verbosePrint = false; -bool mpiOutPut = false; +void create_file() { + global_filename = get_file_name("KOKKOS_TOOLS_VARIORUM_OUTPUT_FILE"); -// Function: variorum_print_power_call -// Description: Prints out power data in two ways: Verbose and non verbose. -// verbose will print out each component of the systems power draw -// in the sierra architecture. non-verbose will print the node -// power. -// Pre: None -// Post: Will print an error message if variorum print power fails. No return -// value. -std::string variorum_print_power_call() { - std::string outputString; - json_t* power_obj = json_object(); - double power_node, power_sock0, power_mem0, power_gpu0; - double power_sock1, power_mem1, power_gpu1; - int ret; - ret = variorum_get_node_power_json(power_obj); - if (ret != 0) { - return "Print power failed!\n"; - } - // total node measurment - power_node = json_real_value(json_object_get(power_obj, "power_node")); - const char* hostnameChar = - json_string_value(json_object_get(power_obj, "hostname")); - std::string hostname(hostnameChar); - // print informatin to screen - if (verbosePrint) { - // socket 1 measurements - power_sock0 = - json_real_value(json_object_get(power_obj, "power_cpu_socket_0")); - power_mem0 = - json_real_value(json_object_get(power_obj, "power_mem_socket_0")); - power_gpu0 = - json_real_value(json_object_get(power_obj, "power_gpu_socket_0")); - // socket 2 measurements - power_sock1 = - json_real_value(json_object_get(power_obj, "power_cpu_socket_1")); - power_mem1 = - json_real_value(json_object_get(power_obj, "power_mem_socket_1")); - power_gpu1 = - json_real_value(json_object_get(power_obj, "power_gpu_socket_1")); - - outputString += "HostName " + hostname + "\n"; - outputString += "Total Node Power: " + std::to_string(power_node); - outputString += "\n Socket 1 Power"; - outputString += "\n CPU Socket 1: " + std::to_string(power_sock0); - outputString += "\n Mem Socket 1: " + std::to_string(power_mem0); - outputString += "\n GPU Socket 1: " + std::to_string(power_gpu0); - outputString += "\n Socket 2 Power"; - outputString += "\n CPU Socket 2: " + std::to_string(power_sock1); - outputString += "\n Mem Socket 2: " + std::to_string(power_mem1); - outputString += "\n GPU Socket 2: " + std::to_string(power_gpu1) + "\n"; + std::ifstream infile(global_filename); + if (infile.good()) { + infile.close(); + std::ofstream file(global_filename, std::ios::trunc); } else { - outputString += hostname + ": " + std::to_string(power_node) + "\n"; - } - - return outputString; -} - -// Function: variorum_json_call() -// Description: function that will call variorum print json and handle the -// execution errors Pre: None Post: Will print an error message if variorum -// print json fails. No return value. -char* variorum_json_call() { - int ret; - json_t* my_power_obj = NULL; - my_power_obj = json_object(); - ret = variorum_get_node_power_json(my_power_obj); - if (ret != 0) { - printf("First run: JSON get node power failed!\n"); + std::ofstream file(global_filename, std::ios::app); } - char* s = json_dumps(my_power_obj, 0); - return s; } -// Function: variorum_call_mpi -// Description: This function will call the variourm helper functions and either -// write them to -// output files or to std::cout depending on what options are -// selected -// Pre: None -// Post: An output message if variourum returned an error or if it functioned -// correctly -void variorum_call_mpi() { -#if USE_MPI - if (usingMPI == true) { - int rank; - std::string output; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - std::ofstream file; - std::ofstream pfile; - if (type_of_profiling == - 0) { // if both print power and json options selected - if (mpiOutPut) { - std::string filenamejson = printpath + "variorum-output-mpi-rank-" + - std::to_string(rank) + "-json.txt"; - std::string filenameprintp = printpath + "variorum-output-mpi-rank-" + - std::to_string(rank) + ".txt"; - - file.open(filenamejson, std::ios_base::app); - std::ofstream pfile; - pfile.open(filenameprintp, std::ios_base::app); - char* s = variorum_json_call(); - file << s; - pfile << variorum_print_power_call(); - } else { - std::cout << "MPI Rank " << rank << "\n"; - output = variorum_print_power_call(); - char* s = variorum_json_call(); - puts(s); - std::cout << s << std::endl; - } - - } else if (type_of_profiling == 1) { // if only print power is selected - if (mpiOutPut) { - std::string filenameprintp = printpath + "variorum-output-mpi-rank-" + - std::to_string(rank) + ".txt"; - std::ofstream pfile; - pfile.open(filenameprintp, std::ios_base::app); - pfile << variorum_print_power_call(); - } else { - std::cout << "MPI Rank " << rank << "\n"; - output = variorum_print_power_call(); - std::cout << output << std::endl; - } - } else if (type_of_profiling == 2) { // if only json is selecte - if (mpiOutPut) { - std::string filenamejson = printpath + "variorum-output-mpi-rank-" + - std::to_string(rank) + "-json.txt"; - std::ofstream file; - file.open(filenamejson, std::ios_base::app); - char* s = variorum_json_call(); - file << s; - } else { - std::cout << "MPI Rank " << rank << "\n"; - char* s = variorum_json_call(); - puts(s); - std::cout << s << std::endl; +void variorum_call() { + char* s = nullptr; + int variorum_error = variorum_get_power_json(&s); + if (variorum_error != 0) { + std::cerr << "JSON get node power failed!\n"; + abort(); + } + + json_error_t error; + json_t* root = nullptr; + json_t* socket_0 = nullptr; + json_t* timestamp_value = nullptr; + json_t* power_gpu_watts = nullptr; + json_t* gpu_0_value = nullptr; + + // Parse JSON string into a json_t object + root = json_loads(s, 0, &error); + if (!root) { + std::cerr << "Error parsing JSON: " << error.text << '\n'; + } + + const char* key; + json_t* value; + json_object_foreach(root, key, value) { + if (json_is_object(value)) { + json_t* socket_object = json_object_get(value, "socket_0"); + if (socket_object && json_is_object(socket_object)) { + socket_0 = socket_object; + timestamp_value = json_object_get(value, "timestamp"); + if (global_time[0] == 0) { + global_time[0] = (long long)json_integer_value(timestamp_value); + } else { + global_time[1] = (long long)json_integer_value(timestamp_value); + } + break; } } - file.close(); } -#endif -} - -// Function: variorum_call -// Description: The function determines what profiling options are selected and -// prints the profoiling data out to std out Pre: None Post: An output message -// if variourum returned an error or if it functioned correctly - -void variorum_call() { - std::string output; - if (type_of_profiling == 0) { - output = variorum_print_power_call(); - char* s = variorum_json_call(); - std::cout << s << "\n"; - std::cout << output << std::endl; - } else if (type_of_profiling == 1) { - output = variorum_print_power_call(); - std::cout << output << std::endl; - } else if (type_of_profiling == 2) { - char* s = variorum_json_call(); - std::cout << s << std::endl; + // FIXME We assume that all GPUs are on socket 0 for now. + if (!socket_0 || !timestamp_value) { + std::cerr << "Failed to find 'socket_0' object or 'timestamp'.\n"; } -} -void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, - const uint32_t devInfoCount, - Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { - char* outputPathChar; - try { - outputPathChar = getenv("VARIORUM_OUTPUT_PATH"); - if (outputPathChar == NULL) { - throw 10; - } - std::string outputPathStr(outputPathChar); - printpath = outputPathStr; - std::cout << "Output Path set to" << outputPathChar << "\n"; - } catch (int e) { - if (e == 10) { - printpath = "./"; - std::cout << "No output path provided, the application will output to " - "the default path \n"; - } + // Access power_gpu_watts within socket_0 + power_gpu_watts = json_object_get(socket_0, "power_gpu_watts"); + if (!json_is_object(power_gpu_watts)) { + std::cerr << "Expected 'power_gpu_watts' to be an object.\n"; } - // Profiling options are read in from their enviornment variables and the - // options are set - char* profiling_type; - try { - profiling_type = getenv("KOKKOS_VARIORUM_FUNC_TYPE"); - if (profiling_type == NULL) { - throw 10; + std::string gpu_key = "GPU_" + std::to_string(global_device_id); + json_t* power_value = json_object_get(power_gpu_watts, gpu_key.c_str()); + if (json_is_number(power_value)) { + double power_val = json_number_value(power_value); + if (global_power[0] == 0) { + global_power[0] = power_val; + } else { + global_power[1] = power_val; } - if (strcmp(profiling_type, "ppower") == 0) { - type_of_profiling = 1; - std::cout << "Variorum print power will be called\n"; - if (verbosePrint == true) { - std::cout - << "Power Format: \n Hostname: total node power, cpuSocket1, " - "memScoket1, gpuSocket1, cpuSocket2, memScoket2, gpuSocket2 \n"; - } - } else if (strcmp(profiling_type, "json") == 0) { - type_of_profiling = 2; - } else if (strcmp(profiling_type, "both") == 0) { - type_of_profiling = 0; - } - } catch (int e) { - if (e == 10) { - type_of_profiling = 0; - std::cout << "No profiling options provided, profiling tool will call " - "variorum print power and json \n"; - } - } - try { - char* verbosePrintStr = getenv("VERBOSE"); - if (verbosePrintStr == NULL) { - throw 10; - } - if (strcmp(verbosePrintStr, "false") == 0 || - strcmp(verbosePrintStr, "") == 0) { - throw 20; - } else if (strcmp(verbosePrintStr, "true") == 0) { - std::cout << "Verbose power option set" - << "\n"; - verbosePrint = true; - } - } catch (int e) { - verbosePrint = false; - std::cout << "No verbose options provided, power information outut will " - "not be verbose \n Format - Hosntame : Node power value\n"; + } else { + std::cerr << "Error: GPU key " << gpu_key << " not found or not a number" + << std::endl; } - try { - char* usingMPIstr = getenv("VARIORUM_USING_MPI"); - if (usingMPIstr == NULL) { - throw 10; - } - if (strcmp(usingMPIstr, "false") == 0 || strcmp(usingMPIstr, "") == 0) { - throw 20; - } - if (strcmp(usingMPIstr, "true") == 0) { -#if USE_MPI - usingMPI = true; - try { - char* perRankOutput = getenv("RANKED_OUTPUT"); - if (strcmp(perRankOutput, "false") == 0 || - strcmp(perRankOutput, "") == 0) { - mpiOutPut = false; - } else if (strcmp(perRankOutput, "true") == 0) { - mpiOutPut = true; - } else { - mpiOutPut = false; - } - } catch (int f) { - std::cout << "Ranked output will no be used, error setting paramters" - << std::endl; - mpiOutPut = false; - } -#else - usingMPI = false; - std::cout << "Ignoring MPI enabled in Variorum: the connector was built " - "without MPI support" - << std::endl; -#endif - } - } catch (int e) { - std::cout << "No MPI Option provided, not using per rank output" - << std::endl; - usingMPI = false; + if (global_power[0] != 0 && global_power[1] != 0) { + int average_power = global_power[1] + global_power[0]; + double energy = + ((average_power / 2) * ((global_time[1] - global_time[0]) * .001)); + std::ofstream file(global_filename, std::ios::app); + file << "name: \"" << global_kernel_name + << "\", Device ID: " << global_device_id + << ", Instance ID: " << global_instance_id + << ", DeviceType: " << global_device_type + << ", Energy Estimate: " << energy << " J\n"; + global_time[0] = 0; + global_power[0] = 0; + global_power[1] = 0; + global_time[1] = 0; } - // Simple timer code to keep track of the general amount of time the - // application ran for. - time(&start_time); - std::cout << "Start Time: " << start_time << "\n"; +} - // variorum_call(); +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { + create_file(); } -void kokkosp_finalize_library() { - if (usingMPI) { - variorum_call_mpi(); - } else { - variorum_call(); +std::string device_type_to_string( + const Kokkos::Tools::Experimental::DeviceType& deviceType) { + switch (deviceType) { + case Kokkos::Tools::Experimental::DeviceType::Serial: + return "SERIAL"; + break; + case Kokkos::Tools::Experimental::DeviceType::OpenMP: + return "OPENMP"; + break; + case Kokkos::Tools::Experimental::DeviceType::Cuda: return "CUDA"; break; + case Kokkos::Tools::Experimental::DeviceType::HIP: return "HIP"; break; + case Kokkos::Tools::Experimental::DeviceType::OpenMPTarget: + return "OPENMPTARGET"; + break; + case Kokkos::Tools::Experimental::DeviceType::HPX: return "HPX"; break; + case Kokkos::Tools::Experimental::DeviceType::Threads: + return "THREADS"; + break; + case Kokkos::Tools::Experimental::DeviceType::SYCL: return "SYCL"; break; + case Kokkos::Tools::Experimental::DeviceType::OpenACC: + return "OPENACC"; + break; + default: return "UNKOWN"; } - time_t total_time; - time_t end_time; - time(&end_time); - std::cout << "End Time: " << end_time << "\nStart Time: " << start_time - << "\n"; - total_time = end_time - start_time; - - std::cout << "The kokkos library was alive for " << total_time << " seconds." - << std::endl; } void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { - std::cout << "Device ID: " << devID << "\n"; - if (usingMPI) { - variorum_call_mpi(); - } else { - variorum_call(); - } + auto result = Kokkos::Tools::Experimental::identifier_from_devid(devID); + global_kernel_name = name; + global_instance_id = result.instance_id; + global_device_type = device_type_to_string(result.type); + global_device_id = result.device_id; + variorum_call(); } -void kokkosp_end_parallel_for(const uint64_t kID) { - if (usingMPI) { - variorum_call_mpi(); - } else { - variorum_call(); - } -} +void kokkosp_end_parallel_for(const uint64_t kID) { variorum_call(); } void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { - std::cout << "Device ID: " << devID << "\n"; - if (usingMPI) { - variorum_call_mpi(); - } else { - variorum_call(); - } + auto result = Kokkos::Tools::Experimental::identifier_from_devid(devID); + global_kernel_name = name; + global_instance_id = result.instance_id; + global_device_type = device_type_to_string(result.type); + global_device_id = result.device_id; + variorum_call(); } -void kokkosp_end_parallel_scan(const uint64_t kID) { - if (usingMPI) { - variorum_call_mpi(); - } else { - variorum_call(); - } -} +void kokkosp_end_parallel_scan(const uint64_t kID) { variorum_call(); } void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { - std::cout << "Device ID: " << devID << "\n"; - if (usingMPI) { - variorum_call_mpi(); - } else { - variorum_call(); - } + auto result = Kokkos::Tools::Experimental::identifier_from_devid(devID); + global_kernel_name = name; + global_instance_id = result.instance_id; + global_device_type = device_type_to_string(result.type); + global_device_id = result.device_id; + variorum_call(); } -void kokkosp_end_parallel_reduce(const uint64_t kID) { - if (usingMPI) { - variorum_call_mpi(); - } else { - variorum_call(); - } -} +void kokkosp_end_parallel_reduce(const uint64_t kID) { variorum_call(); } Kokkos::Tools::Experimental::EventSet get_event_set() { Kokkos::Tools::Experimental::EventSet my_event_set; memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here - my_event_set.init = kokkosp_init_library; - my_event_set.finalize = kokkosp_finalize_library; my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; @@ -450,7 +243,6 @@ extern "C" { namespace impl = KokkosTools::VariorumConnector; EXPOSE_INIT(impl::kokkosp_init_library) -EXPOSE_FINALIZE(impl::kokkosp_finalize_library) EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan)