Skip to content

Commit

Permalink
Support for heterogeneous gpu configurations final.
Browse files Browse the repository at this point in the history
  • Loading branch information
Treece Burgess committed Feb 5, 2025
1 parent 5eecff3 commit 7e1a2a3
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 45 deletions.
37 changes: 17 additions & 20 deletions src/components/cuda/cupti_profiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "cupti_config.h"
#include "lcuda_debug.h"
#include "htable.h"
#include <threads.h>

/**
* Event identifier encoding format:
Expand Down Expand Up @@ -90,9 +89,9 @@ static void *dl_nvpw;
static int num_gpus;
static gpu_record_t *avail_gpu_info;

/* main event table to store metrics */
static cuptiu_event_table_t *cuptiu_table_p;


/* load and unload cuda function pointers */
static int load_cupti_perf_sym(void);
static int unload_cupti_perf_sym(void);
Expand Down Expand Up @@ -154,7 +153,6 @@ static int get_event_collection_method(const char *evt_name);
static int get_added_events_rmr(cuptip_gpu_state_t *gpu_ctl);
static int get_counter_availability(cuptip_gpu_state_t *gpu_ctl);
static int get_measured_values(cuptip_gpu_state_t *gpu_ctl, long long *counts);
static int num_unique_devs(int num_gpus);

/* nvperf function pointers */
NVPA_Status ( *NVPW_GetSupportedChipNamesPtr ) (NVPW_GetSupportedChipNames_Params* params);
Expand Down Expand Up @@ -899,10 +897,12 @@ static int metric_get_config_image(cuptip_gpu_state_t *gpu_ctl)
static int metric_get_counter_data_prefix_image(cuptip_gpu_state_t *gpu_ctl)
{
COMPDBG("Entering.\n");
int gpu_id = gpu_ctl->gpu_id;

NVPW_CounterDataBuilder_Create_Params counterDataBuilderCreateParams = {
.structSize = NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE,
.pPriv = NULL,
.pChipName = cuptiu_table_p->avail_gpu_info[gpu_ctl->gpu_id].chip_name,
.pChipName = cuptiu_table_p->avail_gpu_info[gpu_id].chip_name,
};
nvpwCheckErrors( NVPW_CounterDataBuilder_CreatePtr(&counterDataBuilderCreateParams), goto fn_fail );

Expand Down Expand Up @@ -1106,7 +1106,8 @@ static int begin_profiling(cuptip_gpu_state_t *gpu_ctl)
cuptiCheckErrors( cuptiProfilerEnableProfilingPtr(&enableProfilingParams), goto fn_fail );

char rangeName[PAPI_MIN_STR_LEN];
sprintf(rangeName, "PAPI_Range_%d", gpu_ctl->gpu_id);
int gpu_id = gpu_ctl->gpu_id;
sprintf(rangeName, "PAPI_Range_%d", gpu_id);
CUpti_Profiler_PushRange_Params pushRangeParams = {
.structSize = CUpti_Profiler_PushRange_Params_STRUCT_SIZE,
.pPriv = NULL,
Expand Down Expand Up @@ -1361,12 +1362,13 @@ static void init_main_htable(void)
{
int i, val = 1, base = 2;

/* allocate 2 ^ 21 metric names, this matches the number of bits for the event encoding format */
/* allocate (2 ^ NAMEID_WIDTH) metric names, this matches the
number of bits for the event encoding format */
for (i = 0; i < NAMEID_WIDTH; i++) {
val *= base;
}

/* allocate memory */
/* initialize struct */
cuptiu_table_p = papi_malloc(sizeof(cuptiu_event_table_t));
cuptiu_table_p->capacity = val;
cuptiu_table_p->count = 0;
Expand Down Expand Up @@ -1410,14 +1412,14 @@ int cuptip_init(void)
goto fn_fail;
}

/* initialize the main event table for metric collection */
init_main_htable();

papi_errno = init_all_metrics();
if (papi_errno != PAPI_OK) {
goto fn_fail;
}
/* initialize hash table with cuda native events */

/* collect metrics */
init_event_table();

papi_errno = cuInitPtr(0);
Expand Down Expand Up @@ -1466,7 +1468,7 @@ int verify_events(uint64_t *events_id, int num_events,
return papi_errno;
}

/* for the current device table get the next event index */
/* for a specific device table, get the current event index */
idx = state->gpu_ctl[info.device].added_events->count;

metricName = state->gpu_ctl[info.device].added_events->cuda_evts[idx];
Expand Down Expand Up @@ -1517,7 +1519,6 @@ int cuptip_ctx_create(cuptic_info_t thr_info, cuptip_control_t *pstate, uint64_t
return PAPI_ENOMEM;
}

/* for each unique gpu store the gpu id for that gpu index */
for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
state->gpu_ctl[gpu_id].gpu_id = gpu_id;
}
Expand All @@ -1528,13 +1529,13 @@ int cuptip_ctx_create(cuptic_info_t thr_info, cuptip_control_t *pstate, uint64_t
goto fn_exit;
}

/* creates a MetricsContext */
/* create a MetricsContext */
papi_errno = nvpw_cuda_metricscontext_create(state);
if (papi_errno != PAPI_OK) {
goto fn_exit;
}

/* verify user added events are available on the system */
/* verify user added events are available on the machine */
papi_errno = verify_events(events_id, num_events, state);
if (papi_errno != PAPI_OK) {
goto fn_exit;
Expand Down Expand Up @@ -1641,8 +1642,7 @@ int cuptip_ctx_read(cuptip_control_t state, long long **counters)
{
COMPDBG("Entering.\n");
int papi_errno, gpu_id, i, j = 0, method, evt_pos;
long long counts[30];
long long *counter_vals = state->counters;
long long counts[30], *counter_vals = state->counters;
cuptip_gpu_state_t *gpu_ctl = NULL;
CUcontext userCtx = NULL, ctx = NULL;

Expand Down Expand Up @@ -1693,14 +1693,11 @@ int cuptip_ctx_read(cuptip_control_t state, long long **counters)
}

for (i = 0; i < gpu_ctl->added_events->count; i++) {
printf("counts value: %d and gpu_id: %d\n", counts[i], gpu_id);
printf("evt_pos: %d\n", gpu_ctl->added_events->evt_pos[i]);
evt_pos = gpu_ctl->added_events->evt_pos[i];
if (state->read_count == 0) {
counter_vals[evt_pos] = counts[i];
}
else {
printf("WE ENTER ELSE STATEMENT.\n");
/* determine collection method such as max, min, sum, and avg for an added Cuda native event */
method = get_event_collection_method(gpu_ctl->added_events->cuda_evts[i]);
switch (method) {
Expand Down Expand Up @@ -2026,7 +2023,7 @@ static int get_ntv_events(cuptiu_event_table_t *evt_table, const char *evt_name,
int papi_errno;
char description[256];
int *count = &evt_table->count;
cuptiu_event_t *events = evt_table->events;
cuptiu_event_t *events = evt_table->events;

/* check to see if evt_name argument has been provided */
if (evt_name == NULL) {
Expand All @@ -2053,7 +2050,7 @@ static int get_ntv_events(cuptiu_event_table_t *evt_table, const char *evt_name,
return PAPI_ESYS;
}
}

cuptiu_dev_set(&event->device_map, gpu_id);

return PAPI_OK;
Expand Down
18 changes: 3 additions & 15 deletions src/components/cuda/linux-cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,9 @@ static int cuda_init_component(int cidx)
_cuda_vector.cmp_info.num_native_events = -1;
_cuda_lock = PAPI_NUM_LOCK + NUM_INNER_LOCK + cidx;

//_cuda_vector.cmp_info.initialized = 1;
_cuda_vector.cmp_info.disabled = PAPI_EDELAY_INIT;
sprintf(_cuda_vector.cmp_info.disabled_reason,
"Not initialized. Access component events to initialize it.");
"Not initialized. Access component events to initialize it.");
return PAPI_EDELAY_INIT;
}

Expand All @@ -172,7 +171,6 @@ static int cuda_init_private(void)
_papi_hwi_lock(COMPONENT_LOCK);
SUBDBG("ENTER\n");
if (_cuda_vector.cmp_info.initialized) goto fn_exit;
SUBDBG("Proceeding\n");

papi_errno = cuptid_init();
if (papi_errno != PAPI_OK) {
Expand Down Expand Up @@ -203,12 +201,9 @@ static int cuda_init_private(void)

static int check_n_initialize(void)
{

//_papi_hwi_lock(COMPONENT_LOCK);
if (!_cuda_vector.cmp_info.initialized) {
return cuda_init_private();
}
//_papi_hwi_unlock(COMPONENT_LOCK);
return _cuda_vector.cmp_info.disabled;
}

Expand All @@ -221,9 +216,7 @@ static int cuda_ntv_enum_events(unsigned int *event_code, int modifier)
}

uint64_t code = *(uint64_t *) event_code;
//_papi_hwi_lock(COMPONENT_LOCK);
papi_errno = cuptid_evt_enum(&code, modifier);
//_papi_hwi_unlock(COMPONENT_LOCK);
*event_code = (unsigned int) code;

fn_exit:
Expand All @@ -241,9 +234,7 @@ static int cuda_ntv_name_to_code(const char *name, unsigned int *event_code)
}

uint64_t code;
//_papi_hwi_lock(COMPONENT_LOCK);
papi_errno = cuptid_evt_name_to_code(name, &code);
//_papi_hwi_unlock(COMPONENT_LOCK);
*event_code = (unsigned int) code;

fn_exit:
Expand Down Expand Up @@ -277,9 +268,7 @@ static int cuda_ntv_code_to_descr(unsigned int event_code, char *descr, int len)
goto fn_fail;
}

//_papi_hwi_lock(COMPONENT_LOCK);
papi_errno = cuptid_evt_code_to_descr((uint64_t) event_code, descr, len);
//_papi_hwi_unlock(COMPONENT_LOCK);

fn_exit:
SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
Expand Down Expand Up @@ -371,9 +360,10 @@ static int cuda_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *ntv
if (papi_errno != PAPI_OK) {
goto fn_exit;
}
printf("ntv_count: %d\n", ntv_count);

/* needed to make sure multipass events are caught with proper error code (PAPI_EMULPASS)*/
papi_errno = cuptid_ctx_create(cuda_ctl->info, &(cuda_ctl->cuptid_ctx), cuda_ctl->events_id, cuda_ctl->num_events);

fn_exit:
SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
return papi_errno;
Expand Down Expand Up @@ -414,8 +404,6 @@ int update_native_events(cuda_control_t *ctl, NativeInfo_t *ntv_info,
sorted_events[i].frontend_idx = i;
}

//qsort(sorted_events, ntv_count, sizeof(struct event_map_item), compare);

for (i = 0; i < ntv_count; ++i) {
ctl->events_id[i] = sorted_events[i].event_id;
ntv_info[sorted_events[i].frontend_idx].ni_position = i;
Expand Down
10 changes: 0 additions & 10 deletions src/components/cuda/papi_cupti_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -619,8 +619,6 @@ int cuptic_ctxarr_update_current(cuptic_info_t info)
return PAPI_EMISC;
}

printf("gpu_id inside update_current is: %d\n", gpu_id);

/* return cuda context bound to the calling CPU thread */
cuda_err = cuCtxGetCurrentPtr(&pctx);
if (cuda_err != cudaSuccess) {
Expand All @@ -638,7 +636,6 @@ int cuptic_ctxarr_update_current(cuptic_info_t info)
}
/* cuda context not found for calling CPU thread */
else {
printf("We create a cuda context.\n");
cudaArtCheckErrors(cudaFreePtr(NULL), return PAPI_EMISC);
cudaCheckErrors(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC);
LOGDBG("Using primary device context %p for device %d.\n", info[gpu_id].ctx, gpu_id);
Expand All @@ -657,13 +654,6 @@ int cuptic_ctxarr_update_current(cuptic_info_t info)
int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx)
{
*ctx = info[gpu_idx].ctx;
printf("gpu_idx: %d\n", gpu_idx);
if (*ctx == NULL) {
printf("ctx is null.\n");
*ctx = info[0].ctx;
if (*ctx != NULL) printf("Gpu id 0 is not null.\n");
}

return PAPI_OK;
}

Expand Down

0 comments on commit 7e1a2a3

Please sign in to comment.