diff --git a/src/components/cuda/cupti_profiler.c b/src/components/cuda/cupti_profiler.c index 21100f8b0..5257fcc3b 100644 --- a/src/components/cuda/cupti_profiler.c +++ b/src/components/cuda/cupti_profiler.c @@ -1523,8 +1523,14 @@ int cuptip_ctx_create(cuptic_info_t thr_info, cuptip_control_t *pstate, uint64_t state->gpu_ctl[gpu_id].gpu_id = gpu_id; } + event_info_t info; + papi_errno = evt_id_to_info(events_id[num_events - 1], &info); + if (papi_errno != PAPI_OK) { + return papi_errno; + } + /* register the user created cuda context for the current gpu if not already known */ - papi_errno = cuptic_ctxarr_update_current(thr_info); + papi_errno = cuptic_ctxarr_update_current(thr_info, info.device); if (papi_errno != PAPI_OK) { goto fn_exit; } diff --git a/src/components/cuda/papi_cupti_common.c b/src/components/cuda/papi_cupti_common.c index 492eb260f..71d001bdd 100644 --- a/src/components/cuda/papi_cupti_common.c +++ b/src/components/cuda/papi_cupti_common.c @@ -607,46 +607,44 @@ int cuptic_ctxarr_create(cuptic_info_t *pinfo) * Struct that contains a Cuda context, that can be indexed into based * on device id. */ -int cuptic_ctxarr_update_current(cuptic_info_t info) +int cuptic_ctxarr_update_current(cuptic_info_t info, int evt_dev_id) { int gpu_id; CUcontext pctx; CUresult cuda_err; + CUdevice dev_id; - /* get device currently being used */ - cuda_err = cudaGetDevicePtr(&gpu_id); - if (cuda_err != cudaSuccess) { - return PAPI_EMISC; - } - - /* return cuda context bound to the calling CPU thread */ + // See if a user created a CUDA context on the + // calling cpu thread. cuda_err = cuCtxGetCurrentPtr(&pctx); - if (cuda_err != cudaSuccess) { - return PAPI_EMISC; - } - /* check to see if Cuda context exists for device */ - if (info[gpu_id].ctx == NULL) { - /* cuda context found for the calling CPU thread */ - if (pctx != NULL) { - LOGDBG("Registering device = %d with ctx = %p.\n", gpu_id, pctx); - /* store current context into struct */ - cuda_err = cuCtxGetCurrentPtr(&info[gpu_id].ctx); - if (cuda_err != cudaSuccess) + if (cuda_err == CUDA_SUCCESS && pctx != NULL) { + // Get the device id associated with the user created CUDA context + cuda_err = cuCtxGetDevicePtr(&dev_id); + if (cuda_err != CUDA_SUCCESS) { + return PAPI_EMISC; + } + + if (info[dev_id].ctx == NULL) { + // Store current user created CUDA context + cuda_err = cuCtxGetCurrentPtr(&info[dev_id].ctx); + if (cuda_err != CUDA_SUCCESS) { return PAPI_EMISC; + } } - /* cuda context not found for calling CPU thread */ - else { - cudaArtCheckErrors(cudaFreePtr(NULL), return PAPI_EMISC); - cudaCheckErrors(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC); - LOGDBG("Using primary device context %p for device %d.\n", info[gpu_id].ctx, gpu_id); + else if (info[dev_id].ctx != pctx) { + ERRDBG("Warning: cuda context for gpu %d has changed from %p to %p\n", gpu_id, info[gpu_id].ctx, pctx); } - } - - /* if context exists then see if it has changed; if it has then keep the first - seen one, but show warning */ - else if (info[gpu_id].ctx != pctx) { - ERRDBG("Warning: cuda context for gpu %d has changed from %p to %p\n", gpu_id, info[gpu_id].ctx, pctx); - } + } + // If a user did not create a CUDA context, then we will create one + // for them. Note, that for machine with multiple devices, we need to + // call cudaSetDevice. + else { + cudaArtCheckErrors(cudaSetDevicePtr(evt_dev_id), return PAPI_EMISC); + cudaArtCheckErrors(cudaFreePtr(NULL), return PAPI_EMISC); + + cudaCheckErrors(cuCtxGetCurrentPtr(&info[evt_dev_id].ctx), return PAPI_EMISC); + cudaCheckErrors(cuCtxPopCurrentPtr(&pctx), PAPI_EMISC); + } return PAPI_OK; } diff --git a/src/components/cuda/papi_cupti_common.h b/src/components/cuda/papi_cupti_common.h index 13a30828f..398d75267 100644 --- a/src/components/cuda/papi_cupti_common.h +++ b/src/components/cuda/papi_cupti_common.h @@ -65,7 +65,7 @@ int cuptic_shutdown(void); /* context management interfaces */ int cuptic_ctxarr_create(cuptic_info_t *pinfo); -int cuptic_ctxarr_update_current(cuptic_info_t info); +int cuptic_ctxarr_update_current(cuptic_info_t info, int evt_dev_id); int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx); int cuptic_ctxarr_destroy(cuptic_info_t *pinfo);