Skip to content

Commit

Permalink
rocm_smi: Initial event count and event table initialization event co…
Browse files Browse the repository at this point in the history
…unt upper bound mismatch & handling unsupported events
  • Loading branch information
Dong Jun Woun authored and Dong Jun Woun committed Feb 14, 2025
1 parent 2a7e09c commit f0099c5
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions src/components/rocm_smi/rocs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,12 @@ init_device_table(void)

for (i = 0; i < device_count; ++i) {
status = rsmi_dev_pci_bandwidth_get_p(i, &pcie_table[i]);
if (status != RSMI_STATUS_SUCCESS && status != RSMI_STATUS_NOT_YET_IMPLEMENTED) {
/*
Retrieve available PCIe bandwidths. This function is deprecated but remains
supported on some hardware. Ignore statuses indicating lack of support or
unimplemented functionality.
*/
if (status != RSMI_STATUS_SUCCESS && status != RSMI_STATUS_NOT_YET_IMPLEMENTED && status != RSMI_STATUS_NOT_SUPPORTED) {
papi_errno = PAPI_EMISC;
goto fn_fail;
}
Expand Down Expand Up @@ -1377,7 +1382,7 @@ handle_derived_events_count(const char *v_name, int32_t dev, int64_t v_variant,
(*events_count) += ROCS_PCI_BW_VARIANT__CURRENT + 1;
}
int i;
for (i = 0; i < ROCS_PCI_BW_VARIANT__LANE_IDX - ROCS_PCI_BW_VARIANT__CURRENT + 1; ++i) {
for (i = 0; i < ROCS_PCI_BW_VARIANT__LANE_IDX - ROCS_PCI_BW_VARIANT__CURRENT; ++i) {
(*events_count) += pcie_table[dev].transfer_rate.num_supported;
}

Expand Down Expand Up @@ -1585,8 +1590,7 @@ handle_derived_events(const char *v_name, int32_t dev, int64_t v_variant, int64_
return ROCS_EVENT_TYPE__DERIVED;
}

int64_t i;
for (i = 0; i <= ROCS_PCI_BW_VARIANT__CURRENT; ++i) {
for (int64_t i = 0; i <= ROCS_PCI_BW_VARIANT__CURRENT; ++i) {
events[*events_count].id = *events_count;
events[*events_count].name = get_event_name(v_name, dev, i, -1);
events[*events_count].descr = get_event_descr(v_name, i, -1);
Expand All @@ -1604,7 +1608,7 @@ handle_derived_events(const char *v_name, int32_t dev, int64_t v_variant, int64_
}

int64_t j;
for (; i <= ROCS_PCI_BW_VARIANT__LANE_IDX; ++i) {
for (int64_t i = ROCS_PCI_BW_VARIANT__CURRENT + 1; i <= ROCS_PCI_BW_VARIANT__LANE_IDX; ++i) {
for (j = 0; j < pcie_table[dev].transfer_rate.num_supported; ++j) {
events[*events_count].id = *events_count;
events[*events_count].name = get_event_name(v_name, dev, i, j);
Expand Down Expand Up @@ -1774,7 +1778,7 @@ handle_xgmi_events(int32_t dev, int *events_count, ntv_event_t *events)

status = rsmi_dev_counter_group_supported_p(dev, RSMI_EVNT_GRP_XGMI);
if (status == RSMI_STATUS_SUCCESS) {
for (i = RSMI_EVNT_XGMI_FIRST; i <= RSMI_EVNT_XGMI_LAST; ++i) {
for (i = RSMI_EVNT_XGMI_FIRST; i < RSMI_EVNT_XGMI_LAST; ++i) {
events[*events_count].id = *events_count;
events[*events_count].name = get_event_name("rsmi_dev_xgmi_evt_get", dev, i, -1);
events[*events_count].descr = get_event_descr("rsmi_dev_xgmi_evt_get", i, -1);
Expand All @@ -1794,7 +1798,7 @@ handle_xgmi_events(int32_t dev, int *events_count, ntv_event_t *events)

status = rsmi_dev_counter_group_supported_p(dev, RSMI_EVNT_GRP_XGMI_DATA_OUT);
if (status == RSMI_STATUS_SUCCESS) {
for (i = RSMI_EVNT_XGMI_DATA_OUT_FIRST; i <= RSMI_EVNT_XGMI_DATA_OUT_LAST; ++i) {
for (i = RSMI_EVNT_XGMI_DATA_OUT_FIRST; i < RSMI_EVNT_XGMI_DATA_OUT_LAST; ++i) {
events[*events_count].id = *events_count;
events[*events_count].name = get_event_name("rsmi_dev_xgmi_evt_get", dev, i, -1);
events[*events_count].descr = get_event_descr("rsmi_dev_xgmi_evt_get", i, -1);
Expand Down

0 comments on commit f0099c5

Please sign in to comment.