-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathncu_metrics.py
89 lines (77 loc) · 4.27 KB
/
ncu_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def METRICS_COMPUTE():
return {
"sm__throughput.avg.pct_of_peak_sustained_elapsed": "Compute (SM) Throughput",
"gpu__time_duration.sum": "Duration",
"gpu__compute_memory_throughput.avg.pct_of_peak_sustained_elapsed": "Memory Throughput",
"gpc__cycles_elapsed.max": "Elapsed Cycles",
"l1tex__throughput.avg.pct_of_peak_sustained_active": "L1/TEX Cache Throughput",
"lts__throughput.avg.pct_of_peak_sustained_elapsed": "L2 Cache Throughput",
"gpu__dram_throughput.avg.pct_of_peak_sustained_elapsed": "DRAM Throughput",
"launch__waves_per_multiprocessor": "Waves Per SM",
}
def METRICS_ROOFLINE():
return {
"dram__bytes.sum": "# of Bytes",
"smsp__inst_executed.sum": "# of Insts",
"sm__sass_thread_inst_executed_op_integer_pred_on.sum.peak_sustained": "Peak Inst/Cycle (INT)",
}
def METRICS_MEMORY():
return {
"l1tex__t_sector_hit_rate.pct": "L1/TEX Hit Rate",
"lts__t_sector_hit_rate.pct": "L2 Hit Rate",
"sm__memory_throughput.avg.pct_of_peak_sustained_elapsed": "Mem Pipes Busy",
"smsp__sass_inst_executed_op_memory_8b.sum": "8-bit Mem Inst",
"smsp__sass_inst_executed_op_memory_16b.sum": "16-bit Mem Inst",
"smsp__sass_inst_executed_op_memory_32b.sum": "32-bit Mem Inst",
"smsp__sass_inst_executed_op_memory_64b.sum": "64-bit Mem Inst",
"smsp__sass_inst_executed_op_memory_128b.sum": "128-bit Mem Inst",
# L1 cache
"l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum": "# Sec L1 (Global Load)",
"l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum": "# Sec L1 (Global Store)",
"l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum": "# Sec L1 (Local Load)",
"l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum": "# Sec L1 (Local Store)",
"l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum": "# Req L1 (Global Load)",
"l1tex__t_requests_pipe_lsu_mem_global_op_st.sum": "# Req L1 (Global Store)",
"l1tex__t_requests_pipe_lsu_mem_local_op_ld.sum": "# Req L1 (Local Load)",
"l1tex__t_requests_pipe_lsu_mem_local_op_st.sum": "# Req L1 (Local Store)",
# L2 cache
"lts__t_sectors_srcunit_tex_op_read.sum": "# Sec L2 Load",
"lts__t_sectors_srcunit_tex_op_write.sum": "# Sec L2 Store",
"lts__t_requests_srcunit_tex_op_read.sum": "# Req L2 Load",
"lts__t_requests_srcunit_tex_op_write.sum": "# Req L2 Store",
"lts__t_sectors_srcunit_tex_op_read_lookup_hit.sum": "# L2 Load Hit",
"lts__t_sectors_srcunit_tex_op_read_lookup_miss.sum": "# L2 Load Misses",
}
def METRICS_OCCUPANCY():
return {
"sm__maximum_warps_per_active_cycle_pct": "Theoretical Occupancy",
"sm__maximum_warps_avg_per_active_cycle": "Theoretical Active Warps Per SM",
"sm__warps_active.avg.pct_of_peak_sustained_active": "Achieved Occupancy",
"sm__warps_active.avg.per_cycle_active": "Achieved Active Warps Per SM",
"launch__occupancy_limit_registers": "Block Limit Registers",
"launch__occupancy_limit_shared_mem": "Block Limit Shared Mem",
"launch__occupancy_limit_warps": "Block Limit Warp",
"launch__occupancy_limit_blocks": "Block Limit SM",
}
def METRICS_INSTRUCTION():
return {
"smsp__inst_executed.sum": "Executed Insts",
"smsp__inst_issued.sum": "Issued Insts",
"smsp__inst_executed.avg": "Avg. Executed Insts Per Scheduler",
"smsp__inst_issued.avg": "Avg. Issued Insts Per Scheduler",
}
def METRICS_SCHEDULER():
return {
"smsp__warps_active.avg.per_cycle_active": "Active Warps Per Scheduler",
"smsp__warps_eligible.avg.per_cycle_active": "Eligible Warps Per Scheduler",
"smsp__issue_active.avg.per_cycle_active": "Issued Warps Per Scheduler",
"smsp__issue_inst0.avg.pct_of_peak_sustained_active": "No Eligible",
"smsp__issue_active.avg.pct_of_peak_sustained_active": "One or More Eligible",
"smsp__warps_active.avg.peak_sustained": "GPU Maximum Warps Per Scheduler",
"smsp__maximum_warps_avg_per_active_cycle": "Theoretical Warps Per Scheduler",
}
def METRICS_BRANCH():
return {
"smsp__inst_executed_op_branch.sum": " # Branch Insts",
"smsp__sass_average_branch_targets_threads_uniform.pct": "Branch Efficiency",
}