Skip to content

Commit

Permalink
Default machine params depends on has_gpu_feature
Browse files Browse the repository at this point in the history
  • Loading branch information
antonysigma committed Aug 24, 2023
1 parent cdc64c8 commit 65a793c
Showing 1 changed file with 21 additions and 4 deletions.
25 changes: 21 additions & 4 deletions src/autoschedulers/mullapudi2016/AutoSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,27 @@ namespace {

struct ArchParams {
/** Maximum level of parallelism avalaible. */
int parallelism = 16;
int parallelism{};

/** Size of the last-level cache (in bytes). */
uint64_t last_level_cache_size = 16 * 1024 * 1024;
uint64_t last_level_cache_size{};

/** Indicates how much more expensive is the cost of a load compared to
* the cost of an arithmetic operation at last level cache. */
float balance = 40;
float balance{};

/** If GPU target is detected, but machine parameters are not specified, *
* make a realistic estimate based on consumer-grade GPUs (Nvidia GTX *
* 1660/Turing), or low-cost scientific-grade GPUs (Nvidia K40/Tesla).
*
* Section 5.4 of the Mullapudi2016 article: We configure the auto-scheduler
* to target the GPU by setting the PARALLELISM_THRESHOLD to 128, ..., and
* CACHE_SIZE to 48 KB.
*/
constexpr ArchParams(bool has_gpu_feature)
: parallelism(has_gpu_feature ? 128 : 16), last_level_cache_size(has_gpu_feature ? 48 * 1024 : 16 * 1024 * 1024),
balance(has_gpu_feature ? 20 : 40) {
}
};

// Substitute parameter estimates into the exprs describing the box bounds.
Expand Down Expand Up @@ -2823,6 +2836,10 @@ void Partitioner::vectorize_stage(const Group &g, Stage f_handle, int stage_num,
// values produced by the function.
const auto vec_len = [&]() -> int {
if (t.has_gpu_feature()) {
/** Section 5.4 of the Mullapudi2016 article: We configure the
* auto-scheduler to target the GPU by set- ting the ...,
* VECTOR_WIDTH to 32.
*/
return GPUTilingDedup::min_n_threads;
}

Expand Down Expand Up @@ -3851,7 +3868,7 @@ struct Mullapudi2016 {
pipeline_outputs.push_back(f.function());
}

ArchParams arch_params;
ArchParams arch_params{target.has_gpu_feature()};
{
ParamParser parser(params_in.extra);
parser.parse("parallelism", &arch_params.parallelism);
Expand Down

0 comments on commit 65a793c

Please sign in to comment.