diff --git a/loopy/__init__.py b/loopy/__init__.py index a14bf09d5..54c06c680 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -69,7 +69,8 @@ from loopy.version import VERSION, MOST_RECENT_LANGUAGE_VERSION from loopy.transform.iname import ( - set_loop_priority, prioritize_loops, untag_inames, + set_loop_priority, prioritize_loops, constrain_loop_nesting, + untag_inames, split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames, rename_iname, remove_unused_inames, split_reduction_inward, split_reduction_outward, @@ -197,7 +198,8 @@ # {{{ transforms - "set_loop_priority", "prioritize_loops", "untag_inames", + "set_loop_priority", "prioritize_loops", "constrain_loop_nesting", + "untag_inames", "split_iname", "chunk_iname", "join_inames", "tag_inames", "duplicate_inames", "rename_iname", "remove_unused_inames", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 2f39614b8..43d97453d 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -249,6 +249,7 @@ class LoopKernel(ImmutableRecordWithoutPickling, Taggable): .. automethod:: tagged .. automethod:: without_tags """ + # TODO document loop_nest_constraints attribute # {{{ constructor @@ -268,6 +269,7 @@ def __init__(self, domains, instructions, args=None, iname_slab_increments=None, loop_priority=frozenset(), + loop_nest_constraints=None, silenced_warnings=None, applied_iname_rewrites=None, @@ -380,6 +382,7 @@ def __init__(self, domains, instructions, args=None, assumptions=assumptions, iname_slab_increments=iname_slab_increments, loop_priority=loop_priority, + loop_nest_constraints=loop_nest_constraints, silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, @@ -1543,6 +1546,7 @@ def __setstate__(self, state): "substitutions", "iname_slab_increments", "loop_priority", + "loop_nest_constraints", "silenced_warnings", "options", "state", diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 4ded4e330..7aecacfd6 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -364,6 +364,7 @@ def find_loop_insn_dep_map( # If at least one of the three cases above succeeds for every # dep_insn_iname, we can add dep_insn to iname's set of insns # in result dict. + # (means dep_insn must be scheduled before entering iname loop) iname_dep.add(dep_insn_id) return result @@ -688,7 +689,7 @@ class SchedulerState(ImmutableRecord): # TODO document simplified_depends_on_graph @property - def last_entered_loop(self): + def deepest_active_iname(self): if self.active_inames: return self.active_inames[-1] else: @@ -1021,19 +1022,22 @@ def insn_sort_key(insn_id): if not is_ready: continue - want = insn.within_inames - sched_state.parallel_inames - have = active_inames_set - sched_state.parallel_inames + nonconc_insn_inames_wanted = insn.within_inames - sched_state.parallel_inames + nonconc_active_inames = active_inames_set - sched_state.parallel_inames - if want != have: + if nonconc_insn_inames_wanted != nonconc_active_inames: + # We don't have the inames we need, may need to open more loops is_ready = False if debug_mode: - if want-have: + if nonconc_insn_inames_wanted-nonconc_active_inames: print("instruction '%s' is missing inames '%s'" - % (format_insn(kernel, insn.id), ",".join(want-have))) - if have-want: + % (format_insn(kernel, insn.id), ",".join( + nonconc_insn_inames_wanted-nonconc_active_inames))) + if nonconc_active_inames-nonconc_insn_inames_wanted: print("instruction '%s' won't work under inames '%s'" - % (format_insn(kernel, insn.id), ",".join(have-want))) + % (format_insn(kernel, insn.id), ",".join( + nonconc_active_inames-nonconc_insn_inames_wanted))) # {{{ check if scheduling this insn is compatible with preschedule @@ -1085,9 +1089,10 @@ def insn_sort_key(insn_id): # }}} - # {{{ determine reachability + # {{{ determine reachability (no active inames conflict w/insn, but + # may need more inames) - if (not is_ready and have <= want): + if (not is_ready and nonconc_active_inames <= nonconc_insn_inames_wanted): reachable_insn_ids.add(insn_id) # }}} @@ -1095,7 +1100,13 @@ def insn_sort_key(insn_id): if is_ready and debug_mode: print("ready to schedule '%s'" % format_insn(kernel, insn.id)) + # (if we wanted, we could check to see whether adding insn would + # violate dependencies_v2 here, as done in old in-progress branch: + # https://gitlab.tiker.net/jdsteve2/loopy/-/merge_requests/15/diffs) + if is_ready and not debug_mode: + # schedule this instruction and recurse + iid_set = frozenset([insn.id]) # {{{ update active group counts for added instruction @@ -1165,44 +1176,49 @@ def insn_sort_key(insn_id): # }}} + # No insns are ready to be scheduled now, but some may be reachable + # reachable_insn_ids = no active inames conflict w/insn, but may need more inames + # {{{ see if we're ready to leave the innermost loop - last_entered_loop = sched_state.last_entered_loop + deepest_active_iname = sched_state.deepest_active_iname - if last_entered_loop is not None: + if deepest_active_iname is not None: can_leave = True if ( - last_entered_loop in sched_state.prescheduled_inames + deepest_active_iname in sched_state.prescheduled_inames and not ( isinstance(next_preschedule_item, LeaveLoop) - and next_preschedule_item.iname == last_entered_loop)): + and next_preschedule_item.iname == deepest_active_iname)): # A prescheduled loop can only be left if the preschedule agrees. if debug_mode: print("cannot leave '%s' because of preschedule constraints" - % last_entered_loop) + % deepest_active_iname) can_leave = False - elif last_entered_loop not in sched_state.breakable_inames: + elif deepest_active_iname not in sched_state.breakable_inames: # If the iname is not breakable, then check that we've # scheduled all the instructions that require it. for insn_id in sched_state.unscheduled_insn_ids: insn = kernel.id_to_insn[insn_id] - if last_entered_loop in insn.within_inames: + if deepest_active_iname in insn.within_inames: + # cannot leave deepest_active_iname; insn still depends on it if debug_mode: print("cannot leave '%s' because '%s' still depends on it" - % (last_entered_loop, format_insn(kernel, insn.id))) + % (deepest_active_iname, format_insn(kernel, insn.id))) # check if there's a dependency of insn that needs to be - # outside of last_entered_loop. + # outside of deepest_active_iname. for subdep_id in gen_dependencies_except( kernel, insn_id, sched_state.scheduled_insn_ids, sched_state.simplified_depends_on_graph): - want = (kernel.insn_inames(subdep_id) + nonconc_subdep_insn_inames_wanted = ( + kernel.insn_inames(subdep_id) - sched_state.parallel_inames) - if ( - last_entered_loop not in want): + if (deepest_active_iname + not in nonconc_subdep_insn_inames_wanted): print( "%(warn)swarning:%(reset_all)s '%(iname)s', " "which the schedule is " @@ -1216,7 +1232,7 @@ def insn_sort_key(insn_id): % { "warn": Fore.RED + Style.BRIGHT, "reset_all": Style.RESET_ALL, - "iname": last_entered_loop, + "iname": deepest_active_iname, "subdep": format_insn_id(kernel, subdep_id), "dep": format_insn_id(kernel, insn_id), "subdep_i": format_insn(kernel, subdep_id), @@ -1243,23 +1259,57 @@ def insn_sort_key(insn_id): if ignore_count: ignore_count -= 1 else: - assert sched_item.iname == last_entered_loop + assert sched_item.iname == deepest_active_iname if seen_an_insn: can_leave = True break + # {{{ don't leave if doing so would violate must_nest constraints + + # don't leave if must_nest constraints require that + # additional inames be nested inside the current iname + if can_leave: + must_nest_graph = ( + sched_state.kernel.loop_nest_constraints.must_nest_graph + if sched_state.kernel.loop_nest_constraints else None) + + if must_nest_graph: + # get inames that must nest inside the current iname + must_nest_inside = must_nest_graph[deepest_active_iname] + + if must_nest_inside: + # get scheduled inames that are nested inside current iname + within_deepest_active_iname = False + actually_nested_inside = set() + for sched_item in sched_state.schedule: + if isinstance(sched_item, EnterLoop): + if within_deepest_active_iname: + actually_nested_inside.add(sched_item.iname) + elif sched_item.iname == deepest_active_iname: + within_deepest_active_iname = True + elif (isinstance(sched_item, LeaveLoop) and + sched_item.iname == deepest_active_iname): + break + + # don't leave if must_nest constraints require that + # additional inames be nested inside the current iname + if not must_nest_inside.issubset(actually_nested_inside): + can_leave = False + + # }}} + if can_leave and not debug_mode: for sub_sched in generate_loop_schedules_internal( sched_state.copy( schedule=( sched_state.schedule - + (LeaveLoop(iname=last_entered_loop),)), + + (LeaveLoop(iname=deepest_active_iname),)), active_inames=sched_state.active_inames[:-1], insn_ids_to_try=insn_ids_to_try, preschedule=( sched_state.preschedule - if last_entered_loop + if deepest_active_iname not in sched_state.prescheduled_inames else sched_state.preschedule[1:]), ), @@ -1273,11 +1323,11 @@ def insn_sort_key(insn_id): # {{{ see if any loop can be entered now # Find inames that are being referenced by as yet unscheduled instructions. - needed_inames = set() + unscheduled_nonconc_insn_inames_needed = set() for insn_id in sched_state.unscheduled_insn_ids: - needed_inames.update(kernel.insn_inames(insn_id)) + unscheduled_nonconc_insn_inames_needed.update(kernel.insn_inames(insn_id)) - needed_inames = (needed_inames + unscheduled_nonconc_insn_inames_needed = (unscheduled_nonconc_insn_inames_needed # There's no notion of 'entering' a parallel loop - sched_state.parallel_inames @@ -1286,7 +1336,8 @@ def insn_sort_key(insn_id): if debug_mode: print(75*"-") - print("inames still needed :", ",".join(needed_inames)) + print("inames still needed :", ",".join( + unscheduled_nonconc_insn_inames_needed)) print("active inames :", ",".join(sched_state.active_inames)) print("inames entered so far :", ",".join(sched_state.entered_inames)) print("reachable insns:", ",".join(reachable_insn_ids)) @@ -1295,12 +1346,15 @@ def insn_sort_key(insn_id): for grp, c in sched_state.active_group_counts.items())) print(75*"-") - if needed_inames: + if unscheduled_nonconc_insn_inames_needed: iname_to_usefulness = {} - for iname in needed_inames: + for iname in unscheduled_nonconc_insn_inames_needed: # {{{ check if scheduling this iname now is allowed/plausible + # based on preschedule constraints, loop_nest_around_map, + # loop_insn_dep_map, and data dependencies; + # if not, continue if ( iname in sched_state.prescheduled_inames @@ -1314,6 +1368,9 @@ def insn_sort_key(insn_id): currently_accessible_inames = ( active_inames_set | sched_state.parallel_inames) + + # check loop_nest_around_map to determine whether inames that must + # nest around iname are available if ( not sched_state.loop_nest_around_map[iname] <= currently_accessible_inames): @@ -1321,6 +1378,9 @@ def insn_sort_key(insn_id): print("scheduling %s prohibited by loop nest-around map" % iname) continue + # loop_insn_dep_map: dict mapping inames to other insn ids that need to + # be scheduled before the iname should be eligible for scheduling. + # If loop dependency map prohibits scheduling of iname, continue if ( not sched_state.loop_insn_dep_map.get(iname, set()) <= sched_state.scheduled_insn_ids): @@ -1370,23 +1430,31 @@ def insn_sort_key(insn_id): # }}} + # so far, scheduling of iname is allowed/plausible + # {{{ determine if that gets us closer to being able to schedule an insn usefulness = None # highest insn priority enabled by iname + # suppose we were to activate this iname... + # would that get us closer to scheduling an insn? + hypothetically_active_loops = active_inames_set | {iname} + # loop over reachable_insn_ids (reachable insn: no active inames + # conflict w/insn, but may need more inames) for insn_id in reachable_insn_ids: insn = kernel.id_to_insn[insn_id] - want = insn.within_inames + wanted_insn_inames = insn.within_inames - if hypothetically_active_loops <= want: + if hypothetically_active_loops <= wanted_insn_inames: if usefulness is None: usefulness = insn.priority else: usefulness = max(usefulness, insn.priority) if usefulness is None: + # iname won't get us closer to scheduling insn if debug_mode: print("iname '%s' deemed not useful" % iname) continue @@ -1395,67 +1463,128 @@ def insn_sort_key(insn_id): # }}} - # {{{ tier building - - # Build priority tiers. If a schedule is found in the first tier, then - # loops in the second are not even tried (and so on). - loop_priority_set = set().union(*[set(prio) - for prio in - sched_state.kernel.loop_priority]) - useful_loops_set = set(iname_to_usefulness.keys()) - useful_and_desired = useful_loops_set & loop_priority_set - - if useful_and_desired: - wanted = ( - useful_and_desired - - sched_state.ilp_inames - - sched_state.vec_inames - ) - priority_tiers = [t for t in - get_priority_tiers(wanted, - sched_state.kernel.loop_priority - ) - ] - - # Update the loop priority set, because some constraints may have - # have been contradictary. - loop_priority_set = set().union(*[set(t) for t in priority_tiers]) - - priority_tiers.append( + # keys of iname_to_usefulness are now inames that get us closer to + # scheduling an insn + + if sched_state.kernel.loop_nest_constraints: + # {{{ use loop_nest_constraints in determining next_iname_candidates + + # inames not yet entered that would get us closer to scheduling an insn: + useful_loops_set = set(iname_to_usefulness.keys()) + + from loopy.transform.iname import ( + check_all_must_not_nests, + get_graph_sources, + ) + from pytools.graph import compute_induced_subgraph + + # since vec_inames must be innermost, + # they are not valid canidates unless only vec_inames remain + if useful_loops_set - sched_state.vec_inames: + useful_loops_set -= sched_state.vec_inames + + # to enter an iname without violating must_nest constraints, + # iname must be a source in the induced subgraph of must_nest_graph + # containing inames in useful_loops_set + must_nest_graph_full = ( + sched_state.kernel.loop_nest_constraints.must_nest_graph + if sched_state.kernel.loop_nest_constraints else None) + if must_nest_graph_full: + must_nest_graph_useful = compute_induced_subgraph( + must_nest_graph_full, useful_loops_set - - loop_priority_set - - sched_state.ilp_inames - - sched_state.vec_inames ) + source_inames = get_graph_sources(must_nest_graph_useful) + else: + source_inames = useful_loops_set + + # since graph has a key for every iname, + # sources should be the only valid iname candidates + + # check whether entering any source_inames violates + # must-not-nest constraints, given the currently active inames + must_not_nest_constraints = ( + sched_state.kernel.loop_nest_constraints.must_not_nest + if sched_state.kernel.loop_nest_constraints else None) + if must_not_nest_constraints: + next_iname_candidates = set() + for next_iname in source_inames: + iname_orders_to_check = [ + (active_iname, next_iname) + for active_iname in active_inames_set] + + if check_all_must_not_nests( + iname_orders_to_check, must_not_nest_constraints): + next_iname_candidates.add(next_iname) + else: + next_iname_candidates = source_inames + + # }}} else: - priority_tiers = [ - useful_loops_set + # {{{ old tier building + + # Build priority tiers. If a schedule is found in the first tier, then + # loops in the second are not even tried (and so on). + loop_priority_set = set().union(*[set(prio) + for prio in + sched_state.kernel.loop_priority]) + useful_loops_set = set(iname_to_usefulness.keys()) + useful_and_desired = useful_loops_set & loop_priority_set + + if useful_and_desired: + wanted = ( + useful_and_desired - sched_state.ilp_inames - sched_state.vec_inames - ] - - # vectorization must be the absolute innermost loop - priority_tiers.extend([ - [iname] - for iname in sched_state.ilp_inames - if iname in useful_loops_set - ]) + ) + priority_tiers = [t for t in + get_priority_tiers(wanted, + sched_state.kernel.loop_priority + ) + ] + + # Update the loop priority set, because some constraints may have + # have been contradictary. + loop_priority_set = set().union(*[set(t) for t in priority_tiers]) + + priority_tiers.append( + useful_loops_set + - loop_priority_set + - sched_state.ilp_inames + - sched_state.vec_inames + ) + else: + priority_tiers = [ + useful_loops_set + - sched_state.ilp_inames + - sched_state.vec_inames + ] + + # vectorization must be the absolute innermost loop + priority_tiers.extend([ + [iname] + for iname in sched_state.ilp_inames + if iname in useful_loops_set + ]) + + priority_tiers.extend([ + [iname] + for iname in sched_state.vec_inames + if iname in useful_loops_set + ]) - priority_tiers.extend([ - [iname] - for iname in sched_state.vec_inames - if iname in useful_loops_set - ]) + # }}} - # }}} + if sched_state.kernel.loop_nest_constraints: + # {{{ loop over next_iname_candidates generated w/ loop_nest_constraints - if debug_mode: - print("useful inames: %s" % ",".join(useful_loops_set)) - else: - for tier in priority_tiers: + if debug_mode: + print("useful inames: %s" % ",".join(useful_loops_set)) + else: found_viable_schedule = False - for iname in sorted(tier, + # loop over iname candidates; enter inames and recurse: + for iname in sorted(next_iname_candidates, key=lambda iname: ( iname_to_usefulness.get(iname, 0), # Sort by iname to achieve deterministic @@ -1463,6 +1592,7 @@ def insn_sort_key(insn_id): iname), reverse=True): + # enter the loop and recurse for sub_sched in generate_loop_schedules_internal( sched_state.copy( schedule=( @@ -1476,16 +1606,63 @@ def insn_sort_key(insn_id): insn_ids_to_try=insn_ids_to_try, preschedule=( sched_state.preschedule - if iname not in sched_state.prescheduled_inames + if iname not in + sched_state.prescheduled_inames else sched_state.preschedule[1:]), ), debug=debug): + found_viable_schedule = True yield sub_sched + # TODO what happened if found_viable_schedule is false? if found_viable_schedule: return + # }}} + else: + # {{{ old looping over tiers + + if debug_mode: + print("useful inames: %s" % ",".join(useful_loops_set)) + else: + for tier in priority_tiers: + found_viable_schedule = False + + for iname in sorted(tier, + key=lambda iname: ( + iname_to_usefulness.get(iname, 0), + # Sort by iname to achieve deterministic + # ordering of generated schedules. + iname), + reverse=True): + + for sub_sched in generate_loop_schedules_internal( + sched_state.copy( + schedule=( + sched_state.schedule + + (EnterLoop(iname=iname),)), + active_inames=( + sched_state.active_inames + (iname,)), + entered_inames=( + sched_state.entered_inames + | frozenset((iname,))), + insn_ids_to_try=insn_ids_to_try, + preschedule=( + sched_state.preschedule + if iname not in + sched_state.prescheduled_inames + else sched_state.preschedule[1:]), + ), + debug=debug): + found_viable_schedule = True + yield sub_sched + + if found_viable_schedule: + return + + # }}} + # }}} if debug_mode: @@ -1496,10 +1673,32 @@ def insn_sort_key(insn_id): if inp: raise ScheduleDebugInputError(inp) + # {{{ make sure ALL must_nest_constraints are satisfied + + # (the check above avoids contradicting some must_nest constraints, + # but we don't know if all required nestings are present) + # TODO is this the only place we need to check all must_nest constraints? + must_constraints_satisfied = True + if sched_state.kernel.loop_nest_constraints: + from loopy.transform.iname import ( + get_iname_nestings, + loop_nest_constraints_satisfied, + ) + must_nest_constraints = sched_state.kernel.loop_nest_constraints.must_nest + if must_nest_constraints: + sched_tiers = get_iname_nestings(sched_state.schedule) + must_constraints_satisfied = loop_nest_constraints_satisfied( + sched_tiers, must_nest_constraints, + must_not_nest_constraints=None, # (checked upon loop creation) + all_inames=kernel.all_inames()) + + # }}} + if ( not sched_state.active_inames and not sched_state.unscheduled_insn_ids - and not sched_state.preschedule): + and not sched_state.preschedule + and must_constraints_satisfied): # if done, yield result debug.log_success(sched_state.schedule) @@ -2244,7 +2443,7 @@ def print_longest_dead_end(): key_builder=LoopyKeyBuilder()) -def _get_one_scheduled_kernel_inner(kernel, callables_table): +def _get_one_scheduled_kernel_inner(kernel, callables_table, debug_args={}): # This helper function exists to ensure that the generator chain is fully # out of scope after the function returns. This allows it to be # garbage-collected in the exit handler of the @@ -2254,7 +2453,8 @@ def _get_one_scheduled_kernel_inner(kernel, callables_table): # # See https://gitlab.tiker.net/inducer/sumpy/issues/31 for context. - return next(iter(generate_loop_schedules(kernel, callables_table))) + return next(iter(generate_loop_schedules( + kernel, callables_table, debug_args=debug_args))) def get_one_scheduled_kernel(kernel, callables_table): @@ -2266,7 +2466,7 @@ def get_one_scheduled_kernel(kernel, callables_table): return get_one_linearized_kernel(kernel, callables_table) -def get_one_linearized_kernel(kernel, callables_table): +def get_one_linearized_kernel(kernel, callables_table, debug_args={}): from loopy import CACHING_ENABLED # must include *callables_table* within the cache key as the preschedule @@ -2287,7 +2487,7 @@ def get_one_linearized_kernel(kernel, callables_table): with ProcessLogger(logger, "%s: schedule" % kernel.name): with MinRecursionLimitForScheduling(kernel): result = _get_one_scheduled_kernel_inner(kernel, - callables_table) + callables_table, debug_args) if CACHING_ENABLED and not from_cache: schedule_cache.store_if_not_present(sched_cache_key, result) diff --git a/loopy/statistics.py b/loopy/statistics.py index 88e930ce4..41bcbb181 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1316,13 +1316,23 @@ def map_subscript(self, expr): except AttributeError: var_tags = frozenset() + is_temp = False if name in self.knl.arg_dict: array = self.knl.arg_dict[name] + elif name in self.knl.temporary_variables: + # this a temporary variable, but might have global address space + from loopy.kernel.data import AddressSpace + array = self.knl.temporary_variables[name] + if array.address_space != AddressSpace.GLOBAL: + # this is a temporary variable + return self.rec(expr.index) + # this is a temporary variable with global address space + is_temp = True else: # this is a temporary variable return self.rec(expr.index) - if not isinstance(array, lp.ArrayArg): + if (not is_temp) and not isinstance(array, lp.ArrayArg): # this array is not in global memory return self.rec(expr.index) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 688a12d23..16847a3b6 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -28,6 +28,7 @@ RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError +from pytools import Record from loopy.translation_unit import (TranslationUnit, for_each_kernel) @@ -123,6 +124,997 @@ def prioritize_loops(kernel, loop_priority): # }}} +# {{{ Handle loop nest constraints + +# {{{ Classes to house loop nest constraints + +# {{{ UnexpandedInameSet + +class UnexpandedInameSet(Record): + def __init__(self, inames, complement=False): + Record.__init__( + self, + inames=inames, + complement=complement, + ) + + def contains(self, inames): + if isinstance(inames, set): + return (not (inames & self.inames) if self.complement + else inames.issubset(self.inames)) + else: + return (inames not in self.inames if self.complement + else inames in self.inames) + + def get_inames_represented(self, iname_universe=None): + """Return the set of inames represented by the UnexpandedInameSet + """ + if self.complement: + if not iname_universe: + raise ValueError( + "Cannot expand UnexpandedInameSet %s without " + "iname_universe." % (self)) + return iname_universe-self.inames + else: + return self.inames.copy() + + def __lt__(self, other): + # FIXME is this function really necessary? If so, what should it return? + return self.__hash__() < other.__hash__() + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.inames) + key_builder.rec(key_hash, self.complement) + + def __str__(self): + return "%s{%s}" % ("~" if self.complement else "", + ",".join(i for i in sorted(self.inames))) + +# }}} + + +# {{{ LoopNestConstraints + +class LoopNestConstraints(Record): + def __init__(self, must_nest=None, must_not_nest=None, + must_nest_graph=None): + Record.__init__( + self, + must_nest=must_nest, + must_not_nest=must_not_nest, + must_nest_graph=must_nest_graph, + ) + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.must_nest) + key_builder.rec(key_hash, self.must_not_nest) + key_builder.rec(key_hash, self.must_nest_graph) + + def __str__(self): + return "LoopNestConstraints(\n" \ + " must_nest = " + str(self.must_nest) + "\n" \ + " must_not_nest = " + str(self.must_not_nest) + "\n" \ + " must_nest_graph = " + str(self.must_nest_graph) + "\n" \ + ")" + +# }}} + +# }}} + + +# {{{ Initial loop nest constraint creation + +# {{{ process_loop_nest_specification + +def process_loop_nest_specification( + nesting, + max_tuple_size=None, + complement_sets_allowed=True, + ): + + # Ensure that user-supplied nesting conforms to syntax rules, and + # convert string representations of nestings to tuple of UnexpandedInameSets + + import re + + def _raise_loop_nest_input_error(msg): + valid_prio_rules = ( + "Valid `must_nest` description formats: " # noqa + "\"iname, iname, ...\" or (str, str, str, ...), " # noqa + "where str can be of form " # noqa + "\"iname\" or \"{iname, iname, ...}\". " # noqa + "No set complements allowed.\n" # noqa + "Valid `must_not_nest` description tuples must have length 2: " # noqa + "\"iname, iname\", \"iname, ~iname\", or " # noqa + "(str, str), where str can be of form " # noqa + "\"iname\", \"~iname\", \"{iname, iname, ...}\", or " # noqa + "\"~{iname, iname, ...}\"." # noqa + ) + raise ValueError( + "Invalid loop nest prioritization: %s\n" + "Loop nest prioritization formatting rules:\n%s" + % (msg, valid_prio_rules)) + + def _error_on_regex_match(match_str, target_str): + if re.findall(match_str, target_str): + _raise_loop_nest_input_error( + "Unrecognized character(s) %s in nest string %s" + % (re.findall(match_str, target_str), target_str)) + + def _process_iname_set_str(iname_set_str): + # Convert something like ~{i,j} or ~i or "i,j" to an UnexpandedInameSet + + # Remove leading/trailing whitespace + iname_set_str_stripped = iname_set_str.strip() + + if not iname_set_str_stripped: + _raise_loop_nest_input_error( + "Found 0 inames in string %s." + % (iname_set_str)) + + # Process complement sets + if iname_set_str_stripped[0] == "~": + # Make sure compelement is allowed + if not complement_sets_allowed: + _raise_loop_nest_input_error( + "Complement (~) not allowed in this loop nest string %s. " + "If you have a use-case where allowing a currently " + "disallowed set complement would be helpful, and the " + "desired nesting constraint cannot easily be expressed " + "another way, " + "please contact the Loo.py maintainers." + % (iname_set_str)) + + # Remove tilde + iname_set_str_stripped = iname_set_str_stripped[1:] + if "~" in iname_set_str_stripped: + _raise_loop_nest_input_error( + "Multiple complement symbols found in iname set string %s" + % (iname_set_str)) + + # Make sure that braces are included if multiple inames present + if "," in iname_set_str_stripped and not ( + iname_set_str_stripped.startswith("{") and + iname_set_str_stripped.endswith("}")): + _raise_loop_nest_input_error( + "Complements of sets containing multiple inames must " + "enclose inames in braces: %s is not valid." + % (iname_set_str)) + + complement = True + else: + complement = False + + # Remove leading/trailing spaces + iname_set_str_stripped = iname_set_str_stripped.strip(" ") + + # Make sure braces are valid and strip them + if iname_set_str_stripped[0] == "{": + if not iname_set_str_stripped[-1] == "}": + _raise_loop_nest_input_error( + "Invalid braces: %s" % (iname_set_str)) + else: + # Remove enclosing braces + iname_set_str_stripped = iname_set_str_stripped[1:-1] + # (If there are dangling braces around, they will be caught next) + + # Remove any more spaces + iname_set_str_stripped = iname_set_str_stripped.strip() + + # Should be no remaining special characters besides comma and space + _error_on_regex_match(r"([^,\w ])", iname_set_str_stripped) + + # Split by commas or spaces to get inames + inames = re.findall(r"([\w]+)(?:[ |,]*|$)", iname_set_str_stripped) + + # Make sure iname count matches what we expect from comma count + if len(inames) != iname_set_str_stripped.count(",") + 1: + _raise_loop_nest_input_error( + "Found %d inames but expected %d in string %s." + % (len(inames), iname_set_str_stripped.count(",") + 1, + iname_set_str)) + + if len(inames) == 0: + _raise_loop_nest_input_error( + "Found empty set in string %s." + % (iname_set_str)) + + # NOTE this won't catch certain cases of bad syntax, e.g., ("{h i j,,}", "k") + + return UnexpandedInameSet( + set([s.strip() for s in iname_set_str_stripped.split(",")]), + complement=complement) + + if isinstance(nesting, str): + # Enforce that constraints involving iname sets be passed as tuple. + # Iname sets defined negatively with a *single* iname are allowed here. + + # Check for any special characters besides comma, space, and tilde. + # E.g., curly braces would indicate that an iname set was NOT + # passed as a tuple, which is not allowed. + _error_on_regex_match(r"([^,\w~ ])", nesting) + + # Split by comma and process each tier + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting.split(",")) + else: + assert isinstance(nesting, (tuple, list)) + # Process each tier + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting) + + # Check max_tuple_size + if max_tuple_size and len(nesting_as_tuple) > max_tuple_size: + _raise_loop_nest_input_error( + "Loop nest prioritization tuple %s exceeds max tuple size %d." + % (nesting_as_tuple)) + + # Make sure nesting has len > 1 + if len(nesting_as_tuple) <= 1: + _raise_loop_nest_input_error( + "Loop nest prioritization tuple %s must have length > 1." + % (nesting_as_tuple)) + + # Return tuple of UnexpandedInameSets + return nesting_as_tuple + +# }}} + + +# {{{ constrain_loop_nesting + +@for_each_kernel +def constrain_loop_nesting( + kernel, must_nest=None, must_not_nest=None): + r"""Add the provided constraints to the kernel. + + :arg must_nest: A tuple or comma-separated string representing + an ordering of loop nesting tiers that must appear in the + linearized kernel. Each item in the tuple represents a + :class:`UnexpandedInameSet`\ s. + + :arg must_not_nest: A two-tuple or comma-separated string representing + an ordering of loop nesting tiers that must not appear in the + linearized kernel. Each item in the tuple represents a + :class:`UnexpandedInameSet`\ s. + + """ + + # {{{ Get any current constraints, if they exist + if kernel.loop_nest_constraints: + if kernel.loop_nest_constraints.must_nest: + must_nest_constraints_old = kernel.loop_nest_constraints.must_nest + else: + must_nest_constraints_old = set() + + if kernel.loop_nest_constraints.must_not_nest: + must_not_nest_constraints_old = \ + kernel.loop_nest_constraints.must_not_nest + else: + must_not_nest_constraints_old = set() + + if kernel.loop_nest_constraints.must_nest_graph: + must_nest_graph_old = kernel.loop_nest_constraints.must_nest_graph + else: + must_nest_graph_old = {} + else: + must_nest_constraints_old = set() + must_not_nest_constraints_old = set() + must_nest_graph_old = {} + + # }}} + + # {{{ Process must_nest + + if must_nest: + # {{{ Parse must_nest, check for conflicts, combine with old constraints + + # {{{ Parse must_nest (no complements allowed) + must_nest_tuple = process_loop_nest_specification( + must_nest, complement_sets_allowed=False) + # }}} + + # {{{ Error if someone prioritizes concurrent iname + + from loopy.kernel.data import ConcurrentTag + for iname_set in must_nest_tuple: + for iname in iname_set.inames: + if kernel.iname_tags_of_type(iname, ConcurrentTag): + raise ValueError( + "iname %s tagged with ConcurrentTag, " + "cannot use iname in must-nest constraint %s." + % (iname, must_nest_tuple)) + + # }}} + + # {{{ Update must_nest graph (and check for cycles) + + must_nest_graph_new = update_must_nest_graph( + must_nest_graph_old, must_nest_tuple, kernel.all_inames()) + + # }}} + + # {{{ Make sure must_nest constraints don't violate must_not_nest + # (this may not catch all problems) + check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints_old, must_nest_graph_new) + # }}} + + # {{{ Check for conflicts with inames tagged 'vec' (must be innermost) + + from loopy.kernel.data import VectorizeTag + for iname in kernel.all_inames(): + if kernel.iname_tags_of_type(iname, VectorizeTag) and ( + must_nest_graph_new.get(iname, set())): + # Must-nest graph doesn't allow iname to be a leaf, error + raise ValueError( + "Iname %s tagged as 'vec', but loop nest constraints " + "%s require that iname %s nest outside of inames %s. " + "Vectorized inames must nest innermost; cannot " + "impose loop nest specification." + % (iname, must_nest, iname, + must_nest_graph_new.get(iname, set()))) + + # }}} + + # {{{ Add new must_nest constraints to existing must_nest constraints + must_nest_constraints_new = must_nest_constraints_old | set( + [must_nest_tuple, ]) + # }}} + + # }}} + else: + # {{{ No new must_nest constraints, just keep the old ones + + must_nest_constraints_new = must_nest_constraints_old + must_nest_graph_new = must_nest_graph_old + + # }}} + + # }}} + + # {{{ Process must_not_nest + + if must_not_nest: + # {{{ Parse must_not_nest, check for conflicts, combine with old constraints + + # {{{ Parse must_not_nest; complements allowed; max_tuple_size=2 + + must_not_nest_tuple = process_loop_nest_specification( + must_not_nest, max_tuple_size=2) + + # }}} + + # {{{ Make sure must_not_nest constraints don't violate must_nest + + # (cycles are allowed in must_not_nest constraints) + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph_new.items(): + must_pairs.extend(list(itertools.product([iname_before], inames_after))) + + if not check_must_not_nest(must_pairs, must_not_nest_tuple): + raise ValueError( + "constrain_loop_nesting: nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest constraints %s." + % (must_not_nest_tuple, must_nest_constraints_new)) + + # }}} + + # {{{ Add new must_not_nest constraints to exisitng must_not_nest constraints + must_not_nest_constraints_new = must_not_nest_constraints_old | set([ + must_not_nest_tuple, ]) + # }}} + + # }}} + else: + # {{{ No new must_not_nest constraints, just keep the old ones + + must_not_nest_constraints_new = must_not_nest_constraints_old + + # }}} + + # }}} + + nest_constraints = LoopNestConstraints( + must_nest=must_nest_constraints_new, + must_not_nest=must_not_nest_constraints_new, + must_nest_graph=must_nest_graph_new, + ) + + return kernel.copy(loop_nest_constraints=nest_constraints) + +# }}} + + +# {{{ update_must_nest_graph + +def update_must_nest_graph(must_nest_graph, must_nest, all_inames): + # Note: there should *not* be any complements in the must_nest tuples + + from copy import deepcopy + new_graph = deepcopy(must_nest_graph) + + # First, each iname must be a node in the graph + for missing_iname in all_inames - new_graph.keys(): + new_graph[missing_iname] = set() + + # Expand must_nest into (before, after) pairs + must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames) + + # Update must_nest_graph with new pairs + for before, after in must_nest_expanded: + new_graph[before].add(after) + + # Compute transitive closure + from pytools.graph import compute_transitive_closure, contains_cycle + new_graph_closure = compute_transitive_closure(new_graph) + # Note: compute_transitive_closure now allows cycles, will not error + + # Check for inconsistent must_nest constraints by checking for cycle: + if contains_cycle(new_graph_closure): + raise ValueError( + "update_must_nest_graph: Nest constraint cycle detected. " + "must_nest constraints %s inconsistent with existing " + "must_nest constraints %s." + % (must_nest, must_nest_graph)) + + return new_graph_closure + +# }}} + + +# {{{ _expand_iname_sets_in_tuple + +def _expand_iname_sets_in_tuple( + iname_sets_tuple, + iname_universe=None, + ): + + # First convert UnexpandedInameSets to sets. + # Note that must_nest constraints cannot be negatively defined. + positively_defined_iname_sets = [ + iname_set.get_inames_represented(iname_universe) + for iname_set in iname_sets_tuple] + + # Now expand all priority tuples into (before, after) pairs using + # Cartesian product of all pairs of sets + # (Assumes prio_sets length > 1) + import itertools + loop_priority_pairs = set() + for i, before_set in enumerate(positively_defined_iname_sets[:-1]): + for after_set in positively_defined_iname_sets[i+1:]: + loop_priority_pairs.update( + list(itertools.product(before_set, after_set))) + + # Make sure no priority tuple contains an iname twice + for prio_tuple in loop_priority_pairs: + if len(set(prio_tuple)) != len(prio_tuple): + raise ValueError( + "Loop nesting %s contains cycle: %s. " + % (iname_sets_tuple, prio_tuple)) + + return loop_priority_pairs + +# }}} + +# }}} + + +# {{{ Checking constraints + +# {{{ check_must_nest + +def check_must_nest(all_loop_nests, must_nest, all_inames): + r"""Determine whether must_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A list of lists of inames, each representing + the nesting order of nested loops. + + :arg must_nest: A tuple of :class:`UnexpandedInameSet`\ s describing + nestings that must appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the must nest constraints + are satisfied by the provided loop nesting. + + """ + + # In order to make sure must_nest is satisfied, we + # need to expand all must_nest tiers + + # FIXME instead of expanding tiers into all pairs up front, + # create these pairs one at a time so that we can stop as soon as we fail + + must_nest_expanded = _expand_iname_sets_in_tuple(must_nest) + + # must_nest_expanded contains pairs + for before, after in must_nest_expanded: + found = False + for nesting in all_loop_nests: + if before in nesting and after in nesting and ( + nesting.index(before) < nesting.index(after)): + found = True + break + if not found: + return False + return True + +# }}} + + +# {{{ check_must_not_nest + +def check_must_not_nest(all_loop_nests, must_not_nest): + r"""Determine whether must_not_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A list of lists of inames, each representing + the nesting order of nested loops. + + :arg must_not_nest: A two-tuple of :class:`UnexpandedInameSet`\ s + describing nestings that must not appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the must_not_nest constraints + are satisfied by the provided loop nesting. + + """ + + # Note that must_not_nest may only contain two tiers + + for nesting in all_loop_nests: + + # Go through each pair in all_loop_nests + for i, iname_before in enumerate(nesting): + for iname_after in nesting[i+1:]: + + # Check whether it violates must not nest + if (must_not_nest[0].contains(iname_before) + and must_not_nest[1].contains(iname_after)): + # Stop as soon as we fail + return False + return True + +# }}} + + +# {{{ check_all_must_not_nests + +def check_all_must_not_nests(all_loop_nests, must_not_nests): + r"""Determine whether all must_not_nest constraints are satisfied by + all_loop_nests + + :arg all_loop_nests: A list of lists of inames, each representing + the nesting order of nested loops. + + :arg must_not_nests: A set of two-tuples of :class:`UnexpandedInameSet`\ s + describing nestings that must not appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the must_not_nest constraints + are satisfied by the provided loop nesting. + + """ + + for must_not_nest in must_not_nests: + if not check_must_not_nest(all_loop_nests, must_not_nest): + return False + return True + +# }}} + + +# {{{ loop_nest_constraints_satisfied + +def loop_nest_constraints_satisfied( + all_loop_nests, + must_nest_constraints=None, + must_not_nest_constraints=None, + all_inames=None): + r"""Determine whether must_not_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A set of lists of inames, each representing + the nesting order of loops. + + :arg must_nest_constraints: An iterable of tuples of + :class:`UnexpandedInameSet`\ s, each describing nestings that must + appear in all_loop_nests. + + :arg must_not_nest_constraints: An iterable of two-tuples of + :class:`UnexpandedInameSet`\ s, each describing nestings that must not + appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the constraints + are satisfied by the provided loop nesting. + + """ + + # Check must-nest constraints + if must_nest_constraints: + for must_nest in must_nest_constraints: + if not check_must_nest( + all_loop_nests, must_nest, all_inames): + return False + + # Check must-not-nest constraints + if must_not_nest_constraints: + for must_not_nest in must_not_nest_constraints: + if not check_must_not_nest( + all_loop_nests, must_not_nest): + return False + + return True + +# }}} + + +# {{{ check_must_not_nest_against_must_nest_graph + +def check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints, must_nest_graph): + r"""Ensure none of the must_not_nest constraints are violated by + nestings represented in the must_nest_graph + + :arg must_not_nest_constraints: A set of two-tuples of + :class:`UnexpandedInameSet`\ s describing nestings that must not appear + in loop nestings. + + :arg must_nest_graph: A :class:`dict` mapping each iname to other inames + that must be nested inside it. + + """ + + if must_not_nest_constraints and must_nest_graph: + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph.items(): + must_pairs.extend( + list(itertools.product([iname_before], inames_after))) + if any(not check_must_not_nest(must_pairs, must_not_nest_tuple) + for must_not_nest_tuple in must_not_nest_constraints): + raise ValueError( + "Nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest relationships (must_nest graph: %s)." + % (must_not_nest_constraints, must_nest_graph)) + +# }}} + + +# {{{ get_iname_nestings + +def get_iname_nestings(linearization): + """Return a list of iname tuples representing the deepest loop nestings + in a kernel linearization. + """ + from loopy.schedule import EnterLoop, LeaveLoop + nestings = [] + current_tiers = [] + already_exiting_loops = False + for lin_item in linearization: + if isinstance(lin_item, EnterLoop): + already_exiting_loops = False + current_tiers.append(lin_item.iname) + elif isinstance(lin_item, LeaveLoop): + if not already_exiting_loops: + nestings.append(tuple(current_tiers)) + already_exiting_loops = True + del current_tiers[-1] + return nestings + +# }}} + + +# {{{ get_graph_sources + +def get_graph_sources(graph): + sources = set(graph.keys()) + for non_sources in graph.values(): + sources -= non_sources + return sources + +# }}} + +# }}} + + +# {{{ updating constraints during transformation + +# {{{ replace_inames_in_nest_constraints + +def replace_inames_in_nest_constraints( + inames_to_replace, replacement_inames, old_constraints, + coalesce_new_iname_duplicates=False, + ): + """ + :arg inames_to_replace: A set of inames that may exist in + `old_constraints`, each of which is to be replaced with all inames + in `replacement_inames`. + + :arg replacement_inames: A set of inames, all of which will repalce each + iname in `inames_to_replace` in `old_constraints`. + + :arg old_constraints: An iterable of tuples containing one or more + :class:`UnexpandedInameSet` objects. + """ + + # replace each iname in inames_to_replace + # with *all* inames in replacement_inames + + # loop through old_constraints and handle each nesting independently + new_constraints = set() + for old_nesting in old_constraints: + # loop through each iname_set in this nesting and perform replacement + new_nesting = [] + for iname_set in old_nesting: + + # find inames to be replaced + inames_found = inames_to_replace & iname_set.inames + + # create the new set of inames with the replacements + if inames_found: + new_inames = iname_set.inames - inames_found + new_inames.update(replacement_inames) + else: + new_inames = iname_set.inames.copy() + + new_nesting.append( + UnexpandedInameSet(new_inames, iname_set.complement)) + + # if we've removed things, new_nesting might only contain 1 item, + # in which case it's meaningless and we should just remove it + if len(new_nesting) > 1: + new_constraints.add(tuple(new_nesting)) + + # When joining inames, we may need to coalesce: + # e.g., if we join `i` and `j` into `ij`, and old_nesting was + # [{i, k}, {j, h}], at this point we have [{ij, k}, {ij, h}] + # which contains a cycle. If coalescing is enabled, change this + # to [{k}, ij, {h}] to remove the cycle. + if coalesce_new_iname_duplicates: + + def coalesce_duplicate_inames_in_nesting(nesting, coalesce_candidates): + # TODO would like this to be fully generic, but for now, assumes + # all UnexpandedInameSets have complement=False, which works if + # we're only using this for must_nest constraints since they cannot + # have complements + for iname_set in nesting: + assert not iname_set.complement + + import copy + # copy and convert nesting to list so we can modify + coalesced_nesting = list(copy.deepcopy(nesting)) + + # repeat coalescing step until we don't find any adjacent pairs + # containing duplicates (among coalesce_candidates) + found_duplicates = True + while found_duplicates: + found_duplicates = False + # loop through each iname_set in nesting and coalesce + # (assume new_nesting has at least 2 items) + i = 0 + while i < len(coalesced_nesting)-1: + iname_set_before = coalesced_nesting[i] + iname_set_after = coalesced_nesting[i+1] + # coalesce for each iname candidate + for iname in coalesce_candidates: + if (iname_set_before.inames == set([iname, ]) and + iname_set_after.inames == set([iname, ])): + # before/after contain single iname to be coalesced, + # -> remove iname_set_after + del coalesced_nesting[i+1] + found_duplicates = True + elif (iname_set_before.inames == set([iname, ]) and + iname in iname_set_after.inames): + # before contains single iname to be coalesced, + # after contains iname along with others, + # -> remove iname from iname_set_after.inames + coalesced_nesting[i+1] = UnexpandedInameSet( + inames=iname_set_after.inames - set([iname, ]), + complement=iname_set_after.complement, + ) + found_duplicates = True + elif (iname in iname_set_before.inames and + iname_set_after.inames == set([iname, ])): + # after contains single iname to be coalesced, + # before contains iname along with others, + # -> remove iname from iname_set_before.inames + coalesced_nesting[i] = UnexpandedInameSet( + inames=iname_set_before.inames - set([iname, ]), + complement=iname_set_before.complement, + ) + found_duplicates = True + elif (iname in iname_set_before.inames and + iname in iname_set_after.inames): + # before and after contain iname along with others, + # -> remove iname from iname_set_{before,after}.inames + # and insert it in between them + coalesced_nesting[i] = UnexpandedInameSet( + inames=iname_set_before.inames - set([iname, ]), + complement=iname_set_before.complement, + ) + coalesced_nesting[i+1] = UnexpandedInameSet( + inames=iname_set_after.inames - set([iname, ]), + complement=iname_set_after.complement, + ) + coalesced_nesting.insert(i+1, UnexpandedInameSet( + inames=set([iname, ]), + complement=False, + )) + found_duplicates = True + # else, iname was not found in both sets, so do nothing + i = i + 1 + + return tuple(coalesced_nesting) + + # loop through new_constraints; handle each nesting independently + coalesced_constraints = set() + for new_nesting in new_constraints: + coalesced_constraints.add( + coalesce_duplicate_inames_in_nesting( + new_nesting, replacement_inames)) + + return coalesced_constraints + else: + return new_constraints + +# }}} + + +# {{{ replace_inames_in_graph + +def replace_inames_in_graph( + inames_to_replace, replacement_inames, old_graph): + # replace each iname in inames_to_replace with all inames in replacement_inames + + new_graph = {} + iname_to_replace_found_as_key = False + union_of_inames_after_for_replaced_keys = set() + for iname, inames_after in old_graph.items(): + # create new inames_after + new_inames_after = inames_after.copy() + inames_found = inames_to_replace & new_inames_after + + if inames_found: + new_inames_after -= inames_found + new_inames_after.update(replacement_inames) + + # update dict + if iname in inames_to_replace: + iname_to_replace_found_as_key = True + union_of_inames_after_for_replaced_keys = \ + union_of_inames_after_for_replaced_keys | new_inames_after + # don't add this iname as a key in new graph, + # its replacements will be added below + else: + new_graph[iname] = new_inames_after + + # add replacement iname keys + if iname_to_replace_found_as_key: + for new_key in replacement_inames: + new_graph[new_key] = union_of_inames_after_for_replaced_keys.copy() + + # check for cycle + from pytools.graph import contains_cycle + if contains_cycle(new_graph): + raise ValueError( + "replace_inames_in_graph: Loop priority cycle detected. " + "Cannot replace inames %s with inames %s." + % (inames_to_replace, replacement_inames)) + + return new_graph + +# }}} + + +# {{{ replace_inames_in_all_nest_constraints + +def replace_inames_in_all_nest_constraints( + kernel, old_inames, new_inames, + coalesce_new_iname_duplicates=False, + pairs_that_must_not_voilate_constraints=set(), + ): + # replace each iname in old_inames with all inames in new_inames + + # get old must_nest and must_not_nest + # (must_nest_graph will be rebuilt) + if kernel.loop_nest_constraints: + old_must_nest = kernel.loop_nest_constraints.must_nest + old_must_not_nest = kernel.loop_nest_constraints.must_not_nest + # (these could still be None) + else: + old_must_nest = None + old_must_not_nest = None + + if old_must_nest: + # check to make sure special pairs don't conflict with constraints + for iname_before, iname_after in pairs_that_must_not_voilate_constraints: + if iname_before in kernel.loop_nest_constraints.must_nest_graph[ + iname_after]: + raise ValueError( + "Implied nestings violate existing must-nest constraints." + "\nimplied nestings: %s\nmust-nest constraints: %s" + % (pairs_that_must_not_voilate_constraints, old_must_nest)) + + new_must_nest = replace_inames_in_nest_constraints( + old_inames, new_inames, old_must_nest, + coalesce_new_iname_duplicates=coalesce_new_iname_duplicates, + ) + else: + new_must_nest = None + + if old_must_not_nest: + # check to make sure special pairs don't conflict with constraints + if not check_all_must_not_nests( + pairs_that_must_not_voilate_constraints, old_must_not_nest): + raise ValueError( + "Implied nestings violate existing must-not-nest constraints." + "\nimplied nestings: %s\nmust-not-nest constraints: %s" + % (pairs_that_must_not_voilate_constraints, old_must_not_nest)) + + new_must_not_nest = replace_inames_in_nest_constraints( + old_inames, new_inames, old_must_not_nest, + coalesce_new_iname_duplicates=False, + # (for now, never coalesce must-not-nest constraints) + ) + # each must not nest constraint may only contain two tiers + # TODO coalesce_new_iname_duplicates? + else: + new_must_not_nest = None + + # Rebuild must_nest graph + if new_must_nest: + new_must_nest_graph = {} + new_all_inames = ( + kernel.all_inames() - set(old_inames)) | set(new_inames) + from pytools.graph import CycleError + for must_nest_tuple in new_must_nest: + try: + new_must_nest_graph = update_must_nest_graph( + new_must_nest_graph, must_nest_tuple, new_all_inames) + except CycleError: + raise ValueError( + "Loop priority cycle detected when replacing inames %s " + "with inames %s. Previous must_nest constraints: %s" + % (old_inames, new_inames, old_must_nest)) + + # make sure none of the must_nest constraints violate must_not_nest + # this may not catch all problems + check_must_not_nest_against_must_nest_graph( + new_must_not_nest, new_must_nest_graph) + else: + new_must_nest_graph = None + + return kernel.copy( + loop_nest_constraints=LoopNestConstraints( + must_nest=new_must_nest, + must_not_nest=new_must_not_nest, + must_nest_graph=new_must_nest_graph, + ) + ) + +# }}} + +# }}} + +# }}} + + # {{{ split/chunk inames # {{{ backend @@ -389,6 +1381,20 @@ def _split_iname_in_dependee(dep): new_prio = new_prio + (prio_iname,) new_priorities.append(new_prio) + # {{{ update nest constraints + + # Add {inner,outer} wherever iname_to_split is found in constraints, while + # still keeping the original around. Then let remove_unused_inames handle + # removal of the old iname if necessary + + # update must_nest, must_not_nest, and must_nest_graph + kernel = replace_inames_in_all_nest_constraints( + kernel, + set([iname_to_split, ]), [iname_to_split, inner_iname, outer_iname], + ) + + # }}} + kernel = kernel.copy( domains=new_domains, iname_slab_increments=iname_slab_increments, @@ -626,7 +1632,7 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None): from loopy.match import parse_match within = parse_match(within) - # {{{ return the same kernel if no kernel matches + # {{{ return the same kernel if no insn matches if not any(within(kernel, insn) for insn in kernel.instructions): return kernel @@ -721,6 +1727,37 @@ def subst_within_inames(fid): applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict] )) + # {{{ update must_nest, must_not_nest, and must_nest_graph + + if kernel.loop_nest_constraints and ( + kernel.loop_nest_constraints.must_nest or + kernel.loop_nest_constraints.must_not_nest or + kernel.loop_nest_constraints.must_nest_graph): + + if within != parse_match(None): + raise NotImplementedError( + "join_inames() does not yet handle new loop nest " + "constraints when within is not None.") + + # When joining inames, we create several implied loop nestings. + # make sure that these implied nestings don't violate existing + # constraints. + + # (will fail if cycle is created in must-nest graph) + implied_nestings = set() + inames_orig_order = inames[::-1] # this was reversed above + for i, iname_before in enumerate(inames_orig_order[:-1]): + for iname_after in inames_orig_order[i+1:]: + implied_nestings.add((iname_before, iname_after)) + + kernel = replace_inames_in_all_nest_constraints( + kernel, set(inames), [new_iname], + coalesce_new_iname_duplicates=True, + pairs_that_must_not_voilate_constraints=implied_nestings, + ) + + # }}} + from loopy.match import parse_stack_match within = parse_stack_match(within) @@ -872,6 +1909,7 @@ def parse_tag(tag): # }}} + from loopy.kernel.data import ConcurrentTag, VectorizeTag knl_inames = kernel.inames.copy() for name, new_tag in iname_to_tag.items(): if not new_tag: @@ -882,6 +1920,36 @@ def parse_tag(tag): knl_inames[name] = knl_inames[name].tagged(new_tag) + # {{{ loop nest constraint handling + + if isinstance(new_tag, VectorizeTag): + # {{{ vec_inames will be nested innermost, check whether this + # conflicts with must-nest constraints + must_nest_graph = (kernel.loop_nest_constraints.must_nest_graph + if kernel.loop_nest_constraints else None) + if must_nest_graph and must_nest_graph.get(iname, set()): + # iname is not a leaf + raise ValueError( + "Loop priorities provided specify that iname %s nest " + "outside of inames %s, but vectorized inames " + "must nest innermost. Cannot tag %s with 'vec' tag." + % (iname, must_nest_graph.get(iname, set()), iname)) + # }}} + + elif isinstance(new_tag, ConcurrentTag) and kernel.loop_nest_constraints: + # {{{ Don't allow tagging of must_nest iname as concurrent + must_nest = kernel.loop_nest_constraints.must_nest + if must_nest: + for nesting in must_nest: + for iname_set in nesting: + if iname in iname_set.inames: + raise ValueError("cannot tag '%s' as concurrent--" + "iname involved in must-nest constraint %s." + % (iname, nesting)) + # }}} + + # }}} + return kernel.copy(inames=knl_inames) # }}} @@ -999,6 +2067,14 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None, from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, frozenset([old_iname])) + # # {{{ update nest constraints + + # (don't remove any unused inames yet, that happens later) + kernel = replace_inames_in_all_nest_constraints( + kernel, set([old_iname, ]), [old_iname, new_iname]) + + # }}} + from loopy.isl_helpers import duplicate_axes kernel = kernel.copy( domains=domch.get_domains_with( @@ -1063,6 +2139,18 @@ def _rename_iname_in_dim_in(dep): # }}} + # TODO why isn't remove_unused_inames called on kernel here? + + # {{{ if there are any now unused inames, remove from nest constraints + + now_unused_inames = (set(inames) - get_used_inames(kernel)) & set(inames) + kernel = replace_inames_in_all_nest_constraints( + kernel, old_inames=now_unused_inames, new_inames=[], + coalesce_new_iname_duplicates=False, + ) + + # }}} + return kernel # }}} @@ -1256,6 +2344,16 @@ def rename_iname(kernel, old_iname, new_iname, existing_ok=False, within=None): "--cannot rename" % new_iname) if does_exist: + + # TODO implement this + if kernel.loop_nest_constraints and ( + kernel.loop_nest_constraints.must_nest or + kernel.loop_nest_constraints.must_not_nest or + kernel.loop_nest_constraints.must_nest_graph): + raise NotImplementedError( + "rename_iname() does not yet handle new loop nest " + "constraints when does_exist=True.") + # {{{ check that the domains match up dom = kernel.get_inames_domain(frozenset((old_iname, new_iname))) @@ -1443,6 +2541,15 @@ def _remove_iname_from_dep(dep): # }}} + # {{{ Remove inames from loop nest constraints + + kernel = replace_inames_in_all_nest_constraints( + kernel, old_inames=unused_inames, new_inames=[], + coalesce_new_iname_duplicates=False, + ) + + # }}} + return kernel diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py new file mode 100644 index 000000000..a931e9e72 --- /dev/null +++ b/test/test_nest_constraints.py @@ -0,0 +1,1160 @@ +__copyright__ = "Copyright (C) 2021 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import loopy as lp +import numpy as np +import pyopencl as cl +from loopy import preprocess_kernel, get_one_linearized_kernel + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + +from pyopencl.tools import pytest_generate_tests_for_pyopencl \ + as pytest_generate_tests + +__all__ = [ + "pytest_generate_tests", + "cl" # "cl.create_some_context" + ] + + +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa + + +# {{{ Helper functions + +def _process_and_linearize(prog, knl_name="loopy_kernel"): + # Return linearized kernel + proc_prog = preprocess_kernel(prog) + lin_prog = get_one_linearized_kernel( + proc_prog[knl_name], proc_prog.callables_table) + return lin_prog + + +def _linearize_and_get_nestings(prog, knl_name="loopy_kernel"): + from loopy.transform.iname import get_iname_nestings + lin_knl = _process_and_linearize(prog, knl_name) + return get_iname_nestings(lin_knl.linearization) + +# }}} + + +# {{{ test_loop_constraint_string_parsing + +def test_loop_constraint_string_parsing(): + ref_knl = lp.make_kernel( + "{ [g,h,i,j,k,xx]: 0<=g,h,i,j,k,xx 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: foldmethod=marker