Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hoist reduction loops #104

Merged
merged 31 commits into from
Nov 30, 2016
Merged
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5031f78
Drop precompute routine
FabioLuporini Nov 2, 2016
19fb417
Fix ast_update_rank utility function
FabioLuporini Nov 2, 2016
88373a5
Fix loop fusion in presence of empty loops
FabioLuporini Oct 26, 2016
875b4a0
Fix substituion after loop fusion
FabioLuporini Oct 26, 2016
a7a8ac0
Add block property to For nodes
FabioLuporini Oct 28, 2016
678cade
Make Symbol's rank an actual class (Rank)
FabioLuporini Nov 5, 2016
d110454
Add is_number property to Symbols
FabioLuporini Nov 11, 2016
61757ec
Add more properties to expressions
FabioLuporini Oct 28, 2016
893a4a7
Use lists, not sets, in loops_analysis if possible
FabioLuporini Nov 8, 2016
0e6144f
Add utility to get urepr of (iterable of) nodes
FabioLuporini Oct 31, 2016
ccb798c
Add utils summands function
FabioLuporini Nov 7, 2016
702ab2a
Capture more COFFEE expressions
FabioLuporini Oct 28, 2016
5060a67
Fix operands() for Decl
FabioLuporini Nov 17, 2016
1984750
Generalise CSE's cost model
FabioLuporini Oct 28, 2016
244a7fe
Drop expression graph wherever possible
FabioLuporini Nov 3, 2016
7dd9b23
Generalize and simplify code hoister routines
FabioLuporini Oct 28, 2016
9f62168
Generalize sharing-graph-based expr rewriting
FabioLuporini Nov 3, 2016
ce9ca5d
Add function to hoist reducible loops
FabioLuporini Nov 5, 2016
36d0445
Simplify hoister routines and interface
FabioLuporini Nov 10, 2016
74804a6
Improve sharing-graph-based rewriting
FabioLuporini Nov 14, 2016
8cbcb6f
Introduce ast_reconstructor to avoid deep_copy
FabioLuporini Nov 17, 2016
064ab50
Add utility in_read routine
FabioLuporini Nov 17, 2016
fec3baa
fix expression's reduction_loops property
FabioLuporini Nov 17, 2016
b3b99ff
Simplify ast_replace and update invocations
FabioLuporini Nov 23, 2016
3337830
Speed COFFEE up.
FabioLuporini Nov 23, 2016
6a344e8
minor fixes: flake8, test, cyclic imports
FabioLuporini Nov 25, 2016
c49166b
Merge branch 'master' into hoist-reduction-loops
FabioLuporini Nov 28, 2016
b92f96c
More Python3 compatibility
FabioLuporini Nov 28, 2016
c1cdaae
Avoid assigning utility functions to self
FabioLuporini Nov 28, 2016
2fdef6b
Fix insertion of reducible loops
FabioLuporini Nov 29, 2016
d8f2f0e
Fix conditional handling in rewriter
FabioLuporini Nov 29, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Drop precompute routine
FabioLuporini committed Nov 2, 2016
commit 5031f78c377c0782f99a585b1909e13b0df86e69
2 changes: 0 additions & 2 deletions coffee/__init__.py
Original file line number Diff line number Diff line change
@@ -169,8 +169,6 @@ def set_opt_level(optlevel):
O1 = OptimizationLevel('O1', rewrite=1)
O2 = OptimizationLevel('O2', rewrite=2, dead_ops_elimination=True)
O3 = OptimizationLevel('O3', align_pad=True, **O2)
Ofast = OptimizationLevel('Ofast', vectorize=(VectStrategy.SPEC_UAJ_PADD, 2),
precompute='noloops', **O3)

initialized = False

116 changes: 0 additions & 116 deletions coffee/optimizer.py
Original file line number Diff line number Diff line change
@@ -169,122 +169,6 @@ def eliminate_zeros(self):
zls = ZeroRemover(self.exprs, self.decls, self.hoisted, self.expr_graph)
self.nz_syms = zls.reschedule(self.header)

def precompute(self, mode='perfect'):
"""Precompute statements out of ``self.loop``. This is achieved through
scalar code hoisting.

:arg mode: drives the precomputation. Two values are possible: ['perfect',
'noloops']. The 'perfect' mode attempts to hoist everything, making the loop
nest perfect. The 'noloops' mode excludes inner loops from the precomputation.

Example: ::

for i
for r
A[r] += f(i, ...)
for j
for k
B[j][k] += g(A[r], ...)

with mode='perfect', becomes: ::

for i
for r
A[i][r] += f(...)
for i
for j
for k
B[j][k] += g(A[i][r], ...)
"""

precomputed_block = []
precomputed_syms = {}

def _precompute(node, outer_block):

if isinstance(node, Symbol):
if node.symbol in precomputed_syms:
node.rank = precomputed_syms[node.symbol] + node.rank

elif isinstance(node, FlatBlock):
outer_block.append(node)

elif isinstance(node, Expr):
for n in node.children:
_precompute(n, outer_block)

elif isinstance(node, Writer):
sym, expr = node.children
precomputed_syms[sym.symbol] = (self.loop.dim,)
_precompute(sym, outer_block)
_precompute(expr, outer_block)
outer_block.append(node)

elif isinstance(node, Decl):
outer_block.append(node)
if isinstance(node.init, Symbol):
node.init.symbol = "{%s}" % node.init.symbol
elif isinstance(node.init, Expr):
_precompute(Assign(dcopy(node.sym), node.init), outer_block)
node.init = EmptyStatement()
node.sym.rank = (self.loop.size,) + node.sym.rank

elif isinstance(node, For):
new_children = []
for n in node.body:
_precompute(n, new_children)
node.body = new_children
outer_block.append(node)

else:
raise RuntimeError("Precompute error: unexpteced node: %s" % str(node))

# If the outermost loop is already perfect, there is nothing to precompute
if is_perfect_loop(self.loop):
return

# Get the nodes that should not be precomputed
no_precompute = set()
if mode == 'noloops':
for l in self.hoisted.values():
if l.loop:
no_precompute.add(l.decl)
no_precompute.add(l.loop)

# Visit the AST and perform the precomputation
to_remove = []
for n in self.loop.body:
if n in flatten(self.expr_linear_loops):
break
elif n not in no_precompute:
_precompute(n, precomputed_block)
to_remove.append(n)

# Clean up
for n in to_remove:
self.loop.body.remove(n)

# Wrap precomputed statements within a loop
searching, outer_block = [], []
for n in precomputed_block:
if searching and not isinstance(n, Writer):
outer_block.append(ast_make_for(searching, self.loop))
searching = []
if isinstance(n, For):
outer_block.append(ast_make_for([n], self.loop))
elif isinstance(n, Writer):
searching.append(n)
else:
outer_block.append(n)
if searching:
outer_block.append(ast_make_for(searching, self.loop))

# Update the AST ...
# ... adding the newly precomputed blocks
insert_at_elem(self.header.children, self.loop, outer_block)
# ... scalar-expanding the precomputed symbols
ast_update_rank(self.loop, precomputed_syms)

def _unpick_cse(self):
"""Search for factorization opportunities across temporaries created by
common sub-expression elimination. If a gain in operation count is detected,
3 changes: 0 additions & 3 deletions coffee/plan.py
Original file line number Diff line number Diff line change
@@ -85,7 +85,6 @@ def plan_cpu(self, opts):
vectorize = opts.get('vectorize', (None, None))
align_pad = opts.get('align_pad')
split = opts.get('split')
precompute = opts.get('precompute')
dead_ops_elimination = opts.get('dead_ops_elimination')

info = visit(kernel)
@@ -126,8 +125,6 @@ def plan_cpu(self, opts):
# 2) Code specialization
if split:
loop_opt.split(split)
if precompute:
loop_opt.precompute(precompute)
if coffee.initialized and flatten(loop_opt.expr_linear_loops):
vect = LoopVectorizer(loop_opt, kernel)
if align_pad: