From 36b084e4c48712f1c27e9738cb91e590d416bad5 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 14 Jul 2022 11:32:26 -0500 Subject: [PATCH 1/3] require targets to provide whether they are executable --- loopy/target/__init__.py | 8 ++++++++ loopy/target/c/__init__.py | 12 +++++++++++- loopy/target/cuda.py | 4 ++++ loopy/target/ispc.py | 10 ++++++++-- loopy/target/opencl.py | 4 ++++ loopy/target/pyopencl.py | 4 ++++ 6 files changed, 39 insertions(+), 3 deletions(-) diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 5bb1043b9..572c0e95f 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -49,6 +49,7 @@ from typing import (Any, Tuple, Generic, TypeVar, Sequence, ClassVar, Optional, TYPE_CHECKING) +import abc if TYPE_CHECKING: from loopy.typing import ExpressionT @@ -159,6 +160,13 @@ def get_kernel_executor(self, kernel, *args, **kwargs): """ raise NotImplementedError() + @abc.abstractproperty + def is_executable(self) -> bool: + """ + Returns *True* only if the target allows executing loopy + translation units through :attr:`loopy.TranslationUnit.__call__`. + """ + class ASTBuilderBase(Generic[ASTType]): """An interface for generating (host or device) ASTs. diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index b83b89bc5..16ee8e59d 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -850,7 +850,9 @@ def get_function_declaration( # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info skai = get_subkernel_arg_info(kernel, subkernel_name) - passed_names = skai.passed_names + passed_names = (skai.passed_names + if self.target.is_executable + else [arg.name for arg in kernel.args]) written_names = skai.written_names else: name = Value("static void", name) @@ -1342,6 +1344,10 @@ def get_dtype_registry(self): fill_registry_with_c99_complex_types(result) return DTypeRegistryWrapper(result) + @property + def is_executable(self) -> bool: + return False + class CASTBuilder(CFamilyASTBuilder): def preamble_generators(self): @@ -1385,6 +1391,10 @@ def get_host_ast_builder(self): # enable host code generation return CFamilyASTBuilder(self) + @property + def is_executable(self) -> bool: + return True + # }}} diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index e97c84b00..def2f30de 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -255,6 +255,10 @@ def vector_dtype(self, base, count): # }}} + @property + def is_executable(self) -> bool: + return False + # }}} diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 2fbd6bcf8..385fcf9f8 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -198,6 +198,10 @@ def get_dtype_registry(self): # }}} + @property + def is_executable(self) -> bool: + return False + class ISPCASTBuilder(CFamilyASTBuilder): # {{{ top-level codegen @@ -222,7 +226,9 @@ def get_function_declaration( # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info skai = get_subkernel_arg_info(codegen_state.kernel, subkernel_name) - passed_names = skai.passed_names + passed_names = (skai.passed_names + if self.target.is_executable + else [arg.name for arg in kernel.args]) written_names = skai.written_names else: passed_names = [arg.name for arg in kernel.args] @@ -263,7 +269,7 @@ def get_kernel_call(self, codegen_state: CodeGenerationState, "assert(programCount == (%s))" % ecm(lsize[0], PREC_NONE))) - if codegen_state.is_entrypoint: + if codegen_state.is_entrypoint and self.target.is_executable: # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info skai = get_subkernel_arg_info(codegen_state.kernel, subkernel_name) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index d548cf71d..078660664 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -598,6 +598,10 @@ def is_vector_dtype(self, dtype): def vector_dtype(self, base, count): return NumpyType(vec.types[base.numpy_dtype, count]) + @property + def is_executable(self) -> bool: + return False + # }}} diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 92f4bbd96..81a977530 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -645,6 +645,10 @@ def with_device(self, device): "stop working in 2022.", DeprecationWarning, stacklevel=2) return self + @property + def is_executable(self) -> bool: + return True + # }}} From 8dd1c9164c8ea216d1538049c33c050ba0962476 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 14 Jul 2022 11:34:50 -0500 Subject: [PATCH 2/3] test that argument invocation is preserved for non-executable targets --- test/test_target.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/test_target.py b/test/test_target.py index 13b81502b..7e9e7eba8 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -777,6 +777,24 @@ def test_passing_bajillions_of_svm_args(ctx_factory, with_gbarrier): assert (res[f"c{iargset}"].get() == iargset * multiplier + iargset).all() +def test_non_executable_targets_respect_args(): + # See https://github.com/inducer/loopy/issues/648 + t_unit = lp.make_kernel( + "{ : }", + """ + a[0] = 1729 + """, + [lp.GlobalArg("a,b,c,d,e", + shape=(10,), + dtype="float64")], + target=lp.CTarget() + ) + code_str = lp.generate_code_v2(t_unit).device_code() + + for var in ["b", "c", "d", "e"]: + assert code_str.find(f"double const *__restrict__ {var}") != -1 + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) From f97e873db930e105f6e19d0fec03c7481a1e1404 Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Tue, 8 Nov 2022 18:07:36 +0000 Subject: [PATCH 3/3] Change naming of is_exectubale on Target class to single_subkernel_is_entrypoint, add docs and error when there is more than one subkernel. Update how a subkernel is detected. --- loopy/target/__init__.py | 12 ++++++++---- loopy/target/c/__init__.py | 15 ++++++++++----- loopy/target/cuda.py | 2 +- loopy/target/ispc.py | 13 +++++++------ loopy/target/opencl.py | 2 +- loopy/target/pyopencl.py | 4 ++-- 6 files changed, 29 insertions(+), 19 deletions(-) diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 572c0e95f..c078ae9a2 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -161,10 +161,14 @@ def get_kernel_executor(self, kernel, *args, **kwargs): raise NotImplementedError() @abc.abstractproperty - def is_executable(self) -> bool: - """ - Returns *True* only if the target allows executing loopy - translation units through :attr:`loopy.TranslationUnit.__call__`. + def single_subkernel_is_entrypoint(self) -> bool: + r""" + Returns *True* if *self* does NOT support generating code for + linearized kernels with more than one + :class:`~loopy.schedule.CallKernel`\ s. This guarantees the + :class:`~loopy.schedule.CallKernel` for which we generate code is the + entrypoint kernel. This also allows the target to skip the invoker + level code. """ diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 16ee8e59d..b0fc8fdac 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -849,9 +849,14 @@ def get_function_declaration( # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info + from loopy.kernel.tools import get_subkernels skai = get_subkernel_arg_info(kernel, subkernel_name) + if (self.target.single_subkernel_is_entrypoint + and len(get_subkernels(kernel)) > 1): + raise LoopyError(f"Kernel '{kernel.name}' has more than one" + f" subkernel, not allowed in {self.target}.") passed_names = (skai.passed_names - if self.target.is_executable + if not self.target.single_subkernel_is_entrypoint else [arg.name for arg in kernel.args]) written_names = skai.written_names else: @@ -1345,8 +1350,8 @@ def get_dtype_registry(self): return DTypeRegistryWrapper(result) @property - def is_executable(self) -> bool: - return False + def single_subkernel_is_entrypoint(self) -> bool: + return True class CASTBuilder(CFamilyASTBuilder): @@ -1392,8 +1397,8 @@ def get_host_ast_builder(self): return CFamilyASTBuilder(self) @property - def is_executable(self) -> bool: - return True + def single_subkernel_is_entrypoint(self) -> bool: + return False # }}} diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index def2f30de..fba6b3007 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -256,7 +256,7 @@ def vector_dtype(self, base, count): # }}} @property - def is_executable(self) -> bool: + def single_subkernel_is_entrypoint(self) -> bool: return False # }}} diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 385fcf9f8..ec390085a 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -199,8 +199,8 @@ def get_dtype_registry(self): # }}} @property - def is_executable(self) -> bool: - return False + def single_subkernel_is_entrypoint(self) -> bool: + return True class ISPCASTBuilder(CFamilyASTBuilder): @@ -226,9 +226,9 @@ def get_function_declaration( # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info skai = get_subkernel_arg_info(codegen_state.kernel, subkernel_name) - passed_names = (skai.passed_names - if self.target.is_executable - else [arg.name for arg in kernel.args]) + passed_names = ([arg.name for arg in kernel.args] + if self.target.single_subkernel_is_entrypoint + else skai.passed_names) written_names = skai.written_names else: passed_names = [arg.name for arg in kernel.args] @@ -269,7 +269,8 @@ def get_kernel_call(self, codegen_state: CodeGenerationState, "assert(programCount == (%s))" % ecm(lsize[0], PREC_NONE))) - if codegen_state.is_entrypoint and self.target.is_executable: + if (codegen_state.is_entrypoint and + self.target.single_subkernel_is_entrypoint): # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info skai = get_subkernel_arg_info(codegen_state.kernel, subkernel_name) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 078660664..5112abdcd 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -599,7 +599,7 @@ def vector_dtype(self, base, count): return NumpyType(vec.types[base.numpy_dtype, count]) @property - def is_executable(self) -> bool: + def single_subkernel_is_entrypoint(self) -> bool: return False # }}} diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 81a977530..8d7154f23 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -646,8 +646,8 @@ def with_device(self, device): return self @property - def is_executable(self) -> bool: - return True + def single_subkernel_is_entrypoint(self) -> bool: + return False # }}}