From a6ca4614640a8618d114900ec7d4db3c53013000 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Thu, 5 Oct 2023 11:24:43 -0400 Subject: [PATCH 1/5] Add ComputeDerivativeGroup*NV capabilities to trim capabilities pass. --- source/opt/trim_capabilities_pass.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/opt/trim_capabilities_pass.h b/source/opt/trim_capabilities_pass.h index 9202b2e9af..92777a229d 100644 --- a/source/opt/trim_capabilities_pass.h +++ b/source/opt/trim_capabilities_pass.h @@ -91,7 +91,9 @@ class TrimCapabilitiesPass : public Pass { spv::Capability::StoragePushConstant16, spv::Capability::StorageUniform16, spv::Capability::StorageUniformBufferBlock16, - spv::Capability::ImageMSArray + spv::Capability::ImageMSArray, + spv::Capability::ComputeDerivativeGroupQuadsNV, + spv::Capability::ComputeDerivativeGroupLinearNV // clang-format on }; From 3f588c62009b4664601bcc58ea7bad2b9d71e1e3 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Fri, 6 Oct 2023 12:58:57 -0400 Subject: [PATCH 2/5] Add SPV_NV_compute_shader_derivatives to allow lists No tests needed for this. The code path is well tested. Just adding new data. --- source/opt/aggressive_dead_code_elim_pass.cpp | 95 +++++++------------ .../opt/local_access_chain_convert_pass.cpp | 3 +- source/opt/local_single_block_elim_pass.cpp | 3 +- source/opt/local_single_store_elim_pass.cpp | 3 +- 4 files changed, 40 insertions(+), 64 deletions(-) diff --git a/source/opt/aggressive_dead_code_elim_pass.cpp b/source/opt/aggressive_dead_code_elim_pass.cpp index 55feca811e..6542de6b58 100644 --- a/source/opt/aggressive_dead_code_elim_pass.cpp +++ b/source/opt/aggressive_dead_code_elim_pass.cpp @@ -941,67 +941,40 @@ Pass::Status AggressiveDCEPass::Process() { void AggressiveDCEPass::InitExtensions() { extensions_allowlist_.clear(); - extensions_allowlist_.insert({ - "SPV_AMD_shader_explicit_vertex_parameter", - "SPV_AMD_shader_trinary_minmax", - "SPV_AMD_gcn_shader", - "SPV_KHR_shader_ballot", - "SPV_AMD_shader_ballot", - "SPV_AMD_gpu_shader_half_float", - "SPV_KHR_shader_draw_parameters", - "SPV_KHR_subgroup_vote", - "SPV_KHR_8bit_storage", - "SPV_KHR_16bit_storage", - "SPV_KHR_device_group", - "SPV_KHR_multiview", - "SPV_NVX_multiview_per_view_attributes", - "SPV_NV_viewport_array2", - "SPV_NV_stereo_view_rendering", - "SPV_NV_sample_mask_override_coverage", - "SPV_NV_geometry_shader_passthrough", - "SPV_AMD_texture_gather_bias_lod", - "SPV_KHR_storage_buffer_storage_class", - // SPV_KHR_variable_pointers - // Currently do not support extended pointer expressions - "SPV_AMD_gpu_shader_int16", - "SPV_KHR_post_depth_coverage", - "SPV_KHR_shader_atomic_counter_ops", - "SPV_EXT_shader_stencil_export", - "SPV_EXT_shader_viewport_index_layer", - "SPV_AMD_shader_image_load_store_lod", - "SPV_AMD_shader_fragment_mask", - "SPV_EXT_fragment_fully_covered", - "SPV_AMD_gpu_shader_half_float_fetch", - "SPV_GOOGLE_decorate_string", - "SPV_GOOGLE_hlsl_functionality1", - "SPV_GOOGLE_user_type", - "SPV_NV_shader_subgroup_partitioned", - "SPV_EXT_demote_to_helper_invocation", - "SPV_EXT_descriptor_indexing", - "SPV_NV_fragment_shader_barycentric", - "SPV_NV_compute_shader_derivatives", - "SPV_NV_shader_image_footprint", - "SPV_NV_shading_rate", - "SPV_NV_mesh_shader", - "SPV_NV_ray_tracing", - "SPV_KHR_ray_tracing", - "SPV_KHR_ray_query", - "SPV_EXT_fragment_invocation_density", - "SPV_EXT_physical_storage_buffer", - "SPV_KHR_physical_storage_buffer", - "SPV_KHR_terminate_invocation", - "SPV_KHR_shader_clock", - "SPV_KHR_vulkan_memory_model", - "SPV_KHR_subgroup_uniform_control_flow", - "SPV_KHR_integer_dot_product", - "SPV_EXT_shader_image_int64", - "SPV_KHR_non_semantic_info", - "SPV_KHR_uniform_group_instructions", - "SPV_KHR_fragment_shader_barycentric", - "SPV_NV_bindless_texture", - "SPV_EXT_shader_atomic_float_add", - "SPV_EXT_fragment_shader_interlock", - }); + extensions_allowlist_.insert( + {"SPV_AMD_shader_explicit_vertex_parameter", + "SPV_AMD_shader_trinary_minmax", "SPV_AMD_gcn_shader", + "SPV_KHR_shader_ballot", "SPV_AMD_shader_ballot", + "SPV_AMD_gpu_shader_half_float", "SPV_KHR_shader_draw_parameters", + "SPV_KHR_subgroup_vote", "SPV_KHR_8bit_storage", "SPV_KHR_16bit_storage", + "SPV_KHR_device_group", "SPV_KHR_multiview", + "SPV_NVX_multiview_per_view_attributes", "SPV_NV_viewport_array2", + "SPV_NV_stereo_view_rendering", "SPV_NV_sample_mask_override_coverage", + "SPV_NV_geometry_shader_passthrough", "SPV_AMD_texture_gather_bias_lod", + "SPV_KHR_storage_buffer_storage_class", + // SPV_KHR_variable_pointers + // Currently do not support extended pointer expressions + "SPV_AMD_gpu_shader_int16", "SPV_KHR_post_depth_coverage", + "SPV_KHR_shader_atomic_counter_ops", "SPV_EXT_shader_stencil_export", + "SPV_EXT_shader_viewport_index_layer", + "SPV_AMD_shader_image_load_store_lod", "SPV_AMD_shader_fragment_mask", + "SPV_EXT_fragment_fully_covered", "SPV_AMD_gpu_shader_half_float_fetch", + "SPV_GOOGLE_decorate_string", "SPV_GOOGLE_hlsl_functionality1", + "SPV_GOOGLE_user_type", "SPV_NV_shader_subgroup_partitioned", + "SPV_EXT_demote_to_helper_invocation", "SPV_EXT_descriptor_indexing", + "SPV_NV_fragment_shader_barycentric", + "SPV_NV_compute_shader_derivatives", "SPV_NV_shader_image_footprint", + "SPV_NV_shading_rate", "SPV_NV_mesh_shader", "SPV_NV_ray_tracing", + "SPV_KHR_ray_tracing", "SPV_KHR_ray_query", + "SPV_EXT_fragment_invocation_density", "SPV_EXT_physical_storage_buffer", + "SPV_KHR_physical_storage_buffer", "SPV_KHR_terminate_invocation", + "SPV_KHR_shader_clock", "SPV_KHR_vulkan_memory_model", + "SPV_KHR_subgroup_uniform_control_flow", "SPV_KHR_integer_dot_product", + "SPV_EXT_shader_image_int64", "SPV_KHR_non_semantic_info", + "SPV_KHR_uniform_group_instructions", + "SPV_KHR_fragment_shader_barycentric", "SPV_NV_bindless_texture", + "SPV_EXT_shader_atomic_float_add", "SPV_EXT_fragment_shader_interlock", + "SPV_NV_compute_shader_derivatives"}); } Instruction* AggressiveDCEPass::GetHeaderBranch(BasicBlock* blk) { diff --git a/source/opt/local_access_chain_convert_pass.cpp b/source/opt/local_access_chain_convert_pass.cpp index fac4cea64f..ea1bdeeb3b 100644 --- a/source/opt/local_access_chain_convert_pass.cpp +++ b/source/opt/local_access_chain_convert_pass.cpp @@ -428,7 +428,8 @@ void LocalAccessChainConvertPass::InitExtensions() { "SPV_KHR_uniform_group_instructions", "SPV_KHR_fragment_shader_barycentric", "SPV_KHR_vulkan_memory_model", "SPV_NV_bindless_texture", "SPV_EXT_shader_atomic_float_add", - "SPV_EXT_fragment_shader_interlock"}); + "SPV_EXT_fragment_shader_interlock", + "SPV_NV_compute_shader_derivatives"}); } bool LocalAccessChainConvertPass::AnyIndexIsOutOfBounds( diff --git a/source/opt/local_single_block_elim_pass.cpp b/source/opt/local_single_block_elim_pass.cpp index 0acffda335..7502d0497e 100644 --- a/source/opt/local_single_block_elim_pass.cpp +++ b/source/opt/local_single_block_elim_pass.cpp @@ -289,7 +289,8 @@ void LocalSingleBlockLoadStoreElimPass::InitExtensions() { "SPV_KHR_vulkan_memory_model", "SPV_NV_bindless_texture", "SPV_EXT_shader_atomic_float_add", - "SPV_EXT_fragment_shader_interlock"}); + "SPV_EXT_fragment_shader_interlock", + "SPV_NV_compute_shader_derivatives"}); } } // namespace opt diff --git a/source/opt/local_single_store_elim_pass.cpp b/source/opt/local_single_store_elim_pass.cpp index 77b3420ce9..f6fc2760e0 100644 --- a/source/opt/local_single_store_elim_pass.cpp +++ b/source/opt/local_single_store_elim_pass.cpp @@ -139,7 +139,8 @@ void LocalSingleStoreElimPass::InitExtensionAllowList() { "SPV_KHR_vulkan_memory_model", "SPV_NV_bindless_texture", "SPV_EXT_shader_atomic_float_add", - "SPV_EXT_fragment_shader_interlock"}); + "SPV_EXT_fragment_shader_interlock", + "SPV_NV_compute_shader_derivatives"}); } bool LocalSingleStoreElimPass::ProcessVariable(Instruction* var_inst) { std::vector users; From 2ce1c1f7f31e9c2a82479a015d1e31c63f45ad3a Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Fri, 6 Oct 2023 13:23:01 -0400 Subject: [PATCH 3/5] Add tests for trimming pass. --- test/opt/trim_capabilities_pass_test.cpp | 54 ++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/test/opt/trim_capabilities_pass_test.cpp b/test/opt/trim_capabilities_pass_test.cpp index 8aaf860dc9..c94319a93e 100644 --- a/test/opt/trim_capabilities_pass_test.cpp +++ b/test/opt/trim_capabilities_pass_test.cpp @@ -63,6 +63,8 @@ TEST_F(TrimCapabilitiesPassTest, CheckKnownAliasTransformations) { OpCapability DotProductInput4x8BitKHR OpCapability DotProductInput4x8BitPackedKHR OpCapability DotProductKHR + OpCapability ComputeDerivativeGroupQuadsNV + OpCapability ComputeDerivativeGroupLinearNV ; CHECK: OpCapability Linkage ; CHECK-NOT: OpCapability StorageUniform16 ; CHECK-NOT: OpCapability StorageUniformBufferBlock16 @@ -89,6 +91,8 @@ TEST_F(TrimCapabilitiesPassTest, CheckKnownAliasTransformations) { ; CHECK-NOT: OpCapability DotProductInput4x8BitKHR ; CHECK-NOT: OpCapability DotProductInput4x8BitPackedKHR ; CHECK-NOT: OpCapability DotProductKHR +; CHECK-NOT: OpCapability ComputeDerivativeGroupQuadsNV +; CHECK-NOT: OpCapability ComputeDerivativeGroupLinearNV ; CHECK: OpCapability UniformAndStorageBuffer16BitAccess ; CHECK: OpCapability StorageBuffer16BitAccess ; CHECK: OpCapability ShaderViewportIndexLayerEXT @@ -2129,6 +2133,56 @@ TEST_F(TrimCapabilitiesPassTest, Float64_RemainsWhenUsed) { EXPECT_EQ(std::get<1>(result), Pass::Status::SuccessWithoutChange); } +TEST_F(TrimCapabilitiesPassTest, + ComputeDerivativeGroupQuads_ReamainsWithExecMode) { + const std::string kTest = R"( + OpCapability ComputeDerivativeGroupQuadsNV + OpCapability ComputeDerivativeGroupLinearNV +; CHECK-NOT: OpCapability ComputeDerivativeGroupLinearNV +; CHECK: OpCapability ComputeDerivativeGroupQuadsNV +; CHECK-NOT: OpCapability ComputeDerivativeGroupLinearNV + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" + OpExecutionMode %1 DerivativeGroupQuadsNV + %void = OpTypeVoid + %float = OpTypeFloat 64 + %3 = OpTypeFunction %void + %1 = OpFunction %void None %3 + %6 = OpLabel + OpReturn + OpFunctionEnd + )"; + const auto result = + SinglePassRunAndMatch(kTest, /* skip_nop= */ false); + EXPECT_EQ(std::get<1>(result), Pass::Status::SuccessWithChange); +} + +TEST_F(TrimCapabilitiesPassTest, + ComputeDerivativeGroupLinear_ReamainsWithExecMode) { + const std::string kTest = R"( + OpCapability ComputeDerivativeGroupLinearNV + OpCapability ComputeDerivativeGroupQuadsNV +; CHECK-NOT: OpCapability ComputeDerivativeGroupQuadsNV +; CHECK: OpCapability ComputeDerivativeGroupLinearNV +; CHECK-NOT: OpCapability ComputeDerivativeGroupQuadsNV + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" + OpExecutionMode %1 DerivativeGroupLinearNV + %void = OpTypeVoid + %float = OpTypeFloat 64 + %3 = OpTypeFunction %void + %1 = OpFunction %void None %3 + %6 = OpLabel + OpReturn + OpFunctionEnd + )"; + const auto result = + SinglePassRunAndMatch(kTest, /* skip_nop= */ false); + EXPECT_EQ(std::get<1>(result), Pass::Status::SuccessWithChange); +} + } // namespace } // namespace opt } // namespace spvtools From c0bdace5a3a9e9fcc627a098cfd63b7a00d77fe6 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Tue, 10 Oct 2023 11:05:13 -0400 Subject: [PATCH 4/5] Fix tests --- test/opt/trim_capabilities_pass_test.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/opt/trim_capabilities_pass_test.cpp b/test/opt/trim_capabilities_pass_test.cpp index c94319a93e..8f49c55cbe 100644 --- a/test/opt/trim_capabilities_pass_test.cpp +++ b/test/opt/trim_capabilities_pass_test.cpp @@ -2142,11 +2142,12 @@ TEST_F(TrimCapabilitiesPassTest, ; CHECK: OpCapability ComputeDerivativeGroupQuadsNV ; CHECK-NOT: OpCapability ComputeDerivativeGroupLinearNV OpCapability Shader +; CHECK: OpExtension "SPV_NV_compute_shader_derivatives" + OpExtension "SPV_NV_compute_shader_derivatives" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %1 "main" OpExecutionMode %1 DerivativeGroupQuadsNV %void = OpTypeVoid - %float = OpTypeFloat 64 %3 = OpTypeFunction %void %1 = OpFunction %void None %3 %6 = OpLabel @@ -2167,6 +2168,8 @@ TEST_F(TrimCapabilitiesPassTest, ; CHECK: OpCapability ComputeDerivativeGroupLinearNV ; CHECK-NOT: OpCapability ComputeDerivativeGroupQuadsNV OpCapability Shader +; CHECK: OpExtension "SPV_NV_compute_shader_derivatives" + OpExtension "SPV_NV_compute_shader_derivatives" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %1 "main" OpExecutionMode %1 DerivativeGroupLinearNV From bb9118e74a67625b1076f93933876d37f7257400 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Mon, 16 Oct 2023 11:34:01 -0400 Subject: [PATCH 5/5] Undo clang format changes. --- source/opt/aggressive_dead_code_elim_pass.cpp | 99 ++++++++++++------- 1 file changed, 65 insertions(+), 34 deletions(-) diff --git a/source/opt/aggressive_dead_code_elim_pass.cpp b/source/opt/aggressive_dead_code_elim_pass.cpp index 6542de6b58..b372571f51 100644 --- a/source/opt/aggressive_dead_code_elim_pass.cpp +++ b/source/opt/aggressive_dead_code_elim_pass.cpp @@ -941,40 +941,71 @@ Pass::Status AggressiveDCEPass::Process() { void AggressiveDCEPass::InitExtensions() { extensions_allowlist_.clear(); - extensions_allowlist_.insert( - {"SPV_AMD_shader_explicit_vertex_parameter", - "SPV_AMD_shader_trinary_minmax", "SPV_AMD_gcn_shader", - "SPV_KHR_shader_ballot", "SPV_AMD_shader_ballot", - "SPV_AMD_gpu_shader_half_float", "SPV_KHR_shader_draw_parameters", - "SPV_KHR_subgroup_vote", "SPV_KHR_8bit_storage", "SPV_KHR_16bit_storage", - "SPV_KHR_device_group", "SPV_KHR_multiview", - "SPV_NVX_multiview_per_view_attributes", "SPV_NV_viewport_array2", - "SPV_NV_stereo_view_rendering", "SPV_NV_sample_mask_override_coverage", - "SPV_NV_geometry_shader_passthrough", "SPV_AMD_texture_gather_bias_lod", - "SPV_KHR_storage_buffer_storage_class", - // SPV_KHR_variable_pointers - // Currently do not support extended pointer expressions - "SPV_AMD_gpu_shader_int16", "SPV_KHR_post_depth_coverage", - "SPV_KHR_shader_atomic_counter_ops", "SPV_EXT_shader_stencil_export", - "SPV_EXT_shader_viewport_index_layer", - "SPV_AMD_shader_image_load_store_lod", "SPV_AMD_shader_fragment_mask", - "SPV_EXT_fragment_fully_covered", "SPV_AMD_gpu_shader_half_float_fetch", - "SPV_GOOGLE_decorate_string", "SPV_GOOGLE_hlsl_functionality1", - "SPV_GOOGLE_user_type", "SPV_NV_shader_subgroup_partitioned", - "SPV_EXT_demote_to_helper_invocation", "SPV_EXT_descriptor_indexing", - "SPV_NV_fragment_shader_barycentric", - "SPV_NV_compute_shader_derivatives", "SPV_NV_shader_image_footprint", - "SPV_NV_shading_rate", "SPV_NV_mesh_shader", "SPV_NV_ray_tracing", - "SPV_KHR_ray_tracing", "SPV_KHR_ray_query", - "SPV_EXT_fragment_invocation_density", "SPV_EXT_physical_storage_buffer", - "SPV_KHR_physical_storage_buffer", "SPV_KHR_terminate_invocation", - "SPV_KHR_shader_clock", "SPV_KHR_vulkan_memory_model", - "SPV_KHR_subgroup_uniform_control_flow", "SPV_KHR_integer_dot_product", - "SPV_EXT_shader_image_int64", "SPV_KHR_non_semantic_info", - "SPV_KHR_uniform_group_instructions", - "SPV_KHR_fragment_shader_barycentric", "SPV_NV_bindless_texture", - "SPV_EXT_shader_atomic_float_add", "SPV_EXT_fragment_shader_interlock", - "SPV_NV_compute_shader_derivatives"}); + + // clang-format off + extensions_allowlist_.insert({ + "SPV_AMD_shader_explicit_vertex_parameter", + "SPV_AMD_shader_trinary_minmax", + "SPV_AMD_gcn_shader", + "SPV_KHR_shader_ballot", + "SPV_AMD_shader_ballot", + "SPV_AMD_gpu_shader_half_float", + "SPV_KHR_shader_draw_parameters", + "SPV_KHR_subgroup_vote", + "SPV_KHR_8bit_storage", + "SPV_KHR_16bit_storage", + "SPV_KHR_device_group", + "SPV_KHR_multiview", + "SPV_NVX_multiview_per_view_attributes", + "SPV_NV_viewport_array2", + "SPV_NV_stereo_view_rendering", + "SPV_NV_sample_mask_override_coverage", + "SPV_NV_geometry_shader_passthrough", + "SPV_AMD_texture_gather_bias_lod", + "SPV_KHR_storage_buffer_storage_class", + // SPV_KHR_variable_pointers + // Currently do not support extended pointer expressions + "SPV_AMD_gpu_shader_int16", + "SPV_KHR_post_depth_coverage", + "SPV_KHR_shader_atomic_counter_ops", + "SPV_EXT_shader_stencil_export", + "SPV_EXT_shader_viewport_index_layer", + "SPV_AMD_shader_image_load_store_lod", + "SPV_AMD_shader_fragment_mask", + "SPV_EXT_fragment_fully_covered", + "SPV_AMD_gpu_shader_half_float_fetch", + "SPV_GOOGLE_decorate_string", + "SPV_GOOGLE_hlsl_functionality1", + "SPV_GOOGLE_user_type", + "SPV_NV_shader_subgroup_partitioned", + "SPV_EXT_demote_to_helper_invocation", + "SPV_EXT_descriptor_indexing", + "SPV_NV_fragment_shader_barycentric", + "SPV_NV_compute_shader_derivatives", + "SPV_NV_shader_image_footprint", + "SPV_NV_shading_rate", + "SPV_NV_mesh_shader", + "SPV_NV_ray_tracing", + "SPV_KHR_ray_tracing", + "SPV_KHR_ray_query", + "SPV_EXT_fragment_invocation_density", + "SPV_EXT_physical_storage_buffer", + "SPV_KHR_physical_storage_buffer", + "SPV_KHR_terminate_invocation", + "SPV_KHR_shader_clock", + "SPV_KHR_vulkan_memory_model", + "SPV_KHR_subgroup_uniform_control_flow", + "SPV_KHR_integer_dot_product", + "SPV_EXT_shader_image_int64", + "SPV_KHR_non_semantic_info", + "SPV_KHR_uniform_group_instructions", + "SPV_KHR_fragment_shader_barycentric", + "SPV_NV_bindless_texture", + "SPV_EXT_shader_atomic_float_add", + "SPV_EXT_fragment_shader_interlock", + "SPV_NV_compute_shader_derivatives" + }); + // clang-format on } Instruction* AggressiveDCEPass::GetHeaderBranch(BasicBlock* blk) {