From 863516dd63c9d6b327223feec9c7780960a1f385 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 30 Nov 2022 01:00:38 +0200 Subject: [PATCH] radv: enable lower shader call vectorizing Totals from 5 (71.43% of 7) affected shaders: MaxWaves: 48 -> 50 (+4.17%) Instrs: 32012 -> 32070 (+0.18%) CodeSize: 172672 -> 172932 (+0.15%) VGPRs: 512 -> 496 (-3.12%) Latency: 715333 -> 715279 (-0.01%); split: -0.03%, +0.02% InvThroughput: 149540 -> 146150 (-2.27%); split: -2.29%, +0.02% VClause: 900 -> 897 (-0.33%) Copies: 4173 -> 4199 (+0.62%); split: -0.12%, +0.74% Branches: 1512 -> 1511 (-0.07%) PreVGPRs: 475 -> 469 (-1.26%) Signed-off-by: Lionel Landwerlin Reviewed-by: Konstantin Seurer Part-of: --- src/amd/vulkan/radv_pipeline.c | 10 +++++----- src/amd/vulkan/radv_private.h | 4 ++++ src/amd/vulkan/radv_rt_shader.c | 4 +++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 6300ecd0992..5f2df7201ef 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2981,10 +2981,10 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag } } -static bool -mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, - unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high, - void *data) +bool +radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, + unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high, + void *data) { if (num_components > 4) return false; @@ -3805,7 +3805,7 @@ radv_postprocess_nir(struct radv_pipeline *pipeline, nir_load_store_vectorize_options vectorize_opts = { .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global, - .callback = mem_vectorize_callback, + .callback = radv_mem_vectorize_callback, .robust_modes = 0, /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if * the final offset is not. diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 2eb76295ab0..1117cf84911 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2238,6 +2238,10 @@ void radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, const struct radv_shader *shader); +bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, + unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high, + void *data); + void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout); diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c index 64b33c1c8cd..acf529eca86 100644 --- a/src/amd/vulkan/radv_rt_shader.c +++ b/src/amd/vulkan/radv_rt_shader.c @@ -1462,7 +1462,9 @@ create_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInf const nir_lower_shader_calls_options opts = { .address_format = nir_address_format_32bit_offset, .stack_alignment = 16, - .localized_loads = true}; + .localized_loads = true, + .vectorizer_callback = radv_mem_vectorize_callback, + }; uint32_t num_resume_shaders = 0; nir_shader **resume_shaders = NULL; nir_lower_shader_calls(nir_stage, &opts, &resume_shaders, &num_resume_shaders, nir_stage);