radv: enable lower shader call vectorizing

Totals from 5 (71.43% of 7) affected shaders:
MaxWaves: 48 -> 50 (+4.17%)
Instrs: 32012 -> 32070 (+0.18%)
CodeSize: 172672 -> 172932 (+0.15%)
VGPRs: 512 -> 496 (-3.12%)
Latency: 715333 -> 715279 (-0.01%); split: -0.03%, +0.02%
InvThroughput: 149540 -> 146150 (-2.27%); split: -2.29%, +0.02%
VClause: 900 -> 897 (-0.33%)
Copies: 4173 -> 4199 (+0.62%); split: -0.12%, +0.74%
Branches: 1512 -> 1511 (-0.07%)
PreVGPRs: 475 -> 469 (-1.26%)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20075>
This commit is contained in:
Lionel Landwerlin 2022-11-30 01:00:38 +02:00 committed by Marge Bot
parent 96332b3433
commit 863516dd63
3 changed files with 12 additions and 6 deletions

View file

@ -2981,10 +2981,10 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
}
}
static bool
mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
void *data)
bool
radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
void *data)
{
if (num_components > 4)
return false;
@ -3805,7 +3805,7 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
nir_load_store_vectorize_options vectorize_opts = {
.modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const |
nir_var_mem_shared | nir_var_mem_global,
.callback = mem_vectorize_callback,
.callback = radv_mem_vectorize_callback,
.robust_modes = 0,
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
* the final offset is not.

View file

@ -2238,6 +2238,10 @@ void radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct
void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice,
struct radeon_cmdbuf *cs, const struct radv_shader *shader);
bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
void *data);
void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline,
const struct radv_pipeline_layout *layout);

View file

@ -1462,7 +1462,9 @@ create_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInf
const nir_lower_shader_calls_options opts = {
.address_format = nir_address_format_32bit_offset,
.stack_alignment = 16,
.localized_loads = true};
.localized_loads = true,
.vectorizer_callback = radv_mem_vectorize_callback,
};
uint32_t num_resume_shaders = 0;
nir_shader **resume_shaders = NULL;
nir_lower_shader_calls(nir_stage, &opts, &resume_shaders, &num_resume_shaders, nir_stage);