mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
radv: enable lower shader call vectorizing
Totals from 5 (71.43% of 7) affected shaders: MaxWaves: 48 -> 50 (+4.17%) Instrs: 32012 -> 32070 (+0.18%) CodeSize: 172672 -> 172932 (+0.15%) VGPRs: 512 -> 496 (-3.12%) Latency: 715333 -> 715279 (-0.01%); split: -0.03%, +0.02% InvThroughput: 149540 -> 146150 (-2.27%); split: -2.29%, +0.02% VClause: 900 -> 897 (-0.33%) Copies: 4173 -> 4199 (+0.62%); split: -0.12%, +0.74% Branches: 1512 -> 1511 (-0.07%) PreVGPRs: 475 -> 469 (-1.26%) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20075>
This commit is contained in:
parent
96332b3433
commit
863516dd63
3 changed files with 12 additions and 6 deletions
|
|
@ -2981,10 +2981,10 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
bool
|
||||
radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
{
|
||||
if (num_components > 4)
|
||||
return false;
|
||||
|
|
@ -3805,7 +3805,7 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
|
|||
nir_load_store_vectorize_options vectorize_opts = {
|
||||
.modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const |
|
||||
nir_var_mem_shared | nir_var_mem_global,
|
||||
.callback = mem_vectorize_callback,
|
||||
.callback = radv_mem_vectorize_callback,
|
||||
.robust_modes = 0,
|
||||
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
|
||||
* the final offset is not.
|
||||
|
|
|
|||
|
|
@ -2238,6 +2238,10 @@ void radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct
|
|||
void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice,
|
||||
struct radeon_cmdbuf *cs, const struct radv_shader *shader);
|
||||
|
||||
bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *data);
|
||||
|
||||
void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline,
|
||||
const struct radv_pipeline_layout *layout);
|
||||
|
||||
|
|
|
|||
|
|
@ -1462,7 +1462,9 @@ create_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInf
|
|||
const nir_lower_shader_calls_options opts = {
|
||||
.address_format = nir_address_format_32bit_offset,
|
||||
.stack_alignment = 16,
|
||||
.localized_loads = true};
|
||||
.localized_loads = true,
|
||||
.vectorizer_callback = radv_mem_vectorize_callback,
|
||||
};
|
||||
uint32_t num_resume_shaders = 0;
|
||||
nir_shader **resume_shaders = NULL;
|
||||
nir_lower_shader_calls(nir_stage, &opts, &resume_shaders, &num_resume_shaders, nir_stage);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue