diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index e6ca331d76b..3270a05db69 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1916,12 +1916,11 @@ radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_ radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3); } - radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader)); - + radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits); radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0])); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1])); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2])); + radeon_emit(cs, shader->info.regs.cs.compute_num_thread_x); + radeon_emit(cs, shader->info.regs.cs.compute_num_thread_y); + radeon_emit(cs, shader->info.regs.cs.compute_num_thread_z); } static void diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index 6cbef4e5e31..ef89fa2129e 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -38,7 +38,7 @@ #include "vk_format.h" uint32_t -radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs) +radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader_info *info) { unsigned threads_per_threadgroup; unsigned threadgroups_per_cu = 1; @@ -46,8 +46,8 @@ radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const unsigned max_waves_per_sh = 0; /* Calculate best compute resource limits. */ - threads_per_threadgroup = cs->info.cs.block_size[0] * cs->info.cs.block_size[1] * cs->info.cs.block_size[2]; - waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, cs->info.wave_size); + threads_per_threadgroup = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2]; + waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, info->wave_size); if (pdev->info.gfx_level >= GFX10 && waves_per_threadgroup == 1) threadgroups_per_cu = 2; @@ -69,7 +69,7 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc metadata->rsrc1 = cs->config.rsrc1; metadata->rsrc2 = cs->config.rsrc2; metadata->rsrc3 = cs->config.rsrc3; - metadata->compute_resource_limits = radv_get_compute_resource_limits(pdev, cs); + metadata->compute_resource_limits = radv_get_compute_resource_limits(pdev, &cs->info); metadata->block_size_x = cs->info.cs.block_size[0]; metadata->block_size_y = cs->info.cs.block_size[1]; metadata->block_size_z = cs->info.cs.block_size[2]; diff --git a/src/amd/vulkan/radv_pipeline_compute.h b/src/amd/vulkan/radv_pipeline_compute.h index ccdc78b9bb9..bfd0e5c9604 100644 --- a/src/amd/vulkan/radv_pipeline_compute.h +++ b/src/amd/vulkan/radv_pipeline_compute.h @@ -15,6 +15,7 @@ struct radv_physical_device; struct radv_shader_binary; +struct radv_shader_info; struct radv_compute_pipeline { struct radv_pipeline base; @@ -42,7 +43,7 @@ struct radv_compute_pipeline_metadata { uint64_t inline_push_const_mask; }; -uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs); +uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader_info *info); void radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline, struct radv_compute_pipeline_metadata *metadata); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index c840953f56d..35b111cb9ab 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1461,6 +1461,33 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar } #endif +static void +radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_binary *binary) +{ + const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_shader_info *info = &binary->info; + + info->regs.cs.compute_resource_limits = radv_get_compute_resource_limits(pdev, info); + info->regs.cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL(info->cs.block_size[0]); + info->regs.cs.compute_num_thread_y = S_00B81C_NUM_THREAD_FULL(info->cs.block_size[1]); + info->regs.cs.compute_num_thread_z = S_00B81C_NUM_THREAD_FULL(info->cs.block_size[2]); +} + +static void +radv_precompute_registers(struct radv_device *device, struct radv_shader_binary *binary) +{ + const struct radv_shader_info *info = &binary->info; + + switch (info->stage) { + case MESA_SHADER_COMPUTE: + case MESA_SHADER_TASK: + radv_precompute_registers_hw_cs(device, binary); + break; + default: + break; + } +} + static bool radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_binary *binary, const struct radv_shader_args *args) @@ -1767,6 +1794,9 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi config->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt); } + /* Precompute register values for faster emission. */ + radv_precompute_registers(device, binary); + return true; } diff --git a/src/amd/vulkan/radv_shader_info.h b/src/amd/vulkan/radv_shader_info.h index db295d0442a..9073e74de1d 100644 --- a/src/amd/vulkan/radv_shader_info.h +++ b/src/amd/vulkan/radv_shader_info.h @@ -248,6 +248,16 @@ struct radv_shader_info { struct radv_legacy_gs_info gs_ring_info; struct gfx10_ngg_info ngg_info; + + /* Precomputed register values. */ + struct { + struct { + uint32_t compute_num_thread_x; + uint32_t compute_num_thread_y; + uint32_t compute_num_thread_z; + uint32_t compute_resource_limits; + } cs; + } regs; }; void radv_nir_shader_info_init(gl_shader_stage stage, gl_shader_stage next_stage, struct radv_shader_info *info);