mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
radv: precompute compute/task shader register values
To make emission faster. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29014>
This commit is contained in:
parent
0549649bcf
commit
3b41fbd4b8
5 changed files with 50 additions and 10 deletions
|
|
@ -1916,12 +1916,11 @@ radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_
|
|||
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader));
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits);
|
||||
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
|
||||
radeon_emit(cs, shader->info.regs.cs.compute_num_thread_x);
|
||||
radeon_emit(cs, shader->info.regs.cs.compute_num_thread_y);
|
||||
radeon_emit(cs, shader->info.regs.cs.compute_num_thread_z);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@
|
|||
#include "vk_format.h"
|
||||
|
||||
uint32_t
|
||||
radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs)
|
||||
radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader_info *info)
|
||||
{
|
||||
unsigned threads_per_threadgroup;
|
||||
unsigned threadgroups_per_cu = 1;
|
||||
|
|
@ -46,8 +46,8 @@ radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const
|
|||
unsigned max_waves_per_sh = 0;
|
||||
|
||||
/* Calculate best compute resource limits. */
|
||||
threads_per_threadgroup = cs->info.cs.block_size[0] * cs->info.cs.block_size[1] * cs->info.cs.block_size[2];
|
||||
waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, cs->info.wave_size);
|
||||
threads_per_threadgroup = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2];
|
||||
waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, info->wave_size);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10 && waves_per_threadgroup == 1)
|
||||
threadgroups_per_cu = 2;
|
||||
|
|
@ -69,7 +69,7 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc
|
|||
metadata->rsrc1 = cs->config.rsrc1;
|
||||
metadata->rsrc2 = cs->config.rsrc2;
|
||||
metadata->rsrc3 = cs->config.rsrc3;
|
||||
metadata->compute_resource_limits = radv_get_compute_resource_limits(pdev, cs);
|
||||
metadata->compute_resource_limits = radv_get_compute_resource_limits(pdev, &cs->info);
|
||||
metadata->block_size_x = cs->info.cs.block_size[0];
|
||||
metadata->block_size_y = cs->info.cs.block_size[1];
|
||||
metadata->block_size_z = cs->info.cs.block_size[2];
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
struct radv_physical_device;
|
||||
struct radv_shader_binary;
|
||||
struct radv_shader_info;
|
||||
|
||||
struct radv_compute_pipeline {
|
||||
struct radv_pipeline base;
|
||||
|
|
@ -42,7 +43,7 @@ struct radv_compute_pipeline_metadata {
|
|||
uint64_t inline_push_const_mask;
|
||||
};
|
||||
|
||||
uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs);
|
||||
uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader_info *info);
|
||||
|
||||
void radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline,
|
||||
struct radv_compute_pipeline_metadata *metadata);
|
||||
|
|
|
|||
|
|
@ -1461,6 +1461,33 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar
|
|||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
|
||||
info->regs.cs.compute_resource_limits = radv_get_compute_resource_limits(pdev, info);
|
||||
info->regs.cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL(info->cs.block_size[0]);
|
||||
info->regs.cs.compute_num_thread_y = S_00B81C_NUM_THREAD_FULL(info->cs.block_size[1]);
|
||||
info->regs.cs.compute_num_thread_z = S_00B81C_NUM_THREAD_FULL(info->cs.block_size[2]);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
{
|
||||
const struct radv_shader_info *info = &binary->info;
|
||||
|
||||
switch (info->stage) {
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_TASK:
|
||||
radv_precompute_registers_hw_cs(device, binary);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_binary *binary,
|
||||
const struct radv_shader_args *args)
|
||||
|
|
@ -1767,6 +1794,9 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
|
|||
config->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
|
||||
}
|
||||
|
||||
/* Precompute register values for faster emission. */
|
||||
radv_precompute_registers(device, binary);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -248,6 +248,16 @@ struct radv_shader_info {
|
|||
|
||||
struct radv_legacy_gs_info gs_ring_info;
|
||||
struct gfx10_ngg_info ngg_info;
|
||||
|
||||
/* Precomputed register values. */
|
||||
struct {
|
||||
struct {
|
||||
uint32_t compute_num_thread_x;
|
||||
uint32_t compute_num_thread_y;
|
||||
uint32_t compute_num_thread_z;
|
||||
uint32_t compute_resource_limits;
|
||||
} cs;
|
||||
} regs;
|
||||
};
|
||||
|
||||
void radv_nir_shader_info_init(gl_shader_stage stage, gl_shader_stage next_stage, struct radv_shader_info *info);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue