radv: implement derivative group quads on GFX12

It's natively supported by the hw.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33303>
This commit is contained in:
Samuel Pitoiset 2025-01-30 07:34:10 -08:00 committed by Marge Bot
parent 5fb23f29fe
commit bd8575ebd3
6 changed files with 25 additions and 3 deletions

View file

@ -593,7 +593,6 @@ gfx12_init_graphics_preamble_state(const struct ac_preamble_state *state,
ac_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
ac_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
ac_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
ac_pm4_set_reg(pm4, R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, 0);
/* Shader registers - HS */
ac_pm4_set_reg(pm4, R_00B418_SPI_SHADER_PGM_HI_LS,

View file

@ -2444,6 +2444,10 @@ radv_emit_mesh_shader(struct radv_cmd_buffer *cmd_buffer)
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
radeon_emit(cmd_buffer->cs, ms->info.regs.ms.spi_shader_gs_meshlet_dim);
radeon_emit(cmd_buffer->cs, ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
if (pdev->info.gfx_level >= GFX12)
radeon_set_sh_reg(cmd_buffer->cs, R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL,
ms->info.regs.ms.spi_shader_gs_meshlet_ctrl);
}
radv_emit_vgt_gs_out(cmd_buffer, gs_out);

View file

@ -482,8 +482,9 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
*/
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !pdev->mesh_fast_launch_2,
.lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE &&
((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
(nir->info.workgroup_size[2] == 1)) == 2,
((((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
(nir->info.workgroup_size[2] == 1)) == 2) ||
nir->info.derivative_group == DERIVATIVE_GROUP_QUADS),
};
NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
@ -1659,6 +1660,13 @@ radv_precompute_registers_hw_ms(struct radv_device *device, struct radv_shader_b
info->regs.ms.spi_shader_gs_meshlet_exp_alloc =
S_00B2B4_MAX_EXP_VERTS(info->ngg_info.max_out_verts) | S_00B2B4_MAX_EXP_PRIMS(info->ngg_info.prim_amp_factor);
if (pdev->info.gfx_level >= GFX12) {
const bool derivative_group_quads = info->cs.derivative_group == DERIVATIVE_GROUP_QUADS;
info->regs.ms.spi_shader_gs_meshlet_ctrl =
S_00B2B8_INTERLEAVE_BITS_X(derivative_group_quads) | S_00B2B8_INTERLEAVE_BITS_Y(derivative_group_quads);
}
}
static void
@ -1731,6 +1739,11 @@ radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_b
if (pdev->info.gfx_level >= GFX12) {
info->regs.cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL_GFX12(info->cs.block_size[0]);
info->regs.cs.compute_num_thread_y = S_00B820_NUM_THREAD_FULL_GFX12(info->cs.block_size[1]);
if (info->cs.derivative_group == DERIVATIVE_GROUP_QUADS) {
info->regs.cs.compute_num_thread_x |= S_00B81C_INTERLEAVE_BITS_X(1);
info->regs.cs.compute_num_thread_y |= S_00B820_INTERLEAVE_BITS_Y(1);
}
} else {
info->regs.cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL_GFX6(info->cs.block_size[0]);
info->regs.cs.compute_num_thread_y = S_00B820_NUM_THREAD_FULL_GFX6(info->cs.block_size[1]);

View file

@ -1270,6 +1270,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) |
radv_shader_should_clear_lds(device, nir);
info->cs.derivative_group = nir->info.derivative_group;
if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK ||
nir->info.stage == MESA_SHADER_MESH) {

View file

@ -232,6 +232,8 @@ struct radv_shader_info {
bool has_query; /* Task shader only */
bool regalloc_hang_bug;
unsigned derivative_group : 2;
} cs;
struct {
uint64_t tes_inputs_read;
@ -293,6 +295,7 @@ struct radv_shader_info {
struct {
uint32_t spi_shader_gs_meshlet_dim;
uint32_t spi_shader_gs_meshlet_exp_alloc;
uint32_t spi_shader_gs_meshlet_ctrl; /* GFX12+ */
} ms;
struct {

View file

@ -5169,6 +5169,8 @@ static void gfx12_init_gfx_preamble_state(struct si_context *sctx)
ac_pm4_set_reg(&pm4->base, R_028C54_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
S_028C54_NULL_SQUAD_AA_MASK_ENABLE(1));
ac_pm4_set_reg(&pm4->base, R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, 0);
done:
sctx->cs_preamble_state = pm4;
sctx->cs_preamble_state_tmz = si_pm4_clone(sscreen, pm4); /* Make a copy of the preamble for TMZ. */