mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
radv: implement derivative group quads on GFX12
It's natively supported by the hw. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33303>
This commit is contained in:
parent
5fb23f29fe
commit
bd8575ebd3
6 changed files with 25 additions and 3 deletions
|
|
@ -593,7 +593,6 @@ gfx12_init_graphics_preamble_state(const struct ac_preamble_state *state,
|
|||
ac_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
|
||||
ac_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
|
||||
ac_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
|
||||
ac_pm4_set_reg(pm4, R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, 0);
|
||||
|
||||
/* Shader registers - HS */
|
||||
ac_pm4_set_reg(pm4, R_00B418_SPI_SHADER_PGM_HI_LS,
|
||||
|
|
|
|||
|
|
@ -2444,6 +2444,10 @@ radv_emit_mesh_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
|
||||
radeon_emit(cmd_buffer->cs, ms->info.regs.ms.spi_shader_gs_meshlet_dim);
|
||||
radeon_emit(cmd_buffer->cs, ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12)
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL,
|
||||
ms->info.regs.ms.spi_shader_gs_meshlet_ctrl);
|
||||
}
|
||||
|
||||
radv_emit_vgt_gs_out(cmd_buffer, gs_out);
|
||||
|
|
|
|||
|
|
@ -482,8 +482,9 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
*/
|
||||
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !pdev->mesh_fast_launch_2,
|
||||
.lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE &&
|
||||
((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
|
||||
(nir->info.workgroup_size[2] == 1)) == 2,
|
||||
((((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
|
||||
(nir->info.workgroup_size[2] == 1)) == 2) ||
|
||||
nir->info.derivative_group == DERIVATIVE_GROUP_QUADS),
|
||||
};
|
||||
NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
|
||||
|
||||
|
|
@ -1659,6 +1660,13 @@ radv_precompute_registers_hw_ms(struct radv_device *device, struct radv_shader_b
|
|||
|
||||
info->regs.ms.spi_shader_gs_meshlet_exp_alloc =
|
||||
S_00B2B4_MAX_EXP_VERTS(info->ngg_info.max_out_verts) | S_00B2B4_MAX_EXP_PRIMS(info->ngg_info.prim_amp_factor);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
const bool derivative_group_quads = info->cs.derivative_group == DERIVATIVE_GROUP_QUADS;
|
||||
|
||||
info->regs.ms.spi_shader_gs_meshlet_ctrl =
|
||||
S_00B2B8_INTERLEAVE_BITS_X(derivative_group_quads) | S_00B2B8_INTERLEAVE_BITS_Y(derivative_group_quads);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1731,6 +1739,11 @@ radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_b
|
|||
if (pdev->info.gfx_level >= GFX12) {
|
||||
info->regs.cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL_GFX12(info->cs.block_size[0]);
|
||||
info->regs.cs.compute_num_thread_y = S_00B820_NUM_THREAD_FULL_GFX12(info->cs.block_size[1]);
|
||||
|
||||
if (info->cs.derivative_group == DERIVATIVE_GROUP_QUADS) {
|
||||
info->regs.cs.compute_num_thread_x |= S_00B81C_INTERLEAVE_BITS_X(1);
|
||||
info->regs.cs.compute_num_thread_y |= S_00B820_INTERLEAVE_BITS_Y(1);
|
||||
}
|
||||
} else {
|
||||
info->regs.cs.compute_num_thread_x = S_00B81C_NUM_THREAD_FULL_GFX6(info->cs.block_size[0]);
|
||||
info->regs.cs.compute_num_thread_y = S_00B820_NUM_THREAD_FULL_GFX6(info->cs.block_size[1]);
|
||||
|
|
|
|||
|
|
@ -1270,6 +1270,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) |
|
||||
radv_shader_should_clear_lds(device, nir);
|
||||
info->cs.derivative_group = nir->info.derivative_group;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK ||
|
||||
nir->info.stage == MESA_SHADER_MESH) {
|
||||
|
|
|
|||
|
|
@ -232,6 +232,8 @@ struct radv_shader_info {
|
|||
bool has_query; /* Task shader only */
|
||||
|
||||
bool regalloc_hang_bug;
|
||||
|
||||
unsigned derivative_group : 2;
|
||||
} cs;
|
||||
struct {
|
||||
uint64_t tes_inputs_read;
|
||||
|
|
@ -293,6 +295,7 @@ struct radv_shader_info {
|
|||
struct {
|
||||
uint32_t spi_shader_gs_meshlet_dim;
|
||||
uint32_t spi_shader_gs_meshlet_exp_alloc;
|
||||
uint32_t spi_shader_gs_meshlet_ctrl; /* GFX12+ */
|
||||
} ms;
|
||||
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -5169,6 +5169,8 @@ static void gfx12_init_gfx_preamble_state(struct si_context *sctx)
|
|||
ac_pm4_set_reg(&pm4->base, R_028C54_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
|
||||
S_028C54_NULL_SQUAD_AA_MASK_ENABLE(1));
|
||||
|
||||
ac_pm4_set_reg(&pm4->base, R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, 0);
|
||||
|
||||
done:
|
||||
sctx->cs_preamble_state = pm4;
|
||||
sctx->cs_preamble_state_tmz = si_pm4_clone(sscreen, pm4); /* Make a copy of the preamble for TMZ. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue