radv/shader_info: use union for precomputed register values of non-overlapping stages

Reduces the size of radv_shader_info from 872 bytes to 784 bytes.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37931>
This commit is contained in:
Daniel Schürmann 2025-10-16 13:22:17 +02:00
parent 68ab01b2f2
commit 9b34da3da8
3 changed files with 52 additions and 51 deletions

View file

@ -3434,13 +3434,13 @@ radv_gfx11_emit_meshlet(struct radv_cmd_buffer *cmd_buffer, const struct radv_sh
radeon_begin(cs);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_sh_reg(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, ms->info.regs.ms.spi_shader_gs_meshlet_dim);
gfx12_push_sh_reg(R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC, ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
gfx12_push_sh_reg(R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->info.regs.ms.spi_shader_gs_meshlet_ctrl);
gfx12_push_sh_reg(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, ms->info.regs.ngg.ms.spi_shader_gs_meshlet_dim);
gfx12_push_sh_reg(R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC, ms->info.regs.ngg.ms.spi_shader_gs_meshlet_exp_alloc);
gfx12_push_sh_reg(R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->info.regs.ngg.ms.spi_shader_gs_meshlet_ctrl);
} else {
radeon_set_sh_reg_seq(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_dim);
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
radeon_emit(ms->info.regs.ngg.ms.spi_shader_gs_meshlet_dim);
radeon_emit(ms->info.regs.ngg.ms.spi_shader_gs_meshlet_exp_alloc);
}
radeon_end();
}

View file

@ -1805,18 +1805,18 @@ radv_precompute_registers_hw_ms(struct radv_device *device, struct radv_shader_b
info->regs.vgt_gs_max_vert_out = pdev->info.mesh_fast_launch_2 ? info->ngg_info.max_out_verts : info->workgroup_size;
info->regs.ms.spi_shader_gs_meshlet_dim = S_00B2B0_MESHLET_NUM_THREAD_X(info->cs.block_size[0] - 1) |
S_00B2B0_MESHLET_NUM_THREAD_Y(info->cs.block_size[1] - 1) |
S_00B2B0_MESHLET_NUM_THREAD_Z(info->cs.block_size[2] - 1) |
S_00B2B0_MESHLET_THREADGROUP_SIZE(info->workgroup_size - 1);
info->regs.ngg.ms.spi_shader_gs_meshlet_dim = S_00B2B0_MESHLET_NUM_THREAD_X(info->cs.block_size[0] - 1) |
S_00B2B0_MESHLET_NUM_THREAD_Y(info->cs.block_size[1] - 1) |
S_00B2B0_MESHLET_NUM_THREAD_Z(info->cs.block_size[2] - 1) |
S_00B2B0_MESHLET_THREADGROUP_SIZE(info->workgroup_size - 1);
info->regs.ms.spi_shader_gs_meshlet_exp_alloc =
info->regs.ngg.ms.spi_shader_gs_meshlet_exp_alloc =
S_00B2B4_MAX_EXP_VERTS(info->ngg_info.max_out_verts) | S_00B2B4_MAX_EXP_PRIMS(info->ngg_info.prim_amp_factor);
if (pdev->info.gfx_level >= GFX12) {
const bool derivative_group_quads = info->cs.derivative_group == DERIVATIVE_GROUP_QUADS;
info->regs.ms.spi_shader_gs_meshlet_ctrl =
info->regs.ngg.ms.spi_shader_gs_meshlet_ctrl =
S_00B2B8_INTERLEAVE_BITS_X(derivative_group_quads) | S_00B2B8_INTERLEAVE_BITS_Y(derivative_group_quads);
}
}

View file

@ -258,50 +258,51 @@ struct radv_shader_info {
uint32_t pgm_rsrc2;
uint32_t pgm_rsrc3;
struct {
uint32_t spi_shader_late_alloc_vs;
uint32_t spi_shader_pgm_rsrc3_vs;
uint32_t vgt_reuse_off;
} vs;
union {
struct {
uint32_t spi_shader_late_alloc_vs;
uint32_t spi_shader_pgm_rsrc3_vs;
uint32_t vgt_reuse_off;
} vs;
struct {
uint32_t vgt_esgs_ring_itemsize;
uint32_t vgt_gs_instance_cnt;
uint32_t vgt_gs_max_prims_per_subgroup;
uint32_t vgt_gs_vert_itemsize[4];
uint32_t vgt_gsvs_ring_itemsize;
uint32_t vgt_gsvs_ring_offset[3];
} gs;
struct {
uint32_t vgt_esgs_ring_itemsize;
uint32_t vgt_gs_instance_cnt;
uint32_t vgt_gs_max_prims_per_subgroup;
uint32_t vgt_gs_vert_itemsize[4];
uint32_t vgt_gsvs_ring_itemsize;
uint32_t vgt_gsvs_ring_offset[3];
} gs;
struct {
uint32_t ge_cntl; /* Not fully precomputed. */
uint32_t ge_max_output_per_subgroup;
uint32_t ge_ngg_subgrp_cntl;
uint32_t spi_shader_idx_format;
uint32_t vgt_primitiveid_en;
} ngg;
struct {
uint32_t ge_cntl; /* Not fully precomputed. */
uint32_t ge_max_output_per_subgroup;
uint32_t ge_ngg_subgrp_cntl;
uint32_t spi_shader_idx_format;
uint32_t vgt_primitiveid_en;
struct {
uint32_t spi_shader_gs_meshlet_dim;
uint32_t spi_shader_gs_meshlet_exp_alloc;
uint32_t spi_shader_gs_meshlet_ctrl; /* GFX12+ */
} ms;
} ngg;
struct {
uint32_t spi_shader_gs_meshlet_dim;
uint32_t spi_shader_gs_meshlet_exp_alloc;
uint32_t spi_shader_gs_meshlet_ctrl; /* GFX12+ */
} ms;
struct {
uint32_t db_shader_control;
uint32_t pa_sc_shader_control;
uint32_t spi_ps_in_control;
uint32_t spi_shader_z_format;
uint32_t spi_gs_out_config_ps;
uint32_t pa_sc_hisz_control;
} ps;
struct {
uint32_t db_shader_control;
uint32_t pa_sc_shader_control;
uint32_t spi_ps_in_control;
uint32_t spi_shader_z_format;
uint32_t spi_gs_out_config_ps;
uint32_t pa_sc_hisz_control;
} ps;
struct {
uint32_t compute_num_thread_x;
uint32_t compute_num_thread_y;
uint32_t compute_num_thread_z;
uint32_t compute_resource_limits;
} cs;
struct {
uint32_t compute_num_thread_x;
uint32_t compute_num_thread_y;
uint32_t compute_num_thread_z;
uint32_t compute_resource_limits;
} cs;
};
/* Common registers between stages. */
uint32_t vgt_gs_max_vert_out;