radv: precompute existing legacy GS register values later

To precompute all registers at the same place.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29022>
This commit is contained in:
Samuel Pitoiset 2024-05-02 15:00:13 +02:00 committed by Marge Bot
parent 88dfe04b08
commit e5bc4d85bb
6 changed files with 44 additions and 19 deletions

View file

@ -287,7 +287,7 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->vgt_esgs_ring_itemsize);
} else {
const unsigned stride =
s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.vgt_esgs_ring_itemsize;
s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.esgs_itemsize;
replacement = nir_imm_int(b, stride);
}
break;

View file

@ -5593,7 +5593,7 @@ gfx10_emit_ge_cntl(struct radv_cmd_buffer *cmd_buffer)
}
} else if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY)) {
const struct radv_legacy_gs_info *gs_state = &cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(gs_state->vgt_gs_onchip_cntl);
primgroup_size = gs_state->gs_prims_per_subgroup;
} else {
primgroup_size = 128; /* recommended without a GS and tess */
}

View file

@ -3218,7 +3218,7 @@ radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
/* GFX6-8: ESGS offchip ring buffer is allocated according to VGT_ESGS_RING_ITEMSIZE.
* GFX9+: Only used to set the GS input VGPRs, emulated in shaders.
*/
radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs_state->vgt_esgs_ring_itemsize);
radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs->info.regs.gs.vgt_esgs_ring_itemsize);
}
va = radv_shader_get_va(gs);
@ -3237,8 +3237,9 @@ radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
}
radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup);
radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs->info.regs.gs.vgt_gs_onchip_cntl);
radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
gs->info.regs.gs.vgt_gs_max_prims_per_subgroup);
} else {
radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
radeon_emit(cs, va >> 8);

View file

@ -1461,6 +1461,21 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar
}
#endif
static void
radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_binary *binary)
{
struct radv_shader_info *info = &binary->info;
info->regs.gs.vgt_esgs_ring_itemsize = info->gs_ring_info.esgs_itemsize;
info->regs.gs.vgt_gs_max_prims_per_subgroup =
S_028A94_MAX_PRIMS_PER_SUBGROUP(info->gs_ring_info.gs_inst_prims_in_subgroup);
info->regs.gs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.es_verts_per_subgroup) |
S_028A44_GS_PRIMS_PER_SUBGRP(info->gs_ring_info.gs_prims_per_subgroup) |
S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.gs_inst_prims_in_subgroup);
}
static void
radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_binary *binary)
{
@ -1479,6 +1494,10 @@ radv_precompute_registers(struct radv_device *device, struct radv_shader_binary
const struct radv_shader_info *info = &binary->info;
switch (info->stage) {
case MESA_SHADER_GEOMETRY:
if (!info->is_ngg)
radv_precompute_registers_hw_gs(device, binary);
break;
case MESA_SHADER_COMPUTE:
case MESA_SHADER_TASK:
radv_precompute_registers_hw_cs(device, binary);

View file

@ -624,11 +624,9 @@ radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shad
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
/* Calculate the minimum size. */
unsigned min_esgs_ring_size =
align(gs_ring_info->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse * wave_size, alignment);
unsigned min_esgs_ring_size = align(gs_ring_info->esgs_itemsize * 4 * gs_vertex_reuse * wave_size, alignment);
/* These are recommended sizes, not minimum sizes. */
unsigned esgs_ring_size =
max_gs_waves * 2 * wave_size * gs_ring_info->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in;
unsigned esgs_ring_size = max_gs_waves * 2 * wave_size * gs_ring_info->esgs_itemsize * 4 * gs_info->gs.vertices_in;
unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs_info->gs.max_gsvs_emit_size;
min_esgs_ring_size = align(min_esgs_ring_size, alignment);
@ -731,12 +729,12 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
const uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
const uint32_t lds_granularity = pdev->info.lds_encode_granularity;
const uint32_t total_lds_bytes = align(esgs_lds_size * 4, lds_granularity);
out->gs_inst_prims_in_subgroup = gs_inst_prims_in_subgroup;
out->es_verts_per_subgroup = es_verts_per_subgroup;
out->gs_prims_per_subgroup = gs_prims_per_subgroup;
out->esgs_itemsize = esgs_itemsize;
out->lds_size = total_lds_bytes / lds_granularity;
out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
out->vgt_esgs_ring_itemsize = esgs_itemsize;
assert(max_prims_per_subgroup <= max_out_prims);
radv_init_legacy_gs_ring_info(device, gs_info);
@ -1339,8 +1337,8 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
break;
case MESA_SHADER_GEOMETRY:
if (!info->is_ngg) {
unsigned es_verts_per_subgroup = G_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
unsigned gs_inst_prims_in_subgroup = G_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
unsigned es_verts_per_subgroup = info->gs_ring_info.es_verts_per_subgroup;
unsigned gs_inst_prims_in_subgroup = info->gs_ring_info.gs_inst_prims_in_subgroup;
info->workgroup_size = ac_compute_esgs_workgroup_size(pdev->info.gfx_level, info->wave_size,
es_verts_per_subgroup, gs_inst_prims_in_subgroup);

View file

@ -54,9 +54,10 @@ struct radv_streamout_info {
};
struct radv_legacy_gs_info {
uint32_t vgt_gs_onchip_cntl;
uint32_t vgt_gs_max_prims_per_subgroup;
uint32_t vgt_esgs_ring_itemsize;
uint32_t gs_inst_prims_in_subgroup;
uint32_t es_verts_per_subgroup;
uint32_t gs_prims_per_subgroup;
uint32_t esgs_itemsize;
uint32_t lds_size;
uint32_t esgs_ring_size;
uint32_t gsvs_ring_size;
@ -252,6 +253,12 @@ struct radv_shader_info {
/* Precomputed register values. */
struct {
struct {
uint32_t vgt_esgs_ring_itemsize;
uint32_t vgt_gs_max_prims_per_subgroup;
uint32_t vgt_gs_onchip_cntl;
} gs;
struct {
uint32_t compute_num_thread_x;
uint32_t compute_num_thread_y;