mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-20 17:30:24 +01:00
radv: precompute existing legacy GS register values later
To precompute all registers at the same place. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29022>
This commit is contained in:
parent
88dfe04b08
commit
e5bc4d85bb
6 changed files with 44 additions and 19 deletions
|
|
@ -287,7 +287,7 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
|||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->vgt_esgs_ring_itemsize);
|
||||
} else {
|
||||
const unsigned stride =
|
||||
s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.vgt_esgs_ring_itemsize;
|
||||
s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.esgs_itemsize;
|
||||
replacement = nir_imm_int(b, stride);
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -5593,7 +5593,7 @@ gfx10_emit_ge_cntl(struct radv_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
} else if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY)) {
|
||||
const struct radv_legacy_gs_info *gs_state = &cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
|
||||
primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(gs_state->vgt_gs_onchip_cntl);
|
||||
primgroup_size = gs_state->gs_prims_per_subgroup;
|
||||
} else {
|
||||
primgroup_size = 128; /* recommended without a GS and tess */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3218,7 +3218,7 @@ radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
|
|||
/* GFX6-8: ESGS offchip ring buffer is allocated according to VGT_ESGS_RING_ITEMSIZE.
|
||||
* GFX9+: Only used to set the GS input VGPRs, emulated in shaders.
|
||||
*/
|
||||
radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs_state->vgt_esgs_ring_itemsize);
|
||||
radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs->info.regs.gs.vgt_esgs_ring_itemsize);
|
||||
}
|
||||
|
||||
va = radv_shader_get_va(gs);
|
||||
|
|
@ -3237,8 +3237,9 @@ radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
|
|||
radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
|
||||
}
|
||||
|
||||
radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
|
||||
radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup);
|
||||
radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs->info.regs.gs.vgt_gs_onchip_cntl);
|
||||
radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
|
||||
gs->info.regs.gs.vgt_gs_max_prims_per_subgroup);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
|
||||
radeon_emit(cs, va >> 8);
|
||||
|
|
|
|||
|
|
@ -1461,6 +1461,21 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar
|
|||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
{
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
|
||||
info->regs.gs.vgt_esgs_ring_itemsize = info->gs_ring_info.esgs_itemsize;
|
||||
|
||||
info->regs.gs.vgt_gs_max_prims_per_subgroup =
|
||||
S_028A94_MAX_PRIMS_PER_SUBGROUP(info->gs_ring_info.gs_inst_prims_in_subgroup);
|
||||
|
||||
info->regs.gs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.es_verts_per_subgroup) |
|
||||
S_028A44_GS_PRIMS_PER_SUBGRP(info->gs_ring_info.gs_prims_per_subgroup) |
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.gs_inst_prims_in_subgroup);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
{
|
||||
|
|
@ -1479,6 +1494,10 @@ radv_precompute_registers(struct radv_device *device, struct radv_shader_binary
|
|||
const struct radv_shader_info *info = &binary->info;
|
||||
|
||||
switch (info->stage) {
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
if (!info->is_ngg)
|
||||
radv_precompute_registers_hw_gs(device, binary);
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_TASK:
|
||||
radv_precompute_registers_hw_cs(device, binary);
|
||||
|
|
|
|||
|
|
@ -624,11 +624,9 @@ radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shad
|
|||
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
|
||||
|
||||
/* Calculate the minimum size. */
|
||||
unsigned min_esgs_ring_size =
|
||||
align(gs_ring_info->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse * wave_size, alignment);
|
||||
unsigned min_esgs_ring_size = align(gs_ring_info->esgs_itemsize * 4 * gs_vertex_reuse * wave_size, alignment);
|
||||
/* These are recommended sizes, not minimum sizes. */
|
||||
unsigned esgs_ring_size =
|
||||
max_gs_waves * 2 * wave_size * gs_ring_info->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in;
|
||||
unsigned esgs_ring_size = max_gs_waves * 2 * wave_size * gs_ring_info->esgs_itemsize * 4 * gs_info->gs.vertices_in;
|
||||
unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs_info->gs.max_gsvs_emit_size;
|
||||
|
||||
min_esgs_ring_size = align(min_esgs_ring_size, alignment);
|
||||
|
|
@ -731,12 +729,12 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
|
|||
const uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
|
||||
const uint32_t lds_granularity = pdev->info.lds_encode_granularity;
|
||||
const uint32_t total_lds_bytes = align(esgs_lds_size * 4, lds_granularity);
|
||||
|
||||
out->gs_inst_prims_in_subgroup = gs_inst_prims_in_subgroup;
|
||||
out->es_verts_per_subgroup = es_verts_per_subgroup;
|
||||
out->gs_prims_per_subgroup = gs_prims_per_subgroup;
|
||||
out->esgs_itemsize = esgs_itemsize;
|
||||
out->lds_size = total_lds_bytes / lds_granularity;
|
||||
out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
|
||||
S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
|
||||
out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
|
||||
out->vgt_esgs_ring_itemsize = esgs_itemsize;
|
||||
assert(max_prims_per_subgroup <= max_out_prims);
|
||||
|
||||
radv_init_legacy_gs_ring_info(device, gs_info);
|
||||
|
|
@ -1339,8 +1337,8 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
if (!info->is_ngg) {
|
||||
unsigned es_verts_per_subgroup = G_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
|
||||
unsigned gs_inst_prims_in_subgroup = G_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
|
||||
unsigned es_verts_per_subgroup = info->gs_ring_info.es_verts_per_subgroup;
|
||||
unsigned gs_inst_prims_in_subgroup = info->gs_ring_info.gs_inst_prims_in_subgroup;
|
||||
|
||||
info->workgroup_size = ac_compute_esgs_workgroup_size(pdev->info.gfx_level, info->wave_size,
|
||||
es_verts_per_subgroup, gs_inst_prims_in_subgroup);
|
||||
|
|
|
|||
|
|
@ -54,9 +54,10 @@ struct radv_streamout_info {
|
|||
};
|
||||
|
||||
struct radv_legacy_gs_info {
|
||||
uint32_t vgt_gs_onchip_cntl;
|
||||
uint32_t vgt_gs_max_prims_per_subgroup;
|
||||
uint32_t vgt_esgs_ring_itemsize;
|
||||
uint32_t gs_inst_prims_in_subgroup;
|
||||
uint32_t es_verts_per_subgroup;
|
||||
uint32_t gs_prims_per_subgroup;
|
||||
uint32_t esgs_itemsize;
|
||||
uint32_t lds_size;
|
||||
uint32_t esgs_ring_size;
|
||||
uint32_t gsvs_ring_size;
|
||||
|
|
@ -252,6 +253,12 @@ struct radv_shader_info {
|
|||
|
||||
/* Precomputed register values. */
|
||||
struct {
|
||||
struct {
|
||||
uint32_t vgt_esgs_ring_itemsize;
|
||||
uint32_t vgt_gs_max_prims_per_subgroup;
|
||||
uint32_t vgt_gs_onchip_cntl;
|
||||
} gs;
|
||||
|
||||
struct {
|
||||
uint32_t compute_num_thread_x;
|
||||
uint32_t compute_num_thread_y;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue