mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 18:00:10 +01:00
radeonsi: pack GS_STATE_ESGS_VERTEX_STRIDE better to save 2 bits
Change it to the number of ES outputs, then compute the stride from that. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26917>
This commit is contained in:
parent
8eed352e05
commit
bad2530a40
3 changed files with 31 additions and 13 deletions
|
|
@ -360,9 +360,12 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
|||
break;
|
||||
case nir_intrinsic_load_esgs_vertex_stride_amd:
|
||||
assert(sel->screen->info.gfx_level >= GFX9);
|
||||
replacement = shader->is_monolithic ?
|
||||
nir_imm_int(b, key->ge.part.gs.es->info.esgs_vertex_stride / 4) :
|
||||
GET_FIELD_NIR(GS_STATE_ESGS_VERTEX_STRIDE);
|
||||
if (shader->is_monolithic) {
|
||||
replacement = nir_imm_int(b, key->ge.part.gs.es->info.esgs_vertex_stride / 4);
|
||||
} else {
|
||||
nir_def *num_es_outputs = GET_FIELD_NIR(GS_STATE_NUM_ES_OUTPUTS);
|
||||
replacement = nir_iadd_imm(b, nir_imul_imm(b, num_es_outputs, 4), 1);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_tcs_num_patches_amd: {
|
||||
nir_def *tmp = ac_nir_unpack_arg(b, &args->ac, args->tcs_offchip_layout, 0, 6);
|
||||
|
|
|
|||
|
|
@ -253,8 +253,12 @@ enum
|
|||
* in the shader via vs_state_bits in legacy GS, the GS copy shader, and any NGG shader.
|
||||
*/
|
||||
/* bit gap */
|
||||
#define GS_STATE_ESGS_VERTEX_STRIDE__SHIFT 11
|
||||
#define GS_STATE_ESGS_VERTEX_STRIDE__MASK 0xff /* max 32 * 4 + 1 */
|
||||
/* The number of ES outputs is derived from the last output index of SI_UNIQUE_SLOT_* + 1, which
|
||||
* can be 55 at most. The ESGS vertex stride in dwords is: NUM_ES_OUTPUTS * 4 + 1
|
||||
* Only used by GFX9+ to compute LDS addresses of GS inputs.
|
||||
*/
|
||||
#define GS_STATE_NUM_ES_OUTPUTS__SHIFT 13
|
||||
#define GS_STATE_NUM_ES_OUTPUTS__MASK 0x3f
|
||||
/* Small prim filter precision = num_samples / quant_mode, which can only be equal to 1/2^n
|
||||
* where n is between 4 and 12. Knowing that, we only need to store 4 bits of the FP32 exponent.
|
||||
* Set it like this: value = (fui(num_samples / quant_mode) >> 23) & 0xf;
|
||||
|
|
|
|||
|
|
@ -906,14 +906,25 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
|
|||
assert(out->max_prims_per_subgroup <= max_out_prims);
|
||||
}
|
||||
|
||||
static void gfx9_set_gs_sgpr_num_es_outputs(struct si_context *sctx, unsigned esgs_vertex_stride)
|
||||
{
|
||||
/* The stride must always be odd (e.g. a multiple of 4 + 1) to reduce LDS bank conflicts. */
|
||||
assert(esgs_vertex_stride % 4 == 1);
|
||||
unsigned num_es_outputs = (esgs_vertex_stride - 1) / 4;
|
||||
|
||||
/* If there are no ES outputs, GS doesn't use this SGPR field, so only set it if the number
|
||||
* is non-zero.
|
||||
*/
|
||||
if (num_es_outputs)
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_NUM_ES_OUTPUTS, num_es_outputs);
|
||||
}
|
||||
|
||||
static void si_emit_shader_gs(struct si_context *sctx, unsigned index)
|
||||
{
|
||||
struct si_shader *shader = sctx->queued.named.gs;
|
||||
|
||||
if (sctx->gfx_level >= GFX9) {
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_ESGS_VERTEX_STRIDE,
|
||||
shader->key.ge.part.gs.es->info.esgs_vertex_stride / 4);
|
||||
}
|
||||
if (sctx->gfx_level >= GFX9)
|
||||
gfx9_set_gs_sgpr_num_es_outputs(sctx, shader->key.ge.part.gs.es->info.esgs_vertex_stride / 4);
|
||||
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
|
||||
|
|
@ -1167,8 +1178,8 @@ static void gfx10_emit_shader_ngg(struct si_context *sctx, unsigned index)
|
|||
{
|
||||
struct si_shader *shader = sctx->queued.named.gs;
|
||||
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_ESGS_VERTEX_STRIDE,
|
||||
shader->ngg.esgs_vertex_stride);
|
||||
if (shader->selector->stage == MESA_SHADER_GEOMETRY)
|
||||
gfx9_set_gs_sgpr_num_es_outputs(sctx, shader->ngg.esgs_vertex_stride);
|
||||
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
if (HAS_TESS) {
|
||||
|
|
@ -1226,8 +1237,8 @@ static void gfx11_dgpu_emit_shader_ngg(struct si_context *sctx, unsigned index)
|
|||
{
|
||||
struct si_shader *shader = sctx->queued.named.gs;
|
||||
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_ESGS_VERTEX_STRIDE,
|
||||
shader->ngg.esgs_vertex_stride);
|
||||
if (shader->selector->stage == MESA_SHADER_GEOMETRY)
|
||||
gfx9_set_gs_sgpr_num_es_outputs(sctx, shader->ngg.esgs_vertex_stride);
|
||||
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
gfx11_begin_packed_context_regs();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue