radeonsi: rework how vs_state_bits is set and unpacked

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
This commit is contained in:
Marek Olšák 2022-06-09 09:41:52 -04:00 committed by Marge Bot
parent c2342e6770
commit a9f7744cfe
11 changed files with 62 additions and 51 deletions

View file

@ -129,7 +129,7 @@ success:;
sbuf.buffer_offset = qbuf->head;
sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);
si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, &sbuf);
sctx->current_gs_state |= S_GS_STATE_STREAMOUT_QUERY_ENABLED(1);
SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 1);
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
return true;
@ -185,7 +185,7 @@ static bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery)
if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) {
si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL);
sctx->current_gs_state &= C_GS_STATE_STREAMOUT_QUERY_ENABLED;
SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 0);
/* If a query_begin is followed by a query_end without a draw
* in-between, we need to clear the atom to ensure that the

View file

@ -106,7 +106,7 @@ static LLVMValueRef ngg_get_vertices_per_prim(struct si_shader_context *ctx, uns
*num_vertices = 3;
/* Extract OUTPRIM field. */
LLVMValueRef num = si_unpack_param(ctx, ctx->vs_state_bits, 29, 2);
LLVMValueRef num = GET_FIELD(ctx, GS_STATE_OUTPRIM);
return LLVMBuildAdd(ctx->ac.builder, num, ctx->ac.i32_1, "");
}
} else {
@ -895,7 +895,7 @@ static void cull_primitive(struct si_shader_context *ctx,
assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE));
} else {
/* Get the small prim filter precision. */
small_prim_precision = si_unpack_param(ctx, ctx->vs_state_bits, 22, 4);
small_prim_precision = GET_FIELD(ctx, GS_STATE_SMALL_PRIM_PRECISION);
small_prim_precision =
LLVMBuildOr(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 0x70, 0), "");
small_prim_precision =
@ -1553,7 +1553,7 @@ void gfx10_ngg_build_end(struct si_shader_context *ctx)
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
/* Extract the PROVOKING_VTX_INDEX field. */
LLVMValueRef provoking_vtx_in_prim = si_unpack_param(ctx, ctx->vs_state_bits, 27, 2);
LLVMValueRef provoking_vtx_in_prim = GET_FIELD(ctx, GS_STATE_PROVOKING_VTX_INDEX);
/* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */
LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3);
@ -1570,7 +1570,7 @@ void gfx10_ngg_build_end(struct si_shader_context *ctx)
if (ctx->screen->use_ngg_streamout && !info->base.vs.blit_sgprs_amd) {
assert(!unterminated_es_if_block);
tmp = si_unpack_param(ctx, ctx->vs_state_bits, 26, 1);
tmp = GET_FIELD(ctx, GS_STATE_STREAMOUT_QUERY_ENABLED);
tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
ac_build_ifcc(&ctx->ac, tmp, 5029); /* if (STREAMOUT_QUERY_ENABLED) */
tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
@ -1857,7 +1857,7 @@ void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx)
tmp = si_is_gs_thread(ctx);
ac_build_ifcc(&ctx->ac, tmp, 15090);
{
tmp = si_unpack_param(ctx, ctx->vs_state_bits, 31, 1);
tmp = GET_FIELD(ctx, GS_STATE_PIPELINE_STATS_EMU);
tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (GS_PIPELINE_STATS_EMU) */
LLVMValueRef args[] = {
@ -1976,7 +1976,7 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx)
/* Write shader query data. */
if (ctx->screen->use_ngg_streamout) {
tmp = si_unpack_param(ctx, ctx->vs_state_bits, 26, 1);
tmp = GET_FIELD(ctx, GS_STATE_STREAMOUT_QUERY_ENABLED);
tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */
unsigned num_query_comps = ctx->so.num_outputs ? 8 : 4;
@ -2177,7 +2177,7 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx)
LLVMValueRef is_odd = LLVMBuildLShr(builder, flags, ctx->ac.i8_1, "");
is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, "");
LLVMValueRef flatshade_first = LLVMBuildICmp(
builder, LLVMIntEQ, si_unpack_param(ctx, ctx->vs_state_bits, 27, 2), ctx->ac.i32_0, "");
builder, LLVMIntEQ, GET_FIELD(ctx, GS_STATE_PROVOKING_VTX_INDEX), ctx->ac.i32_0, "");
ac_build_triangle_strip_indices_to_triangle(&ctx->ac, is_odd, flatshade_first, prim.index);
}
@ -2185,7 +2185,7 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx)
ac_build_export_prim(&ctx->ac, &prim);
if (ctx->screen->info.gfx_level < GFX11) {
tmp = si_unpack_param(ctx, ctx->vs_state_bits, 31, 1);
tmp = GET_FIELD(ctx, GS_STATE_PIPELINE_STATS_EMU);
tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
ac_build_ifcc(&ctx->ac, tmp, 5229); /* if (GS_PIPELINE_STATS_EMU) */
ac_build_ifcc(&ctx->ac, LLVMBuildNot(builder, prim.isnull, ""), 5237);

View file

@ -862,7 +862,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h
sbuf.buffer_offset = query->buffer.results_end;
sbuf.buffer_size = buffer->bo_size;
si_set_internal_shader_buffer(sctx, SI_GS_QUERY_EMULATED_COUNTERS_BUF, &sbuf);
sctx->current_gs_state |= S_GS_STATE_PIPELINE_STATS_EMU(1);
SET_FIELD(sctx->current_gs_state, GS_STATE_PIPELINE_STATS_EMU, 1);
const uint32_t zero = 0;
radeon_begin(cs);
@ -976,7 +976,7 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw
if (--sctx->num_pipeline_stat_emulated_queries == 0) {
si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL);
sctx->current_gs_state &= C_GS_STATE_PIPELINE_STATS_EMU;
SET_FIELD(sctx->current_gs_state, GS_STATE_PIPELINE_STATS_EMU, 0);
}
} else {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));

View file

@ -252,36 +252,50 @@ enum
};
/* These fields are only set in current_vs_state (except INDEXED) in si_context, and they are
* accessible in the shader via vs_state_bits in all VS, TES, and GS.
* accessible in the shader via vs_state_bits in VS, TES, and GS.
*/
#define S_VS_STATE_CLAMP_VERTEX_COLOR(x) (((unsigned)(x)&0x1) << 0) /* Shared by VS and GS */
#define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE
#define S_VS_STATE_INDEXED(x) (((unsigned)(x)&0x1) << 1) /* Shared by VS and GS */
#define C_VS_STATE_INDEXED 0xFFFFFFFD
#define VS_STATE_CLAMP_VERTEX_COLOR__SHIFT 0
#define VS_STATE_CLAMP_VERTEX_COLOR__MASK 0x1 /* Shared by VS and GS */
#define VS_STATE_INDEXED__SHIFT 1
#define VS_STATE_INDEXED__MASK 0x1 /* Shared by VS and GS */
/* These fields are only set in current_vs_state in si_context, and they are accessible
* in the shader via vs_state_bits in LS/HS.
*/
/* bit gap */
#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x)&0x1FFF) << 11)
#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFF0007FF
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x)&0xFF) << 24)
#define C_VS_STATE_LS_OUT_VERTEX_SIZE 0x00FFFFFF
#define VS_STATE_LS_OUT_PATCH_SIZE__SHIFT 11
#define VS_STATE_LS_OUT_PATCH_SIZE__MASK 0x1fff
#define VS_STATE_LS_OUT_VERTEX_SIZE__SHIFT 24
#define VS_STATE_LS_OUT_VERTEX_SIZE__MASK 0xff
/* These fields are only set in current_gs_state in si_context, and they are accessible
* in the shader via vs_state_bits in legacy GS, the GS copy shader, and any NGG shader.
*/
/* bit gap */
#define S_GS_STATE_SMALL_PRIM_PRECISION(x) (((unsigned)(x)&0xF) << 22)
#define C_GS_STATE_SMALL_PRIM_PRECISION 0xFC3FFFFF
#define S_GS_STATE_STREAMOUT_QUERY_ENABLED(x) (((unsigned)(x)&0x1) << 26)
#define C_GS_STATE_STREAMOUT_QUERY_ENABLED 0xFBFFFFFF
#define S_GS_STATE_PROVOKING_VTX_INDEX(x) (((unsigned)(x)&0x3) << 27)
#define C_GS_STATE_PROVOKING_VTX_INDEX 0xE7FFFFFF
#define S_GS_STATE_OUTPRIM(x) (((unsigned)(x)&0x3) << 29)
#define C_GS_STATE_OUTPRIM 0x9FFFFFFF
#define S_GS_STATE_PIPELINE_STATS_EMU(x) (((unsigned)(x)&0x1) << 31)
#define C_GS_STATE_PIPELINE_STATS_EMU 0x7FFFFFFF
#define GS_STATE_SMALL_PRIM_PRECISION__SHIFT 22
#define GS_STATE_SMALL_PRIM_PRECISION__MASK 0xf
#define GS_STATE_STREAMOUT_QUERY_ENABLED__SHIFT 26
#define GS_STATE_STREAMOUT_QUERY_ENABLED__MASK 0x1
#define GS_STATE_PROVOKING_VTX_INDEX__SHIFT 27
#define GS_STATE_PROVOKING_VTX_INDEX__MASK 0x3
#define GS_STATE_OUTPRIM__SHIFT 29
#define GS_STATE_OUTPRIM__MASK 0x3
#define GS_STATE_PIPELINE_STATS_EMU__SHIFT 31
#define GS_STATE_PIPELINE_STATS_EMU__MASK 0x1
#define ENCODE_FIELD(field, value) (((unsigned)(value) & field##__MASK) << field##__SHIFT)
#define CLEAR_FIELD(field) (~((unsigned)field##__MASK << field##__SHIFT))
/* This is called by functions that change states. */
#define SET_FIELD(var, field, value) do { \
assert((value) == ((unsigned)(value) & field##__MASK)); \
(var) &= CLEAR_FIELD(field); \
(var) |= ENCODE_FIELD(field, value); \
} while (0)
/* This is called during shader compilation and returns LLVMValueRef. */
#define GET_FIELD(ctx, field) si_unpack_param((ctx), (ctx)->vs_state_bits, field##__SHIFT, \
util_bitcount(field##__MASK))
enum
{

View file

@ -725,7 +725,7 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
* (for direct draws) or the CP (for indirect draws) is the
* first vertex ID, but GLSL expects 0 to be returned.
*/
LLVMValueRef indexed = si_unpack_param(ctx, ctx->vs_state_bits, 1, 1);
LLVMValueRef indexed = GET_FIELD(ctx, VS_STATE_INDEXED);
indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->ac.i1, "");
return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args.base_vertex),
ctx->ac.i32_0, "");

View file

@ -222,7 +222,7 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx)
* use pipeline statistics (they would be correct but when screen->use_ngg, we
* can't know when the query is started if the next draw(s) will use ngg or not).
*/
LLVMValueRef tmp = si_unpack_param(ctx, ctx->vs_state_bits, 31, 1);
LLVMValueRef tmp = GET_FIELD(ctx, GS_STATE_PIPELINE_STATS_EMU);
tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i1, "");
ac_build_ifcc(&ctx->ac, tmp, 5229); /* if (GS_PIPELINE_STATS_EMU) */
{

View file

@ -64,7 +64,7 @@ static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx)
{
return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13);
return GET_FIELD(ctx, VS_STATE_LS_OUT_PATCH_SIZE);
}
static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)
@ -163,7 +163,7 @@ LLVMValueRef si_get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
stride = ctx->shader->key.ge.part.tcs.ls->info.lshs_vertex_stride / 4;
return LLVMConstInt(ctx->ac.i32, stride, 0);
}
return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
return GET_FIELD(ctx, VS_STATE_LS_OUT_VERTEX_SIZE);
default:
assert(0);

View file

@ -470,7 +470,7 @@ static void si_vertex_color_clamping(struct si_shader_context *ctx,
return;
/* The state is in the first bit of the user SGPR. */
LLVMValueRef cond = ac_get_arg(&ctx->ac, ctx->vs_state_bits);
LLVMValueRef cond = GET_FIELD(ctx, VS_STATE_CLAMP_VERTEX_COLOR);
cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
ac_build_ifcc(&ctx->ac, cond, 6502);

View file

@ -1170,8 +1170,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
old_rs->line_width != rs->line_width))
si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);
sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
SET_FIELD(sctx->current_vs_state, VS_STATE_CLAMP_VERTEX_COLOR, rs->clamp_vertex_color);
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_poly_offset_state(sctx);

View file

@ -794,8 +794,6 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
si_resource(sctx->tess_rings_tmz) : si_resource(sctx->tess_rings))->gpu_address;
assert((ring_va & u_bit_consecutive(0, 19)) == 0);
unsigned tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4);
unsigned tcs_out_layout = (output_patch_size / 4) | (num_tcs_input_cp << 13) | ring_va;
unsigned tcs_out_offsets = (output_patch0_offset / 16) | ((perpatch_output_offset / 16) << 16);
unsigned offchip_layout =
@ -814,8 +812,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
}
/* Set SI_SGPR_VS_STATE_BITS. */
sctx->current_vs_state &= C_VS_STATE_LS_OUT_PATCH_SIZE & C_VS_STATE_LS_OUT_VERTEX_SIZE;
sctx->current_vs_state |= tcs_in_layout;
SET_FIELD(sctx->current_vs_state, VS_STATE_LS_OUT_PATCH_SIZE, input_patch_size / 4);
SET_FIELD(sctx->current_vs_state, VS_STATE_LS_OUT_VERTEX_SIZE, input_vertex_size / 4);
/* We should be able to support in-shader LDS use with LLVM >= 9
* by just adding the lds_sizes together, but it has never
@ -861,7 +859,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
radeon_emit(offchip_layout);
radeon_emit(tcs_out_offsets);
radeon_emit(tcs_out_layout);
radeon_emit(tcs_in_layout);
radeon_emit(sctx->current_vs_state);
}
/* Set userdata SGPRs for TES. */
@ -1211,13 +1209,11 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
if (hw_vs->uses_vs_state_provoking_vertex) {
unsigned vtx_index = rs->flatshade_first ? 0 : gs_out_prim;
sctx->current_gs_state &= C_GS_STATE_PROVOKING_VTX_INDEX;
sctx->current_gs_state |= S_GS_STATE_PROVOKING_VTX_INDEX(vtx_index);
SET_FIELD(sctx->current_gs_state, GS_STATE_PROVOKING_VTX_INDEX, vtx_index);
}
if (hw_vs->uses_gs_state_outprim) {
sctx->current_gs_state &= C_GS_STATE_OUTPRIM;
sctx->current_gs_state |= S_GS_STATE_OUTPRIM(gs_out_prim);
SET_FIELD(sctx->current_gs_state, GS_STATE_OUTPRIM, gs_out_prim);
}
}
}
@ -1237,10 +1233,12 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size)
unsigned gs_state = sctx->current_gs_state; /* only GS and NGG bits; VS bits will be copied here */
if (sctx->shader.vs.cso->info.uses_base_vertex && index_size)
vs_state |= S_VS_STATE_INDEXED(1);
vs_state |= ENCODE_FIELD(VS_STATE_INDEXED, 1);
/* Copy all state bits from vs_state to gs_state except the LS bits. */
gs_state |= vs_state & C_VS_STATE_LS_OUT_PATCH_SIZE & C_VS_STATE_LS_OUT_VERTEX_SIZE;
gs_state |= vs_state &
CLEAR_FIELD(VS_STATE_LS_OUT_PATCH_SIZE) &
CLEAR_FIELD(VS_STATE_LS_OUT_VERTEX_SIZE);
if (vs_state != sctx->last_vs_state ||
((HAS_GS || NGG) && gs_state != sctx->last_gs_state)) {

View file

@ -141,8 +141,8 @@ static void si_emit_cull_state(struct si_context *sctx)
*
* So pass only the first 4 bits of the float exponent to the shader.
*/
sctx->current_gs_state &= C_GS_STATE_SMALL_PRIM_PRECISION;
sctx->current_gs_state |= S_GS_STATE_SMALL_PRIM_PRECISION(fui(info.small_prim_precision) >> 23);
SET_FIELD(sctx->current_gs_state, GS_STATE_SMALL_PRIM_PRECISION,
(fui(info.small_prim_precision) >> 23) & 0xf);
}
static void si_set_scissor_states(struct pipe_context *pctx, unsigned start_slot,