diff --git a/src/gallium/drivers/radeonsi/gfx10_query.c b/src/gallium/drivers/radeonsi/gfx10_query.c index a014ee3e3fc..f27cec22e88 100644 --- a/src/gallium/drivers/radeonsi/gfx10_query.c +++ b/src/gallium/drivers/radeonsi/gfx10_query.c @@ -129,7 +129,7 @@ success:; sbuf.buffer_offset = qbuf->head; sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem); si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, &sbuf); - sctx->current_gs_state |= S_GS_STATE_STREAMOUT_QUERY_ENABLED(1); + SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 1); si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query); return true; @@ -185,7 +185,7 @@ static bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery) if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) { si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL); - sctx->current_gs_state &= C_GS_STATE_STREAMOUT_QUERY_ENABLED; + SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 0); /* If a query_begin is followed by a query_end without a draw * in-between, we need to clear the atom to ensure that the diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 714b3613ce4..83dc79812b6 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -106,7 +106,7 @@ static LLVMValueRef ngg_get_vertices_per_prim(struct si_shader_context *ctx, uns *num_vertices = 3; /* Extract OUTPRIM field. */ - LLVMValueRef num = si_unpack_param(ctx, ctx->vs_state_bits, 29, 2); + LLVMValueRef num = GET_FIELD(ctx, GS_STATE_OUTPRIM); return LLVMBuildAdd(ctx->ac.builder, num, ctx->ac.i32_1, ""); } } else { @@ -895,7 +895,7 @@ static void cull_primitive(struct si_shader_context *ctx, assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE)); } else { /* Get the small prim filter precision. */ - small_prim_precision = si_unpack_param(ctx, ctx->vs_state_bits, 22, 4); + small_prim_precision = GET_FIELD(ctx, GS_STATE_SMALL_PRIM_PRECISION); small_prim_precision = LLVMBuildOr(builder, small_prim_precision, LLVMConstInt(ctx->ac.i32, 0x70, 0), ""); small_prim_precision = @@ -1553,7 +1553,7 @@ void gfx10_ngg_build_end(struct si_shader_context *ctx) ac_build_ifcc(&ctx->ac, is_gs_thread, 5400); /* Extract the PROVOKING_VTX_INDEX field. */ - LLVMValueRef provoking_vtx_in_prim = si_unpack_param(ctx, ctx->vs_state_bits, 27, 2); + LLVMValueRef provoking_vtx_in_prim = GET_FIELD(ctx, GS_STATE_PROVOKING_VTX_INDEX); /* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */ LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3); @@ -1570,7 +1570,7 @@ void gfx10_ngg_build_end(struct si_shader_context *ctx) if (ctx->screen->use_ngg_streamout && !info->base.vs.blit_sgprs_amd) { assert(!unterminated_es_if_block); - tmp = si_unpack_param(ctx, ctx->vs_state_bits, 26, 1); + tmp = GET_FIELD(ctx, GS_STATE_STREAMOUT_QUERY_ENABLED); tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); ac_build_ifcc(&ctx->ac, tmp, 5029); /* if (STREAMOUT_QUERY_ENABLED) */ tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, ""); @@ -1857,7 +1857,7 @@ void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx) tmp = si_is_gs_thread(ctx); ac_build_ifcc(&ctx->ac, tmp, 15090); { - tmp = si_unpack_param(ctx, ctx->vs_state_bits, 31, 1); + tmp = GET_FIELD(ctx, GS_STATE_PIPELINE_STATS_EMU); tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (GS_PIPELINE_STATS_EMU) */ LLVMValueRef args[] = { @@ -1976,7 +1976,7 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx) /* Write shader query data. */ if (ctx->screen->use_ngg_streamout) { - tmp = si_unpack_param(ctx, ctx->vs_state_bits, 26, 1); + tmp = GET_FIELD(ctx, GS_STATE_STREAMOUT_QUERY_ENABLED); tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */ unsigned num_query_comps = ctx->so.num_outputs ? 8 : 4; @@ -2177,7 +2177,7 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx) LLVMValueRef is_odd = LLVMBuildLShr(builder, flags, ctx->ac.i8_1, ""); is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, ""); LLVMValueRef flatshade_first = LLVMBuildICmp( - builder, LLVMIntEQ, si_unpack_param(ctx, ctx->vs_state_bits, 27, 2), ctx->ac.i32_0, ""); + builder, LLVMIntEQ, GET_FIELD(ctx, GS_STATE_PROVOKING_VTX_INDEX), ctx->ac.i32_0, ""); ac_build_triangle_strip_indices_to_triangle(&ctx->ac, is_odd, flatshade_first, prim.index); } @@ -2185,7 +2185,7 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx) ac_build_export_prim(&ctx->ac, &prim); if (ctx->screen->info.gfx_level < GFX11) { - tmp = si_unpack_param(ctx, ctx->vs_state_bits, 31, 1); + tmp = GET_FIELD(ctx, GS_STATE_PIPELINE_STATS_EMU); tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); ac_build_ifcc(&ctx->ac, tmp, 5229); /* if (GS_PIPELINE_STATS_EMU) */ ac_build_ifcc(&ctx->ac, LLVMBuildNot(builder, prim.isnull, ""), 5237); diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 9077ee8ba55..704826f6c06 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -862,7 +862,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h sbuf.buffer_offset = query->buffer.results_end; sbuf.buffer_size = buffer->bo_size; si_set_internal_shader_buffer(sctx, SI_GS_QUERY_EMULATED_COUNTERS_BUF, &sbuf); - sctx->current_gs_state |= S_GS_STATE_PIPELINE_STATS_EMU(1); + SET_FIELD(sctx->current_gs_state, GS_STATE_PIPELINE_STATS_EMU, 1); const uint32_t zero = 0; radeon_begin(cs); @@ -976,7 +976,7 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw if (--sctx->num_pipeline_stat_emulated_queries == 0) { si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL); - sctx->current_gs_state &= C_GS_STATE_PIPELINE_STATS_EMU; + SET_FIELD(sctx->current_gs_state, GS_STATE_PIPELINE_STATS_EMU, 0); } } else { radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0)); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 547c57dbeee..5fa538b87e0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -252,36 +252,50 @@ enum }; /* These fields are only set in current_vs_state (except INDEXED) in si_context, and they are - * accessible in the shader via vs_state_bits in all VS, TES, and GS. + * accessible in the shader via vs_state_bits in VS, TES, and GS. */ -#define S_VS_STATE_CLAMP_VERTEX_COLOR(x) (((unsigned)(x)&0x1) << 0) /* Shared by VS and GS */ -#define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE -#define S_VS_STATE_INDEXED(x) (((unsigned)(x)&0x1) << 1) /* Shared by VS and GS */ -#define C_VS_STATE_INDEXED 0xFFFFFFFD +#define VS_STATE_CLAMP_VERTEX_COLOR__SHIFT 0 +#define VS_STATE_CLAMP_VERTEX_COLOR__MASK 0x1 /* Shared by VS and GS */ +#define VS_STATE_INDEXED__SHIFT 1 +#define VS_STATE_INDEXED__MASK 0x1 /* Shared by VS and GS */ /* These fields are only set in current_vs_state in si_context, and they are accessible * in the shader via vs_state_bits in LS/HS. */ /* bit gap */ -#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x)&0x1FFF) << 11) -#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFF0007FF -#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x)&0xFF) << 24) -#define C_VS_STATE_LS_OUT_VERTEX_SIZE 0x00FFFFFF +#define VS_STATE_LS_OUT_PATCH_SIZE__SHIFT 11 +#define VS_STATE_LS_OUT_PATCH_SIZE__MASK 0x1fff +#define VS_STATE_LS_OUT_VERTEX_SIZE__SHIFT 24 +#define VS_STATE_LS_OUT_VERTEX_SIZE__MASK 0xff /* These fields are only set in current_gs_state in si_context, and they are accessible * in the shader via vs_state_bits in legacy GS, the GS copy shader, and any NGG shader. */ /* bit gap */ -#define S_GS_STATE_SMALL_PRIM_PRECISION(x) (((unsigned)(x)&0xF) << 22) -#define C_GS_STATE_SMALL_PRIM_PRECISION 0xFC3FFFFF -#define S_GS_STATE_STREAMOUT_QUERY_ENABLED(x) (((unsigned)(x)&0x1) << 26) -#define C_GS_STATE_STREAMOUT_QUERY_ENABLED 0xFBFFFFFF -#define S_GS_STATE_PROVOKING_VTX_INDEX(x) (((unsigned)(x)&0x3) << 27) -#define C_GS_STATE_PROVOKING_VTX_INDEX 0xE7FFFFFF -#define S_GS_STATE_OUTPRIM(x) (((unsigned)(x)&0x3) << 29) -#define C_GS_STATE_OUTPRIM 0x9FFFFFFF -#define S_GS_STATE_PIPELINE_STATS_EMU(x) (((unsigned)(x)&0x1) << 31) -#define C_GS_STATE_PIPELINE_STATS_EMU 0x7FFFFFFF +#define GS_STATE_SMALL_PRIM_PRECISION__SHIFT 22 +#define GS_STATE_SMALL_PRIM_PRECISION__MASK 0xf +#define GS_STATE_STREAMOUT_QUERY_ENABLED__SHIFT 26 +#define GS_STATE_STREAMOUT_QUERY_ENABLED__MASK 0x1 +#define GS_STATE_PROVOKING_VTX_INDEX__SHIFT 27 +#define GS_STATE_PROVOKING_VTX_INDEX__MASK 0x3 +#define GS_STATE_OUTPRIM__SHIFT 29 +#define GS_STATE_OUTPRIM__MASK 0x3 +#define GS_STATE_PIPELINE_STATS_EMU__SHIFT 31 +#define GS_STATE_PIPELINE_STATS_EMU__MASK 0x1 + +#define ENCODE_FIELD(field, value) (((unsigned)(value) & field##__MASK) << field##__SHIFT) +#define CLEAR_FIELD(field) (~((unsigned)field##__MASK << field##__SHIFT)) + +/* This is called by functions that change states. */ +#define SET_FIELD(var, field, value) do { \ + assert((value) == ((unsigned)(value) & field##__MASK)); \ + (var) &= CLEAR_FIELD(field); \ + (var) |= ENCODE_FIELD(field, value); \ +} while (0) + +/* This is called during shader compilation and returns LLVMValueRef. */ +#define GET_FIELD(ctx, field) si_unpack_param((ctx), (ctx)->vs_state_bits, field##__SHIFT, \ + util_bitcount(field##__MASK)) enum { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index a5b1389f24c..7ae10d2736d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -725,7 +725,7 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin * (for direct draws) or the CP (for indirect draws) is the * first vertex ID, but GLSL expects 0 to be returned. */ - LLVMValueRef indexed = si_unpack_param(ctx, ctx->vs_state_bits, 1, 1); + LLVMValueRef indexed = GET_FIELD(ctx, VS_STATE_INDEXED); indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->ac.i1, ""); return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args.base_vertex), ctx->ac.i32_0, ""); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 4c1a7887dd7..9c1e9999737 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -222,7 +222,7 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx) * use pipeline statistics (they would be correct but when screen->use_ngg, we * can't know when the query is started if the next draw(s) will use ngg or not). */ - LLVMValueRef tmp = si_unpack_param(ctx, ctx->vs_state_bits, 31, 1); + LLVMValueRef tmp = GET_FIELD(ctx, GS_STATE_PIPELINE_STATS_EMU); tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i1, ""); ac_build_ifcc(&ctx->ac, tmp, 5229); /* if (GS_PIPELINE_STATS_EMU) */ { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 24e1858e3a2..67e896e1291 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -64,7 +64,7 @@ static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx) static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx) { - return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13); + return GET_FIELD(ctx, VS_STATE_LS_OUT_PATCH_SIZE); } static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx) @@ -163,7 +163,7 @@ LLVMValueRef si_get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx) stride = ctx->shader->key.ge.part.tcs.ls->info.lshs_vertex_stride / 4; return LLVMConstInt(ctx->ac.i32, stride, 0); } - return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8); + return GET_FIELD(ctx, VS_STATE_LS_OUT_VERTEX_SIZE); default: assert(0); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 4408a03a897..5b0beb486dc 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -470,7 +470,7 @@ static void si_vertex_color_clamping(struct si_shader_context *ctx, return; /* The state is in the first bit of the user SGPR. */ - LLVMValueRef cond = ac_get_arg(&ctx->ac, ctx->vs_state_bits); + LLVMValueRef cond = GET_FIELD(ctx, VS_STATE_CLAMP_VERTEX_COLOR); cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, ""); ac_build_ifcc(&ctx->ac, cond, 6502); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 8dd62a0c706..ddbf393f87d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1170,8 +1170,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) old_rs->line_width != rs->line_width)) si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); - sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR; - sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color); + SET_FIELD(sctx->current_vs_state, VS_STATE_CLAMP_VERTEX_COLOR, rs->clamp_vertex_color); si_pm4_bind_state(sctx, rasterizer, rs); si_update_poly_offset_state(sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index d4858078937..876d1a93e15 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -794,8 +794,6 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa si_resource(sctx->tess_rings_tmz) : si_resource(sctx->tess_rings))->gpu_address; assert((ring_va & u_bit_consecutive(0, 19)) == 0); - unsigned tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) | - S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4); unsigned tcs_out_layout = (output_patch_size / 4) | (num_tcs_input_cp << 13) | ring_va; unsigned tcs_out_offsets = (output_patch0_offset / 16) | ((perpatch_output_offset / 16) << 16); unsigned offchip_layout = @@ -814,8 +812,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa } /* Set SI_SGPR_VS_STATE_BITS. */ - sctx->current_vs_state &= C_VS_STATE_LS_OUT_PATCH_SIZE & C_VS_STATE_LS_OUT_VERTEX_SIZE; - sctx->current_vs_state |= tcs_in_layout; + SET_FIELD(sctx->current_vs_state, VS_STATE_LS_OUT_PATCH_SIZE, input_patch_size / 4); + SET_FIELD(sctx->current_vs_state, VS_STATE_LS_OUT_VERTEX_SIZE, input_vertex_size / 4); /* We should be able to support in-shader LDS use with LLVM >= 9 * by just adding the lds_sizes together, but it has never @@ -861,7 +859,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa radeon_emit(offchip_layout); radeon_emit(tcs_out_offsets); radeon_emit(tcs_out_layout); - radeon_emit(tcs_in_layout); + radeon_emit(sctx->current_vs_state); } /* Set userdata SGPRs for TES. */ @@ -1211,13 +1209,11 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx) if (hw_vs->uses_vs_state_provoking_vertex) { unsigned vtx_index = rs->flatshade_first ? 0 : gs_out_prim; - sctx->current_gs_state &= C_GS_STATE_PROVOKING_VTX_INDEX; - sctx->current_gs_state |= S_GS_STATE_PROVOKING_VTX_INDEX(vtx_index); + SET_FIELD(sctx->current_gs_state, GS_STATE_PROVOKING_VTX_INDEX, vtx_index); } if (hw_vs->uses_gs_state_outprim) { - sctx->current_gs_state &= C_GS_STATE_OUTPRIM; - sctx->current_gs_state |= S_GS_STATE_OUTPRIM(gs_out_prim); + SET_FIELD(sctx->current_gs_state, GS_STATE_OUTPRIM, gs_out_prim); } } } @@ -1237,10 +1233,12 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size) unsigned gs_state = sctx->current_gs_state; /* only GS and NGG bits; VS bits will be copied here */ if (sctx->shader.vs.cso->info.uses_base_vertex && index_size) - vs_state |= S_VS_STATE_INDEXED(1); + vs_state |= ENCODE_FIELD(VS_STATE_INDEXED, 1); /* Copy all state bits from vs_state to gs_state except the LS bits. */ - gs_state |= vs_state & C_VS_STATE_LS_OUT_PATCH_SIZE & C_VS_STATE_LS_OUT_VERTEX_SIZE; + gs_state |= vs_state & + CLEAR_FIELD(VS_STATE_LS_OUT_PATCH_SIZE) & + CLEAR_FIELD(VS_STATE_LS_OUT_VERTEX_SIZE); if (vs_state != sctx->last_vs_state || ((HAS_GS || NGG) && gs_state != sctx->last_gs_state)) { diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 5db37a94365..1e8444663f3 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -141,8 +141,8 @@ static void si_emit_cull_state(struct si_context *sctx) * * So pass only the first 4 bits of the float exponent to the shader. */ - sctx->current_gs_state &= C_GS_STATE_SMALL_PRIM_PRECISION; - sctx->current_gs_state |= S_GS_STATE_SMALL_PRIM_PRECISION(fui(info.small_prim_precision) >> 23); + SET_FIELD(sctx->current_gs_state, GS_STATE_SMALL_PRIM_PRECISION, + (fui(info.small_prim_precision) >> 23) & 0xf); } static void si_set_scissor_states(struct pipe_context *pctx, unsigned start_slot,