diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 8ee9720e171..745dd90eb21 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -83,7 +83,7 @@ static LLVMValueRef ngg_get_vertices_per_prim(struct si_shader_context *ctx, uns /* Blits always use axis-aligned rectangles with 3 vertices. */ *num_vertices = 3; return LLVMConstInt(ctx->ac.i32, 3, 0); - } else if (ctx->shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) { + } else if (ctx->shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) { *num_vertices = 2; return LLVMConstInt(ctx->ac.i32, 2, 0); } else { @@ -115,7 +115,7 @@ bool gfx10_ngg_export_prim_early(struct si_shader *shader) { struct si_shader_selector *sel = shader->selector; - assert(shader->key.as_ngg && !shader->key.as_es); + assert(shader->key.ge.as_ngg && !shader->key.ge.as_es); return sel->info.stage != MESA_SHADER_GEOMETRY && !gfx10_ngg_writes_user_edgeflags(shader); @@ -137,7 +137,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use { LLVMBuilderRef builder = ctx->ac.builder; - if (gfx10_is_ngg_passthrough(ctx->shader) || ctx->shader->key.opt.ngg_culling) { + if (gfx10_is_ngg_passthrough(ctx->shader) || ctx->shader->key.ge.opt.ngg_culling) { ac_build_ifcc(&ctx->ac, si_is_gs_thread(ctx), 6001); { struct ac_ngg_prim prim = {}; @@ -614,17 +614,17 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader) * to the ES thread of the provoking vertex. All ES threads * load and export PrimitiveID for their thread. */ - if (shader->selector->info.stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id) + if (shader->selector->info.stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id) lds_vertex_size = MAX2(lds_vertex_size, 1); - if (shader->key.opt.ngg_culling) { + if (shader->key.ge.opt.ngg_culling) { if (shader->selector->info.stage == MESA_SHADER_VERTEX) { STATIC_ASSERT(lds_instance_id + 1 == 7); lds_vertex_size = MAX2(lds_vertex_size, 7); } else { assert(shader->selector->info.stage == MESA_SHADER_TESS_EVAL); - if (shader->selector->info.uses_primid || shader->key.mono.u.vs_export_prim_id) { + if (shader->selector->info.uses_primid || shader->key.ge.mono.u.vs_export_prim_id) { STATIC_ASSERT(lds_tes_patch_id + 2 == 9); /* +1 for LDS padding */ lds_vertex_size = MAX2(lds_vertex_size, 9); } else { @@ -823,10 +823,10 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) LLVMValueRef *addrs = abi->outputs; unsigned max_waves = DIV_ROUND_UP(ctx->screen->ngg_subgroup_size, ctx->ac.wave_size); - assert(shader->key.opt.ngg_culling); - assert(shader->key.as_ngg); + assert(shader->key.ge.opt.ngg_culling); + assert(shader->key.ge.as_ngg); assert(sel->info.stage == MESA_SHADER_VERTEX || - (sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es)); + (sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es)); LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx)); unsigned pos_index = 0; @@ -840,8 +840,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) * the position. This is useful for analyzing maximum theoretical * performance without VS input loads. */ - if (shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE && - shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE) { + if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE && + shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE) { for (unsigned j = 0; j < 4; j++) LLVMBuildStore(builder, LLVMGetUndef(ctx->ac.f32), addrs[4 * i + j]); break; @@ -993,15 +993,15 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) options.cull_view_xy = true; options.cull_w = true; - if (shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) { + if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) { options.num_vertices = 2; - assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE)); - assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE)); + assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE)); + assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE)); } else { options.num_vertices = 3; - options.cull_front = shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE; - options.cull_back = shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE; + options.cull_front = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE; + options.cull_back = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE; options.cull_small_prims = true; /* this would only be false with conservative rasterization */ options.cull_zero_area = options.cull_front || options.cull_back; } @@ -1055,10 +1055,10 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) bool uses_instance_id = ctx->stage == MESA_SHADER_VERTEX && (sel->info.uses_instanceid || - shader->key.part.vs.prolog.instance_divisor_is_one || - shader->key.part.vs.prolog.instance_divisor_is_fetched); + shader->key.ge.part.vs.prolog.instance_divisor_is_one || + shader->key.ge.part.vs.prolog.instance_divisor_is_fetched); bool uses_tes_prim_id = ctx->stage == MESA_SHADER_TESS_EVAL && - (sel->info.uses_primid || shader->key.mono.u.vs_export_prim_id); + (sel->info.uses_primid || shader->key.ge.mono.u.vs_export_prim_id); /* ES threads compute their prefix sum, which is the new ES thread ID. * Then they write the vertex position and input VGPRs into the LDS address @@ -1278,7 +1278,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) /* These two also use LDS. */ if (gfx10_ngg_writes_user_edgeflags(shader) || - (ctx->stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)) + (ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id)) ac_build_s_barrier(&ctx->ac); ctx->return_value = ret; @@ -1338,7 +1338,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) bool unterminated_es_if_block = !sel->so.num_outputs && !gfx10_ngg_writes_user_edgeflags(ctx->shader) && !ctx->screen->use_ngg_streamout && /* no query buffer */ - (ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id); + (ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.ge.mono.u.vs_export_prim_id); if (!unterminated_es_if_block) ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); @@ -1347,7 +1347,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) LLVMValueRef is_es_thread = si_is_es_thread(ctx); LLVMValueRef vtxindex[3]; - if (ctx->shader->key.opt.ngg_culling || gfx10_is_ngg_passthrough(ctx->shader)) { + if (ctx->shader->key.ge.opt.ngg_culling || gfx10_is_ngg_passthrough(ctx->shader)) { for (unsigned i = 0; i < 3; ++i) vtxindex[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[0], 10 * i, 9); } else { @@ -1402,7 +1402,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) /* Copy Primitive IDs from GS threads to the LDS address corresponding * to the ES thread of the provoking vertex. */ - if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) { + if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader->key.ge.mono.u.vs_export_prim_id) { assert(!unterminated_es_if_block); /* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */ @@ -1479,7 +1479,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) * load it from LDS. */ if (info->output_semantic[i] == VARYING_SLOT_POS && - ctx->shader->key.opt.ngg_culling) { + ctx->shader->key.ge.opt.ngg_culling) { vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx)); for (unsigned j = 0; j < 4; j++) { @@ -1495,7 +1495,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) } } - if (ctx->shader->key.mono.u.vs_export_prim_id) { + if (ctx->shader->key.ge.mono.u.vs_export_prim_id) { outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID; if (ctx->stage == MESA_SHADER_VERTEX) { diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f02855743a5..51911bd5fbb 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1222,15 +1222,34 @@ static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_res sctx->descriptors_dirty |= 1u << descriptors_idx; } +void si_get_inline_uniform_state(union si_shader_key *key, enum pipe_shader_type shader, + bool *inline_uniforms, uint32_t **inlined_values) +{ + if (shader == PIPE_SHADER_FRAGMENT) { + *inline_uniforms = key->ps.opt.inline_uniforms; + *inlined_values = key->ps.opt.inlined_uniform_values; + } else { + *inline_uniforms = key->ge.opt.inline_uniforms; + *inlined_values = key->ge.opt.inlined_uniform_values; + } +} + void si_invalidate_inlinable_uniforms(struct si_context *sctx, enum pipe_shader_type shader) { if (shader == PIPE_SHADER_COMPUTE) return; - if (sctx->shaders[shader].key.opt.inline_uniforms) { - sctx->shaders[shader].key.opt.inline_uniforms = false; - memset(sctx->shaders[shader].key.opt.inlined_uniform_values, 0, - sizeof(sctx->shaders[shader].key.opt.inlined_uniform_values)); + bool inline_uniforms; + uint32_t *inlined_values; + si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values); + + if (inline_uniforms) { + if (shader == PIPE_SHADER_FRAGMENT) + sctx->shaders[shader].key.ps.opt.inline_uniforms = false; + else + sctx->shaders[shader].key.ge.opt.inline_uniforms = false; + + memset(inlined_values, 0, MAX_INLINABLE_UNIFORMS * 4); sctx->do_update_shaders = true; } } @@ -1273,10 +1292,18 @@ static void si_set_inlinable_constants(struct pipe_context *ctx, if (shader == PIPE_SHADER_COMPUTE) return; - if (!sctx->shaders[shader].key.opt.inline_uniforms) { + bool inline_uniforms; + uint32_t *inlined_values; + si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values); + + if (!inline_uniforms) { /* It's the first time we set the constants. Always update shaders. */ - sctx->shaders[shader].key.opt.inline_uniforms = true; - memcpy(sctx->shaders[shader].key.opt.inlined_uniform_values, values, num_values * 4); + if (shader == PIPE_SHADER_FRAGMENT) + sctx->shaders[shader].key.ps.opt.inline_uniforms = true; + else + sctx->shaders[shader].key.ge.opt.inline_uniforms = true; + + memcpy(inlined_values, values, num_values * 4); sctx->do_update_shaders = true; return; } @@ -1284,8 +1311,8 @@ static void si_set_inlinable_constants(struct pipe_context *ctx, /* We have already set inlinable constants for this shader. Update the shader only if * the constants are being changed so as not to update shaders needlessly. */ - if (memcmp(sctx->shaders[shader].key.opt.inlined_uniform_values, values, num_values * 4)) { - memcpy(sctx->shaders[shader].key.opt.inlined_uniform_values, values, num_values * 4); + if (memcmp(inlined_values, values, num_values * 4)) { + memcpy(inlined_values, values, num_values * 4); sctx->do_update_shaders = true; } } @@ -2029,27 +2056,27 @@ void si_shader_change_notify(struct si_context *sctx) * If GS sets as_ngg, the previous stage must set as_ngg too. */ if (sctx->shader.tes.cso) { - sctx->shader.vs.key.as_ls = 1; - sctx->shader.vs.key.as_es = 0; - sctx->shader.vs.key.as_ngg = 0; + sctx->shader.vs.key.ge.as_ls = 1; + sctx->shader.vs.key.ge.as_es = 0; + sctx->shader.vs.key.ge.as_ngg = 0; if (sctx->shader.gs.cso) { - sctx->shader.tes.key.as_es = 1; - sctx->shader.tes.key.as_ngg = sctx->ngg; - sctx->shader.gs.key.as_ngg = sctx->ngg; + sctx->shader.tes.key.ge.as_es = 1; + sctx->shader.tes.key.ge.as_ngg = sctx->ngg; + sctx->shader.gs.key.ge.as_ngg = sctx->ngg; } else { - sctx->shader.tes.key.as_es = 0; - sctx->shader.tes.key.as_ngg = sctx->ngg; + sctx->shader.tes.key.ge.as_es = 0; + sctx->shader.tes.key.ge.as_ngg = sctx->ngg; } } else if (sctx->shader.gs.cso) { - sctx->shader.vs.key.as_ls = 0; - sctx->shader.vs.key.as_es = 1; - sctx->shader.vs.key.as_ngg = sctx->ngg; - sctx->shader.gs.key.as_ngg = sctx->ngg; + sctx->shader.vs.key.ge.as_ls = 0; + sctx->shader.vs.key.ge.as_es = 1; + sctx->shader.vs.key.ge.as_ngg = sctx->ngg; + sctx->shader.gs.key.ge.as_ngg = sctx->ngg; } else { - sctx->shader.vs.key.as_ls = 0; - sctx->shader.vs.key.as_es = 0; - sctx->shader.vs.key.as_ngg = sctx->ngg; + sctx->shader.vs.key.ge.as_ls = 0; + sctx->shader.vs.key.ge.as_es = 0; + sctx->shader.vs.key.ge.as_ngg = sctx->ngg; } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index b812f170c59..bb43377bfb6 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -747,10 +747,10 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign * and the LS main part when !vs_needs_prolog * - remove the fixup for unused input VGPRs */ - sctx->shader.tcs.key.opt.prefer_mono = 1; + sctx->shader.tcs.key.ge.opt.prefer_mono = 1; /* This enables jumping over the VS prolog for GS-only waves. */ - sctx->shader.gs.key.opt.prefer_mono = 1; + sctx->shader.gs.key.ge.opt.prefer_mono = 1; } si_begin_new_gfx_cs(sctx, true); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 699e00c74ce..b6fe302e7d6 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -805,7 +805,7 @@ struct si_shader_ctx_state { struct si_shader_selector *cso; struct si_shader *current; /* The shader variant key representing the current state. */ - struct si_shader_key key; + union si_shader_key key; }; #define SI_NUM_VGT_PARAM_KEY_BITS 12 @@ -1965,9 +1965,14 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen, static inline unsigned si_get_shader_wave_size(struct si_shader *shader) { + if (shader->selector->info.stage <= MESA_SHADER_GEOMETRY) { + return si_get_wave_size(shader->selector->screen, shader->selector->info.stage, + shader->key.ge.as_ngg, + shader->key.ge.as_es); + } + return si_get_wave_size(shader->selector->screen, shader->selector->info.stage, - shader->key.as_ngg, - shader->key.as_es); + false, false); } static inline void si_select_draw_vbo(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 546f9da1120..4dc423111ab 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -42,10 +42,11 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f); /** Whether the shader runs as a combination of multiple API shaders */ bool si_is_multi_part_shader(struct si_shader *shader) { - if (shader->selector->screen->info.chip_class <= GFX8) + if (shader->selector->screen->info.chip_class <= GFX8 || + shader->selector->info.stage > MESA_SHADER_GEOMETRY) return false; - return shader->key.as_ls || shader->key.as_es || + return shader->key.ge.as_ls || shader->key.ge.as_es || shader->selector->info.stage == MESA_SHADER_TESS_CTRL || shader->selector->info.stage == MESA_SHADER_GEOMETRY; } @@ -53,7 +54,10 @@ bool si_is_multi_part_shader(struct si_shader *shader) /** Whether the shader runs on a merged HW stage (LSHS or ESGS) */ bool si_is_merged_shader(struct si_shader *shader) { - return shader->key.as_ngg || si_is_multi_part_shader(shader); + if (shader->selector->info.stage > MESA_SHADER_GEOMETRY) + return false; + + return shader->key.ge.as_ngg || si_is_multi_part_shader(shader); } /** @@ -200,7 +204,7 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader) switch (shader->selector->info.stage) { case MESA_SHADER_VERTEX: case MESA_SHADER_TESS_EVAL: - return shader->key.as_ngg ? 128 : 0; + return shader->key.ge.as_ngg ? 128 : 0; case MESA_SHADER_TESS_CTRL: /* Return this so that LLVM doesn't remove s_barrier @@ -300,7 +304,7 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_ struct si_shader *shader = ctx->shader; ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vertex_id); - if (shader->key.as_ls) { + if (shader->key.ge.as_ls) { ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_rel_patch_id); if (ctx->screen->info.chip_class >= GFX10) { ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */ @@ -384,10 +388,10 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) memset(&ctx->args, 0, sizeof(ctx->args)); /* Set MERGED shaders. */ - if (ctx->screen->info.chip_class >= GFX9) { - if (shader->key.as_ls || stage == MESA_SHADER_TESS_CTRL) + if (ctx->screen->info.chip_class >= GFX9 && stage <= MESA_SHADER_GEOMETRY) { + if (shader->key.ge.as_ls || stage == MESA_SHADER_TESS_CTRL) stage = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */ - else if (shader->key.as_es || shader->key.as_ngg || stage == MESA_SHADER_GEOMETRY) + else if (shader->key.ge.as_es || shader->key.ge.as_ngg || stage == MESA_SHADER_GEOMETRY) stage = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY; } @@ -408,9 +412,9 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) if (!shader->is_gs_copy_shader) declare_vb_descriptor_input_sgprs(ctx); - if (shader->key.as_es) { + if (shader->key.ge.as_es) { ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset); - } else if (shader->key.as_ls) { + } else if (shader->key.ge.as_ls) { /* no extra parameters */ } else { /* The locations of the other parameters are assigned dynamically. */ @@ -479,14 +483,14 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) ac_add_return(&ctx->args, AC_ARG_VGPR); /* VS outputs passed via VGPRs to TCS. */ - if (shader->key.opt.same_patch_vertices) { + if (shader->key.ge.opt.same_patch_vertices) { unsigned num_outputs = util_last_bit64(shader->selector->outputs_written); for (i = 0; i < num_outputs * 4; i++) ac_add_return(&ctx->args, AC_ARG_VGPR); } } else { /* TCS inputs are passed via VGPRs from VS. */ - if (shader->key.opt.same_patch_vertices) { + if (shader->key.ge.opt.same_patch_vertices) { unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->outputs_written); for (i = 0; i < num_inputs * 4; i++) ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); @@ -510,7 +514,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) /* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */ declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_GEOMETRY); - if (ctx->shader->key.as_ngg) + if (ctx->shader->key.ge.as_ngg) ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_tg_info); else ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs2vs_offset); @@ -559,7 +563,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) declare_tes_input_vgprs(ctx); } - if ((ctx->shader->key.as_es || ngg_cull_shader) && + if ((ctx->shader->key.ge.as_es || ngg_cull_shader) && (ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL)) { unsigned num_user_sgprs, num_vgprs; @@ -602,7 +606,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr); - if (shader->key.as_es) { + if (shader->key.ge.as_es) { ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset); @@ -786,14 +790,15 @@ static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *sh unsigned num_lds_symbols = 0; if (sel && screen->info.chip_class >= GFX9 && !shader->is_gs_copy_shader && - (sel->info.stage == MESA_SHADER_GEOMETRY || shader->key.as_ngg)) { + (sel->info.stage == MESA_SHADER_GEOMETRY || + (sel->info.stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg))) { struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++]; sym->name = "esgs_ring"; sym->size = shader->gs_info.esgs_ring_size * 4; sym->align = 64 * 1024; } - if (shader->key.as_ngg && sel->info.stage == MESA_SHADER_GEOMETRY) { + if (sel->info.stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg) { struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++]; sym->name = "ngg_emit"; sym->size = shader->ngg.ngg_emit_size * 4; @@ -1060,20 +1065,20 @@ const char *si_get_shader_name(const struct si_shader *shader) { switch (shader->selector->info.stage) { case MESA_SHADER_VERTEX: - if (shader->key.as_es) + if (shader->key.ge.as_es) return "Vertex Shader as ES"; - else if (shader->key.as_ls) + else if (shader->key.ge.as_ls) return "Vertex Shader as LS"; - else if (shader->key.as_ngg) + else if (shader->key.ge.as_ngg) return "Vertex Shader as ESGS"; else return "Vertex Shader as VS"; case MESA_SHADER_TESS_CTRL: return "Tessellation Control Shader"; case MESA_SHADER_TESS_EVAL: - if (shader->key.as_es) + if (shader->key.ge.as_es) return "Tessellation Evaluation Shader as ES"; - else if (shader->key.as_ngg) + else if (shader->key.ge.as_ngg) return "Tessellation Evaluation Shader as ESGS"; else return "Tessellation Evaluation Shader as VS"; @@ -1137,7 +1142,7 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, si_shader_dump_stats(sscreen, shader, file, check_debug_option); } -static void si_dump_shader_key_vs(const struct si_shader_key *key, +static void si_dump_shader_key_vs(const union si_shader_key *key, const struct si_vs_prolog_bits *prolog, const char *prefix, FILE *f) { @@ -1146,10 +1151,10 @@ static void si_dump_shader_key_vs(const struct si_shader_key *key, prolog->instance_divisor_is_fetched); fprintf(f, " %s.ls_vgpr_fix = %u\n", prefix, prolog->ls_vgpr_fix); - fprintf(f, " mono.vs.fetch_opencode = %x\n", key->mono.vs_fetch_opencode); + fprintf(f, " mono.vs.fetch_opencode = %x\n", key->ge.mono.vs_fetch_opencode); fprintf(f, " mono.vs.fix_fetch = {"); for (int i = 0; i < SI_MAX_ATTRIBS; i++) { - union si_vs_fix_fetch fix = key->mono.vs_fix_fetch[i]; + union si_vs_fix_fetch fix = key->ge.mono.vs_fix_fetch[i]; if (i) fprintf(f, ", "); if (!fix.bits) @@ -1163,35 +1168,35 @@ static void si_dump_shader_key_vs(const struct si_shader_key *key, static void si_dump_shader_key(const struct si_shader *shader, FILE *f) { - const struct si_shader_key *key = &shader->key; + const union si_shader_key *key = &shader->key; gl_shader_stage stage = shader->selector->info.stage; fprintf(f, "SHADER KEY\n"); switch (stage) { case MESA_SHADER_VERTEX: - si_dump_shader_key_vs(key, &key->part.vs.prolog, "part.vs.prolog", f); - fprintf(f, " as_es = %u\n", key->as_es); - fprintf(f, " as_ls = %u\n", key->as_ls); - fprintf(f, " as_ngg = %u\n", key->as_ngg); - fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->mono.u.vs_export_prim_id); + si_dump_shader_key_vs(key, &key->ge.part.vs.prolog, "part.vs.prolog", f); + fprintf(f, " as_es = %u\n", key->ge.as_es); + fprintf(f, " as_ls = %u\n", key->ge.as_ls); + fprintf(f, " as_ngg = %u\n", key->ge.as_ngg); + fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->ge.mono.u.vs_export_prim_id); break; case MESA_SHADER_TESS_CTRL: if (shader->selector->screen->info.chip_class >= GFX9) { - si_dump_shader_key_vs(key, &key->part.tcs.ls_prolog, "part.tcs.ls_prolog", f); + si_dump_shader_key_vs(key, &key->ge.part.tcs.ls_prolog, "part.tcs.ls_prolog", f); } - fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->part.tcs.epilog.prim_mode); + fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->ge.part.tcs.epilog.prim_mode); fprintf(f, " mono.u.ff_tcs_inputs_to_copy = 0x%" PRIx64 "\n", - key->mono.u.ff_tcs_inputs_to_copy); - fprintf(f, " opt.prefer_mono = %u\n", key->opt.prefer_mono); - fprintf(f, " opt.same_patch_vertices = %u\n", key->opt.same_patch_vertices); + key->ge.mono.u.ff_tcs_inputs_to_copy); + fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono); + fprintf(f, " opt.same_patch_vertices = %u\n", key->ge.opt.same_patch_vertices); break; case MESA_SHADER_TESS_EVAL: - fprintf(f, " as_es = %u\n", key->as_es); - fprintf(f, " as_ngg = %u\n", key->as_ngg); - fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->mono.u.vs_export_prim_id); + fprintf(f, " as_es = %u\n", key->ge.as_es); + fprintf(f, " as_ngg = %u\n", key->ge.as_ngg); + fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->ge.mono.u.vs_export_prim_id); break; case MESA_SHADER_GEOMETRY: @@ -1199,50 +1204,50 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) break; if (shader->selector->screen->info.chip_class >= GFX9 && - key->part.gs.es->info.stage == MESA_SHADER_VERTEX) { - si_dump_shader_key_vs(key, &key->part.gs.vs_prolog, "part.gs.vs_prolog", f); + key->ge.part.gs.es->info.stage == MESA_SHADER_VERTEX) { + si_dump_shader_key_vs(key, &key->ge.part.gs.vs_prolog, "part.gs.vs_prolog", f); } fprintf(f, " part.gs.prolog.tri_strip_adj_fix = %u\n", - key->part.gs.prolog.tri_strip_adj_fix); - fprintf(f, " as_ngg = %u\n", key->as_ngg); + key->ge.part.gs.prolog.tri_strip_adj_fix); + fprintf(f, " as_ngg = %u\n", key->ge.as_ngg); break; case MESA_SHADER_COMPUTE: break; case MESA_SHADER_FRAGMENT: - fprintf(f, " part.ps.prolog.color_two_side = %u\n", key->part.ps.prolog.color_two_side); - fprintf(f, " part.ps.prolog.flatshade_colors = %u\n", key->part.ps.prolog.flatshade_colors); - fprintf(f, " part.ps.prolog.poly_stipple = %u\n", key->part.ps.prolog.poly_stipple); - fprintf(f, " part.ps.prolog.force_persp_sample_interp = %u\n", - key->part.ps.prolog.force_persp_sample_interp); - fprintf(f, " part.ps.prolog.force_linear_sample_interp = %u\n", - key->part.ps.prolog.force_linear_sample_interp); - fprintf(f, " part.ps.prolog.force_persp_center_interp = %u\n", - key->part.ps.prolog.force_persp_center_interp); - fprintf(f, " part.ps.prolog.force_linear_center_interp = %u\n", - key->part.ps.prolog.force_linear_center_interp); - fprintf(f, " part.ps.prolog.bc_optimize_for_persp = %u\n", - key->part.ps.prolog.bc_optimize_for_persp); - fprintf(f, " part.ps.prolog.bc_optimize_for_linear = %u\n", - key->part.ps.prolog.bc_optimize_for_linear); - fprintf(f, " part.ps.prolog.samplemask_log_ps_iter = %u\n", - key->part.ps.prolog.samplemask_log_ps_iter); - fprintf(f, " part.ps.epilog.spi_shader_col_format = 0x%x\n", - key->part.ps.epilog.spi_shader_col_format); - fprintf(f, " part.ps.epilog.color_is_int8 = 0x%X\n", key->part.ps.epilog.color_is_int8); - fprintf(f, " part.ps.epilog.color_is_int10 = 0x%X\n", key->part.ps.epilog.color_is_int10); - fprintf(f, " part.ps.epilog.last_cbuf = %u\n", key->part.ps.epilog.last_cbuf); - fprintf(f, " part.ps.epilog.alpha_func = %u\n", key->part.ps.epilog.alpha_func); - fprintf(f, " part.ps.epilog.alpha_to_one = %u\n", key->part.ps.epilog.alpha_to_one); - fprintf(f, " part.ps.epilog.poly_line_smoothing = %u\n", - key->part.ps.epilog.poly_line_smoothing); - fprintf(f, " part.ps.epilog.clamp_color = %u\n", key->part.ps.epilog.clamp_color); - fprintf(f, " mono.u.ps.interpolate_at_sample_force_center = %u\n", - key->mono.u.ps.interpolate_at_sample_force_center); - fprintf(f, " mono.u.ps.fbfetch_msaa = %u\n", key->mono.u.ps.fbfetch_msaa); - fprintf(f, " mono.u.ps.fbfetch_is_1D = %u\n", key->mono.u.ps.fbfetch_is_1D); - fprintf(f, " mono.u.ps.fbfetch_layered = %u\n", key->mono.u.ps.fbfetch_layered); + fprintf(f, " prolog.color_two_side = %u\n", key->ps.part.prolog.color_two_side); + fprintf(f, " prolog.flatshade_colors = %u\n", key->ps.part.prolog.flatshade_colors); + fprintf(f, " prolog.poly_stipple = %u\n", key->ps.part.prolog.poly_stipple); + fprintf(f, " prolog.force_persp_sample_interp = %u\n", + key->ps.part.prolog.force_persp_sample_interp); + fprintf(f, " prolog.force_linear_sample_interp = %u\n", + key->ps.part.prolog.force_linear_sample_interp); + fprintf(f, " prolog.force_persp_center_interp = %u\n", + key->ps.part.prolog.force_persp_center_interp); + fprintf(f, " prolog.force_linear_center_interp = %u\n", + key->ps.part.prolog.force_linear_center_interp); + fprintf(f, " prolog.bc_optimize_for_persp = %u\n", + key->ps.part.prolog.bc_optimize_for_persp); + fprintf(f, " prolog.bc_optimize_for_linear = %u\n", + key->ps.part.prolog.bc_optimize_for_linear); + fprintf(f, " prolog.samplemask_log_ps_iter = %u\n", + key->ps.part.prolog.samplemask_log_ps_iter); + fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", + key->ps.part.epilog.spi_shader_col_format); + fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.part.epilog.color_is_int8); + fprintf(f, " epilog.color_is_int10 = 0x%X\n", key->ps.part.epilog.color_is_int10); + fprintf(f, " epilog.last_cbuf = %u\n", key->ps.part.epilog.last_cbuf); + fprintf(f, " epilog.alpha_func = %u\n", key->ps.part.epilog.alpha_func); + fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.part.epilog.alpha_to_one); + fprintf(f, " epilog.poly_line_smoothing = %u\n", + key->ps.part.epilog.poly_line_smoothing); + fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color); + fprintf(f, " mono.interpolate_at_sample_force_center = %u\n", + key->ps.mono.interpolate_at_sample_force_center); + fprintf(f, " mono.fbfetch_msaa = %u\n", key->ps.mono.fbfetch_msaa); + fprintf(f, " mono.fbfetch_is_1D = %u\n", key->ps.mono.fbfetch_is_1D); + fprintf(f, " mono.fbfetch_layered = %u\n", key->ps.mono.fbfetch_layered); break; default: @@ -1251,32 +1256,44 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) if ((stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_VERTEX) && - !key->as_es && !key->as_ls) { - fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->opt.kill_outputs); - fprintf(f, " opt.kill_pointsize = 0x%x\n", key->opt.kill_pointsize); - fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->opt.kill_clip_distances); + !key->ge.as_es && !key->ge.as_ls) { + fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->ge.opt.kill_outputs); + fprintf(f, " opt.kill_pointsize = 0x%x\n", key->ge.opt.kill_pointsize); + fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->ge.opt.kill_clip_distances); if (stage != MESA_SHADER_GEOMETRY) - fprintf(f, " opt.ngg_culling = 0x%x\n", key->opt.ngg_culling); + fprintf(f, " opt.ngg_culling = 0x%x\n", key->ge.opt.ngg_culling); } - fprintf(f, " opt.prefer_mono = %u\n", key->opt.prefer_mono); - fprintf(f, " opt.inline_uniforms = %u (0x%x, 0x%x, 0x%x, 0x%x)\n", - key->opt.inline_uniforms, - key->opt.inlined_uniform_values[0], - key->opt.inlined_uniform_values[1], - key->opt.inlined_uniform_values[2], - key->opt.inlined_uniform_values[3]); + if (stage <= MESA_SHADER_GEOMETRY) { + fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono); + fprintf(f, " opt.inline_uniforms = %u (0x%x, 0x%x, 0x%x, 0x%x)\n", + key->ge.opt.inline_uniforms, + key->ge.opt.inlined_uniform_values[0], + key->ge.opt.inlined_uniform_values[1], + key->ge.opt.inlined_uniform_values[2], + key->ge.opt.inlined_uniform_values[3]); + } else { + fprintf(f, " opt.prefer_mono = %u\n", key->ps.opt.prefer_mono); + fprintf(f, " opt.inline_uniforms = %u (0x%x, 0x%x, 0x%x, 0x%x)\n", + key->ps.opt.inline_uniforms, + key->ps.opt.inlined_uniform_values[0], + key->ps.opt.inlined_uniform_values[1], + key->ps.opt.inlined_uniform_values[2], + key->ps.opt.inlined_uniform_values[3]); + } } bool si_vs_needs_prolog(const struct si_shader_selector *sel, const struct si_vs_prolog_bits *prolog_key, - const struct si_shader_key *key, bool ngg_cull_shader) + const union si_shader_key *key, bool ngg_cull_shader) { + assert(sel->info.stage == MESA_SHADER_VERTEX); + /* VGPR initialization fixup for Vega10 and Raven is always done in the * VS prolog. */ return sel->vs_needs_prolog || prolog_key->ls_vgpr_fix || /* The 2nd VS prolog loads input VGPRs from LDS */ - (key->opt.ngg_culling && !ngg_cull_shader); + (key->ge.opt.ngg_culling && !ngg_cull_shader); } /** @@ -1298,11 +1315,11 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_ key->vs_prolog.states = *prolog_key; key->vs_prolog.num_input_sgprs = num_input_sgprs; key->vs_prolog.num_inputs = info->num_inputs; - key->vs_prolog.as_ls = shader_out->key.as_ls; - key->vs_prolog.as_es = shader_out->key.as_es; - key->vs_prolog.as_ngg = shader_out->key.as_ngg; + key->vs_prolog.as_ls = shader_out->key.ge.as_ls; + key->vs_prolog.as_es = shader_out->key.ge.as_es; + key->vs_prolog.as_ngg = shader_out->key.ge.as_ngg; - if (!ngg_cull_shader && shader_out->key.opt.ngg_culling) + if (!ngg_cull_shader && shader_out->key.ge.opt.ngg_culling) key->vs_prolog.load_vgprs_after_culling = 1; if (shader_out->selector->info.stage == MESA_SHADER_TESS_CTRL) { @@ -1311,7 +1328,7 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_ } else if (shader_out->selector->info.stage == MESA_SHADER_GEOMETRY) { key->vs_prolog.as_es = 1; key->vs_prolog.num_merged_next_stage_vgprs = 5; - } else if (shader_out->key.as_ngg) { + } else if (shader_out->key.ge.as_ngg) { key->vs_prolog.num_merged_next_stage_vgprs = 5; } @@ -1329,7 +1346,7 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_ } struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel, - const struct si_shader_key *key, + const union si_shader_key *key, bool *free_nir) { nir_shader *nir; @@ -1350,7 +1367,14 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel, return NULL; } - if (key && key->opt.inline_uniforms) { + bool inline_uniforms = false; + uint32_t *inlined_uniform_values; + if (key) { + si_get_inline_uniform_state((union si_shader_key*)key, sel->pipe_shader_type, + &inline_uniforms, &inlined_uniform_values); + } + + if (inline_uniforms) { assert(*free_nir); /* Most places use shader information from the default variant, not @@ -1394,7 +1418,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel, */ NIR_PASS_V(nir, nir_inline_uniforms, nir->info.num_inlinable_uniforms, - key->opt.inlined_uniform_values, + inlined_uniform_values, nir->info.inlinable_uniform_dw_offsets); si_nir_opts(sel->screen, nir, true); @@ -1441,15 +1465,15 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi if ((sel->info.stage == MESA_SHADER_VERTEX || sel->info.stage == MESA_SHADER_TESS_EVAL || sel->info.stage == MESA_SHADER_GEOMETRY) && - !shader->key.as_ls && !shader->key.as_es) { + !shader->key.ge.as_ls && !shader->key.ge.as_es) { ubyte *vs_output_param_offset = shader->info.vs_output_param_offset; - if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.as_ngg) + if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) vs_output_param_offset = sel->gs_copy_shader->info.vs_output_param_offset; /* VS and TES should also set primitive ID output if it's used. */ unsigned num_outputs_with_prim_id = sel->info.num_outputs + - shader->key.mono.u.vs_export_prim_id; + shader->key.ge.mono.u.vs_export_prim_id; for (unsigned i = 0; i < num_outputs_with_prim_id; i++) { unsigned semantic = sel->info.output_semantic[i]; @@ -1563,32 +1587,38 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list, switch (stage) { case MESA_SHADER_VERTEX: - shader.key.as_ls = key->vs_prolog.as_ls; - shader.key.as_es = key->vs_prolog.as_es; - shader.key.as_ngg = key->vs_prolog.as_ngg; + shader.key.ge.as_ls = key->vs_prolog.as_ls; + shader.key.ge.as_es = key->vs_prolog.as_es; + shader.key.ge.as_ngg = key->vs_prolog.as_ngg; break; case MESA_SHADER_TESS_CTRL: assert(!prolog); - shader.key.part.tcs.epilog = key->tcs_epilog.states; + shader.key.ge.part.tcs.epilog = key->tcs_epilog.states; break; case MESA_SHADER_GEOMETRY: assert(prolog); - shader.key.as_ngg = key->gs_prolog.as_ngg; + shader.key.ge.as_ngg = key->gs_prolog.as_ngg; break; case MESA_SHADER_FRAGMENT: if (prolog) - shader.key.part.ps.prolog = key->ps_prolog.states; + shader.key.ps.part.prolog = key->ps_prolog.states; else - shader.key.part.ps.epilog = key->ps_epilog.states; + shader.key.ps.part.epilog = key->ps_epilog.states; break; default: unreachable("bad shader part"); } + unsigned wave_size; + if (stage <= MESA_SHADER_GEOMETRY) { + wave_size = si_get_wave_size(sscreen, stage, shader.key.ge.as_ngg, shader.key.ge.as_es); + } else { + wave_size = si_get_wave_size(sscreen, stage, false, false); + } + struct si_shader_context ctx; - si_llvm_context_init(&ctx, sscreen, compiler, - si_get_wave_size(sscreen, stage, - shader.key.as_ngg, shader.key.as_es)); + si_llvm_context_init(&ctx, sscreen, compiler, wave_size); + ctx.shader = &shader; ctx.stage = stage; @@ -1639,7 +1669,7 @@ static bool si_get_vs_prolog(struct si_screen *sscreen, struct ac_llvm_compiler static bool si_shader_select_vs_parts(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug) { - return si_get_vs_prolog(sscreen, compiler, shader, debug, shader, &shader->key.part.vs.prolog); + return si_get_vs_prolog(sscreen, compiler, shader, debug, shader, &shader->key.ge.part.vs.prolog); } /** @@ -1649,10 +1679,10 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct ac_llvm struct si_shader *shader, struct pipe_debug_callback *debug) { if (sscreen->info.chip_class >= GFX9) { - struct si_shader *ls_main_part = shader->key.part.tcs.ls->main_shader_part_ls; + struct si_shader *ls_main_part = shader->key.ge.part.tcs.ls->main_shader_part_ls; if (!si_get_vs_prolog(sscreen, compiler, shader, debug, ls_main_part, - &shader->key.part.tcs.ls_prolog)) + &shader->key.ge.part.tcs.ls_prolog)) return false; shader->previous_stage = ls_main_part; @@ -1661,7 +1691,7 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct ac_llvm /* Get the epilog. */ union si_shader_part_key epilog_key; memset(&epilog_key, 0, sizeof(epilog_key)); - epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog; + epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog; shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs, MESA_SHADER_TESS_CTRL, false, &epilog_key, compiler, debug, si_llvm_build_tcs_epilog, @@ -1678,26 +1708,26 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, struct ac_llvm_ if (sscreen->info.chip_class >= GFX9) { struct si_shader *es_main_part; - if (shader->key.as_ngg) - es_main_part = shader->key.part.gs.es->main_shader_part_ngg_es; + if (shader->key.ge.as_ngg) + es_main_part = shader->key.ge.part.gs.es->main_shader_part_ngg_es; else - es_main_part = shader->key.part.gs.es->main_shader_part_es; + es_main_part = shader->key.ge.part.gs.es->main_shader_part_es; - if (shader->key.part.gs.es->info.stage == MESA_SHADER_VERTEX && + if (shader->key.ge.part.gs.es->info.stage == MESA_SHADER_VERTEX && !si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part, - &shader->key.part.gs.vs_prolog)) + &shader->key.ge.part.gs.vs_prolog)) return false; shader->previous_stage = es_main_part; } - if (!shader->key.part.gs.prolog.tri_strip_adj_fix) + if (!shader->key.ge.part.gs.prolog.tri_strip_adj_fix) return true; union si_shader_part_key prolog_key; memset(&prolog_key, 0, sizeof(prolog_key)); - prolog_key.gs_prolog.states = shader->key.part.gs.prolog; - prolog_key.gs_prolog.as_ngg = shader->key.as_ngg; + prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog; + prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg; shader->prolog2 = si_get_shader_part(sscreen, &sscreen->gs_prologs, MESA_SHADER_GEOMETRY, true, &prolog_key, @@ -1715,7 +1745,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke struct si_shader_info *info = &shader->selector->info; memset(key, 0, sizeof(*key)); - key->ps_prolog.states = shader->key.part.ps.prolog; + key->ps_prolog.states = shader->key.ps.part.prolog; key->ps_prolog.colors_read = info->colors_read; key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs; key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs; @@ -1731,7 +1761,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke if (info->colors_read) { ubyte *color = shader->selector->color_attr_index; - if (shader->key.part.ps.prolog.color_two_side) { + if (shader->key.ps.part.prolog.color_two_side) { /* BCOLORs are stored after the last input. */ key->ps_prolog.num_interp_inputs = info->num_inputs; key->ps_prolog.face_vgpr_index = shader->info.face_vgpr_index; @@ -1748,7 +1778,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke key->ps_prolog.color_attr_index[i] = color[i]; - if (shader->key.part.ps.prolog.flatshade_colors && interp == INTERP_MODE_COLOR) + if (shader->key.ps.part.prolog.flatshade_colors && interp == INTERP_MODE_COLOR) interp = INTERP_MODE_FLAT; switch (interp) { @@ -1758,9 +1788,9 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke case INTERP_MODE_SMOOTH: case INTERP_MODE_COLOR: /* Force the interpolation location for colors here. */ - if (shader->key.part.ps.prolog.force_persp_sample_interp) + if (shader->key.ps.part.prolog.force_persp_sample_interp) location = TGSI_INTERPOLATE_LOC_SAMPLE; - if (shader->key.part.ps.prolog.force_persp_center_interp) + if (shader->key.ps.part.prolog.force_persp_center_interp) location = TGSI_INTERPOLATE_LOC_CENTER; switch (location) { @@ -1788,9 +1818,9 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke break; case INTERP_MODE_NOPERSPECTIVE: /* Force the interpolation location for colors here. */ - if (shader->key.part.ps.prolog.force_linear_sample_interp) + if (shader->key.ps.part.prolog.force_linear_sample_interp) location = TGSI_INTERPOLATE_LOC_SAMPLE; - if (shader->key.part.ps.prolog.force_linear_center_interp) + if (shader->key.ps.part.prolog.force_linear_center_interp) location = TGSI_INTERPOLATE_LOC_CENTER; /* The VGPR assignment for non-monolithic shaders @@ -1854,7 +1884,7 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke key->ps_epilog.writes_z = info->writes_z; key->ps_epilog.writes_stencil = info->writes_stencil; key->ps_epilog.writes_samplemask = info->writes_samplemask; - key->ps_epilog.states = shader->key.part.ps.epilog; + key->ps_epilog.states = shader->key.ps.part.epilog; } /** @@ -1888,34 +1918,34 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_ return false; /* Enable POS_FIXED_PT if polygon stippling is enabled. */ - if (shader->key.part.ps.prolog.poly_stipple) { + if (shader->key.ps.part.prolog.poly_stipple) { shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1); assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr)); } /* Set up the enable bits for per-sample shading if needed. */ - if (shader->key.part.ps.prolog.force_persp_sample_interp && + if (shader->key.ps.part.prolog.force_persp_sample_interp && (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) || G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) { shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA; shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA; shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1); } - if (shader->key.part.ps.prolog.force_linear_sample_interp && + if (shader->key.ps.part.prolog.force_linear_sample_interp && (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) || G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) { shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA; shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA; shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1); } - if (shader->key.part.ps.prolog.force_persp_center_interp && + if (shader->key.ps.part.prolog.force_persp_center_interp && (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) || G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) { shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA; shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA; shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1); } - if (shader->key.part.ps.prolog.force_linear_center_interp && + if (shader->key.ps.part.prolog.force_linear_center_interp && (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) || G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) { shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA; @@ -1937,7 +1967,7 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_ } /* Samplemask fixup requires the sample ID. */ - if (shader->key.part.ps.prolog.samplemask_log_ps_iter) { + if (shader->key.ps.part.prolog.samplemask_log_ps_iter) { shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1); assert(G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)); } @@ -1945,7 +1975,7 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_ /* The sample mask input is always enabled, because the API shader always * passes it through to the epilog. Disable it here if it's unused. */ - if (!shader->key.part.ps.epilog.poly_line_smoothing && !shader->selector->info.reads_samplemask) + if (!shader->key.ps.part.epilog.poly_line_smoothing && !shader->selector->info.reads_samplemask) shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA; return true; @@ -2098,8 +2128,8 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler si_calculate_max_simd_waves(shader); } - if (shader->key.as_ngg) { - assert(!shader->key.as_es && !shader->key.as_ls); + if (sel->info.stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg) { + assert(!shader->key.ge.as_es && !shader->key.ge.as_ls); if (!gfx10_ngg_calculate_subgroup_info(shader)) { fprintf(stderr, "Failed to compute subgroup info\n"); return false; @@ -2115,7 +2145,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler util_rast_prim_is_triangles(sel->info.base.gs.output_primitive)) || (sel->info.stage == MESA_SHADER_VERTEX && /* Used to export PrimitiveID from the correct vertex. */ - shader->key.mono.u.vs_export_prim_id)); + shader->key.ge.mono.u.vs_export_prim_id)); shader->uses_vs_state_outprim = sscreen->use_ngg && /* Only used by streamout in vertex shaders. */ @@ -2124,18 +2154,18 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler if (sel->info.stage == MESA_SHADER_VERTEX) { shader->uses_base_instance = sel->info.uses_base_instance || - shader->key.part.vs.prolog.instance_divisor_is_one || - shader->key.part.vs.prolog.instance_divisor_is_fetched; + shader->key.ge.part.vs.prolog.instance_divisor_is_one || + shader->key.ge.part.vs.prolog.instance_divisor_is_fetched; } else if (sel->info.stage == MESA_SHADER_TESS_CTRL) { shader->uses_base_instance = shader->previous_stage_sel && (shader->previous_stage_sel->info.uses_base_instance || - shader->key.part.tcs.ls_prolog.instance_divisor_is_one || - shader->key.part.tcs.ls_prolog.instance_divisor_is_fetched); + shader->key.ge.part.tcs.ls_prolog.instance_divisor_is_one || + shader->key.ge.part.tcs.ls_prolog.instance_divisor_is_fetched); } else if (sel->info.stage == MESA_SHADER_GEOMETRY) { shader->uses_base_instance = shader->previous_stage_sel && (shader->previous_stage_sel->info.uses_base_instance || - shader->key.part.gs.vs_prolog.instance_divisor_is_one || - shader->key.part.gs.vs_prolog.instance_divisor_is_fetched); + shader->key.ge.part.gs.vs_prolog.instance_divisor_is_one || + shader->key.ge.part.gs.vs_prolog.instance_divisor_is_fetched); } si_fix_resource_usage(sscreen, shader); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 702df60496f..ae3e8b1f515 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -618,7 +618,8 @@ union si_shader_part_key { } ps_epilog; }; -struct si_shader_key { +/* The shader key for geometry stages (VS, TCS, TES, GS) */ +struct si_shader_key_ge { /* Prolog and epilog flags. */ union { struct { @@ -634,10 +635,6 @@ struct si_shader_key { struct si_shader_selector *es; /* for merged ES-GS */ struct si_gs_prolog_bits prolog; } gs; - struct { - struct si_ps_prolog_bits prolog; - struct si_ps_epilog_bits epilog; - } ps; } part; /* These three are initially set according to the NEXT_SHADER property, @@ -660,12 +657,6 @@ struct si_shader_key { uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */ /* When PS needs PrimID and GS is disabled. */ unsigned vs_export_prim_id : 1; - struct { - unsigned interpolate_at_sample_force_center : 1; - unsigned fbfetch_msaa : 1; - unsigned fbfetch_is_1D : 1; - unsigned fbfetch_layered : 1; - } ps; } u; } mono; @@ -699,6 +690,44 @@ struct si_shader_key { } opt; }; +struct si_shader_key_ps { + struct { + /* Prolog and epilog flags. */ + struct si_ps_prolog_bits prolog; + struct si_ps_epilog_bits epilog; + } part; + + /* Flags for monolithic compilation only. */ + struct { + unsigned interpolate_at_sample_force_center : 1; + unsigned fbfetch_msaa : 1; + unsigned fbfetch_is_1D : 1; + unsigned fbfetch_layered : 1; + } mono; + + /* Optimization flags for asynchronous compilation only. */ + struct { + /* For shaders where monolithic variants have better code. + * + * This is a flag that has no effect on code generation, + * but forces monolithic shaders to be used as soon as + * possible, because it's in the "opt" group. + */ + unsigned prefer_mono : 1; + unsigned inline_uniforms:1; + + /* This must be kept last to limit the number of variants + * depending only on the uniform values. + */ + uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS]; + } opt; +}; + +union si_shader_key { + struct si_shader_key_ge ge; /* geometry engine shaders */ + struct si_shader_key_ps ps; +}; + /* Restore the pack alignment to default. */ #pragma pack(pop) @@ -777,7 +806,7 @@ struct si_shader { struct si_resource *bo; struct si_resource *scratch_bo; - struct si_shader_key key; + union si_shader_key key; struct util_queue_fence ready; bool compilation_failed; bool is_monolithic; @@ -927,16 +956,18 @@ bool gfx10_is_ngg_passthrough(struct si_shader *shader); /* Return the pointer to the main shader part's pointer. */ static inline struct si_shader **si_get_main_shader_part(struct si_shader_selector *sel, - const struct si_shader_key *key) + const union si_shader_key *key) { - if (key->as_ls) - return &sel->main_shader_part_ls; - if (key->as_es && key->as_ngg) - return &sel->main_shader_part_ngg_es; - if (key->as_es) - return &sel->main_shader_part_es; - if (key->as_ngg) - return &sel->main_shader_part_ngg; + if (sel->info.stage <= MESA_SHADER_GEOMETRY) { + if (key->ge.as_ls) + return &sel->main_shader_part_ls; + if (key->ge.as_es && key->ge.as_ngg) + return &sel->main_shader_part_ngg_es; + if (key->ge.as_es) + return &sel->main_shader_part_es; + if (key->ge.as_ngg) + return &sel->main_shader_part_ngg; + } return &sel->main_shader_part; } @@ -954,7 +985,7 @@ static inline bool gfx10_edgeflags_have_effect(struct si_shader *shader) { if (shader->selector->info.stage == MESA_SHADER_VERTEX && !shader->selector->info.base.vs.blit_sgprs_amd && - !(shader->key.opt.ngg_culling & SI_NGG_CULL_LINES)) + !(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES)) return true; return false; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 3970125f549..106abde1c97 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -168,12 +168,12 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader); unsigned si_get_max_workgroup_size(const struct si_shader *shader); bool si_vs_needs_prolog(const struct si_shader_selector *sel, const struct si_vs_prolog_bits *prolog_key, - const struct si_shader_key *key, bool ngg_cull_shader); + const union si_shader_key *key, bool ngg_cull_shader); void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_sgprs, bool ngg_cull_shader, const struct si_vs_prolog_bits *prolog_key, struct si_shader *shader_out, union si_shader_part_key *key); struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel, - const struct si_shader_key *key, + const union si_shader_key *key, bool *free_nir); bool si_need_ps_prolog(const union si_shader_part_key *key); void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key, diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 1a1dd07a507..9bbb1fb95d4 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -150,10 +150,10 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy gl_shader_stage real_stage = ctx->stage; /* LS is merged into HS (TCS), and ES is merged into GS. */ - if (ctx->screen->info.chip_class >= GFX9) { - if (ctx->shader->key.as_ls) + if (ctx->screen->info.chip_class >= GFX9 && ctx->stage <= MESA_SHADER_GEOMETRY) { + if (ctx->shader->key.ge.as_ls) real_stage = MESA_SHADER_TESS_CTRL; - else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg) + else if (ctx->shader->key.ge.as_es || ctx->shader->key.ge.as_ngg) real_stage = MESA_SHADER_GEOMETRY; } @@ -219,7 +219,8 @@ void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shade } - if (shader->key.as_ls || ctx->stage == MESA_SHADER_TESS_CTRL) { + if (ctx->stage <= MESA_SHADER_GEOMETRY && + (shader->key.ge.as_ls || ctx->stage == MESA_SHADER_TESS_CTRL)) { if (USE_LDS_SYMBOLS) { /* The LSHS size is not known until draw time, so we append it * at the end of whatever LDS use there may be in the rest of @@ -470,7 +471,7 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader * } ctx->abi.interp_at_sample_force_center = - ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center; + ctx->shader->key.ps.mono.interpolate_at_sample_force_center; ctx->abi.kill_ps_if_inf_interp = ctx->screen->options.no_infinite_interp && @@ -854,7 +855,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad si_llvm_create_main_func(ctx, ngg_cull_shader); - if (ctx->shader->key.as_es || ctx->stage == MESA_SHADER_GEOMETRY) + if (ctx->stage <= MESA_SHADER_GEOMETRY && + (ctx->shader->key.ge.as_es || ctx->stage == MESA_SHADER_GEOMETRY)) si_preload_esgs_ring(ctx); if (ctx->stage == MESA_SHADER_GEOMETRY) @@ -872,7 +874,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad for (unsigned i = 0; i < 4; i++) { ctx->gs_next_vertex[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, ""); } - if (shader->key.as_ngg) { + if (shader->key.ge.as_ngg) { for (unsigned i = 0; i < 4; ++i) { ctx->gs_curprim_verts[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, ""); ctx->gs_generated_prims[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, ""); @@ -892,7 +894,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad } } - if (ctx->stage != MESA_SHADER_GEOMETRY && (shader->key.as_ngg && !shader->key.as_es)) { + if ((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) && + shader->key.ge.as_ngg && !shader->key.ge.as_es) { /* Unconditionally declare scratch space base for streamout and * vertex compaction. Whether space is actually allocated is * determined during linking / PM4 creation. @@ -902,7 +905,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad /* This is really only needed when streamout and / or vertex * compaction is enabled. */ - if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.opt.ngg_culling)) { + if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.ge.opt.ngg_culling)) { LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader)); ctx->gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS); @@ -918,8 +921,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad /* TES is special because it has only 1 shader part if NGG shader culling is disabled, * and therefore it doesn't use the wrapper function. */ - bool no_wrapper_func = ctx->stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es && - !shader->key.opt.ngg_culling; + bool no_wrapper_func = ctx->stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es && + !shader->key.ge.opt.ngg_culling; /* Set EXEC = ~0 before the first shader. If the prolog is present, EXEC is set there * instead. For monolithic shaders, the wrapper function does this. @@ -927,14 +930,14 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad if ((!shader->is_monolithic || no_wrapper_func) && (ctx->stage == MESA_SHADER_TESS_EVAL || (ctx->stage == MESA_SHADER_VERTEX && - !si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, ngg_cull_shader)))) + !si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, ngg_cull_shader)))) ac_init_exec_full_mask(&ctx->ac); /* NGG VS and NGG TES: Send gs_alloc_req and the prim export at the beginning to decrease * register usage. */ if ((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) && - shader->key.as_ngg && !shader->key.as_es && !shader->key.opt.ngg_culling) { + shader->key.ge.as_ngg && !shader->key.ge.as_es && !shader->key.ge.opt.ngg_culling) { /* GFX10 requires a barrier before gs_alloc_req due to a hw bug. */ if (ctx->screen->info.chip_class == GFX10) ac_build_s_barrier(&ctx->ac); @@ -949,7 +952,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad } /* NGG GS: Initialize LDS and insert s_barrier, which must not be inside the if statement. */ - if (ctx->stage == MESA_SHADER_GEOMETRY && shader->key.as_ngg) + if (ctx->stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg) gfx10_ngg_gs_emit_prologue(ctx); if (ctx->stage == MESA_SHADER_GEOMETRY || @@ -959,8 +962,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad * not here. */ thread_enabled = si_is_gs_thread(ctx); /* 2nd shader: thread enabled bool */ - } else if (((shader->key.as_ls || shader->key.as_es) && !shader->is_monolithic) || - (shader->key.as_ngg && !shader->key.as_es)) { + } else if (((shader->key.ge.as_ls || shader->key.ge.as_es) && !shader->is_monolithic) || + (shader->key.ge.as_ngg && !shader->key.ge.as_es)) { /* This is NGG VS or NGG TES or VS before GS or TES before GS or VS before TCS. * For monolithic LS (VS before TCS) and ES (VS before GS and TES before GS), * the if statement is inserted by the wrapper function. @@ -993,11 +996,11 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad */ if (ctx->stage == MESA_SHADER_TESS_CTRL) { /* We need the barrier only if TCS inputs are read from LDS. */ - if (!shader->key.opt.same_patch_vertices || + if (!shader->key.ge.opt.same_patch_vertices || shader->selector->info.base.inputs_read & ~shader->selector->tcs_vgpr_only_inputs) ac_build_s_barrier(&ctx->ac); - } else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.as_ngg) { + } else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) { /* gfx10_ngg_gs_emit_prologue inserts the barrier for NGG. */ ac_build_s_barrier(&ctx->ac); } @@ -1036,7 +1039,7 @@ static void si_optimize_vs_outputs(struct si_shader_context *ctx) unsigned skip_vs_optim_mask = 0; if ((ctx->stage != MESA_SHADER_VERTEX && ctx->stage != MESA_SHADER_TESS_EVAL) || - shader->key.as_ls || shader->key.as_es) + shader->key.ge.as_ls || shader->key.ge.as_es) return; /* Optimizing these outputs is not possible, since they might be overriden @@ -1064,7 +1067,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * si_llvm_context_init(&ctx, sscreen, compiler, si_get_shader_wave_size(shader)); LLVMValueRef ngg_cull_main_fn = NULL; - if (shader->key.opt.ngg_culling) { + if (ctx.stage <= MESA_SHADER_GEOMETRY && shader->key.ge.opt.ngg_culling) { if (!si_llvm_translate_nir(&ctx, shader, nir, false, true)) { si_llvm_dispose(&ctx); return false; @@ -1085,10 +1088,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * LLVMValueRef main_fn = ctx.main_fn; if (ngg_cull_main_fn) { - if (si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, true)) { + if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true)) { union si_shader_part_key prolog_key; si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, true, - &shader->key.part.vs.prolog, shader, &prolog_key); + &shader->key.ge.part.vs.prolog, shader, &prolog_key); prolog_key.vs_prolog.is_monolithic = true; si_llvm_build_vs_prolog(&ctx, &prolog_key); parts[num_parts++] = ctx.main_fn; @@ -1097,10 +1100,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * parts[num_parts++] = ngg_cull_main_fn; } - if (si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, false)) { + if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, false)) { union si_shader_part_key prolog_key; si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, false, - &shader->key.part.vs.prolog, shader, &prolog_key); + &shader->key.ge.part.vs.prolog, shader, &prolog_key); prolog_key.vs_prolog.is_monolithic = true; si_llvm_build_vs_prolog(&ctx, &prolog_key); parts[num_parts++] = ctx.main_fn; @@ -1131,10 +1134,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * si_build_wrapper_function(&ctx, parts, 3, 0, 0, false); } else if (shader->is_monolithic && ctx.stage == MESA_SHADER_TESS_CTRL) { if (sscreen->info.chip_class >= GFX9) { - struct si_shader_selector *ls = shader->key.part.tcs.ls; + struct si_shader_selector *ls = shader->key.ge.part.tcs.ls; LLVMValueRef parts[4]; bool vs_needs_prolog = - si_vs_needs_prolog(ls, &shader->key.part.tcs.ls_prolog, &shader->key, false); + si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false); /* TCS main part */ parts[2] = ctx.main_fn; @@ -1142,7 +1145,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* TCS epilog */ union si_shader_part_key tcs_epilog_key; memset(&tcs_epilog_key, 0, sizeof(tcs_epilog_key)); - tcs_epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog; + tcs_epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog; si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key); parts[3] = ctx.main_fn; @@ -1151,9 +1154,9 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * nir = si_get_nir_shader(ls, NULL, &free_nir); struct si_shader shader_ls = {}; shader_ls.selector = ls; - shader_ls.key.as_ls = 1; - shader_ls.key.mono = shader->key.mono; - shader_ls.key.opt = shader->key.opt; + shader_ls.key.ge.as_ls = 1; + shader_ls.key.ge.mono = shader->key.ge.mono; + shader_ls.key.ge.opt = shader->key.ge.opt; shader_ls.is_monolithic = true; if (!si_llvm_translate_nir(&ctx, &shader_ls, nir, free_nir, false)) { @@ -1167,7 +1170,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * if (vs_needs_prolog) { union si_shader_part_key vs_prolog_key; si_get_vs_prolog_key(&ls->info, shader_ls.info.num_input_sgprs, false, - &shader->key.part.tcs.ls_prolog, shader, &vs_prolog_key); + &shader->key.ge.part.tcs.ls_prolog, shader, &vs_prolog_key); vs_prolog_key.vs_prolog.is_monolithic = true; si_llvm_build_vs_prolog(&ctx, &vs_prolog_key); parts[0] = ctx.main_fn; @@ -1179,7 +1182,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 4 - !vs_needs_prolog, vs_needs_prolog, vs_needs_prolog ? 2 : 1, - shader->key.opt.same_patch_vertices); + shader->key.ge.opt.same_patch_vertices); } else { LLVMValueRef parts[2]; union si_shader_part_key epilog_key; @@ -1187,7 +1190,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * parts[0] = ctx.main_fn; memset(&epilog_key, 0, sizeof(epilog_key)); - epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog; + epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog; si_llvm_build_tcs_epilog(&ctx, &epilog_key); parts[1] = ctx.main_fn; @@ -1195,7 +1198,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * } } else if (shader->is_monolithic && ctx.stage == MESA_SHADER_GEOMETRY) { if (ctx.screen->info.chip_class >= GFX9) { - struct si_shader_selector *es = shader->key.part.gs.es; + struct si_shader_selector *es = shader->key.ge.part.gs.es; LLVMValueRef es_prolog = NULL; LLVMValueRef es_main = NULL; LLVMValueRef gs_prolog = NULL; @@ -1204,8 +1207,8 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* GS prolog */ union si_shader_part_key gs_prolog_key; memset(&gs_prolog_key, 0, sizeof(gs_prolog_key)); - gs_prolog_key.gs_prolog.states = shader->key.part.gs.prolog; - gs_prolog_key.gs_prolog.as_ngg = shader->key.as_ngg; + gs_prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog; + gs_prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg; si_llvm_build_gs_prolog(&ctx, &gs_prolog_key); gs_prolog = ctx.main_fn; @@ -1213,10 +1216,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * nir = si_get_nir_shader(es, NULL, &free_nir); struct si_shader shader_es = {}; shader_es.selector = es; - shader_es.key.as_es = 1; - shader_es.key.as_ngg = shader->key.as_ngg; - shader_es.key.mono = shader->key.mono; - shader_es.key.opt = shader->key.opt; + shader_es.key.ge.as_es = 1; + shader_es.key.ge.as_ngg = shader->key.ge.as_ngg; + shader_es.key.ge.mono = shader->key.ge.mono; + shader_es.key.ge.opt = shader->key.ge.opt; shader_es.is_monolithic = true; if (!si_llvm_translate_nir(&ctx, &shader_es, nir, free_nir, false)) { @@ -1228,10 +1231,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* ES prolog */ if (es->info.stage == MESA_SHADER_VERTEX && - si_vs_needs_prolog(es, &shader->key.part.gs.vs_prolog, &shader->key, false)) { + si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false)) { union si_shader_part_key vs_prolog_key; si_get_vs_prolog_key(&es->info, shader_es.info.num_input_sgprs, false, - &shader->key.part.gs.vs_prolog, shader, &vs_prolog_key); + &shader->key.ge.part.gs.vs_prolog, shader, &vs_prolog_key); vs_prolog_key.vs_prolog.is_monolithic = true; si_llvm_build_vs_prolog(&ctx, &vs_prolog_key); es_prolog = ctx.main_fn; @@ -1260,7 +1263,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * parts[1] = ctx.main_fn; memset(&prolog_key, 0, sizeof(prolog_key)); - prolog_key.gs_prolog.states = shader->key.part.gs.prolog; + prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog; si_llvm_build_gs_prolog(&ctx, &prolog_key); parts[0] = ctx.main_fn; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index c22e826ff01..4a711c80539 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -108,7 +108,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0); ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1); - if (ctx->shader->key.as_ngg) + if (ctx->shader->key.ge.as_ngg) ret = si_insert_input_ptr(ctx, ret, ctx->args.gs_tg_info, 2); else ret = si_insert_input_ret(ctx, ret, ctx->args.gs2vs_offset, 2); @@ -199,7 +199,7 @@ static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx) static void emit_gs_epilogue(struct si_shader_context *ctx) { - if (ctx->shader->key.as_ngg) { + if (ctx->shader->key.ge.as_ngg) { gfx10_ngg_gs_emit_epilogue(ctx); return; } @@ -228,7 +228,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM { struct si_shader_context *ctx = si_shader_context_from_abi(abi); - if (ctx->shader->key.as_ngg) { + if (ctx->shader->key.ge.as_ngg) { gfx10_ngg_gs_emit_vertex(ctx, stream, addrs); return; } @@ -303,7 +303,7 @@ static void si_llvm_emit_primitive(struct ac_shader_abi *abi, unsigned stream) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); - if (ctx->shader->key.as_ngg) { + if (ctx->shader->key.ge.as_ngg) { LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]); return; } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index 2dc3705ab62..758ea36b435 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -79,22 +79,22 @@ static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi) args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16); - if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D) + if (!ctx->shader->key.ps.mono.fbfetch_is_1D) args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16); /* Get the current render target layer index. */ - if (ctx->shader->key.mono.u.ps.fbfetch_layered) + if (ctx->shader->key.ps.mono.fbfetch_layered) args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11); - if (ctx->shader->key.mono.u.ps.fbfetch_msaa) + if (ctx->shader->key.ps.mono.fbfetch_msaa) args.coords[chan++] = si_get_sample_id(ctx); - if (ctx->shader->key.mono.u.ps.fbfetch_msaa && !(ctx->screen->debug_flags & DBG(NO_FMASK))) { + if (ctx->shader->key.ps.mono.fbfetch_msaa && !(ctx->screen->debug_flags & DBG(NO_FMASK))) { fmask = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, - ctx->shader->key.mono.u.ps.fbfetch_layered); + ctx->shader->key.ps.mono.fbfetch_layered); } args.opcode = ac_image_load; @@ -102,13 +102,13 @@ static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi) args.dmask = 0xf; args.attributes = AC_FUNC_ATTR_READNONE; - if (ctx->shader->key.mono.u.ps.fbfetch_msaa) + if (ctx->shader->key.ps.mono.fbfetch_msaa) args.dim = - ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa; - else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D) - args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_1darray : ac_image_1d; + ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa; + else if (ctx->shader->key.ps.mono.fbfetch_is_1D) + args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_1darray : ac_image_1d; else - args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darray : ac_image_2d; + args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darray : ac_image_2d; return ac_build_image_opcode(&ctx->ac, &args); } @@ -170,7 +170,7 @@ static void interp_fs_color(struct si_shader_context *ctx, unsigned input_index, j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ctx->ac.i32_1, ""); } - if (ctx->shader->key.part.ps.prolog.color_two_side) { + if (ctx->shader->key.ps.part.prolog.color_two_side) { LLVMValueRef is_face_positive; /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1", @@ -199,13 +199,13 @@ static void interp_fs_color(struct si_shader_context *ctx, unsigned input_index, static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha) { - if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) { + if (ctx->shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_NEVER) { static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = { [PIPE_FUNC_LESS] = LLVMRealOLT, [PIPE_FUNC_EQUAL] = LLVMRealOEQ, [PIPE_FUNC_LEQUAL] = LLVMRealOLE, [PIPE_FUNC_GREATER] = LLVMRealOGT, [PIPE_FUNC_NOTEQUAL] = LLVMRealONE, [PIPE_FUNC_GEQUAL] = LLVMRealOGE, }; - LLVMRealPredicate cond = cond_map[ctx->shader->key.part.ps.epilog.alpha_func]; + LLVMRealPredicate cond = cond_map[ctx->shader->key.ps.part.epilog.alpha_func]; assert(cond); LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF); @@ -274,8 +274,8 @@ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue unsigned cbuf, unsigned compacted_mrt_index, unsigned color_type, struct ac_export_args *args) { - const struct si_shader_key *key = &ctx->shader->key; - unsigned col_formats = key->part.ps.epilog.spi_shader_col_format; + const union si_shader_key *key = &ctx->shader->key; + unsigned col_formats = key->ps.part.epilog.spi_shader_col_format; LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32); unsigned spi_shader_col_format; unsigned chan; @@ -284,8 +284,8 @@ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue assert(cbuf < 8); spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf; - is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & 0x1; - is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) & 0x1; + is_int8 = (key->ps.part.epilog.color_is_int8 >> cbuf) & 0x1; + is_int10 = (key->ps.part.epilog.color_is_int10 >> cbuf) & 0x1; /* Default is 0xf. Adjusted below depending on the format. */ args->enabled_channels = 0xf; /* writemask */ @@ -411,31 +411,31 @@ static bool si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col int i; /* Clamp color */ - if (ctx->shader->key.part.ps.epilog.clamp_color) + if (ctx->shader->key.ps.part.epilog.clamp_color) for (i = 0; i < 4; i++) color[i] = ac_build_clamp(&ctx->ac, color[i]); /* Alpha to one */ - if (ctx->shader->key.part.ps.epilog.alpha_to_one) + if (ctx->shader->key.ps.part.epilog.alpha_to_one) color[3] = LLVMConstReal(LLVMTypeOf(color[0]), 1); /* Alpha test */ - if (index == 0 && ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS) + if (index == 0 && ctx->shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS) si_alpha_test(ctx, color[3]); /* Line & polygon smoothing */ - if (ctx->shader->key.part.ps.epilog.poly_line_smoothing) + if (ctx->shader->key.ps.part.epilog.poly_line_smoothing) color[3] = si_scale_alpha_by_sample_mask(ctx, color[3], samplemask_param); /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (ctx->shader->key.part.ps.epilog.last_cbuf > 0) { + if (ctx->shader->key.ps.part.epilog.last_cbuf > 0) { struct ac_export_args args[8]; int c, last = -1; assert(compacted_mrt_index == 0); /* Get the export arguments, also find out what the last one is. */ - for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) { + for (c = 0; c <= ctx->shader->key.ps.part.epilog.last_cbuf; c++) { si_llvm_init_ps_export_args(ctx, color, c, compacted_mrt_index, color_type, &args[c]); if (args[c].enabled_channels) { @@ -447,7 +447,7 @@ static bool si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col return false; /* Emit all exports. */ - for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) { + for (c = 0; c <= ctx->shader->key.ps.part.epilog.last_cbuf; c++) { if (is_last && last == c) { args[c].valid_mask = 1; /* whether the EXEC mask is valid */ args[c].done = 1; /* DONE bit */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 68e3fc18e21..b0aa0a0165d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -71,8 +71,8 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context * { assert(ctx->stage == MESA_SHADER_TESS_CTRL); - if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) - return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4; + if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) + return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4; return util_last_bit64(ctx->shader->selector->outputs_written) * 4; } @@ -86,7 +86,7 @@ static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx) static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx) { - if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) + if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13); const struct si_shader_info *info = &ctx->shader->selector->info; @@ -160,7 +160,7 @@ static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx) case MESA_SHADER_TESS_CTRL: if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) { - stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4; + stride = ctx->shader->key.ge.part.tcs.ls->lshs_vertex_stride / 4; return LLVMConstInt(ctx->ac.i32, stride, 0); } return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8); @@ -396,7 +396,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType } /* Load the TCS input from a VGPR if possible. */ - if (ctx->shader->key.opt.same_patch_vertices && + if (ctx->shader->key.ge.opt.same_patch_vertices && load_input && vertex_index_is_invoc_id && !param_index) { unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 + si_shader_io_get_unique_index(semantic, false) * 4; @@ -650,7 +650,7 @@ static void si_copy_tcs_inputs(struct si_shader_context *ctx) lds_base = get_tcs_in_current_patch_offset(ctx); lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base); - inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy; + inputs = ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy; while (inputs) { unsigned i = u_bit_scan64(&inputs); @@ -679,7 +679,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re unsigned stride, outer_comps, inner_comps, i, offset; /* Add a barrier before loading tess factors from LDS. */ - if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) + if (!shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) si_llvm_emit_barrier(ctx); /* Do this only for invocation 0, because the tess levels are per-patch, @@ -692,7 +692,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503); /* Determine the layout of one tess factor element in the buffer. */ - switch (shader->key.part.tcs.epilog.prim_mode) { + switch (shader->key.ge.part.tcs.epilog.prim_mode) { case GL_LINES: stride = 2; /* 2 dwords, 1 vec2 store */ outer_comps = 2; @@ -718,7 +718,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re outer[i] = LLVMGetUndef(ctx->ac.i32); } - if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) { + if (shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) { /* Tess factors are in VGPRs. */ for (i = 0; i < outer_comps; i++) outer[i] = out[i] = invoc0_tf_outer[i]; @@ -745,7 +745,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re } } - if (shader->key.part.tcs.epilog.prim_mode == GL_LINES) { + if (shader->key.ge.part.tcs.epilog.prim_mode == GL_LINES) { /* For isolines, the hardware expects tess factors in the * reverse order from what NIR specifies. */ @@ -789,7 +789,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re ac_glc); /* Store the tess factors into the offchip buffer if TES reads them. */ - if (shader->key.part.tcs.epilog.tes_reads_tess_factors) { + if (shader->key.ge.part.tcs.epilog.tes_reads_tess_factors) { LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset; LLVMValueRef tf_inner_offset; unsigned param_outer, param_inner; @@ -983,11 +983,11 @@ void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi) LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""); - if (!shader->key.opt.same_patch_vertices || + if (!shader->key.ge.opt.same_patch_vertices || !(ctx->next_shader_sel->tcs_vgpr_only_inputs & (1ull << semantic))) lshs_lds_store(ctx, chan, dw_addr, value); - if (shader->key.opt.same_patch_vertices) { + if (shader->key.ge.opt.same_patch_vertices) { ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value, value, ret_offset + param * 4 + chan, ""); } @@ -1084,11 +1084,11 @@ void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_sha ctx->abi.load_tess_level = si_load_tess_level; ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in; - if (ctx->shader->key.as_es) + if (ctx->shader->key.ge.as_es) ctx->abi.emit_outputs = si_llvm_emit_es_epilogue; else if (ngg_cull_shader) ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue; - else if (ctx->shader->key.as_ngg) + else if (ctx->shader->key.ge.as_ngg) ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue; else ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index ecdcf48403d..53cf9864122 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -131,8 +131,8 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L * of dword-sized data that needs fixups. We need to insert conversion * code anyway, and the amd/common code does it for us. */ - bool opencode = ctx->shader->key.mono.vs_fetch_opencode & (1 << input_index); - fix_fetch.bits = ctx->shader->key.mono.vs_fix_fetch[input_index].bits; + bool opencode = ctx->shader->key.ge.mono.vs_fetch_opencode & (1 << input_index); + fix_fetch.bits = ctx->shader->key.ge.mono.vs_fix_fetch[input_index].bits; if (opencode || (fix_fetch.u.log_size == 3 && fix_fetch.u.format == AC_FETCH_FORMAT_FLOAT) || (fix_fetch.u.log_size == 2)) { tmp = ac_build_opencoded_load_format(&ctx->ac, fix_fetch.u.log_size, @@ -400,7 +400,7 @@ static void si_llvm_emit_clipvertex(struct si_shader_context *ctx, struct ac_exp LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0); LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index); unsigned clipdist_mask = ctx->shader->selector->clipdist_mask & - ~ctx->shader->key.opt.kill_clip_distances; + ~ctx->shader->key.ge.opt.kill_clip_distances; for (reg_index = 0; reg_index < 2; reg_index++) { struct ac_export_args *args = &pos[2 + reg_index]; @@ -484,7 +484,7 @@ static void si_prepare_param_exports(struct si_shader_context *ctx, } if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) && - shader->key.opt.kill_outputs & + shader->key.ge.opt.kill_outputs & (1ull << si_shader_io_get_unique_index(semantic, true))) continue; @@ -575,7 +575,7 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, viewport_index_value = NULL; unsigned pos_idx, index; unsigned clipdist_mask = (shader->selector->clipdist_mask & - ~shader->key.opt.kill_clip_distances) | + ~shader->key.ge.opt.kill_clip_distances) | shader->selector->culldist_mask; int i; @@ -629,8 +629,8 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, pos_args[0].out[3] = ctx->ac.f32_1; /* W */ } - bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize; - bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg; + bool writes_psize = shader->selector->info.writes_psize && !shader->key.ge.opt.kill_pointsize; + bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.ge.as_ngg; bool writes_vrs = ctx->screen->options.vrs2x2; /* Write the misc vector (point size, edgeflag, layer, viewport). */ @@ -783,7 +783,7 @@ void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi) si_llvm_emit_streamout(ctx, outputs, i, 0); /* Export PrimitiveID. */ - if (ctx->shader->key.mono.u.vs_export_prim_id) { + if (ctx->shader->key.ge.mono.u.vs_export_prim_id) { outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID; outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0)); for (j = 1; j < 4; j++) @@ -990,13 +990,13 @@ void si_llvm_init_vs_callbacks(struct si_shader_context *ctx, bool ngg_cull_shad { struct si_shader *shader = ctx->shader; - if (shader->key.as_ls) + if (shader->key.ge.as_ls) ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue; - else if (shader->key.as_es) + else if (shader->key.ge.as_es) ctx->abi.emit_outputs = si_llvm_emit_es_epilogue; else if (ngg_cull_shader) ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue; - else if (shader->key.as_ngg) + else if (shader->key.ge.as_ngg) ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue; else ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index 675c4e1c182..f261c68cdd4 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -927,24 +927,24 @@ si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data, static enum rgp_hardware_stages -si_sqtt_pipe_to_rgp_shader_stage(struct si_shader_key* key, enum pipe_shader_type stage) +si_sqtt_pipe_to_rgp_shader_stage(union si_shader_key* key, enum pipe_shader_type stage) { switch (stage) { case PIPE_SHADER_VERTEX: - if (key->as_ls) + if (key->ge.as_ls) return RGP_HW_STAGE_LS; - else if (key->as_es) + else if (key->ge.as_es) return RGP_HW_STAGE_ES; - else if (key->as_ngg) + else if (key->ge.as_ngg) return RGP_HW_STAGE_GS; else return RGP_HW_STAGE_VS; case PIPE_SHADER_TESS_CTRL: return RGP_HW_STAGE_HS; case PIPE_SHADER_TESS_EVAL: - if (key->as_es) + if (key->ge.as_es) return RGP_HW_STAGE_ES; - else if (key->as_ngg) + else if (key->ge.as_ngg) return RGP_HW_STAGE_GS; else return RGP_HW_STAGE_VS; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 9ed07e6262b..b0b2e06dd55 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -123,7 +123,7 @@ static void si_emit_cb_render_state(struct si_context *sctx) /* RB+ register settings. */ if (sctx->screen->info.rbplus_allowed) { unsigned spi_shader_col_format = - sctx->shader.ps.cso ? sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format + sctx->shader.ps.cso ? sctx->shader.ps.current->key.ps.part.epilog.spi_shader_col_format : 0; unsigned sx_ps_downconvert = 0; unsigned sx_blend_opt_epsilon = 0; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 1e6c08245ed..0bdc2eb1d83 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -478,6 +478,8 @@ struct si_buffer_resources { } while (0) /* si_descriptors.c */ +void si_get_inline_uniform_state(union si_shader_key *key, enum pipe_shader_type shader, + bool *inline_uniforms, uint32_t **inlined_values); void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture *tex, const struct legacy_surf_level *base_level_info, unsigned base_level, unsigned first_level, unsigned block_width, @@ -572,17 +574,14 @@ void si_schedule_initial_compile(struct si_context *sctx, gl_shader_stage stage, util_queue_execute_func execute); void si_get_active_slot_masks(const struct si_shader_info *info, uint64_t *const_and_shader_buffers, uint64_t *samplers_and_images); -int si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state, - const struct si_shader_key *key, int thread_index, - bool optimized_or_none); int si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state); void si_vs_key_update_inputs(struct si_context *sctx); -void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key, +void si_get_vs_key_inputs(struct si_context *sctx, union si_shader_key *key, struct si_vs_prolog_bits *prolog_key); void si_update_ps_inputs_read_or_disabled(struct si_context *sctx); void si_update_ps_kill_enable(struct si_context *sctx); void si_update_vrs_flat_shading(struct si_context *sctx); -unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key); +unsigned si_get_input_prim(const struct si_shader_selector *gs, const union si_shader_key *key); bool si_update_ngg(struct si_context *sctx); void si_ps_key_update_framebuffer(struct si_context *sctx); void si_ps_key_update_framebuffer_blend(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index a16f834831c..fd863e26338 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -113,7 +113,7 @@ static bool si_update_shaders(struct si_context *sctx) unsigned old_pa_cl_vs_out_cntl = old_vs ? old_vs->pa_cl_vs_out_cntl : 0; struct si_shader *old_ps = sctx->shader.ps.current; unsigned old_spi_shader_col_format = - old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0; + old_ps ? old_ps->key.ps.part.epilog.spi_shader_col_format : 0; int r; /* Update TCS and TES. */ @@ -136,7 +136,7 @@ static bool si_update_shaders(struct si_context *sctx) if (!sctx->fixed_func_tcs_shader.cso) return false; - sctx->fixed_func_tcs_shader.key.part.tcs.epilog.invoc0_tess_factors_are_def = + sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def = sctx->fixed_func_tcs_shader.cso->info.tessfactors_are_def_in_all_invocs; } @@ -260,12 +260,12 @@ static bool si_update_shaders(struct si_context *sctx) if ((GFX_VERSION >= GFX10_3 || (GFX_VERSION >= GFX9 && sctx->screen->info.rbplus_allowed)) && si_pm4_state_changed(sctx, ps) && (!old_ps || old_spi_shader_col_format != - sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format)) + sctx->shader.ps.current->key.ps.part.epilog.spi_shader_col_format)) si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); if (sctx->smoothing_enabled != - sctx->shader.ps.current->key.part.ps.epilog.poly_line_smoothing) { - sctx->smoothing_enabled = sctx->shader.ps.current->key.part.ps.epilog.poly_line_smoothing; + sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); /* NGG cull state uses smoothing_enabled. */ @@ -527,7 +527,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa else ls_current = sctx->fixed_func_tcs_shader.current; - ls = ls_current->key.part.tcs.ls; + ls = ls_current->key.ge.part.tcs.ls; } else { ls_current = sctx->shader.vs.current; ls = sctx->shader.vs.cso; @@ -567,7 +567,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa unsigned input_patch_size; /* Allocate LDS for TCS inputs only if it's used. */ - if (!ls_current->key.opt.same_patch_vertices || + if (!ls_current->key.ge.opt.same_patch_vertices || tcs->info.base.inputs_read & ~tcs->tcs_vgpr_only_inputs) input_patch_size = num_tcs_input_cp * input_vertex_size; else @@ -2078,8 +2078,8 @@ static void si_draw(struct pipe_context *ctx, GFX_VERSION >= GFX9 && tcs && sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out; - if (sctx->shader.tcs.key.opt.same_patch_vertices != same_patch_vertices) { - sctx->shader.tcs.key.opt.same_patch_vertices = same_patch_vertices; + if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) { + sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices; sctx->do_update_shaders = true; } @@ -2094,9 +2094,9 @@ static void si_draw(struct pipe_context *ctx, bool ls_vgpr_fix = tcs && sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out; - if (ls_vgpr_fix != sctx->shader.tcs.key.part.tcs.ls_prolog.ls_vgpr_fix) { - sctx->shader.tcs.key.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix; - sctx->fixed_func_tcs_shader.key.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix; + if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) { + sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix; + sctx->fixed_func_tcs_shader.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix; sctx->do_update_shaders = true; } } @@ -2133,8 +2133,8 @@ static void si_draw(struct pipe_context *ctx, bool gs_tri_strip_adj_fix = !HAS_TESS && prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY; - if (gs_tri_strip_adj_fix != sctx->shader.gs.key.part.gs.prolog.tri_strip_adj_fix) { - sctx->shader.gs.key.part.gs.prolog.tri_strip_adj_fix = gs_tri_strip_adj_fix; + if (gs_tri_strip_adj_fix != sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix) { + sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix = gs_tri_strip_adj_fix; sctx->do_update_shaders = true; } } @@ -2326,7 +2326,7 @@ static void si_draw(struct pipe_context *ctx, * hasn't finished. Set it to the correct value in si_context. */ if (GFX_VERSION >= GFX10 && NGG) - sctx->ngg_culling = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.opt.ngg_culling; + sctx->ngg_culling = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.ge.opt.ngg_culling; } /* Since we've called si_context_add_resource_size for vertex buffers, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 1ad98cc930f..3d73831dd02 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -360,7 +360,7 @@ bool si_shader_mem_ordered(struct si_shader *shader) shader->config.scratch_bytes_per_wave || (info->stage == MESA_SHADER_FRAGMENT && (info->base.fs.uses_fbfetch_output || - shader->key.part.ps.prolog.poly_stipple)); + shader->key.ps.part.prolog.poly_stipple)); if (prev_info) { sampler_or_bvh |= prev_info->uses_vmem_return_type_sampler_or_bvh; @@ -455,7 +455,7 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen, struct si_sh /* VS as VS, or VS as ES: */ if ((sel->info.stage == MESA_SHADER_VERTEX && - (!shader->key.as_ls && !shader->is_gs_copy_shader)) || + (!shader->key.ge.as_ls && !shader->is_gs_copy_shader)) || /* TES as VS, or TES as ES: */ sel->info.stage == MESA_SHADER_TESS_EVAL) { unsigned vtx_reuse_depth = 30; @@ -504,7 +504,7 @@ static unsigned si_get_vs_vgpr_comp_cnt(struct si_screen *sscreen, struct si_sha * GFX10 LS (VertexID, RelAutoIndex, UserVGPR1, UserVGPR2 or InstanceID) * GFX10 ES,VS (VertexID, UserVGPR1, UserVGPR2 or VSPrimID, UserVGPR3 or InstanceID) */ - bool is_ls = shader->selector->info.stage == MESA_SHADER_TESS_CTRL || shader->key.as_ls; + bool is_ls = shader->selector->info.stage == MESA_SHADER_TESS_CTRL || shader->key.ge.as_ls; unsigned max = 0; if (shader->info.uses_instanceid) { @@ -810,7 +810,7 @@ static void si_emit_shader_gs(struct si_context *sctx) SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, shader->ctx_reg.gs.vgt_esgs_ring_itemsize); - if (shader->key.part.gs.es->info.stage == MESA_SHADER_TESS_EVAL) + if (shader->key.ge.part.gs.es->info.stage == MESA_SHADER_TESS_EVAL) radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, shader->vgt_tf_param); if (shader->vgt_vertex_reuse_block_cntl) @@ -886,13 +886,13 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) if (sscreen->info.chip_class >= GFX9) { unsigned input_prim = sel->info.base.gs.input_primitive; - gl_shader_stage es_stage = shader->key.part.gs.es->info.stage; + gl_shader_stage es_stage = shader->key.ge.part.gs.es->info.stage; unsigned es_vgpr_comp_cnt, gs_vgpr_comp_cnt; if (es_stage == MESA_SHADER_VERTEX) { es_vgpr_comp_cnt = si_get_vs_vgpr_comp_cnt(sscreen, shader, false); } else if (es_stage == MESA_SHADER_TESS_EVAL) - es_vgpr_comp_cnt = shader->key.part.gs.es->info.uses_primid ? 3 : 2; + es_vgpr_comp_cnt = shader->key.ge.part.gs.es->info.uses_primid ? 3 : 2; else unreachable("invalid shader selector type"); @@ -952,12 +952,12 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) S_028A44_GS_INST_PRIMS_IN_SUBGRP(shader->gs_info.gs_inst_prims_in_subgroup); shader->ctx_reg.gs.vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(shader->gs_info.max_prims_per_subgroup); - shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.part.gs.es->esgs_itemsize / 4; + shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.ge.part.gs.es->esgs_itemsize / 4; if (es_stage == MESA_SHADER_TESS_EVAL) - si_set_tesseval_regs(sscreen, shader->key.part.gs.es, shader); + si_set_tesseval_regs(sscreen, shader->key.ge.part.gs.es, shader); - polaris_set_vgt_vertex_reuse(sscreen, shader->key.part.gs.es, shader); + polaris_set_vgt_vertex_reuse(sscreen, shader->key.ge.part.gs.es, shader); } else { shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F); @@ -994,7 +994,7 @@ bool gfx10_is_ngg_passthrough(struct si_shader *shader) * * NGG passthrough still allows the use of LDS. */ - return sel->info.stage != MESA_SHADER_GEOMETRY && !shader->key.opt.ngg_culling; + return sel->info.stage != MESA_SHADER_GEOMETRY && !shader->key.ge.opt.ngg_culling; } /* Common tail code for NGG primitive shaders. */ @@ -1094,7 +1094,7 @@ static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx) gfx10_emit_shader_ngg_tail(sctx, shader); } -unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key) +unsigned si_get_input_prim(const struct si_shader_selector *gs, const union si_shader_key *key) { if (gs->info.stage == MESA_SHADER_GEOMETRY) return gs->info.base.gs.input_primitive; @@ -1107,7 +1107,7 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_ return PIPE_PRIM_TRIANGLES; } - if (key->opt.ngg_culling & SI_NGG_CULL_LINES) + if (key->ge.opt.ngg_culling & SI_NGG_CULL_LINES) return PIPE_PRIM_LINES; return PIPE_PRIM_TRIANGLES; /* worst case for all callers */ @@ -1117,9 +1117,9 @@ static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, const struct si_shader *shader, bool ngg) { /* Clip distances can be killed, but cull distances can't. */ - unsigned clipcull_mask = (sel->clipdist_mask & ~shader->key.opt.kill_clip_distances) | + unsigned clipcull_mask = (sel->clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) | sel->culldist_mask; - bool writes_psize = sel->info.writes_psize && !shader->key.opt.kill_pointsize; + bool writes_psize = sel->info.writes_psize && !shader->key.ge.opt.kill_pointsize; bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) || sel->screen->options.vrs2x2 || sel->info.writes_layer || sel->info.writes_viewport_index; @@ -1153,7 +1153,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader uint64_t va; bool window_space = gs_info->stage == MESA_SHADER_VERTEX ? gs_info->base.vs.window_space_position : 0; - bool es_enable_prim_id = shader->key.mono.u.vs_export_prim_id || es_info->uses_primid; + bool es_enable_prim_id = shader->key.ge.mono.u.vs_export_prim_id || es_info->uses_primid; unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1); unsigned input_prim = si_get_input_prim(gs_sel, &shader->key); bool break_wave_at_eoi = false; @@ -1200,7 +1200,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader (gfx10_edgeflags_have_effect(shader) && !gfx10_is_ngg_passthrough(shader))) gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID, edge flags. */ else if ((gs_stage == MESA_SHADER_GEOMETRY && gs_info->uses_primid) || - (gs_stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)) + (gs_stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id)) gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */ else if (input_prim >= PIPE_PRIM_TRIANGLES && !gfx10_is_ngg_passthrough(shader)) gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */ @@ -1210,7 +1210,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader unsigned wave_size = si_get_shader_wave_size(shader); unsigned late_alloc_wave64, cu_mask; - ac_compute_late_alloc(&sscreen->info, true, shader->key.opt.ngg_culling, + ac_compute_late_alloc(&sscreen->info, true, shader->key.ge.opt.ngg_culling, shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask); @@ -1255,7 +1255,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader shader->ctx_reg.ngg.vgt_primitiveid_en = S_028A84_PRIMITIVEID_EN(es_enable_prim_id) | - S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.mono.u.vs_export_prim_id || + S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.ge.mono.u.vs_export_prim_id || gs_sel->info.writes_primid); if (gs_stage == MESA_SHADER_GEOMETRY) { @@ -1293,7 +1293,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader /* Oversubscribe PC. This improves performance when there are too many varyings. */ unsigned oversub_pc_factor = 1; - if (shader->key.opt.ngg_culling) { + if (shader->key.ge.opt.ngg_culling) { /* Be more aggressive with NGG culling. */ if (shader->info.nr_param_exports > 4) oversub_pc_factor = 4; @@ -1418,7 +1418,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, unsigned nparams, oc_lds_en; bool window_space = info->stage == MESA_SHADER_VERTEX ? info->base.vs.window_space_position : 0; - bool enable_prim_id = shader->key.mono.u.vs_export_prim_id || info->uses_primid; + bool enable_prim_id = shader->key.ge.mono.u.vs_export_prim_id || info->uses_primid; pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -1556,7 +1556,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps) struct si_shader_info *info = &ps->selector->info; unsigned num_colors = !!(info->colors_read & 0x0f) + !!(info->colors_read & 0xf0); unsigned num_interp = - ps->selector->info.num_inputs + (ps->key.part.ps.prolog.color_two_side ? num_colors : 0); + ps->selector->info.num_inputs + (ps->key.ps.part.prolog.color_two_side ? num_colors : 0); assert(num_interp <= 32); return MIN2(num_interp, 32); @@ -1564,7 +1564,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps) static unsigned si_get_spi_shader_col_format(struct si_shader *shader) { - unsigned spi_shader_col_format = shader->key.part.ps.epilog.spi_shader_col_format; + unsigned spi_shader_col_format = shader->key.ps.part.epilog.spi_shader_col_format; unsigned value = 0, num_mrts = 0; unsigned i, num_targets = (util_last_bit(spi_shader_col_format) + 3) / 4; @@ -1628,23 +1628,23 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) G_0286CC_PERSP_PULL_MODEL_ENA(input_ena)); /* Validate interpolation optimization flags (read as implications). */ - assert(!shader->key.part.ps.prolog.bc_optimize_for_persp || + assert(!shader->key.ps.part.prolog.bc_optimize_for_persp || (G_0286CC_PERSP_CENTER_ENA(input_ena) && G_0286CC_PERSP_CENTROID_ENA(input_ena))); - assert(!shader->key.part.ps.prolog.bc_optimize_for_linear || + assert(!shader->key.ps.part.prolog.bc_optimize_for_linear || (G_0286CC_LINEAR_CENTER_ENA(input_ena) && G_0286CC_LINEAR_CENTROID_ENA(input_ena))); - assert(!shader->key.part.ps.prolog.force_persp_center_interp || + assert(!shader->key.ps.part.prolog.force_persp_center_interp || (!G_0286CC_PERSP_SAMPLE_ENA(input_ena) && !G_0286CC_PERSP_CENTROID_ENA(input_ena))); - assert(!shader->key.part.ps.prolog.force_linear_center_interp || + assert(!shader->key.ps.part.prolog.force_linear_center_interp || (!G_0286CC_LINEAR_SAMPLE_ENA(input_ena) && !G_0286CC_LINEAR_CENTROID_ENA(input_ena))); - assert(!shader->key.part.ps.prolog.force_persp_sample_interp || + assert(!shader->key.ps.part.prolog.force_persp_sample_interp || (!G_0286CC_PERSP_CENTER_ENA(input_ena) && !G_0286CC_PERSP_CENTROID_ENA(input_ena))); - assert(!shader->key.part.ps.prolog.force_linear_sample_interp || + assert(!shader->key.ps.part.prolog.force_linear_sample_interp || (!G_0286CC_LINEAR_CENTER_ENA(input_ena) && !G_0286CC_LINEAR_CENTROID_ENA(input_ena))); /* Validate cases when the optimizations are off (read as implications). */ - assert(shader->key.part.ps.prolog.bc_optimize_for_persp || + assert(shader->key.ps.part.prolog.bc_optimize_for_persp || !G_0286CC_PERSP_CENTER_ENA(input_ena) || !G_0286CC_PERSP_CENTROID_ENA(input_ena)); - assert(shader->key.part.ps.prolog.bc_optimize_for_linear || + assert(shader->key.ps.part.prolog.bc_optimize_for_linear || !G_0286CC_LINEAR_CENTER_ENA(input_ena) || !G_0286CC_LINEAR_CENTROID_ENA(input_ena)); pm4 = si_get_shader_pm4_state(shader); @@ -1685,7 +1685,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1); spi_shader_col_format = si_get_spi_shader_col_format(shader); - cb_shader_mask = ac_get_cb_shader_mask(shader->key.part.ps.epilog.spi_shader_col_format); + cb_shader_mask = ac_get_cb_shader_mask(shader->key.ps.part.epilog.spi_shader_col_format); /* Ensure that some export memory is always allocated, for two reasons: * @@ -1703,7 +1703,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) * instructions if any are present. */ if ((sscreen->info.chip_class <= GFX9 || info->base.fs.uses_discard || - shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS) && + shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS) && !spi_shader_col_format && !info->writes_z && !info->writes_stencil && !info->writes_samplemask) spi_shader_col_format = V_028714_SPI_SHADER_32_R; @@ -1750,11 +1750,11 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader { switch (shader->selector->info.stage) { case MESA_SHADER_VERTEX: - if (shader->key.as_ls) + if (shader->key.ge.as_ls) si_shader_ls(sscreen, shader); - else if (shader->key.as_es) + else if (shader->key.ge.as_es) si_shader_es(sscreen, shader); - else if (shader->key.as_ngg) + else if (shader->key.ge.as_ngg) gfx10_shader_ngg(sscreen, shader); else si_shader_vs(sscreen, shader, NULL); @@ -1763,15 +1763,15 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader si_shader_hs(sscreen, shader); break; case MESA_SHADER_TESS_EVAL: - if (shader->key.as_es) + if (shader->key.ge.as_es) si_shader_es(sscreen, shader); - else if (shader->key.as_ngg) + else if (shader->key.ge.as_ngg) gfx10_shader_ngg(sscreen, shader); else si_shader_vs(sscreen, shader, NULL); break; case MESA_SHADER_GEOMETRY: - if (shader->key.as_ngg) + if (shader->key.ge.as_ngg) gfx10_shader_ngg(sscreen, shader); else si_shader_gs(sscreen, shader); @@ -1784,27 +1784,27 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader } } -static void si_clear_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key, +static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key, struct si_vs_prolog_bits *prolog_key) { prolog_key->instance_divisor_is_one = 0; prolog_key->instance_divisor_is_fetched = 0; - key->mono.vs_fetch_opencode = 0; - memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch)); + key->ge.mono.vs_fetch_opencode = 0; + memset(key->ge.mono.vs_fix_fetch, 0, sizeof(key->ge.mono.vs_fix_fetch)); } void si_vs_key_update_inputs(struct si_context *sctx) { struct si_shader_selector *vs = sctx->shader.vs.cso; struct si_vertex_elements *elts = sctx->vertex_elements; - struct si_shader_key *key = &sctx->shader.vs.key; + union si_shader_key *key = &sctx->shader.vs.key; if (!vs) return; if (vs->info.base.vs.blit_sgprs_amd) { - si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog); - key->opt.prefer_mono = 0; + si_clear_vs_key_inputs(sctx, key, &key->ge.part.vs.prolog); + key->ge.opt.prefer_mono = 0; sctx->uses_nontrivial_vs_prolog = false; return; } @@ -1814,9 +1814,9 @@ void si_vs_key_update_inputs(struct si_context *sctx) if (elts->instance_divisor_is_one || elts->instance_divisor_is_fetched) uses_nontrivial_vs_prolog = true; - key->part.vs.prolog.instance_divisor_is_one = elts->instance_divisor_is_one; - key->part.vs.prolog.instance_divisor_is_fetched = elts->instance_divisor_is_fetched; - key->opt.prefer_mono = elts->instance_divisor_is_fetched; + key->ge.part.vs.prolog.instance_divisor_is_one = elts->instance_divisor_is_one; + key->ge.part.vs.prolog.instance_divisor_is_fetched = elts->instance_divisor_is_fetched; + key->ge.opt.prefer_mono = elts->instance_divisor_is_fetched; unsigned count_mask = (1 << vs->info.num_inputs) - 1; unsigned fix = elts->fix_fetch_always & count_mask; @@ -1837,17 +1837,17 @@ void si_vs_key_update_inputs(struct si_context *sctx) } } - memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch)); + memset(key->ge.mono.vs_fix_fetch, 0, sizeof(key->ge.mono.vs_fix_fetch)); while (fix) { unsigned i = u_bit_scan(&fix); uint8_t fix_fetch = elts->fix_fetch[i]; - key->mono.vs_fix_fetch[i].bits = fix_fetch; + key->ge.mono.vs_fix_fetch[i].bits = fix_fetch; if (fix_fetch) uses_nontrivial_vs_prolog = true; } - key->mono.vs_fetch_opencode = opencode; + key->ge.mono.vs_fetch_opencode = opencode; if (opencode) uses_nontrivial_vs_prolog = true; @@ -1863,18 +1863,18 @@ void si_vs_key_update_inputs(struct si_context *sctx) * cases. */ if (uses_nontrivial_vs_prolog && sctx->force_trivial_vs_prolog) - si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog); + si_clear_vs_key_inputs(sctx, key, &key->ge.part.vs.prolog); } -void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key, +void si_get_vs_key_inputs(struct si_context *sctx, union si_shader_key *key, struct si_vs_prolog_bits *prolog_key) { - prolog_key->instance_divisor_is_one = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_one; - prolog_key->instance_divisor_is_fetched = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_fetched; + prolog_key->instance_divisor_is_one = sctx->shader.vs.key.ge.part.vs.prolog.instance_divisor_is_one; + prolog_key->instance_divisor_is_fetched = sctx->shader.vs.key.ge.part.vs.prolog.instance_divisor_is_fetched; - key->mono.vs_fetch_opencode = sctx->shader.vs.key.mono.vs_fetch_opencode; - memcpy(key->mono.vs_fix_fetch, sctx->shader.vs.key.mono.vs_fix_fetch, - sizeof(key->mono.vs_fix_fetch)); + key->ge.mono.vs_fetch_opencode = sctx->shader.vs.key.ge.mono.vs_fetch_opencode; + memcpy(key->ge.mono.vs_fix_fetch, sctx->shader.vs.key.ge.mono.vs_fix_fetch, + sizeof(key->ge.mono.vs_fix_fetch)); } void si_update_ps_inputs_read_or_disabled(struct si_context *sctx) @@ -1898,53 +1898,53 @@ void si_update_ps_inputs_read_or_disabled(struct si_context *sctx) } static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs, - struct si_shader_key *key) + union si_shader_key *key) { - key->opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable; + key->ge.opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable; /* Find out which VS outputs aren't used by the PS. */ uint64_t outputs_written = vs->outputs_written_before_ps; uint64_t linked = outputs_written & sctx->ps_inputs_read_or_disabled; - key->opt.kill_outputs = ~linked & outputs_written; + key->ge.opt.kill_outputs = ~linked & outputs_written; if (vs->info.stage != MESA_SHADER_GEOMETRY) { - key->opt.ngg_culling = sctx->ngg_culling; - key->mono.u.vs_export_prim_id = sctx->shader.ps.cso && sctx->shader.ps.cso->info.uses_primid; + key->ge.opt.ngg_culling = sctx->ngg_culling; + key->ge.mono.u.vs_export_prim_id = sctx->shader.ps.cso && sctx->shader.ps.cso->info.uses_primid; } else { - key->opt.ngg_culling = 0; - key->mono.u.vs_export_prim_id = 0; + key->ge.opt.ngg_culling = 0; + key->ge.mono.u.vs_export_prim_id = 0; } - key->opt.kill_pointsize = vs->info.writes_psize && - sctx->current_rast_prim != PIPE_PRIM_POINTS && - !sctx->queued.named.rasterizer->polygon_mode_is_points; + key->ge.opt.kill_pointsize = vs->info.writes_psize && + sctx->current_rast_prim != PIPE_PRIM_POINTS && + !sctx->queued.named.rasterizer->polygon_mode_is_points; } static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs, - struct si_shader_key *key) + union si_shader_key *key) { - key->opt.kill_clip_distances = 0; - key->opt.kill_outputs = 0; - key->opt.ngg_culling = 0; - key->mono.u.vs_export_prim_id = 0; - key->opt.kill_pointsize = 0; + key->ge.opt.kill_clip_distances = 0; + key->ge.opt.kill_outputs = 0; + key->ge.opt.ngg_culling = 0; + key->ge.mono.u.vs_export_prim_id = 0; + key->ge.opt.kill_pointsize = 0; } void si_ps_key_update_framebuffer(struct si_context *sctx) { struct si_shader_selector *sel = sctx->shader.ps.cso; - struct si_shader_key *key = &sctx->shader.ps.key; + union si_shader_key *key = &sctx->shader.ps.key; if (!sel) return; if (sel->info.color0_writes_all_cbufs && sel->info.colors_written == 0x1) - key->part.ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; + key->ps.part.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; else - key->part.ps.epilog.last_cbuf = 0; + key->ps.part.epilog.last_cbuf = 0; /* ps_uses_fbfetch is true only if the color buffer is bound. */ if (sctx->ps_uses_fbfetch) { @@ -1952,25 +1952,25 @@ void si_ps_key_update_framebuffer(struct si_context *sctx) struct pipe_resource *tex = cb0->texture; /* 1D textures are allocated and used as 2D on GFX9. */ - key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1; - key->mono.u.ps.fbfetch_is_1D = + key->ps.mono.fbfetch_msaa = sctx->framebuffer.nr_samples > 1; + key->ps.mono.fbfetch_is_1D = sctx->chip_class != GFX9 && (tex->target == PIPE_TEXTURE_1D || tex->target == PIPE_TEXTURE_1D_ARRAY); - key->mono.u.ps.fbfetch_layered = + key->ps.mono.fbfetch_layered = tex->target == PIPE_TEXTURE_1D_ARRAY || tex->target == PIPE_TEXTURE_2D_ARRAY || tex->target == PIPE_TEXTURE_CUBE || tex->target == PIPE_TEXTURE_CUBE_ARRAY || tex->target == PIPE_TEXTURE_3D; } else { - key->mono.u.ps.fbfetch_msaa = 0; - key->mono.u.ps.fbfetch_is_1D = 0; - key->mono.u.ps.fbfetch_layered = 0; + key->ps.mono.fbfetch_msaa = 0; + key->ps.mono.fbfetch_is_1D = 0; + key->ps.mono.fbfetch_layered = 0; } } void si_ps_key_update_framebuffer_blend(struct si_context *sctx) { struct si_shader_selector *sel = sctx->shader.ps.cso; - struct si_shader_key *key = &sctx->shader.ps.key; + union si_shader_key *key = &sctx->shader.ps.key; struct si_state_blend *blend = sctx->queued.named.blend; if (!sel) @@ -1979,7 +1979,7 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx) /* Select the shader color format based on whether * blending or alpha are needed. */ - key->part.ps.epilog.spi_shader_col_format = + key->ps.part.epilog.spi_shader_col_format = (blend->blend_enable_4bit & blend->need_src_alpha_4bit & sctx->framebuffer.spi_shader_col_format_blend_alpha) | (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & @@ -1988,36 +1988,36 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx) sctx->framebuffer.spi_shader_col_format_alpha) | (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & sctx->framebuffer.spi_shader_col_format); - key->part.ps.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit; + key->ps.part.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit; /* The output for dual source blending should have * the same format as the first output. */ if (blend->dual_src_blend) { - key->part.ps.epilog.spi_shader_col_format |= - (key->part.ps.epilog.spi_shader_col_format & 0xf) << 4; + key->ps.part.epilog.spi_shader_col_format |= + (key->ps.part.epilog.spi_shader_col_format & 0xf) << 4; } /* If alpha-to-coverage is enabled, we have to export alpha * even if there is no color buffer. */ - if (!(key->part.ps.epilog.spi_shader_col_format & 0xf) && blend->alpha_to_coverage) - key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; + if (!(key->ps.part.epilog.spi_shader_col_format & 0xf) && blend->alpha_to_coverage) + key->ps.part.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; /* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs * to the range supported by the type if a channel has less * than 16 bits and the export format is 16_ABGR. */ if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) { - key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8; - key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10; + key->ps.part.epilog.color_is_int8 = sctx->framebuffer.color_is_int8; + key->ps.part.epilog.color_is_int10 = sctx->framebuffer.color_is_int10; } /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */ - if (!key->part.ps.epilog.last_cbuf) { - key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit; - key->part.ps.epilog.color_is_int8 &= sel->info.colors_written; - key->part.ps.epilog.color_is_int10 &= sel->info.colors_written; + if (!key->ps.part.epilog.last_cbuf) { + key->ps.part.epilog.spi_shader_col_format &= sel->colors_written_4bit; + key->ps.part.epilog.color_is_int8 &= sel->info.colors_written; + key->ps.part.epilog.color_is_int10 &= sel->info.colors_written; } /* Eliminate shader code computing output values that are unused. @@ -2026,51 +2026,51 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx) */ if (sel->colors_written_4bit & ~(sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_enabled_4bit)) - key->opt.prefer_mono = 1; + key->ps.opt.prefer_mono = 1; else - key->opt.prefer_mono = 0; + key->ps.opt.prefer_mono = 0; } void si_ps_key_update_blend_rasterizer(struct si_context *sctx) { - struct si_shader_key *key = &sctx->shader.ps.key; + union si_shader_key *key = &sctx->shader.ps.key; struct si_state_blend *blend = sctx->queued.named.blend; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; - key->part.ps.epilog.alpha_to_one = blend->alpha_to_one && rs->multisample_enable; + key->ps.part.epilog.alpha_to_one = blend->alpha_to_one && rs->multisample_enable; } void si_ps_key_update_rasterizer(struct si_context *sctx) { struct si_shader_selector *sel = sctx->shader.ps.cso; - struct si_shader_key *key = &sctx->shader.ps.key; + union si_shader_key *key = &sctx->shader.ps.key; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; if (!sel) return; - key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read; - key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.uses_interp_color; - key->part.ps.epilog.clamp_color = rs->clamp_fragment_color; + key->ps.part.prolog.color_two_side = rs->two_side && sel->info.colors_read; + key->ps.part.prolog.flatshade_colors = rs->flatshade && sel->info.uses_interp_color; + key->ps.part.epilog.clamp_color = rs->clamp_fragment_color; } void si_ps_key_update_dsa(struct si_context *sctx) { - struct si_shader_key *key = &sctx->shader.ps.key; + union si_shader_key *key = &sctx->shader.ps.key; - key->part.ps.epilog.alpha_func = sctx->queued.named.dsa->alpha_func; + key->ps.part.epilog.alpha_func = sctx->queued.named.dsa->alpha_func; } static void si_ps_key_update_primtype_shader_rasterizer_framebuffer(struct si_context *sctx) { - struct si_shader_key *key = &sctx->shader.ps.key; + union si_shader_key *key = &sctx->shader.ps.key; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim); bool is_line = util_prim_is_lines(sctx->current_rast_prim); - key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly; - key->part.ps.epilog.poly_line_smoothing = + key->ps.part.prolog.poly_stipple = rs->poly_stipple_enable && is_poly; + key->ps.part.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) || (is_line && rs->line_smooth)) && sctx->framebuffer.nr_samples <= 1; } @@ -2078,21 +2078,21 @@ static void si_ps_key_update_primtype_shader_rasterizer_framebuffer(struct si_co void si_ps_key_update_sample_shading(struct si_context *sctx) { struct si_shader_selector *sel = sctx->shader.ps.cso; - struct si_shader_key *key = &sctx->shader.ps.key; + union si_shader_key *key = &sctx->shader.ps.key; if (!sel) return; if (sctx->ps_iter_samples > 1 && sel->info.reads_samplemask) - key->part.ps.prolog.samplemask_log_ps_iter = util_logbase2(sctx->ps_iter_samples); + key->ps.part.prolog.samplemask_log_ps_iter = util_logbase2(sctx->ps_iter_samples); else - key->part.ps.prolog.samplemask_log_ps_iter = 0; + key->ps.part.prolog.samplemask_log_ps_iter = 0; } void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *sctx) { struct si_shader_selector *sel = sctx->shader.ps.cso; - struct si_shader_key *key = &sctx->shader.ps.key; + union si_shader_key *key = &sctx->shader.ps.key; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; if (!sel) @@ -2107,49 +2107,49 @@ void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *s if (rs->force_persample_interp && rs->multisample_enable && sctx->framebuffer.nr_samples > 1 && sctx->ps_iter_samples > 1) { - key->part.ps.prolog.force_persp_sample_interp = + key->ps.part.prolog.force_persp_sample_interp = uses_persp_center || uses_persp_centroid; - key->part.ps.prolog.force_linear_sample_interp = + key->ps.part.prolog.force_linear_sample_interp = sel->info.uses_linear_center || sel->info.uses_linear_centroid; - key->part.ps.prolog.force_persp_center_interp = 0; - key->part.ps.prolog.force_linear_center_interp = 0; - key->part.ps.prolog.bc_optimize_for_persp = 0; - key->part.ps.prolog.bc_optimize_for_linear = 0; - key->mono.u.ps.interpolate_at_sample_force_center = 0; + key->ps.part.prolog.force_persp_center_interp = 0; + key->ps.part.prolog.force_linear_center_interp = 0; + key->ps.part.prolog.bc_optimize_for_persp = 0; + key->ps.part.prolog.bc_optimize_for_linear = 0; + key->ps.mono.interpolate_at_sample_force_center = 0; } else if (rs->multisample_enable && sctx->framebuffer.nr_samples > 1) { - key->part.ps.prolog.force_persp_sample_interp = 0; - key->part.ps.prolog.force_linear_sample_interp = 0; - key->part.ps.prolog.force_persp_center_interp = 0; - key->part.ps.prolog.force_linear_center_interp = 0; - key->part.ps.prolog.bc_optimize_for_persp = + key->ps.part.prolog.force_persp_sample_interp = 0; + key->ps.part.prolog.force_linear_sample_interp = 0; + key->ps.part.prolog.force_persp_center_interp = 0; + key->ps.part.prolog.force_linear_center_interp = 0; + key->ps.part.prolog.bc_optimize_for_persp = uses_persp_center && uses_persp_centroid; - key->part.ps.prolog.bc_optimize_for_linear = + key->ps.part.prolog.bc_optimize_for_linear = sel->info.uses_linear_center && sel->info.uses_linear_centroid; - key->mono.u.ps.interpolate_at_sample_force_center = 0; + key->ps.mono.interpolate_at_sample_force_center = 0; } else { - key->part.ps.prolog.force_persp_sample_interp = 0; - key->part.ps.prolog.force_linear_sample_interp = 0; + key->ps.part.prolog.force_persp_sample_interp = 0; + key->ps.part.prolog.force_linear_sample_interp = 0; /* Make sure SPI doesn't compute more than 1 pair * of (i,j), which is the optimization here. */ - key->part.ps.prolog.force_persp_center_interp = uses_persp_center + + key->ps.part.prolog.force_persp_center_interp = uses_persp_center + uses_persp_centroid + uses_persp_sample > 1; - key->part.ps.prolog.force_linear_center_interp = sel->info.uses_linear_center + + key->ps.part.prolog.force_linear_center_interp = sel->info.uses_linear_center + sel->info.uses_linear_centroid + sel->info.uses_linear_sample > 1; - key->part.ps.prolog.bc_optimize_for_persp = 0; - key->part.ps.prolog.bc_optimize_for_linear = 0; - key->mono.u.ps.interpolate_at_sample_force_center = sel->info.uses_interp_at_sample; + key->ps.part.prolog.bc_optimize_for_persp = 0; + key->ps.part.prolog.bc_optimize_for_linear = 0; + key->ps.mono.interpolate_at_sample_force_center = sel->info.uses_interp_at_sample; } } /* Compute the key for the hw shader variant */ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_shader_selector *sel, - struct si_shader_key *key) + union si_shader_key *key) { struct si_context *sctx = (struct si_context *)ctx; @@ -2162,8 +2162,8 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh break; case MESA_SHADER_TESS_CTRL: if (sctx->chip_class >= GFX9) { - si_get_vs_key_inputs(sctx, key, &key->part.tcs.ls_prolog); - key->part.tcs.ls = sctx->shader.vs.cso; + si_get_vs_key_inputs(sctx, key, &key->ge.part.tcs.ls_prolog); + key->ge.part.tcs.ls = sctx->shader.vs.cso; } break; case MESA_SHADER_TESS_EVAL: @@ -2175,11 +2175,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh case MESA_SHADER_GEOMETRY: if (sctx->chip_class >= GFX9) { if (sctx->shader.tes.cso) { - si_clear_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog); - key->part.gs.es = sctx->shader.tes.cso; + si_clear_vs_key_inputs(sctx, key, &key->ge.part.gs.vs_prolog); + key->ge.part.gs.es = sctx->shader.tes.cso; } else { - si_get_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog); - key->part.gs.es = sctx->shader.vs.cso; + si_get_vs_key_inputs(sctx, key, &key->ge.part.gs.vs_prolog); + key->ge.part.gs.es = sctx->shader.vs.cso; } /* Only NGG can eliminate GS outputs, because the code is shared with VS. */ @@ -2249,11 +2249,11 @@ static void si_build_shader_variant_low_priority(void *job, void *gdata, int thr } /* This should be const, but C++ doesn't allow implicit zero-initialization with const. */ -static struct si_shader_key zeroed; +static union si_shader_key zeroed; static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shader_selector *sel, struct si_compiler_ctx_state *compiler_state, - const struct si_shader_key *key) + const union si_shader_key *key) { struct si_shader **mainp = si_get_main_shader_part(sel, key); @@ -2269,9 +2269,11 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shad util_queue_fence_init(&main_part->ready); main_part->selector = sel; - main_part->key.as_es = key->as_es; - main_part->key.as_ls = key->as_ls; - main_part->key.as_ngg = key->as_ngg; + if (sel->info.stage <= MESA_SHADER_GEOMETRY) { + main_part->key.ge.as_es = key->ge.as_es; + main_part->key.ge.as_ls = key->ge.as_ls; + main_part->key.ge.as_ngg = key->ge.as_ngg; + } main_part->is_monolithic = false; if (!si_compile_shader(sscreen, compiler_state->compiler, main_part, @@ -2285,8 +2287,9 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shad } /* A helper to copy *key to *local_key and return local_key. */ -static const struct si_shader_key * -use_local_key_copy(const struct si_shader_key *key, struct si_shader_key *local_key) +template +static const SHADER_KEY_TYPE * +use_local_key_copy(const SHADER_KEY_TYPE *key, SHADER_KEY_TYPE *local_key) { if (key != local_key) memcpy(local_key, key, sizeof(*key)); @@ -2301,24 +2304,27 @@ use_local_key_copy(const struct si_shader_key *key, struct si_shader_key *local_ * the compilation isn't finished, don't select any * shader and return an error. */ -int si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state, - const struct si_shader_key *key, int thread_index, - bool optimized_or_none) +template +static int si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state, + const SHADER_KEY_TYPE *key, int thread_index, + bool optimized_or_none) { struct si_screen *sscreen = sctx->screen; struct si_shader_selector *sel = state->cso; struct si_shader_selector *previous_stage_sel = NULL; struct si_shader *current = state->current; struct si_shader *iter, *shader = NULL; + const SHADER_KEY_TYPE *zeroed_key = (SHADER_KEY_TYPE*)&zeroed; + /* si_shader_select_with_key must not modify 'key' because it would affect future shaders. * If we need to modify it for this specific shader (eg: to disable optimizations), we * use a copy. */ - struct si_shader_key local_key; + SHADER_KEY_TYPE local_key; if (unlikely(sscreen->debug_flags & DBG(NO_OPT_VARIANT))) { /* Disable shader variant optimizations. */ - key = use_local_key_copy(key, &local_key); + key = use_local_key_copy(key, &local_key); memset(&local_key.opt, 0, sizeof(key->opt)); } @@ -2364,12 +2370,14 @@ current_not_ready: /* Find the shader variant. */ for (iter = sel->first_variant; iter; iter = iter->next_variant) { - if (memcmp(&iter->key, key, s) == 0) { + const SHADER_KEY_TYPE *iter_key = (const SHADER_KEY_TYPE *)&iter->key; + + if (memcmp(iter_key, key, s) == 0) { /* Check the inlined uniform values separatly, and count * the number of variants based on them. */ if (key->opt.inline_uniforms && - memcmp(iter->key.opt.inlined_uniform_values, + memcmp(iter_key->opt.inlined_uniform_values, key->opt.inlined_uniform_values, MAX_INLINABLE_UNIFORMS * 4) != 0) { if (variant_count++ > max_inline_uniforms_variants) { @@ -2424,7 +2432,7 @@ current_not_ready: si_init_compiler(sctx->screen, &sctx->compiler); shader->selector = sel; - shader->key = *key; + *((SHADER_KEY_TYPE*)&shader->key) = *key; shader->compiler_ctx_state.compiler = &sctx->compiler; shader->compiler_ctx_state.debug = sctx->debug; shader->compiler_ctx_state.is_debug_context = sctx->is_debug; @@ -2432,9 +2440,9 @@ current_not_ready: /* If this is a merged shader, get the first shader's selector. */ if (sscreen->info.chip_class >= GFX9) { if (sel->info.stage == MESA_SHADER_TESS_CTRL) - previous_stage_sel = key->part.tcs.ls; + previous_stage_sel = ((struct si_shader_key_ge*)key)->part.tcs.ls; else if (sel->info.stage == MESA_SHADER_GEOMETRY) - previous_stage_sel = key->part.gs.es; + previous_stage_sel = ((struct si_shader_key_ge*)key)->part.gs.es; /* We need to wait for the previous shader. */ if (previous_stage_sel && thread_index < 0) @@ -2442,7 +2450,7 @@ current_not_ready: } bool is_pure_monolithic = - sscreen->use_monolithic_shaders || memcmp(&key->mono, &zeroed.mono, sizeof(key->mono)) != 0; + sscreen->use_monolithic_shaders || memcmp(&key->mono, &zeroed_key->mono, sizeof(key->mono)) != 0; /* Compile the main shader part if it doesn't exist. This can happen * if the initial guess was wrong. @@ -2461,13 +2469,13 @@ current_not_ready: * part is present. */ if (previous_stage_sel) { - struct si_shader_key shader1_key = zeroed; + union si_shader_key shader1_key = zeroed; if (sel->info.stage == MESA_SHADER_TESS_CTRL) { - shader1_key.as_ls = 1; + shader1_key.ge.as_ls = 1; } else if (sel->info.stage == MESA_SHADER_GEOMETRY) { - shader1_key.as_es = 1; - shader1_key.as_ngg = key->as_ngg; /* for Wave32 vs Wave64 */ + shader1_key.ge.as_es = 1; + shader1_key.ge.as_ngg = ((struct si_shader_key_ge*)key)->as_ngg; /* for Wave32 vs Wave64 */ } else { assert(0); } @@ -2479,7 +2487,8 @@ current_not_ready: } if (ok) { - ok = si_check_missing_main_part(sscreen, sel, &shader->compiler_ctx_state, key); + ok = si_check_missing_main_part(sscreen, sel, &shader->compiler_ctx_state, + (union si_shader_key*)key); } if (!ok) { @@ -2500,10 +2509,10 @@ current_not_ready: /* Monolithic-only shaders don't make a distinction between optimized * and unoptimized. */ shader->is_monolithic = - is_pure_monolithic || memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0; + is_pure_monolithic || memcmp(&key->opt, &zeroed_key->opt, sizeof(key->opt)) != 0; shader->is_optimized = !is_pure_monolithic && - memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0; + memcmp(&key->opt, &zeroed_key->opt, sizeof(key->opt)) != 0; /* If it's an optimized shader, compile it asynchronously. */ if (shader->is_optimized && thread_index < 0) { @@ -2563,11 +2572,15 @@ int si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state struct si_context *sctx = (struct si_context *)ctx; si_shader_selector_key(ctx, state->cso, &state->key); - return si_shader_select_with_key(sctx, state, &state->key, -1, false); + + if (state->cso->info.stage == MESA_SHADER_FRAGMENT) + return si_shader_select_with_key(sctx, state, &state->key.ps, -1, false); + else + return si_shader_select_with_key(sctx, state, &state->key.ge, -1, false); } static void si_parse_next_shader_property(const struct si_shader_info *info, bool streamout, - struct si_shader_key *key) + union si_shader_key *key) { gl_shader_stage next_shader = info->base.next_stage; @@ -2575,11 +2588,11 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo case MESA_SHADER_VERTEX: switch (next_shader) { case MESA_SHADER_GEOMETRY: - key->as_es = 1; + key->ge.as_es = 1; break; case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_EVAL: - key->as_ls = 1; + key->ge.as_ls = 1; break; default: /* If POSITION isn't written, it can only be a HW VS @@ -2588,13 +2601,13 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo * This heuristic is needed for separate shader objects. */ if (!info->writes_position && !streamout) - key->as_ls = 1; + key->ge.as_ls = 1; } break; case MESA_SHADER_TESS_EVAL: if (next_shader == MESA_SHADER_GEOMETRY || !info->writes_position) - key->as_es = 1; + key->ge.as_es = 1; break; default:; @@ -2672,13 +2685,17 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind shader->is_monolithic = false; si_parse_next_shader_property(&sel->info, sel->so.num_outputs != 0, &shader->key); - if (sscreen->use_ngg && (!sel->so.num_outputs || sscreen->use_ngg_streamout) && - ((sel->info.stage == MESA_SHADER_VERTEX && !shader->key.as_ls) || + if (sel->info.stage <= MESA_SHADER_GEOMETRY && + sscreen->use_ngg && (!sel->so.num_outputs || sscreen->use_ngg_streamout) && + ((sel->info.stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) || sel->info.stage == MESA_SHADER_TESS_EVAL || sel->info.stage == MESA_SHADER_GEOMETRY)) - shader->key.as_ngg = 1; + shader->key.ge.as_ngg = 1; if (sel->nir) { - si_get_ir_cache_key(sel, shader->key.as_ngg, shader->key.as_es, ir_sha1_cache_key); + if (sel->info.stage <= MESA_SHADER_GEOMETRY) + si_get_ir_cache_key(sel, shader->key.ge.as_ngg, shader->key.ge.as_es, ir_sha1_cache_key); + else + si_get_ir_cache_key(sel, false, false, ir_sha1_cache_key); } /* Try to load the shader from the shader cache. */ @@ -2714,7 +2731,7 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind if ((sel->info.stage == MESA_SHADER_VERTEX || sel->info.stage == MESA_SHADER_TESS_EVAL || sel->info.stage == MESA_SHADER_GEOMETRY) && - !shader->key.as_ls && !shader->key.as_es) { + !shader->key.ge.as_ls && !shader->key.ge.as_es) { unsigned i; for (i = 0; i < sel->info.num_outputs; i++) { @@ -3186,7 +3203,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) sctx->shader.vs.current = sel ? sel->first_variant : NULL; sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0; sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false; - sctx->fixed_func_tcs_shader.key.mono.u.ff_tcs_inputs_to_copy = sel ? sel->outputs_written : 0; + sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->outputs_written : 0; if (si_update_ngg(sctx)) si_shader_change_notify(sctx); @@ -3294,7 +3311,7 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state) sctx->shader.tcs.cso = sel; sctx->shader.tcs.current = sel ? sel->first_variant : NULL; - sctx->shader.tcs.key.part.tcs.epilog.invoc0_tess_factors_are_def = + sctx->shader.tcs.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def = sel ? sel->info.tessfactors_are_def_in_all_invocs : 0; si_update_tess_uses_prim_id(sctx); @@ -3320,12 +3337,12 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) sctx->ia_multi_vgt_param_key.u.uses_tess = sel != NULL; si_update_tess_uses_prim_id(sctx); - sctx->shader.tcs.key.part.tcs.epilog.prim_mode = - sctx->fixed_func_tcs_shader.key.part.tcs.epilog.prim_mode = + sctx->shader.tcs.key.ge.part.tcs.epilog.prim_mode = + sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.prim_mode = sel ? sel->info.base.tess.primitive_mode : 0; - sctx->shader.tcs.key.part.tcs.epilog.tes_reads_tess_factors = - sctx->fixed_func_tcs_shader.key.part.tcs.epilog.tes_reads_tess_factors = + sctx->shader.tcs.key.ge.part.tcs.epilog.tes_reads_tess_factors = + sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.tes_reads_tess_factors = sel ? sel->info.reads_tess_factors : 0; si_update_common_shader_state(sctx, sel, PIPE_SHADER_TESS_EVAL); @@ -3437,13 +3454,13 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) switch (shader->selector->info.stage) { case MESA_SHADER_VERTEX: - if (shader->key.as_ls) { + if (shader->key.ge.as_ls) { if (sctx->chip_class <= GFX8) state_index = SI_STATE_IDX(ls); - } else if (shader->key.as_es) { + } else if (shader->key.ge.as_es) { if (sctx->chip_class <= GFX8) state_index = SI_STATE_IDX(es); - } else if (shader->key.as_ngg) { + } else if (shader->key.ge.as_ngg) { state_index = SI_STATE_IDX(gs); } else { state_index = SI_STATE_IDX(vs); @@ -3453,10 +3470,10 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) state_index = SI_STATE_IDX(hs); break; case MESA_SHADER_TESS_EVAL: - if (shader->key.as_es) { + if (shader->key.ge.as_es) { if (sctx->chip_class <= GFX8) state_index = SI_STATE_IDX(es); - } else if (shader->key.as_ngg) { + } else if (shader->key.ge.as_ngg) { state_index = SI_STATE_IDX(gs); } else { state_index = SI_STATE_IDX(vs); @@ -3808,11 +3825,11 @@ static bool si_update_scratch_relocs(struct si_context *sctx) if (r < 0) return false; if (r == 1) { - if (sctx->shader.vs.current->key.as_ls) + if (sctx->shader.vs.current->key.ge.as_ls) si_pm4_bind_state(sctx, ls, sctx->shader.vs.current); - else if (sctx->shader.vs.current->key.as_es) + else if (sctx->shader.vs.current->key.ge.as_es) si_pm4_bind_state(sctx, es, sctx->shader.vs.current); - else if (sctx->shader.vs.current->key.as_ngg) + else if (sctx->shader.vs.current->key.ge.as_ngg) si_pm4_bind_state(sctx, gs, sctx->shader.vs.current); else si_pm4_bind_state(sctx, vs, sctx->shader.vs.current); @@ -3823,9 +3840,9 @@ static bool si_update_scratch_relocs(struct si_context *sctx) if (r < 0) return false; if (r == 1) { - if (sctx->shader.tes.current->key.as_es) + if (sctx->shader.tes.current->key.ge.as_es) si_pm4_bind_state(sctx, es, sctx->shader.tes.current); - else if (sctx->shader.tes.current->key.as_ngg) + else if (sctx->shader.tes.current->key.ge.as_ngg) si_pm4_bind_state(sctx, gs, sctx->shader.tes.current); else si_pm4_bind_state(sctx, vs, sctx->shader.tes.current);