radeonsi: split si_shader_key into ps and ge parts to minimize memcmp overhead

ps is for the pixel shader, while ge is for VS, TCS, TES, and GS.

si_shader_key: 68 bytes
si_shader_key_ge: 68 bytes
si_shader_key_ps: 28 bytes

The only notable change is that si_shader_select_with_key is changed
to a C++ template. Other changes are trivial.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13285>
This commit is contained in:
Marek Olšák 2021-09-13 23:09:22 -04:00 committed by Marge Bot
parent 385c9e1caf
commit 8c5a32b5fe
17 changed files with 652 additions and 540 deletions

View file

@ -83,7 +83,7 @@ static LLVMValueRef ngg_get_vertices_per_prim(struct si_shader_context *ctx, uns
/* Blits always use axis-aligned rectangles with 3 vertices. */
*num_vertices = 3;
return LLVMConstInt(ctx->ac.i32, 3, 0);
} else if (ctx->shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) {
} else if (ctx->shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) {
*num_vertices = 2;
return LLVMConstInt(ctx->ac.i32, 2, 0);
} else {
@ -115,7 +115,7 @@ bool gfx10_ngg_export_prim_early(struct si_shader *shader)
{
struct si_shader_selector *sel = shader->selector;
assert(shader->key.as_ngg && !shader->key.as_es);
assert(shader->key.ge.as_ngg && !shader->key.ge.as_es);
return sel->info.stage != MESA_SHADER_GEOMETRY &&
!gfx10_ngg_writes_user_edgeflags(shader);
@ -137,7 +137,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use
{
LLVMBuilderRef builder = ctx->ac.builder;
if (gfx10_is_ngg_passthrough(ctx->shader) || ctx->shader->key.opt.ngg_culling) {
if (gfx10_is_ngg_passthrough(ctx->shader) || ctx->shader->key.ge.opt.ngg_culling) {
ac_build_ifcc(&ctx->ac, si_is_gs_thread(ctx), 6001);
{
struct ac_ngg_prim prim = {};
@ -614,17 +614,17 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
* to the ES thread of the provoking vertex. All ES threads
* load and export PrimitiveID for their thread.
*/
if (shader->selector->info.stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)
if (shader->selector->info.stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id)
lds_vertex_size = MAX2(lds_vertex_size, 1);
if (shader->key.opt.ngg_culling) {
if (shader->key.ge.opt.ngg_culling) {
if (shader->selector->info.stage == MESA_SHADER_VERTEX) {
STATIC_ASSERT(lds_instance_id + 1 == 7);
lds_vertex_size = MAX2(lds_vertex_size, 7);
} else {
assert(shader->selector->info.stage == MESA_SHADER_TESS_EVAL);
if (shader->selector->info.uses_primid || shader->key.mono.u.vs_export_prim_id) {
if (shader->selector->info.uses_primid || shader->key.ge.mono.u.vs_export_prim_id) {
STATIC_ASSERT(lds_tes_patch_id + 2 == 9); /* +1 for LDS padding */
lds_vertex_size = MAX2(lds_vertex_size, 9);
} else {
@ -823,10 +823,10 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
LLVMValueRef *addrs = abi->outputs;
unsigned max_waves = DIV_ROUND_UP(ctx->screen->ngg_subgroup_size, ctx->ac.wave_size);
assert(shader->key.opt.ngg_culling);
assert(shader->key.as_ngg);
assert(shader->key.ge.opt.ngg_culling);
assert(shader->key.ge.as_ngg);
assert(sel->info.stage == MESA_SHADER_VERTEX ||
(sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es));
(sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es));
LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
unsigned pos_index = 0;
@ -840,8 +840,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
* the position. This is useful for analyzing maximum theoretical
* performance without VS input loads.
*/
if (shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE &&
shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE) {
if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE &&
shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE) {
for (unsigned j = 0; j < 4; j++)
LLVMBuildStore(builder, LLVMGetUndef(ctx->ac.f32), addrs[4 * i + j]);
break;
@ -993,15 +993,15 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
options.cull_view_xy = true;
options.cull_w = true;
if (shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) {
if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) {
options.num_vertices = 2;
assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE));
assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE));
assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE));
assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE));
} else {
options.num_vertices = 3;
options.cull_front = shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
options.cull_back = shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
options.cull_front = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
options.cull_back = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
options.cull_small_prims = true; /* this would only be false with conservative rasterization */
options.cull_zero_area = options.cull_front || options.cull_back;
}
@ -1055,10 +1055,10 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
bool uses_instance_id = ctx->stage == MESA_SHADER_VERTEX &&
(sel->info.uses_instanceid ||
shader->key.part.vs.prolog.instance_divisor_is_one ||
shader->key.part.vs.prolog.instance_divisor_is_fetched);
shader->key.ge.part.vs.prolog.instance_divisor_is_one ||
shader->key.ge.part.vs.prolog.instance_divisor_is_fetched);
bool uses_tes_prim_id = ctx->stage == MESA_SHADER_TESS_EVAL &&
(sel->info.uses_primid || shader->key.mono.u.vs_export_prim_id);
(sel->info.uses_primid || shader->key.ge.mono.u.vs_export_prim_id);
/* ES threads compute their prefix sum, which is the new ES thread ID.
* Then they write the vertex position and input VGPRs into the LDS address
@ -1278,7 +1278,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
/* These two also use LDS. */
if (gfx10_ngg_writes_user_edgeflags(shader) ||
(ctx->stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
(ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id))
ac_build_s_barrier(&ctx->ac);
ctx->return_value = ret;
@ -1338,7 +1338,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
bool unterminated_es_if_block =
!sel->so.num_outputs && !gfx10_ngg_writes_user_edgeflags(ctx->shader) &&
!ctx->screen->use_ngg_streamout && /* no query buffer */
(ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id);
(ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.ge.mono.u.vs_export_prim_id);
if (!unterminated_es_if_block)
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
@ -1347,7 +1347,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
LLVMValueRef is_es_thread = si_is_es_thread(ctx);
LLVMValueRef vtxindex[3];
if (ctx->shader->key.opt.ngg_culling || gfx10_is_ngg_passthrough(ctx->shader)) {
if (ctx->shader->key.ge.opt.ngg_culling || gfx10_is_ngg_passthrough(ctx->shader)) {
for (unsigned i = 0; i < 3; ++i)
vtxindex[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[0], 10 * i, 9);
} else {
@ -1402,7 +1402,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
/* Copy Primitive IDs from GS threads to the LDS address corresponding
* to the ES thread of the provoking vertex.
*/
if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) {
if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader->key.ge.mono.u.vs_export_prim_id) {
assert(!unterminated_es_if_block);
/* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */
@ -1479,7 +1479,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
* load it from LDS.
*/
if (info->output_semantic[i] == VARYING_SLOT_POS &&
ctx->shader->key.opt.ngg_culling) {
ctx->shader->key.ge.opt.ngg_culling) {
vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
for (unsigned j = 0; j < 4; j++) {
@ -1495,7 +1495,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
}
}
if (ctx->shader->key.mono.u.vs_export_prim_id) {
if (ctx->shader->key.ge.mono.u.vs_export_prim_id) {
outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
if (ctx->stage == MESA_SHADER_VERTEX) {

View file

@ -1222,15 +1222,34 @@ static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_res
sctx->descriptors_dirty |= 1u << descriptors_idx;
}
void si_get_inline_uniform_state(union si_shader_key *key, enum pipe_shader_type shader,
bool *inline_uniforms, uint32_t **inlined_values)
{
if (shader == PIPE_SHADER_FRAGMENT) {
*inline_uniforms = key->ps.opt.inline_uniforms;
*inlined_values = key->ps.opt.inlined_uniform_values;
} else {
*inline_uniforms = key->ge.opt.inline_uniforms;
*inlined_values = key->ge.opt.inlined_uniform_values;
}
}
void si_invalidate_inlinable_uniforms(struct si_context *sctx, enum pipe_shader_type shader)
{
if (shader == PIPE_SHADER_COMPUTE)
return;
if (sctx->shaders[shader].key.opt.inline_uniforms) {
sctx->shaders[shader].key.opt.inline_uniforms = false;
memset(sctx->shaders[shader].key.opt.inlined_uniform_values, 0,
sizeof(sctx->shaders[shader].key.opt.inlined_uniform_values));
bool inline_uniforms;
uint32_t *inlined_values;
si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values);
if (inline_uniforms) {
if (shader == PIPE_SHADER_FRAGMENT)
sctx->shaders[shader].key.ps.opt.inline_uniforms = false;
else
sctx->shaders[shader].key.ge.opt.inline_uniforms = false;
memset(inlined_values, 0, MAX_INLINABLE_UNIFORMS * 4);
sctx->do_update_shaders = true;
}
}
@ -1273,10 +1292,18 @@ static void si_set_inlinable_constants(struct pipe_context *ctx,
if (shader == PIPE_SHADER_COMPUTE)
return;
if (!sctx->shaders[shader].key.opt.inline_uniforms) {
bool inline_uniforms;
uint32_t *inlined_values;
si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values);
if (!inline_uniforms) {
/* It's the first time we set the constants. Always update shaders. */
sctx->shaders[shader].key.opt.inline_uniforms = true;
memcpy(sctx->shaders[shader].key.opt.inlined_uniform_values, values, num_values * 4);
if (shader == PIPE_SHADER_FRAGMENT)
sctx->shaders[shader].key.ps.opt.inline_uniforms = true;
else
sctx->shaders[shader].key.ge.opt.inline_uniforms = true;
memcpy(inlined_values, values, num_values * 4);
sctx->do_update_shaders = true;
return;
}
@ -1284,8 +1311,8 @@ static void si_set_inlinable_constants(struct pipe_context *ctx,
/* We have already set inlinable constants for this shader. Update the shader only if
* the constants are being changed so as not to update shaders needlessly.
*/
if (memcmp(sctx->shaders[shader].key.opt.inlined_uniform_values, values, num_values * 4)) {
memcpy(sctx->shaders[shader].key.opt.inlined_uniform_values, values, num_values * 4);
if (memcmp(inlined_values, values, num_values * 4)) {
memcpy(inlined_values, values, num_values * 4);
sctx->do_update_shaders = true;
}
}
@ -2029,27 +2056,27 @@ void si_shader_change_notify(struct si_context *sctx)
* If GS sets as_ngg, the previous stage must set as_ngg too.
*/
if (sctx->shader.tes.cso) {
sctx->shader.vs.key.as_ls = 1;
sctx->shader.vs.key.as_es = 0;
sctx->shader.vs.key.as_ngg = 0;
sctx->shader.vs.key.ge.as_ls = 1;
sctx->shader.vs.key.ge.as_es = 0;
sctx->shader.vs.key.ge.as_ngg = 0;
if (sctx->shader.gs.cso) {
sctx->shader.tes.key.as_es = 1;
sctx->shader.tes.key.as_ngg = sctx->ngg;
sctx->shader.gs.key.as_ngg = sctx->ngg;
sctx->shader.tes.key.ge.as_es = 1;
sctx->shader.tes.key.ge.as_ngg = sctx->ngg;
sctx->shader.gs.key.ge.as_ngg = sctx->ngg;
} else {
sctx->shader.tes.key.as_es = 0;
sctx->shader.tes.key.as_ngg = sctx->ngg;
sctx->shader.tes.key.ge.as_es = 0;
sctx->shader.tes.key.ge.as_ngg = sctx->ngg;
}
} else if (sctx->shader.gs.cso) {
sctx->shader.vs.key.as_ls = 0;
sctx->shader.vs.key.as_es = 1;
sctx->shader.vs.key.as_ngg = sctx->ngg;
sctx->shader.gs.key.as_ngg = sctx->ngg;
sctx->shader.vs.key.ge.as_ls = 0;
sctx->shader.vs.key.ge.as_es = 1;
sctx->shader.vs.key.ge.as_ngg = sctx->ngg;
sctx->shader.gs.key.ge.as_ngg = sctx->ngg;
} else {
sctx->shader.vs.key.as_ls = 0;
sctx->shader.vs.key.as_es = 0;
sctx->shader.vs.key.as_ngg = sctx->ngg;
sctx->shader.vs.key.ge.as_ls = 0;
sctx->shader.vs.key.ge.as_es = 0;
sctx->shader.vs.key.ge.as_ngg = sctx->ngg;
}
}

View file

@ -747,10 +747,10 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
* and the LS main part when !vs_needs_prolog
* - remove the fixup for unused input VGPRs
*/
sctx->shader.tcs.key.opt.prefer_mono = 1;
sctx->shader.tcs.key.ge.opt.prefer_mono = 1;
/* This enables jumping over the VS prolog for GS-only waves. */
sctx->shader.gs.key.opt.prefer_mono = 1;
sctx->shader.gs.key.ge.opt.prefer_mono = 1;
}
si_begin_new_gfx_cs(sctx, true);

View file

@ -805,7 +805,7 @@ struct si_shader_ctx_state {
struct si_shader_selector *cso;
struct si_shader *current;
/* The shader variant key representing the current state. */
struct si_shader_key key;
union si_shader_key key;
};
#define SI_NUM_VGT_PARAM_KEY_BITS 12
@ -1965,9 +1965,14 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen,
static inline unsigned si_get_shader_wave_size(struct si_shader *shader)
{
if (shader->selector->info.stage <= MESA_SHADER_GEOMETRY) {
return si_get_wave_size(shader->selector->screen, shader->selector->info.stage,
shader->key.ge.as_ngg,
shader->key.ge.as_es);
}
return si_get_wave_size(shader->selector->screen, shader->selector->info.stage,
shader->key.as_ngg,
shader->key.as_es);
false, false);
}
static inline void si_select_draw_vbo(struct si_context *sctx)

View file

@ -42,10 +42,11 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f);
/** Whether the shader runs as a combination of multiple API shaders */
bool si_is_multi_part_shader(struct si_shader *shader)
{
if (shader->selector->screen->info.chip_class <= GFX8)
if (shader->selector->screen->info.chip_class <= GFX8 ||
shader->selector->info.stage > MESA_SHADER_GEOMETRY)
return false;
return shader->key.as_ls || shader->key.as_es ||
return shader->key.ge.as_ls || shader->key.ge.as_es ||
shader->selector->info.stage == MESA_SHADER_TESS_CTRL ||
shader->selector->info.stage == MESA_SHADER_GEOMETRY;
}
@ -53,7 +54,10 @@ bool si_is_multi_part_shader(struct si_shader *shader)
/** Whether the shader runs on a merged HW stage (LSHS or ESGS) */
bool si_is_merged_shader(struct si_shader *shader)
{
return shader->key.as_ngg || si_is_multi_part_shader(shader);
if (shader->selector->info.stage > MESA_SHADER_GEOMETRY)
return false;
return shader->key.ge.as_ngg || si_is_multi_part_shader(shader);
}
/**
@ -200,7 +204,7 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader)
switch (shader->selector->info.stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_TESS_EVAL:
return shader->key.as_ngg ? 128 : 0;
return shader->key.ge.as_ngg ? 128 : 0;
case MESA_SHADER_TESS_CTRL:
/* Return this so that LLVM doesn't remove s_barrier
@ -300,7 +304,7 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_
struct si_shader *shader = ctx->shader;
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vertex_id);
if (shader->key.as_ls) {
if (shader->key.ge.as_ls) {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_rel_patch_id);
if (ctx->screen->info.chip_class >= GFX10) {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
@ -384,10 +388,10 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
memset(&ctx->args, 0, sizeof(ctx->args));
/* Set MERGED shaders. */
if (ctx->screen->info.chip_class >= GFX9) {
if (shader->key.as_ls || stage == MESA_SHADER_TESS_CTRL)
if (ctx->screen->info.chip_class >= GFX9 && stage <= MESA_SHADER_GEOMETRY) {
if (shader->key.ge.as_ls || stage == MESA_SHADER_TESS_CTRL)
stage = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
else if (shader->key.as_es || shader->key.as_ngg || stage == MESA_SHADER_GEOMETRY)
else if (shader->key.ge.as_es || shader->key.ge.as_ngg || stage == MESA_SHADER_GEOMETRY)
stage = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY;
}
@ -408,9 +412,9 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
if (!shader->is_gs_copy_shader)
declare_vb_descriptor_input_sgprs(ctx);
if (shader->key.as_es) {
if (shader->key.ge.as_es) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset);
} else if (shader->key.as_ls) {
} else if (shader->key.ge.as_ls) {
/* no extra parameters */
} else {
/* The locations of the other parameters are assigned dynamically. */
@ -479,14 +483,14 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
ac_add_return(&ctx->args, AC_ARG_VGPR);
/* VS outputs passed via VGPRs to TCS. */
if (shader->key.opt.same_patch_vertices) {
if (shader->key.ge.opt.same_patch_vertices) {
unsigned num_outputs = util_last_bit64(shader->selector->outputs_written);
for (i = 0; i < num_outputs * 4; i++)
ac_add_return(&ctx->args, AC_ARG_VGPR);
}
} else {
/* TCS inputs are passed via VGPRs from VS. */
if (shader->key.opt.same_patch_vertices) {
if (shader->key.ge.opt.same_patch_vertices) {
unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->outputs_written);
for (i = 0; i < num_inputs * 4; i++)
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
@ -510,7 +514,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_GEOMETRY);
if (ctx->shader->key.as_ngg)
if (ctx->shader->key.ge.as_ngg)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_tg_info);
else
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs2vs_offset);
@ -559,7 +563,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
declare_tes_input_vgprs(ctx);
}
if ((ctx->shader->key.as_es || ngg_cull_shader) &&
if ((ctx->shader->key.ge.as_es || ngg_cull_shader) &&
(ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL)) {
unsigned num_user_sgprs, num_vgprs;
@ -602,7 +606,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
if (shader->key.as_es) {
if (shader->key.ge.as_es) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset);
@ -786,14 +790,15 @@ static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *sh
unsigned num_lds_symbols = 0;
if (sel && screen->info.chip_class >= GFX9 && !shader->is_gs_copy_shader &&
(sel->info.stage == MESA_SHADER_GEOMETRY || shader->key.as_ngg)) {
(sel->info.stage == MESA_SHADER_GEOMETRY ||
(sel->info.stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg))) {
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
sym->name = "esgs_ring";
sym->size = shader->gs_info.esgs_ring_size * 4;
sym->align = 64 * 1024;
}
if (shader->key.as_ngg && sel->info.stage == MESA_SHADER_GEOMETRY) {
if (sel->info.stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg) {
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
sym->name = "ngg_emit";
sym->size = shader->ngg.ngg_emit_size * 4;
@ -1060,20 +1065,20 @@ const char *si_get_shader_name(const struct si_shader *shader)
{
switch (shader->selector->info.stage) {
case MESA_SHADER_VERTEX:
if (shader->key.as_es)
if (shader->key.ge.as_es)
return "Vertex Shader as ES";
else if (shader->key.as_ls)
else if (shader->key.ge.as_ls)
return "Vertex Shader as LS";
else if (shader->key.as_ngg)
else if (shader->key.ge.as_ngg)
return "Vertex Shader as ESGS";
else
return "Vertex Shader as VS";
case MESA_SHADER_TESS_CTRL:
return "Tessellation Control Shader";
case MESA_SHADER_TESS_EVAL:
if (shader->key.as_es)
if (shader->key.ge.as_es)
return "Tessellation Evaluation Shader as ES";
else if (shader->key.as_ngg)
else if (shader->key.ge.as_ngg)
return "Tessellation Evaluation Shader as ESGS";
else
return "Tessellation Evaluation Shader as VS";
@ -1137,7 +1142,7 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
si_shader_dump_stats(sscreen, shader, file, check_debug_option);
}
static void si_dump_shader_key_vs(const struct si_shader_key *key,
static void si_dump_shader_key_vs(const union si_shader_key *key,
const struct si_vs_prolog_bits *prolog, const char *prefix,
FILE *f)
{
@ -1146,10 +1151,10 @@ static void si_dump_shader_key_vs(const struct si_shader_key *key,
prolog->instance_divisor_is_fetched);
fprintf(f, " %s.ls_vgpr_fix = %u\n", prefix, prolog->ls_vgpr_fix);
fprintf(f, " mono.vs.fetch_opencode = %x\n", key->mono.vs_fetch_opencode);
fprintf(f, " mono.vs.fetch_opencode = %x\n", key->ge.mono.vs_fetch_opencode);
fprintf(f, " mono.vs.fix_fetch = {");
for (int i = 0; i < SI_MAX_ATTRIBS; i++) {
union si_vs_fix_fetch fix = key->mono.vs_fix_fetch[i];
union si_vs_fix_fetch fix = key->ge.mono.vs_fix_fetch[i];
if (i)
fprintf(f, ", ");
if (!fix.bits)
@ -1163,35 +1168,35 @@ static void si_dump_shader_key_vs(const struct si_shader_key *key,
static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
{
const struct si_shader_key *key = &shader->key;
const union si_shader_key *key = &shader->key;
gl_shader_stage stage = shader->selector->info.stage;
fprintf(f, "SHADER KEY\n");
switch (stage) {
case MESA_SHADER_VERTEX:
si_dump_shader_key_vs(key, &key->part.vs.prolog, "part.vs.prolog", f);
fprintf(f, " as_es = %u\n", key->as_es);
fprintf(f, " as_ls = %u\n", key->as_ls);
fprintf(f, " as_ngg = %u\n", key->as_ngg);
fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->mono.u.vs_export_prim_id);
si_dump_shader_key_vs(key, &key->ge.part.vs.prolog, "part.vs.prolog", f);
fprintf(f, " as_es = %u\n", key->ge.as_es);
fprintf(f, " as_ls = %u\n", key->ge.as_ls);
fprintf(f, " as_ngg = %u\n", key->ge.as_ngg);
fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->ge.mono.u.vs_export_prim_id);
break;
case MESA_SHADER_TESS_CTRL:
if (shader->selector->screen->info.chip_class >= GFX9) {
si_dump_shader_key_vs(key, &key->part.tcs.ls_prolog, "part.tcs.ls_prolog", f);
si_dump_shader_key_vs(key, &key->ge.part.tcs.ls_prolog, "part.tcs.ls_prolog", f);
}
fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->part.tcs.epilog.prim_mode);
fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->ge.part.tcs.epilog.prim_mode);
fprintf(f, " mono.u.ff_tcs_inputs_to_copy = 0x%" PRIx64 "\n",
key->mono.u.ff_tcs_inputs_to_copy);
fprintf(f, " opt.prefer_mono = %u\n", key->opt.prefer_mono);
fprintf(f, " opt.same_patch_vertices = %u\n", key->opt.same_patch_vertices);
key->ge.mono.u.ff_tcs_inputs_to_copy);
fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono);
fprintf(f, " opt.same_patch_vertices = %u\n", key->ge.opt.same_patch_vertices);
break;
case MESA_SHADER_TESS_EVAL:
fprintf(f, " as_es = %u\n", key->as_es);
fprintf(f, " as_ngg = %u\n", key->as_ngg);
fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->mono.u.vs_export_prim_id);
fprintf(f, " as_es = %u\n", key->ge.as_es);
fprintf(f, " as_ngg = %u\n", key->ge.as_ngg);
fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->ge.mono.u.vs_export_prim_id);
break;
case MESA_SHADER_GEOMETRY:
@ -1199,50 +1204,50 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
break;
if (shader->selector->screen->info.chip_class >= GFX9 &&
key->part.gs.es->info.stage == MESA_SHADER_VERTEX) {
si_dump_shader_key_vs(key, &key->part.gs.vs_prolog, "part.gs.vs_prolog", f);
key->ge.part.gs.es->info.stage == MESA_SHADER_VERTEX) {
si_dump_shader_key_vs(key, &key->ge.part.gs.vs_prolog, "part.gs.vs_prolog", f);
}
fprintf(f, " part.gs.prolog.tri_strip_adj_fix = %u\n",
key->part.gs.prolog.tri_strip_adj_fix);
fprintf(f, " as_ngg = %u\n", key->as_ngg);
key->ge.part.gs.prolog.tri_strip_adj_fix);
fprintf(f, " as_ngg = %u\n", key->ge.as_ngg);
break;
case MESA_SHADER_COMPUTE:
break;
case MESA_SHADER_FRAGMENT:
fprintf(f, " part.ps.prolog.color_two_side = %u\n", key->part.ps.prolog.color_two_side);
fprintf(f, " part.ps.prolog.flatshade_colors = %u\n", key->part.ps.prolog.flatshade_colors);
fprintf(f, " part.ps.prolog.poly_stipple = %u\n", key->part.ps.prolog.poly_stipple);
fprintf(f, " part.ps.prolog.force_persp_sample_interp = %u\n",
key->part.ps.prolog.force_persp_sample_interp);
fprintf(f, " part.ps.prolog.force_linear_sample_interp = %u\n",
key->part.ps.prolog.force_linear_sample_interp);
fprintf(f, " part.ps.prolog.force_persp_center_interp = %u\n",
key->part.ps.prolog.force_persp_center_interp);
fprintf(f, " part.ps.prolog.force_linear_center_interp = %u\n",
key->part.ps.prolog.force_linear_center_interp);
fprintf(f, " part.ps.prolog.bc_optimize_for_persp = %u\n",
key->part.ps.prolog.bc_optimize_for_persp);
fprintf(f, " part.ps.prolog.bc_optimize_for_linear = %u\n",
key->part.ps.prolog.bc_optimize_for_linear);
fprintf(f, " part.ps.prolog.samplemask_log_ps_iter = %u\n",
key->part.ps.prolog.samplemask_log_ps_iter);
fprintf(f, " part.ps.epilog.spi_shader_col_format = 0x%x\n",
key->part.ps.epilog.spi_shader_col_format);
fprintf(f, " part.ps.epilog.color_is_int8 = 0x%X\n", key->part.ps.epilog.color_is_int8);
fprintf(f, " part.ps.epilog.color_is_int10 = 0x%X\n", key->part.ps.epilog.color_is_int10);
fprintf(f, " part.ps.epilog.last_cbuf = %u\n", key->part.ps.epilog.last_cbuf);
fprintf(f, " part.ps.epilog.alpha_func = %u\n", key->part.ps.epilog.alpha_func);
fprintf(f, " part.ps.epilog.alpha_to_one = %u\n", key->part.ps.epilog.alpha_to_one);
fprintf(f, " part.ps.epilog.poly_line_smoothing = %u\n",
key->part.ps.epilog.poly_line_smoothing);
fprintf(f, " part.ps.epilog.clamp_color = %u\n", key->part.ps.epilog.clamp_color);
fprintf(f, " mono.u.ps.interpolate_at_sample_force_center = %u\n",
key->mono.u.ps.interpolate_at_sample_force_center);
fprintf(f, " mono.u.ps.fbfetch_msaa = %u\n", key->mono.u.ps.fbfetch_msaa);
fprintf(f, " mono.u.ps.fbfetch_is_1D = %u\n", key->mono.u.ps.fbfetch_is_1D);
fprintf(f, " mono.u.ps.fbfetch_layered = %u\n", key->mono.u.ps.fbfetch_layered);
fprintf(f, " prolog.color_two_side = %u\n", key->ps.part.prolog.color_two_side);
fprintf(f, " prolog.flatshade_colors = %u\n", key->ps.part.prolog.flatshade_colors);
fprintf(f, " prolog.poly_stipple = %u\n", key->ps.part.prolog.poly_stipple);
fprintf(f, " prolog.force_persp_sample_interp = %u\n",
key->ps.part.prolog.force_persp_sample_interp);
fprintf(f, " prolog.force_linear_sample_interp = %u\n",
key->ps.part.prolog.force_linear_sample_interp);
fprintf(f, " prolog.force_persp_center_interp = %u\n",
key->ps.part.prolog.force_persp_center_interp);
fprintf(f, " prolog.force_linear_center_interp = %u\n",
key->ps.part.prolog.force_linear_center_interp);
fprintf(f, " prolog.bc_optimize_for_persp = %u\n",
key->ps.part.prolog.bc_optimize_for_persp);
fprintf(f, " prolog.bc_optimize_for_linear = %u\n",
key->ps.part.prolog.bc_optimize_for_linear);
fprintf(f, " prolog.samplemask_log_ps_iter = %u\n",
key->ps.part.prolog.samplemask_log_ps_iter);
fprintf(f, " epilog.spi_shader_col_format = 0x%x\n",
key->ps.part.epilog.spi_shader_col_format);
fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.part.epilog.color_is_int8);
fprintf(f, " epilog.color_is_int10 = 0x%X\n", key->ps.part.epilog.color_is_int10);
fprintf(f, " epilog.last_cbuf = %u\n", key->ps.part.epilog.last_cbuf);
fprintf(f, " epilog.alpha_func = %u\n", key->ps.part.epilog.alpha_func);
fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.part.epilog.alpha_to_one);
fprintf(f, " epilog.poly_line_smoothing = %u\n",
key->ps.part.epilog.poly_line_smoothing);
fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color);
fprintf(f, " mono.interpolate_at_sample_force_center = %u\n",
key->ps.mono.interpolate_at_sample_force_center);
fprintf(f, " mono.fbfetch_msaa = %u\n", key->ps.mono.fbfetch_msaa);
fprintf(f, " mono.fbfetch_is_1D = %u\n", key->ps.mono.fbfetch_is_1D);
fprintf(f, " mono.fbfetch_layered = %u\n", key->ps.mono.fbfetch_layered);
break;
default:
@ -1251,32 +1256,44 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
if ((stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_TESS_EVAL ||
stage == MESA_SHADER_VERTEX) &&
!key->as_es && !key->as_ls) {
fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->opt.kill_outputs);
fprintf(f, " opt.kill_pointsize = 0x%x\n", key->opt.kill_pointsize);
fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->opt.kill_clip_distances);
!key->ge.as_es && !key->ge.as_ls) {
fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->ge.opt.kill_outputs);
fprintf(f, " opt.kill_pointsize = 0x%x\n", key->ge.opt.kill_pointsize);
fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->ge.opt.kill_clip_distances);
if (stage != MESA_SHADER_GEOMETRY)
fprintf(f, " opt.ngg_culling = 0x%x\n", key->opt.ngg_culling);
fprintf(f, " opt.ngg_culling = 0x%x\n", key->ge.opt.ngg_culling);
}
fprintf(f, " opt.prefer_mono = %u\n", key->opt.prefer_mono);
fprintf(f, " opt.inline_uniforms = %u (0x%x, 0x%x, 0x%x, 0x%x)\n",
key->opt.inline_uniforms,
key->opt.inlined_uniform_values[0],
key->opt.inlined_uniform_values[1],
key->opt.inlined_uniform_values[2],
key->opt.inlined_uniform_values[3]);
if (stage <= MESA_SHADER_GEOMETRY) {
fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono);
fprintf(f, " opt.inline_uniforms = %u (0x%x, 0x%x, 0x%x, 0x%x)\n",
key->ge.opt.inline_uniforms,
key->ge.opt.inlined_uniform_values[0],
key->ge.opt.inlined_uniform_values[1],
key->ge.opt.inlined_uniform_values[2],
key->ge.opt.inlined_uniform_values[3]);
} else {
fprintf(f, " opt.prefer_mono = %u\n", key->ps.opt.prefer_mono);
fprintf(f, " opt.inline_uniforms = %u (0x%x, 0x%x, 0x%x, 0x%x)\n",
key->ps.opt.inline_uniforms,
key->ps.opt.inlined_uniform_values[0],
key->ps.opt.inlined_uniform_values[1],
key->ps.opt.inlined_uniform_values[2],
key->ps.opt.inlined_uniform_values[3]);
}
}
bool si_vs_needs_prolog(const struct si_shader_selector *sel,
const struct si_vs_prolog_bits *prolog_key,
const struct si_shader_key *key, bool ngg_cull_shader)
const union si_shader_key *key, bool ngg_cull_shader)
{
assert(sel->info.stage == MESA_SHADER_VERTEX);
/* VGPR initialization fixup for Vega10 and Raven is always done in the
* VS prolog. */
return sel->vs_needs_prolog || prolog_key->ls_vgpr_fix ||
/* The 2nd VS prolog loads input VGPRs from LDS */
(key->opt.ngg_culling && !ngg_cull_shader);
(key->ge.opt.ngg_culling && !ngg_cull_shader);
}
/**
@ -1298,11 +1315,11 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_
key->vs_prolog.states = *prolog_key;
key->vs_prolog.num_input_sgprs = num_input_sgprs;
key->vs_prolog.num_inputs = info->num_inputs;
key->vs_prolog.as_ls = shader_out->key.as_ls;
key->vs_prolog.as_es = shader_out->key.as_es;
key->vs_prolog.as_ngg = shader_out->key.as_ngg;
key->vs_prolog.as_ls = shader_out->key.ge.as_ls;
key->vs_prolog.as_es = shader_out->key.ge.as_es;
key->vs_prolog.as_ngg = shader_out->key.ge.as_ngg;
if (!ngg_cull_shader && shader_out->key.opt.ngg_culling)
if (!ngg_cull_shader && shader_out->key.ge.opt.ngg_culling)
key->vs_prolog.load_vgprs_after_culling = 1;
if (shader_out->selector->info.stage == MESA_SHADER_TESS_CTRL) {
@ -1311,7 +1328,7 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_
} else if (shader_out->selector->info.stage == MESA_SHADER_GEOMETRY) {
key->vs_prolog.as_es = 1;
key->vs_prolog.num_merged_next_stage_vgprs = 5;
} else if (shader_out->key.as_ngg) {
} else if (shader_out->key.ge.as_ngg) {
key->vs_prolog.num_merged_next_stage_vgprs = 5;
}
@ -1329,7 +1346,7 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_
}
struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
const struct si_shader_key *key,
const union si_shader_key *key,
bool *free_nir)
{
nir_shader *nir;
@ -1350,7 +1367,14 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
return NULL;
}
if (key && key->opt.inline_uniforms) {
bool inline_uniforms = false;
uint32_t *inlined_uniform_values;
if (key) {
si_get_inline_uniform_state((union si_shader_key*)key, sel->pipe_shader_type,
&inline_uniforms, &inlined_uniform_values);
}
if (inline_uniforms) {
assert(*free_nir);
/* Most places use shader information from the default variant, not
@ -1394,7 +1418,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
*/
NIR_PASS_V(nir, nir_inline_uniforms,
nir->info.num_inlinable_uniforms,
key->opt.inlined_uniform_values,
inlined_uniform_values,
nir->info.inlinable_uniform_dw_offsets);
si_nir_opts(sel->screen, nir, true);
@ -1441,15 +1465,15 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
if ((sel->info.stage == MESA_SHADER_VERTEX ||
sel->info.stage == MESA_SHADER_TESS_EVAL ||
sel->info.stage == MESA_SHADER_GEOMETRY) &&
!shader->key.as_ls && !shader->key.as_es) {
!shader->key.ge.as_ls && !shader->key.ge.as_es) {
ubyte *vs_output_param_offset = shader->info.vs_output_param_offset;
if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.as_ngg)
if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg)
vs_output_param_offset = sel->gs_copy_shader->info.vs_output_param_offset;
/* VS and TES should also set primitive ID output if it's used. */
unsigned num_outputs_with_prim_id = sel->info.num_outputs +
shader->key.mono.u.vs_export_prim_id;
shader->key.ge.mono.u.vs_export_prim_id;
for (unsigned i = 0; i < num_outputs_with_prim_id; i++) {
unsigned semantic = sel->info.output_semantic[i];
@ -1563,32 +1587,38 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
switch (stage) {
case MESA_SHADER_VERTEX:
shader.key.as_ls = key->vs_prolog.as_ls;
shader.key.as_es = key->vs_prolog.as_es;
shader.key.as_ngg = key->vs_prolog.as_ngg;
shader.key.ge.as_ls = key->vs_prolog.as_ls;
shader.key.ge.as_es = key->vs_prolog.as_es;
shader.key.ge.as_ngg = key->vs_prolog.as_ngg;
break;
case MESA_SHADER_TESS_CTRL:
assert(!prolog);
shader.key.part.tcs.epilog = key->tcs_epilog.states;
shader.key.ge.part.tcs.epilog = key->tcs_epilog.states;
break;
case MESA_SHADER_GEOMETRY:
assert(prolog);
shader.key.as_ngg = key->gs_prolog.as_ngg;
shader.key.ge.as_ngg = key->gs_prolog.as_ngg;
break;
case MESA_SHADER_FRAGMENT:
if (prolog)
shader.key.part.ps.prolog = key->ps_prolog.states;
shader.key.ps.part.prolog = key->ps_prolog.states;
else
shader.key.part.ps.epilog = key->ps_epilog.states;
shader.key.ps.part.epilog = key->ps_epilog.states;
break;
default:
unreachable("bad shader part");
}
unsigned wave_size;
if (stage <= MESA_SHADER_GEOMETRY) {
wave_size = si_get_wave_size(sscreen, stage, shader.key.ge.as_ngg, shader.key.ge.as_es);
} else {
wave_size = si_get_wave_size(sscreen, stage, false, false);
}
struct si_shader_context ctx;
si_llvm_context_init(&ctx, sscreen, compiler,
si_get_wave_size(sscreen, stage,
shader.key.as_ngg, shader.key.as_es));
si_llvm_context_init(&ctx, sscreen, compiler, wave_size);
ctx.shader = &shader;
ctx.stage = stage;
@ -1639,7 +1669,7 @@ static bool si_get_vs_prolog(struct si_screen *sscreen, struct ac_llvm_compiler
static bool si_shader_select_vs_parts(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, struct pipe_debug_callback *debug)
{
return si_get_vs_prolog(sscreen, compiler, shader, debug, shader, &shader->key.part.vs.prolog);
return si_get_vs_prolog(sscreen, compiler, shader, debug, shader, &shader->key.ge.part.vs.prolog);
}
/**
@ -1649,10 +1679,10 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct ac_llvm
struct si_shader *shader, struct pipe_debug_callback *debug)
{
if (sscreen->info.chip_class >= GFX9) {
struct si_shader *ls_main_part = shader->key.part.tcs.ls->main_shader_part_ls;
struct si_shader *ls_main_part = shader->key.ge.part.tcs.ls->main_shader_part_ls;
if (!si_get_vs_prolog(sscreen, compiler, shader, debug, ls_main_part,
&shader->key.part.tcs.ls_prolog))
&shader->key.ge.part.tcs.ls_prolog))
return false;
shader->previous_stage = ls_main_part;
@ -1661,7 +1691,7 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct ac_llvm
/* Get the epilog. */
union si_shader_part_key epilog_key;
memset(&epilog_key, 0, sizeof(epilog_key));
epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs, MESA_SHADER_TESS_CTRL, false,
&epilog_key, compiler, debug, si_llvm_build_tcs_epilog,
@ -1678,26 +1708,26 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, struct ac_llvm_
if (sscreen->info.chip_class >= GFX9) {
struct si_shader *es_main_part;
if (shader->key.as_ngg)
es_main_part = shader->key.part.gs.es->main_shader_part_ngg_es;
if (shader->key.ge.as_ngg)
es_main_part = shader->key.ge.part.gs.es->main_shader_part_ngg_es;
else
es_main_part = shader->key.part.gs.es->main_shader_part_es;
es_main_part = shader->key.ge.part.gs.es->main_shader_part_es;
if (shader->key.part.gs.es->info.stage == MESA_SHADER_VERTEX &&
if (shader->key.ge.part.gs.es->info.stage == MESA_SHADER_VERTEX &&
!si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part,
&shader->key.part.gs.vs_prolog))
&shader->key.ge.part.gs.vs_prolog))
return false;
shader->previous_stage = es_main_part;
}
if (!shader->key.part.gs.prolog.tri_strip_adj_fix)
if (!shader->key.ge.part.gs.prolog.tri_strip_adj_fix)
return true;
union si_shader_part_key prolog_key;
memset(&prolog_key, 0, sizeof(prolog_key));
prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
prolog_key.gs_prolog.as_ngg = shader->key.as_ngg;
prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog;
prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg;
shader->prolog2 =
si_get_shader_part(sscreen, &sscreen->gs_prologs, MESA_SHADER_GEOMETRY, true, &prolog_key,
@ -1715,7 +1745,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
struct si_shader_info *info = &shader->selector->info;
memset(key, 0, sizeof(*key));
key->ps_prolog.states = shader->key.part.ps.prolog;
key->ps_prolog.states = shader->key.ps.part.prolog;
key->ps_prolog.colors_read = info->colors_read;
key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
@ -1731,7 +1761,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
if (info->colors_read) {
ubyte *color = shader->selector->color_attr_index;
if (shader->key.part.ps.prolog.color_two_side) {
if (shader->key.ps.part.prolog.color_two_side) {
/* BCOLORs are stored after the last input. */
key->ps_prolog.num_interp_inputs = info->num_inputs;
key->ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
@ -1748,7 +1778,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
key->ps_prolog.color_attr_index[i] = color[i];
if (shader->key.part.ps.prolog.flatshade_colors && interp == INTERP_MODE_COLOR)
if (shader->key.ps.part.prolog.flatshade_colors && interp == INTERP_MODE_COLOR)
interp = INTERP_MODE_FLAT;
switch (interp) {
@ -1758,9 +1788,9 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
case INTERP_MODE_SMOOTH:
case INTERP_MODE_COLOR:
/* Force the interpolation location for colors here. */
if (shader->key.part.ps.prolog.force_persp_sample_interp)
if (shader->key.ps.part.prolog.force_persp_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
if (shader->key.part.ps.prolog.force_persp_center_interp)
if (shader->key.ps.part.prolog.force_persp_center_interp)
location = TGSI_INTERPOLATE_LOC_CENTER;
switch (location) {
@ -1788,9 +1818,9 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
break;
case INTERP_MODE_NOPERSPECTIVE:
/* Force the interpolation location for colors here. */
if (shader->key.part.ps.prolog.force_linear_sample_interp)
if (shader->key.ps.part.prolog.force_linear_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
if (shader->key.part.ps.prolog.force_linear_center_interp)
if (shader->key.ps.part.prolog.force_linear_center_interp)
location = TGSI_INTERPOLATE_LOC_CENTER;
/* The VGPR assignment for non-monolithic shaders
@ -1854,7 +1884,7 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke
key->ps_epilog.writes_z = info->writes_z;
key->ps_epilog.writes_stencil = info->writes_stencil;
key->ps_epilog.writes_samplemask = info->writes_samplemask;
key->ps_epilog.states = shader->key.part.ps.epilog;
key->ps_epilog.states = shader->key.ps.part.epilog;
}
/**
@ -1888,34 +1918,34 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_
return false;
/* Enable POS_FIXED_PT if polygon stippling is enabled. */
if (shader->key.part.ps.prolog.poly_stipple) {
if (shader->key.ps.part.prolog.poly_stipple) {
shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
}
/* Set up the enable bits for per-sample shading if needed. */
if (shader->key.part.ps.prolog.force_persp_sample_interp &&
if (shader->key.ps.part.prolog.force_persp_sample_interp &&
(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
}
if (shader->key.part.ps.prolog.force_linear_sample_interp &&
if (shader->key.ps.part.prolog.force_linear_sample_interp &&
(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
}
if (shader->key.part.ps.prolog.force_persp_center_interp &&
if (shader->key.ps.part.prolog.force_persp_center_interp &&
(G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
}
if (shader->key.part.ps.prolog.force_linear_center_interp &&
if (shader->key.ps.part.prolog.force_linear_center_interp &&
(G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
@ -1937,7 +1967,7 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_
}
/* Samplemask fixup requires the sample ID. */
if (shader->key.part.ps.prolog.samplemask_log_ps_iter) {
if (shader->key.ps.part.prolog.samplemask_log_ps_iter) {
shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1);
assert(G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr));
}
@ -1945,7 +1975,7 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_
/* The sample mask input is always enabled, because the API shader always
* passes it through to the epilog. Disable it here if it's unused.
*/
if (!shader->key.part.ps.epilog.poly_line_smoothing && !shader->selector->info.reads_samplemask)
if (!shader->key.ps.part.epilog.poly_line_smoothing && !shader->selector->info.reads_samplemask)
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
return true;
@ -2098,8 +2128,8 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
si_calculate_max_simd_waves(shader);
}
if (shader->key.as_ngg) {
assert(!shader->key.as_es && !shader->key.as_ls);
if (sel->info.stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg) {
assert(!shader->key.ge.as_es && !shader->key.ge.as_ls);
if (!gfx10_ngg_calculate_subgroup_info(shader)) {
fprintf(stderr, "Failed to compute subgroup info\n");
return false;
@ -2115,7 +2145,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
util_rast_prim_is_triangles(sel->info.base.gs.output_primitive)) ||
(sel->info.stage == MESA_SHADER_VERTEX &&
/* Used to export PrimitiveID from the correct vertex. */
shader->key.mono.u.vs_export_prim_id));
shader->key.ge.mono.u.vs_export_prim_id));
shader->uses_vs_state_outprim = sscreen->use_ngg &&
/* Only used by streamout in vertex shaders. */
@ -2124,18 +2154,18 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
if (sel->info.stage == MESA_SHADER_VERTEX) {
shader->uses_base_instance = sel->info.uses_base_instance ||
shader->key.part.vs.prolog.instance_divisor_is_one ||
shader->key.part.vs.prolog.instance_divisor_is_fetched;
shader->key.ge.part.vs.prolog.instance_divisor_is_one ||
shader->key.ge.part.vs.prolog.instance_divisor_is_fetched;
} else if (sel->info.stage == MESA_SHADER_TESS_CTRL) {
shader->uses_base_instance = shader->previous_stage_sel &&
(shader->previous_stage_sel->info.uses_base_instance ||
shader->key.part.tcs.ls_prolog.instance_divisor_is_one ||
shader->key.part.tcs.ls_prolog.instance_divisor_is_fetched);
shader->key.ge.part.tcs.ls_prolog.instance_divisor_is_one ||
shader->key.ge.part.tcs.ls_prolog.instance_divisor_is_fetched);
} else if (sel->info.stage == MESA_SHADER_GEOMETRY) {
shader->uses_base_instance = shader->previous_stage_sel &&
(shader->previous_stage_sel->info.uses_base_instance ||
shader->key.part.gs.vs_prolog.instance_divisor_is_one ||
shader->key.part.gs.vs_prolog.instance_divisor_is_fetched);
shader->key.ge.part.gs.vs_prolog.instance_divisor_is_one ||
shader->key.ge.part.gs.vs_prolog.instance_divisor_is_fetched);
}
si_fix_resource_usage(sscreen, shader);

View file

@ -618,7 +618,8 @@ union si_shader_part_key {
} ps_epilog;
};
struct si_shader_key {
/* The shader key for geometry stages (VS, TCS, TES, GS) */
struct si_shader_key_ge {
/* Prolog and epilog flags. */
union {
struct {
@ -634,10 +635,6 @@ struct si_shader_key {
struct si_shader_selector *es; /* for merged ES-GS */
struct si_gs_prolog_bits prolog;
} gs;
struct {
struct si_ps_prolog_bits prolog;
struct si_ps_epilog_bits epilog;
} ps;
} part;
/* These three are initially set according to the NEXT_SHADER property,
@ -660,12 +657,6 @@ struct si_shader_key {
uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */
/* When PS needs PrimID and GS is disabled. */
unsigned vs_export_prim_id : 1;
struct {
unsigned interpolate_at_sample_force_center : 1;
unsigned fbfetch_msaa : 1;
unsigned fbfetch_is_1D : 1;
unsigned fbfetch_layered : 1;
} ps;
} u;
} mono;
@ -699,6 +690,44 @@ struct si_shader_key {
} opt;
};
struct si_shader_key_ps {
struct {
/* Prolog and epilog flags. */
struct si_ps_prolog_bits prolog;
struct si_ps_epilog_bits epilog;
} part;
/* Flags for monolithic compilation only. */
struct {
unsigned interpolate_at_sample_force_center : 1;
unsigned fbfetch_msaa : 1;
unsigned fbfetch_is_1D : 1;
unsigned fbfetch_layered : 1;
} mono;
/* Optimization flags for asynchronous compilation only. */
struct {
/* For shaders where monolithic variants have better code.
*
* This is a flag that has no effect on code generation,
* but forces monolithic shaders to be used as soon as
* possible, because it's in the "opt" group.
*/
unsigned prefer_mono : 1;
unsigned inline_uniforms:1;
/* This must be kept last to limit the number of variants
* depending only on the uniform values.
*/
uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS];
} opt;
};
union si_shader_key {
struct si_shader_key_ge ge; /* geometry engine shaders */
struct si_shader_key_ps ps;
};
/* Restore the pack alignment to default. */
#pragma pack(pop)
@ -777,7 +806,7 @@ struct si_shader {
struct si_resource *bo;
struct si_resource *scratch_bo;
struct si_shader_key key;
union si_shader_key key;
struct util_queue_fence ready;
bool compilation_failed;
bool is_monolithic;
@ -927,16 +956,18 @@ bool gfx10_is_ngg_passthrough(struct si_shader *shader);
/* Return the pointer to the main shader part's pointer. */
static inline struct si_shader **si_get_main_shader_part(struct si_shader_selector *sel,
const struct si_shader_key *key)
const union si_shader_key *key)
{
if (key->as_ls)
return &sel->main_shader_part_ls;
if (key->as_es && key->as_ngg)
return &sel->main_shader_part_ngg_es;
if (key->as_es)
return &sel->main_shader_part_es;
if (key->as_ngg)
return &sel->main_shader_part_ngg;
if (sel->info.stage <= MESA_SHADER_GEOMETRY) {
if (key->ge.as_ls)
return &sel->main_shader_part_ls;
if (key->ge.as_es && key->ge.as_ngg)
return &sel->main_shader_part_ngg_es;
if (key->ge.as_es)
return &sel->main_shader_part_es;
if (key->ge.as_ngg)
return &sel->main_shader_part_ngg;
}
return &sel->main_shader_part;
}
@ -954,7 +985,7 @@ static inline bool gfx10_edgeflags_have_effect(struct si_shader *shader)
{
if (shader->selector->info.stage == MESA_SHADER_VERTEX &&
!shader->selector->info.base.vs.blit_sgprs_amd &&
!(shader->key.opt.ngg_culling & SI_NGG_CULL_LINES))
!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES))
return true;
return false;

View file

@ -168,12 +168,12 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader);
unsigned si_get_max_workgroup_size(const struct si_shader *shader);
bool si_vs_needs_prolog(const struct si_shader_selector *sel,
const struct si_vs_prolog_bits *prolog_key,
const struct si_shader_key *key, bool ngg_cull_shader);
const union si_shader_key *key, bool ngg_cull_shader);
void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_sgprs,
bool ngg_cull_shader, const struct si_vs_prolog_bits *prolog_key,
struct si_shader *shader_out, union si_shader_part_key *key);
struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
const struct si_shader_key *key,
const union si_shader_key *key,
bool *free_nir);
bool si_need_ps_prolog(const union si_shader_part_key *key);
void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key,

View file

@ -150,10 +150,10 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
gl_shader_stage real_stage = ctx->stage;
/* LS is merged into HS (TCS), and ES is merged into GS. */
if (ctx->screen->info.chip_class >= GFX9) {
if (ctx->shader->key.as_ls)
if (ctx->screen->info.chip_class >= GFX9 && ctx->stage <= MESA_SHADER_GEOMETRY) {
if (ctx->shader->key.ge.as_ls)
real_stage = MESA_SHADER_TESS_CTRL;
else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
else if (ctx->shader->key.ge.as_es || ctx->shader->key.ge.as_ngg)
real_stage = MESA_SHADER_GEOMETRY;
}
@ -219,7 +219,8 @@ void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shade
}
if (shader->key.as_ls || ctx->stage == MESA_SHADER_TESS_CTRL) {
if (ctx->stage <= MESA_SHADER_GEOMETRY &&
(shader->key.ge.as_ls || ctx->stage == MESA_SHADER_TESS_CTRL)) {
if (USE_LDS_SYMBOLS) {
/* The LSHS size is not known until draw time, so we append it
* at the end of whatever LDS use there may be in the rest of
@ -470,7 +471,7 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
}
ctx->abi.interp_at_sample_force_center =
ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center;
ctx->shader->key.ps.mono.interpolate_at_sample_force_center;
ctx->abi.kill_ps_if_inf_interp =
ctx->screen->options.no_infinite_interp &&
@ -854,7 +855,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
si_llvm_create_main_func(ctx, ngg_cull_shader);
if (ctx->shader->key.as_es || ctx->stage == MESA_SHADER_GEOMETRY)
if (ctx->stage <= MESA_SHADER_GEOMETRY &&
(ctx->shader->key.ge.as_es || ctx->stage == MESA_SHADER_GEOMETRY))
si_preload_esgs_ring(ctx);
if (ctx->stage == MESA_SHADER_GEOMETRY)
@ -872,7 +874,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
for (unsigned i = 0; i < 4; i++) {
ctx->gs_next_vertex[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, "");
}
if (shader->key.as_ngg) {
if (shader->key.ge.as_ngg) {
for (unsigned i = 0; i < 4; ++i) {
ctx->gs_curprim_verts[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, "");
ctx->gs_generated_prims[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, "");
@ -892,7 +894,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
}
}
if (ctx->stage != MESA_SHADER_GEOMETRY && (shader->key.as_ngg && !shader->key.as_es)) {
if ((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
shader->key.ge.as_ngg && !shader->key.ge.as_es) {
/* Unconditionally declare scratch space base for streamout and
* vertex compaction. Whether space is actually allocated is
* determined during linking / PM4 creation.
@ -902,7 +905,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
/* This is really only needed when streamout and / or vertex
* compaction is enabled.
*/
if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.opt.ngg_culling)) {
if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.ge.opt.ngg_culling)) {
LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
ctx->gs_ngg_scratch =
LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
@ -918,8 +921,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
/* TES is special because it has only 1 shader part if NGG shader culling is disabled,
* and therefore it doesn't use the wrapper function.
*/
bool no_wrapper_func = ctx->stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es &&
!shader->key.opt.ngg_culling;
bool no_wrapper_func = ctx->stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es &&
!shader->key.ge.opt.ngg_culling;
/* Set EXEC = ~0 before the first shader. If the prolog is present, EXEC is set there
* instead. For monolithic shaders, the wrapper function does this.
@ -927,14 +930,14 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
if ((!shader->is_monolithic || no_wrapper_func) &&
(ctx->stage == MESA_SHADER_TESS_EVAL ||
(ctx->stage == MESA_SHADER_VERTEX &&
!si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, ngg_cull_shader))))
!si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, ngg_cull_shader))))
ac_init_exec_full_mask(&ctx->ac);
/* NGG VS and NGG TES: Send gs_alloc_req and the prim export at the beginning to decrease
* register usage.
*/
if ((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
shader->key.as_ngg && !shader->key.as_es && !shader->key.opt.ngg_culling) {
shader->key.ge.as_ngg && !shader->key.ge.as_es && !shader->key.ge.opt.ngg_culling) {
/* GFX10 requires a barrier before gs_alloc_req due to a hw bug. */
if (ctx->screen->info.chip_class == GFX10)
ac_build_s_barrier(&ctx->ac);
@ -949,7 +952,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
}
/* NGG GS: Initialize LDS and insert s_barrier, which must not be inside the if statement. */
if (ctx->stage == MESA_SHADER_GEOMETRY && shader->key.as_ngg)
if (ctx->stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg)
gfx10_ngg_gs_emit_prologue(ctx);
if (ctx->stage == MESA_SHADER_GEOMETRY ||
@ -959,8 +962,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
* not here.
*/
thread_enabled = si_is_gs_thread(ctx); /* 2nd shader: thread enabled bool */
} else if (((shader->key.as_ls || shader->key.as_es) && !shader->is_monolithic) ||
(shader->key.as_ngg && !shader->key.as_es)) {
} else if (((shader->key.ge.as_ls || shader->key.ge.as_es) && !shader->is_monolithic) ||
(shader->key.ge.as_ngg && !shader->key.ge.as_es)) {
/* This is NGG VS or NGG TES or VS before GS or TES before GS or VS before TCS.
* For monolithic LS (VS before TCS) and ES (VS before GS and TES before GS),
* the if statement is inserted by the wrapper function.
@ -993,11 +996,11 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
*/
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
/* We need the barrier only if TCS inputs are read from LDS. */
if (!shader->key.opt.same_patch_vertices ||
if (!shader->key.ge.opt.same_patch_vertices ||
shader->selector->info.base.inputs_read &
~shader->selector->tcs_vgpr_only_inputs)
ac_build_s_barrier(&ctx->ac);
} else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.as_ngg) {
} else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
/* gfx10_ngg_gs_emit_prologue inserts the barrier for NGG. */
ac_build_s_barrier(&ctx->ac);
}
@ -1036,7 +1039,7 @@ static void si_optimize_vs_outputs(struct si_shader_context *ctx)
unsigned skip_vs_optim_mask = 0;
if ((ctx->stage != MESA_SHADER_VERTEX && ctx->stage != MESA_SHADER_TESS_EVAL) ||
shader->key.as_ls || shader->key.as_es)
shader->key.ge.as_ls || shader->key.ge.as_es)
return;
/* Optimizing these outputs is not possible, since they might be overriden
@ -1064,7 +1067,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_llvm_context_init(&ctx, sscreen, compiler, si_get_shader_wave_size(shader));
LLVMValueRef ngg_cull_main_fn = NULL;
if (shader->key.opt.ngg_culling) {
if (ctx.stage <= MESA_SHADER_GEOMETRY && shader->key.ge.opt.ngg_culling) {
if (!si_llvm_translate_nir(&ctx, shader, nir, false, true)) {
si_llvm_dispose(&ctx);
return false;
@ -1085,10 +1088,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
LLVMValueRef main_fn = ctx.main_fn;
if (ngg_cull_main_fn) {
if (si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, true)) {
if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true)) {
union si_shader_part_key prolog_key;
si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, true,
&shader->key.part.vs.prolog, shader, &prolog_key);
&shader->key.ge.part.vs.prolog, shader, &prolog_key);
prolog_key.vs_prolog.is_monolithic = true;
si_llvm_build_vs_prolog(&ctx, &prolog_key);
parts[num_parts++] = ctx.main_fn;
@ -1097,10 +1100,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
parts[num_parts++] = ngg_cull_main_fn;
}
if (si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, false)) {
if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, false)) {
union si_shader_part_key prolog_key;
si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, false,
&shader->key.part.vs.prolog, shader, &prolog_key);
&shader->key.ge.part.vs.prolog, shader, &prolog_key);
prolog_key.vs_prolog.is_monolithic = true;
si_llvm_build_vs_prolog(&ctx, &prolog_key);
parts[num_parts++] = ctx.main_fn;
@ -1131,10 +1134,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_build_wrapper_function(&ctx, parts, 3, 0, 0, false);
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_TESS_CTRL) {
if (sscreen->info.chip_class >= GFX9) {
struct si_shader_selector *ls = shader->key.part.tcs.ls;
struct si_shader_selector *ls = shader->key.ge.part.tcs.ls;
LLVMValueRef parts[4];
bool vs_needs_prolog =
si_vs_needs_prolog(ls, &shader->key.part.tcs.ls_prolog, &shader->key, false);
si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false);
/* TCS main part */
parts[2] = ctx.main_fn;
@ -1142,7 +1145,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
/* TCS epilog */
union si_shader_part_key tcs_epilog_key;
memset(&tcs_epilog_key, 0, sizeof(tcs_epilog_key));
tcs_epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
tcs_epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key);
parts[3] = ctx.main_fn;
@ -1151,9 +1154,9 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
nir = si_get_nir_shader(ls, NULL, &free_nir);
struct si_shader shader_ls = {};
shader_ls.selector = ls;
shader_ls.key.as_ls = 1;
shader_ls.key.mono = shader->key.mono;
shader_ls.key.opt = shader->key.opt;
shader_ls.key.ge.as_ls = 1;
shader_ls.key.ge.mono = shader->key.ge.mono;
shader_ls.key.ge.opt = shader->key.ge.opt;
shader_ls.is_monolithic = true;
if (!si_llvm_translate_nir(&ctx, &shader_ls, nir, free_nir, false)) {
@ -1167,7 +1170,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
if (vs_needs_prolog) {
union si_shader_part_key vs_prolog_key;
si_get_vs_prolog_key(&ls->info, shader_ls.info.num_input_sgprs, false,
&shader->key.part.tcs.ls_prolog, shader, &vs_prolog_key);
&shader->key.ge.part.tcs.ls_prolog, shader, &vs_prolog_key);
vs_prolog_key.vs_prolog.is_monolithic = true;
si_llvm_build_vs_prolog(&ctx, &vs_prolog_key);
parts[0] = ctx.main_fn;
@ -1179,7 +1182,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 4 - !vs_needs_prolog,
vs_needs_prolog, vs_needs_prolog ? 2 : 1,
shader->key.opt.same_patch_vertices);
shader->key.ge.opt.same_patch_vertices);
} else {
LLVMValueRef parts[2];
union si_shader_part_key epilog_key;
@ -1187,7 +1190,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
parts[0] = ctx.main_fn;
memset(&epilog_key, 0, sizeof(epilog_key));
epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
si_llvm_build_tcs_epilog(&ctx, &epilog_key);
parts[1] = ctx.main_fn;
@ -1195,7 +1198,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
}
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_GEOMETRY) {
if (ctx.screen->info.chip_class >= GFX9) {
struct si_shader_selector *es = shader->key.part.gs.es;
struct si_shader_selector *es = shader->key.ge.part.gs.es;
LLVMValueRef es_prolog = NULL;
LLVMValueRef es_main = NULL;
LLVMValueRef gs_prolog = NULL;
@ -1204,8 +1207,8 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
/* GS prolog */
union si_shader_part_key gs_prolog_key;
memset(&gs_prolog_key, 0, sizeof(gs_prolog_key));
gs_prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
gs_prolog_key.gs_prolog.as_ngg = shader->key.as_ngg;
gs_prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog;
gs_prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg;
si_llvm_build_gs_prolog(&ctx, &gs_prolog_key);
gs_prolog = ctx.main_fn;
@ -1213,10 +1216,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
nir = si_get_nir_shader(es, NULL, &free_nir);
struct si_shader shader_es = {};
shader_es.selector = es;
shader_es.key.as_es = 1;
shader_es.key.as_ngg = shader->key.as_ngg;
shader_es.key.mono = shader->key.mono;
shader_es.key.opt = shader->key.opt;
shader_es.key.ge.as_es = 1;
shader_es.key.ge.as_ngg = shader->key.ge.as_ngg;
shader_es.key.ge.mono = shader->key.ge.mono;
shader_es.key.ge.opt = shader->key.ge.opt;
shader_es.is_monolithic = true;
if (!si_llvm_translate_nir(&ctx, &shader_es, nir, free_nir, false)) {
@ -1228,10 +1231,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
/* ES prolog */
if (es->info.stage == MESA_SHADER_VERTEX &&
si_vs_needs_prolog(es, &shader->key.part.gs.vs_prolog, &shader->key, false)) {
si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false)) {
union si_shader_part_key vs_prolog_key;
si_get_vs_prolog_key(&es->info, shader_es.info.num_input_sgprs, false,
&shader->key.part.gs.vs_prolog, shader, &vs_prolog_key);
&shader->key.ge.part.gs.vs_prolog, shader, &vs_prolog_key);
vs_prolog_key.vs_prolog.is_monolithic = true;
si_llvm_build_vs_prolog(&ctx, &vs_prolog_key);
es_prolog = ctx.main_fn;
@ -1260,7 +1263,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
parts[1] = ctx.main_fn;
memset(&prolog_key, 0, sizeof(prolog_key));
prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog;
si_llvm_build_gs_prolog(&ctx, &prolog_key);
parts[0] = ctx.main_fn;

View file

@ -108,7 +108,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
if (ctx->shader->key.as_ngg)
if (ctx->shader->key.ge.as_ngg)
ret = si_insert_input_ptr(ctx, ret, ctx->args.gs_tg_info, 2);
else
ret = si_insert_input_ret(ctx, ret, ctx->args.gs2vs_offset, 2);
@ -199,7 +199,7 @@ static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
static void emit_gs_epilogue(struct si_shader_context *ctx)
{
if (ctx->shader->key.as_ngg) {
if (ctx->shader->key.ge.as_ngg) {
gfx10_ngg_gs_emit_epilogue(ctx);
return;
}
@ -228,7 +228,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
if (ctx->shader->key.as_ngg) {
if (ctx->shader->key.ge.as_ngg) {
gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
return;
}
@ -303,7 +303,7 @@ static void si_llvm_emit_primitive(struct ac_shader_abi *abi, unsigned stream)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
if (ctx->shader->key.as_ngg) {
if (ctx->shader->key.ge.as_ngg) {
LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
return;
}

View file

@ -79,22 +79,22 @@ static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
if (!ctx->shader->key.ps.mono.fbfetch_is_1D)
args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
/* Get the current render target layer index. */
if (ctx->shader->key.mono.u.ps.fbfetch_layered)
if (ctx->shader->key.ps.mono.fbfetch_layered)
args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
if (ctx->shader->key.ps.mono.fbfetch_msaa)
args.coords[chan++] = si_get_sample_id(ctx);
if (ctx->shader->key.mono.u.ps.fbfetch_msaa && !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
if (ctx->shader->key.ps.mono.fbfetch_msaa && !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
ctx->shader->key.mono.u.ps.fbfetch_layered);
ctx->shader->key.ps.mono.fbfetch_layered);
}
args.opcode = ac_image_load;
@ -102,13 +102,13 @@ static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
args.dmask = 0xf;
args.attributes = AC_FUNC_ATTR_READNONE;
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
if (ctx->shader->key.ps.mono.fbfetch_msaa)
args.dim =
ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa;
else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_1darray : ac_image_1d;
ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa;
else if (ctx->shader->key.ps.mono.fbfetch_is_1D)
args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_1darray : ac_image_1d;
else
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darray : ac_image_2d;
args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darray : ac_image_2d;
return ac_build_image_opcode(&ctx->ac, &args);
}
@ -170,7 +170,7 @@ static void interp_fs_color(struct si_shader_context *ctx, unsigned input_index,
j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
}
if (ctx->shader->key.part.ps.prolog.color_two_side) {
if (ctx->shader->key.ps.part.prolog.color_two_side) {
LLVMValueRef is_face_positive;
/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
@ -199,13 +199,13 @@ static void interp_fs_color(struct si_shader_context *ctx, unsigned input_index,
static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha)
{
if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
if (ctx->shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_NEVER) {
static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
[PIPE_FUNC_LESS] = LLVMRealOLT, [PIPE_FUNC_EQUAL] = LLVMRealOEQ,
[PIPE_FUNC_LEQUAL] = LLVMRealOLE, [PIPE_FUNC_GREATER] = LLVMRealOGT,
[PIPE_FUNC_NOTEQUAL] = LLVMRealONE, [PIPE_FUNC_GEQUAL] = LLVMRealOGE,
};
LLVMRealPredicate cond = cond_map[ctx->shader->key.part.ps.epilog.alpha_func];
LLVMRealPredicate cond = cond_map[ctx->shader->key.ps.part.epilog.alpha_func];
assert(cond);
LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF);
@ -274,8 +274,8 @@ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue
unsigned cbuf, unsigned compacted_mrt_index,
unsigned color_type, struct ac_export_args *args)
{
const struct si_shader_key *key = &ctx->shader->key;
unsigned col_formats = key->part.ps.epilog.spi_shader_col_format;
const union si_shader_key *key = &ctx->shader->key;
unsigned col_formats = key->ps.part.epilog.spi_shader_col_format;
LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
unsigned spi_shader_col_format;
unsigned chan;
@ -284,8 +284,8 @@ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue
assert(cbuf < 8);
spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & 0x1;
is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) & 0x1;
is_int8 = (key->ps.part.epilog.color_is_int8 >> cbuf) & 0x1;
is_int10 = (key->ps.part.epilog.color_is_int10 >> cbuf) & 0x1;
/* Default is 0xf. Adjusted below depending on the format. */
args->enabled_channels = 0xf; /* writemask */
@ -411,31 +411,31 @@ static bool si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col
int i;
/* Clamp color */
if (ctx->shader->key.part.ps.epilog.clamp_color)
if (ctx->shader->key.ps.part.epilog.clamp_color)
for (i = 0; i < 4; i++)
color[i] = ac_build_clamp(&ctx->ac, color[i]);
/* Alpha to one */
if (ctx->shader->key.part.ps.epilog.alpha_to_one)
if (ctx->shader->key.ps.part.epilog.alpha_to_one)
color[3] = LLVMConstReal(LLVMTypeOf(color[0]), 1);
/* Alpha test */
if (index == 0 && ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
if (index == 0 && ctx->shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS)
si_alpha_test(ctx, color[3]);
/* Line & polygon smoothing */
if (ctx->shader->key.part.ps.epilog.poly_line_smoothing)
if (ctx->shader->key.ps.part.epilog.poly_line_smoothing)
color[3] = si_scale_alpha_by_sample_mask(ctx, color[3], samplemask_param);
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
if (ctx->shader->key.part.ps.epilog.last_cbuf > 0) {
if (ctx->shader->key.ps.part.epilog.last_cbuf > 0) {
struct ac_export_args args[8];
int c, last = -1;
assert(compacted_mrt_index == 0);
/* Get the export arguments, also find out what the last one is. */
for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
for (c = 0; c <= ctx->shader->key.ps.part.epilog.last_cbuf; c++) {
si_llvm_init_ps_export_args(ctx, color, c, compacted_mrt_index,
color_type, &args[c]);
if (args[c].enabled_channels) {
@ -447,7 +447,7 @@ static bool si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col
return false;
/* Emit all exports. */
for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
for (c = 0; c <= ctx->shader->key.ps.part.epilog.last_cbuf; c++) {
if (is_last && last == c) {
args[c].valid_mask = 1; /* whether the EXEC mask is valid */
args[c].done = 1; /* DONE bit */

View file

@ -71,8 +71,8 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *
{
assert(ctx->stage == MESA_SHADER_TESS_CTRL);
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4;
return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
}
@ -86,7 +86,7 @@ static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
{
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
const struct si_shader_info *info = &ctx->shader->selector->info;
@ -160,7 +160,7 @@ static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
case MESA_SHADER_TESS_CTRL:
if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) {
stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
stride = ctx->shader->key.ge.part.tcs.ls->lshs_vertex_stride / 4;
return LLVMConstInt(ctx->ac.i32, stride, 0);
}
return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
@ -396,7 +396,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
}
/* Load the TCS input from a VGPR if possible. */
if (ctx->shader->key.opt.same_patch_vertices &&
if (ctx->shader->key.ge.opt.same_patch_vertices &&
load_input && vertex_index_is_invoc_id && !param_index) {
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
si_shader_io_get_unique_index(semantic, false) * 4;
@ -650,7 +650,7 @@ static void si_copy_tcs_inputs(struct si_shader_context *ctx)
lds_base = get_tcs_in_current_patch_offset(ctx);
lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
inputs = ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy;
while (inputs) {
unsigned i = u_bit_scan64(&inputs);
@ -679,7 +679,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
unsigned stride, outer_comps, inner_comps, i, offset;
/* Add a barrier before loading tess factors from LDS. */
if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
if (!shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def)
si_llvm_emit_barrier(ctx);
/* Do this only for invocation 0, because the tess levels are per-patch,
@ -692,7 +692,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);
/* Determine the layout of one tess factor element in the buffer. */
switch (shader->key.part.tcs.epilog.prim_mode) {
switch (shader->key.ge.part.tcs.epilog.prim_mode) {
case GL_LINES:
stride = 2; /* 2 dwords, 1 vec2 store */
outer_comps = 2;
@ -718,7 +718,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
outer[i] = LLVMGetUndef(ctx->ac.i32);
}
if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
if (shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) {
/* Tess factors are in VGPRs. */
for (i = 0; i < outer_comps; i++)
outer[i] = out[i] = invoc0_tf_outer[i];
@ -745,7 +745,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
}
}
if (shader->key.part.tcs.epilog.prim_mode == GL_LINES) {
if (shader->key.ge.part.tcs.epilog.prim_mode == GL_LINES) {
/* For isolines, the hardware expects tess factors in the
* reverse order from what NIR specifies.
*/
@ -789,7 +789,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
ac_glc);
/* Store the tess factors into the offchip buffer if TES reads them. */
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
if (shader->key.ge.part.tcs.epilog.tes_reads_tess_factors) {
LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
LLVMValueRef tf_inner_offset;
unsigned param_outer, param_inner;
@ -983,11 +983,11 @@ void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi)
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
if (!shader->key.opt.same_patch_vertices ||
if (!shader->key.ge.opt.same_patch_vertices ||
!(ctx->next_shader_sel->tcs_vgpr_only_inputs & (1ull << semantic)))
lshs_lds_store(ctx, chan, dw_addr, value);
if (shader->key.opt.same_patch_vertices) {
if (shader->key.ge.opt.same_patch_vertices) {
ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,
value, ret_offset + param * 4 + chan, "");
}
@ -1084,11 +1084,11 @@ void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_sha
ctx->abi.load_tess_level = si_load_tess_level;
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
if (ctx->shader->key.as_es)
if (ctx->shader->key.ge.as_es)
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
else if (ngg_cull_shader)
ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue;
else if (ctx->shader->key.as_ngg)
else if (ctx->shader->key.ge.as_ngg)
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;

View file

@ -131,8 +131,8 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
* of dword-sized data that needs fixups. We need to insert conversion
* code anyway, and the amd/common code does it for us.
*/
bool opencode = ctx->shader->key.mono.vs_fetch_opencode & (1 << input_index);
fix_fetch.bits = ctx->shader->key.mono.vs_fix_fetch[input_index].bits;
bool opencode = ctx->shader->key.ge.mono.vs_fetch_opencode & (1 << input_index);
fix_fetch.bits = ctx->shader->key.ge.mono.vs_fix_fetch[input_index].bits;
if (opencode || (fix_fetch.u.log_size == 3 && fix_fetch.u.format == AC_FETCH_FORMAT_FLOAT) ||
(fix_fetch.u.log_size == 2)) {
tmp = ac_build_opencoded_load_format(&ctx->ac, fix_fetch.u.log_size,
@ -400,7 +400,7 @@ static void si_llvm_emit_clipvertex(struct si_shader_context *ctx, struct ac_exp
LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0);
LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index);
unsigned clipdist_mask = ctx->shader->selector->clipdist_mask &
~ctx->shader->key.opt.kill_clip_distances;
~ctx->shader->key.ge.opt.kill_clip_distances;
for (reg_index = 0; reg_index < 2; reg_index++) {
struct ac_export_args *args = &pos[2 + reg_index];
@ -484,7 +484,7 @@ static void si_prepare_param_exports(struct si_shader_context *ctx,
}
if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
shader->key.opt.kill_outputs &
shader->key.ge.opt.kill_outputs &
(1ull << si_shader_io_get_unique_index(semantic, true)))
continue;
@ -575,7 +575,7 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
viewport_index_value = NULL;
unsigned pos_idx, index;
unsigned clipdist_mask = (shader->selector->clipdist_mask &
~shader->key.opt.kill_clip_distances) |
~shader->key.ge.opt.kill_clip_distances) |
shader->selector->culldist_mask;
int i;
@ -629,8 +629,8 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
pos_args[0].out[3] = ctx->ac.f32_1; /* W */
}
bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize;
bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
bool writes_psize = shader->selector->info.writes_psize && !shader->key.ge.opt.kill_pointsize;
bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.ge.as_ngg;
bool writes_vrs = ctx->screen->options.vrs2x2;
/* Write the misc vector (point size, edgeflag, layer, viewport). */
@ -783,7 +783,7 @@ void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi)
si_llvm_emit_streamout(ctx, outputs, i, 0);
/* Export PrimitiveID. */
if (ctx->shader->key.mono.u.vs_export_prim_id) {
if (ctx->shader->key.ge.mono.u.vs_export_prim_id) {
outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
for (j = 1; j < 4; j++)
@ -990,13 +990,13 @@ void si_llvm_init_vs_callbacks(struct si_shader_context *ctx, bool ngg_cull_shad
{
struct si_shader *shader = ctx->shader;
if (shader->key.as_ls)
if (shader->key.ge.as_ls)
ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue;
else if (shader->key.as_es)
else if (shader->key.ge.as_es)
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
else if (ngg_cull_shader)
ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue;
else if (shader->key.as_ngg)
else if (shader->key.ge.as_ngg)
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;

View file

@ -927,24 +927,24 @@ si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data,
static enum rgp_hardware_stages
si_sqtt_pipe_to_rgp_shader_stage(struct si_shader_key* key, enum pipe_shader_type stage)
si_sqtt_pipe_to_rgp_shader_stage(union si_shader_key* key, enum pipe_shader_type stage)
{
switch (stage) {
case PIPE_SHADER_VERTEX:
if (key->as_ls)
if (key->ge.as_ls)
return RGP_HW_STAGE_LS;
else if (key->as_es)
else if (key->ge.as_es)
return RGP_HW_STAGE_ES;
else if (key->as_ngg)
else if (key->ge.as_ngg)
return RGP_HW_STAGE_GS;
else
return RGP_HW_STAGE_VS;
case PIPE_SHADER_TESS_CTRL:
return RGP_HW_STAGE_HS;
case PIPE_SHADER_TESS_EVAL:
if (key->as_es)
if (key->ge.as_es)
return RGP_HW_STAGE_ES;
else if (key->as_ngg)
else if (key->ge.as_ngg)
return RGP_HW_STAGE_GS;
else
return RGP_HW_STAGE_VS;

View file

@ -123,7 +123,7 @@ static void si_emit_cb_render_state(struct si_context *sctx)
/* RB+ register settings. */
if (sctx->screen->info.rbplus_allowed) {
unsigned spi_shader_col_format =
sctx->shader.ps.cso ? sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format
sctx->shader.ps.cso ? sctx->shader.ps.current->key.ps.part.epilog.spi_shader_col_format
: 0;
unsigned sx_ps_downconvert = 0;
unsigned sx_blend_opt_epsilon = 0;

View file

@ -478,6 +478,8 @@ struct si_buffer_resources {
} while (0)
/* si_descriptors.c */
void si_get_inline_uniform_state(union si_shader_key *key, enum pipe_shader_type shader,
bool *inline_uniforms, uint32_t **inlined_values);
void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture *tex,
const struct legacy_surf_level *base_level_info,
unsigned base_level, unsigned first_level, unsigned block_width,
@ -572,17 +574,14 @@ void si_schedule_initial_compile(struct si_context *sctx, gl_shader_stage stage,
util_queue_execute_func execute);
void si_get_active_slot_masks(const struct si_shader_info *info, uint64_t *const_and_shader_buffers,
uint64_t *samplers_and_images);
int si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state,
const struct si_shader_key *key, int thread_index,
bool optimized_or_none);
int si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state);
void si_vs_key_update_inputs(struct si_context *sctx);
void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
void si_get_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
struct si_vs_prolog_bits *prolog_key);
void si_update_ps_inputs_read_or_disabled(struct si_context *sctx);
void si_update_ps_kill_enable(struct si_context *sctx);
void si_update_vrs_flat_shading(struct si_context *sctx);
unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key);
unsigned si_get_input_prim(const struct si_shader_selector *gs, const union si_shader_key *key);
bool si_update_ngg(struct si_context *sctx);
void si_ps_key_update_framebuffer(struct si_context *sctx);
void si_ps_key_update_framebuffer_blend(struct si_context *sctx);

View file

@ -113,7 +113,7 @@ static bool si_update_shaders(struct si_context *sctx)
unsigned old_pa_cl_vs_out_cntl = old_vs ? old_vs->pa_cl_vs_out_cntl : 0;
struct si_shader *old_ps = sctx->shader.ps.current;
unsigned old_spi_shader_col_format =
old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
old_ps ? old_ps->key.ps.part.epilog.spi_shader_col_format : 0;
int r;
/* Update TCS and TES. */
@ -136,7 +136,7 @@ static bool si_update_shaders(struct si_context *sctx)
if (!sctx->fixed_func_tcs_shader.cso)
return false;
sctx->fixed_func_tcs_shader.key.part.tcs.epilog.invoc0_tess_factors_are_def =
sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
sctx->fixed_func_tcs_shader.cso->info.tessfactors_are_def_in_all_invocs;
}
@ -260,12 +260,12 @@ static bool si_update_shaders(struct si_context *sctx)
if ((GFX_VERSION >= GFX10_3 || (GFX_VERSION >= GFX9 && sctx->screen->info.rbplus_allowed)) &&
si_pm4_state_changed(sctx, ps) &&
(!old_ps || old_spi_shader_col_format !=
sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format))
sctx->shader.ps.current->key.ps.part.epilog.spi_shader_col_format))
si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
if (sctx->smoothing_enabled !=
sctx->shader.ps.current->key.part.ps.epilog.poly_line_smoothing) {
sctx->smoothing_enabled = sctx->shader.ps.current->key.part.ps.epilog.poly_line_smoothing;
sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing) {
sctx->smoothing_enabled = sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing;
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
/* NGG cull state uses smoothing_enabled. */
@ -527,7 +527,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
else
ls_current = sctx->fixed_func_tcs_shader.current;
ls = ls_current->key.part.tcs.ls;
ls = ls_current->key.ge.part.tcs.ls;
} else {
ls_current = sctx->shader.vs.current;
ls = sctx->shader.vs.cso;
@ -567,7 +567,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
unsigned input_patch_size;
/* Allocate LDS for TCS inputs only if it's used. */
if (!ls_current->key.opt.same_patch_vertices ||
if (!ls_current->key.ge.opt.same_patch_vertices ||
tcs->info.base.inputs_read & ~tcs->tcs_vgpr_only_inputs)
input_patch_size = num_tcs_input_cp * input_vertex_size;
else
@ -2078,8 +2078,8 @@ static void si_draw(struct pipe_context *ctx,
GFX_VERSION >= GFX9 &&
tcs && sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
if (sctx->shader.tcs.key.opt.same_patch_vertices != same_patch_vertices) {
sctx->shader.tcs.key.opt.same_patch_vertices = same_patch_vertices;
if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
sctx->do_update_shaders = true;
}
@ -2094,9 +2094,9 @@ static void si_draw(struct pipe_context *ctx,
bool ls_vgpr_fix =
tcs && sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
if (ls_vgpr_fix != sctx->shader.tcs.key.part.tcs.ls_prolog.ls_vgpr_fix) {
sctx->shader.tcs.key.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
sctx->fixed_func_tcs_shader.key.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
sctx->fixed_func_tcs_shader.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
sctx->do_update_shaders = true;
}
}
@ -2133,8 +2133,8 @@ static void si_draw(struct pipe_context *ctx,
bool gs_tri_strip_adj_fix =
!HAS_TESS && prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
if (gs_tri_strip_adj_fix != sctx->shader.gs.key.part.gs.prolog.tri_strip_adj_fix) {
sctx->shader.gs.key.part.gs.prolog.tri_strip_adj_fix = gs_tri_strip_adj_fix;
if (gs_tri_strip_adj_fix != sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix) {
sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix = gs_tri_strip_adj_fix;
sctx->do_update_shaders = true;
}
}
@ -2326,7 +2326,7 @@ static void si_draw(struct pipe_context *ctx,
* hasn't finished. Set it to the correct value in si_context.
*/
if (GFX_VERSION >= GFX10 && NGG)
sctx->ngg_culling = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.opt.ngg_culling;
sctx->ngg_culling = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.ge.opt.ngg_culling;
}
/* Since we've called si_context_add_resource_size for vertex buffers,

View file

@ -360,7 +360,7 @@ bool si_shader_mem_ordered(struct si_shader *shader)
shader->config.scratch_bytes_per_wave ||
(info->stage == MESA_SHADER_FRAGMENT &&
(info->base.fs.uses_fbfetch_output ||
shader->key.part.ps.prolog.poly_stipple));
shader->key.ps.part.prolog.poly_stipple));
if (prev_info) {
sampler_or_bvh |= prev_info->uses_vmem_return_type_sampler_or_bvh;
@ -455,7 +455,7 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen, struct si_sh
/* VS as VS, or VS as ES: */
if ((sel->info.stage == MESA_SHADER_VERTEX &&
(!shader->key.as_ls && !shader->is_gs_copy_shader)) ||
(!shader->key.ge.as_ls && !shader->is_gs_copy_shader)) ||
/* TES as VS, or TES as ES: */
sel->info.stage == MESA_SHADER_TESS_EVAL) {
unsigned vtx_reuse_depth = 30;
@ -504,7 +504,7 @@ static unsigned si_get_vs_vgpr_comp_cnt(struct si_screen *sscreen, struct si_sha
* GFX10 LS (VertexID, RelAutoIndex, UserVGPR1, UserVGPR2 or InstanceID)
* GFX10 ES,VS (VertexID, UserVGPR1, UserVGPR2 or VSPrimID, UserVGPR3 or InstanceID)
*/
bool is_ls = shader->selector->info.stage == MESA_SHADER_TESS_CTRL || shader->key.as_ls;
bool is_ls = shader->selector->info.stage == MESA_SHADER_TESS_CTRL || shader->key.ge.as_ls;
unsigned max = 0;
if (shader->info.uses_instanceid) {
@ -810,7 +810,7 @@ static void si_emit_shader_gs(struct si_context *sctx)
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
shader->ctx_reg.gs.vgt_esgs_ring_itemsize);
if (shader->key.part.gs.es->info.stage == MESA_SHADER_TESS_EVAL)
if (shader->key.ge.part.gs.es->info.stage == MESA_SHADER_TESS_EVAL)
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param);
if (shader->vgt_vertex_reuse_block_cntl)
@ -886,13 +886,13 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
if (sscreen->info.chip_class >= GFX9) {
unsigned input_prim = sel->info.base.gs.input_primitive;
gl_shader_stage es_stage = shader->key.part.gs.es->info.stage;
gl_shader_stage es_stage = shader->key.ge.part.gs.es->info.stage;
unsigned es_vgpr_comp_cnt, gs_vgpr_comp_cnt;
if (es_stage == MESA_SHADER_VERTEX) {
es_vgpr_comp_cnt = si_get_vs_vgpr_comp_cnt(sscreen, shader, false);
} else if (es_stage == MESA_SHADER_TESS_EVAL)
es_vgpr_comp_cnt = shader->key.part.gs.es->info.uses_primid ? 3 : 2;
es_vgpr_comp_cnt = shader->key.ge.part.gs.es->info.uses_primid ? 3 : 2;
else
unreachable("invalid shader selector type");
@ -952,12 +952,12 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
S_028A44_GS_INST_PRIMS_IN_SUBGRP(shader->gs_info.gs_inst_prims_in_subgroup);
shader->ctx_reg.gs.vgt_gs_max_prims_per_subgroup =
S_028A94_MAX_PRIMS_PER_SUBGROUP(shader->gs_info.max_prims_per_subgroup);
shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.part.gs.es->esgs_itemsize / 4;
shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.ge.part.gs.es->esgs_itemsize / 4;
if (es_stage == MESA_SHADER_TESS_EVAL)
si_set_tesseval_regs(sscreen, shader->key.part.gs.es, shader);
si_set_tesseval_regs(sscreen, shader->key.ge.part.gs.es, shader);
polaris_set_vgt_vertex_reuse(sscreen, shader->key.part.gs.es, shader);
polaris_set_vgt_vertex_reuse(sscreen, shader->key.ge.part.gs.es, shader);
} else {
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(0xffff) |
S_00B21C_WAVE_LIMIT(0x3F);
@ -994,7 +994,7 @@ bool gfx10_is_ngg_passthrough(struct si_shader *shader)
*
* NGG passthrough still allows the use of LDS.
*/
return sel->info.stage != MESA_SHADER_GEOMETRY && !shader->key.opt.ngg_culling;
return sel->info.stage != MESA_SHADER_GEOMETRY && !shader->key.ge.opt.ngg_culling;
}
/* Common tail code for NGG primitive shaders. */
@ -1094,7 +1094,7 @@ static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx)
gfx10_emit_shader_ngg_tail(sctx, shader);
}
unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key)
unsigned si_get_input_prim(const struct si_shader_selector *gs, const union si_shader_key *key)
{
if (gs->info.stage == MESA_SHADER_GEOMETRY)
return gs->info.base.gs.input_primitive;
@ -1107,7 +1107,7 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_
return PIPE_PRIM_TRIANGLES;
}
if (key->opt.ngg_culling & SI_NGG_CULL_LINES)
if (key->ge.opt.ngg_culling & SI_NGG_CULL_LINES)
return PIPE_PRIM_LINES;
return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
@ -1117,9 +1117,9 @@ static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
const struct si_shader *shader, bool ngg)
{
/* Clip distances can be killed, but cull distances can't. */
unsigned clipcull_mask = (sel->clipdist_mask & ~shader->key.opt.kill_clip_distances) |
unsigned clipcull_mask = (sel->clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) |
sel->culldist_mask;
bool writes_psize = sel->info.writes_psize && !shader->key.opt.kill_pointsize;
bool writes_psize = sel->info.writes_psize && !shader->key.ge.opt.kill_pointsize;
bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
sel->screen->options.vrs2x2 ||
sel->info.writes_layer || sel->info.writes_viewport_index;
@ -1153,7 +1153,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
uint64_t va;
bool window_space = gs_info->stage == MESA_SHADER_VERTEX ?
gs_info->base.vs.window_space_position : 0;
bool es_enable_prim_id = shader->key.mono.u.vs_export_prim_id || es_info->uses_primid;
bool es_enable_prim_id = shader->key.ge.mono.u.vs_export_prim_id || es_info->uses_primid;
unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1);
unsigned input_prim = si_get_input_prim(gs_sel, &shader->key);
bool break_wave_at_eoi = false;
@ -1200,7 +1200,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
(gfx10_edgeflags_have_effect(shader) && !gfx10_is_ngg_passthrough(shader)))
gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID, edge flags. */
else if ((gs_stage == MESA_SHADER_GEOMETRY && gs_info->uses_primid) ||
(gs_stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
(gs_stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id))
gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
else if (input_prim >= PIPE_PRIM_TRIANGLES && !gfx10_is_ngg_passthrough(shader))
gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
@ -1210,7 +1210,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
unsigned wave_size = si_get_shader_wave_size(shader);
unsigned late_alloc_wave64, cu_mask;
ac_compute_late_alloc(&sscreen->info, true, shader->key.opt.ngg_culling,
ac_compute_late_alloc(&sscreen->info, true, shader->key.ge.opt.ngg_culling,
shader->config.scratch_bytes_per_wave > 0,
&late_alloc_wave64, &cu_mask);
@ -1255,7 +1255,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
shader->ctx_reg.ngg.vgt_primitiveid_en =
S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.mono.u.vs_export_prim_id ||
S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.ge.mono.u.vs_export_prim_id ||
gs_sel->info.writes_primid);
if (gs_stage == MESA_SHADER_GEOMETRY) {
@ -1293,7 +1293,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
/* Oversubscribe PC. This improves performance when there are too many varyings. */
unsigned oversub_pc_factor = 1;
if (shader->key.opt.ngg_culling) {
if (shader->key.ge.opt.ngg_culling) {
/* Be more aggressive with NGG culling. */
if (shader->info.nr_param_exports > 4)
oversub_pc_factor = 4;
@ -1418,7 +1418,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
unsigned nparams, oc_lds_en;
bool window_space = info->stage == MESA_SHADER_VERTEX ?
info->base.vs.window_space_position : 0;
bool enable_prim_id = shader->key.mono.u.vs_export_prim_id || info->uses_primid;
bool enable_prim_id = shader->key.ge.mono.u.vs_export_prim_id || info->uses_primid;
pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
@ -1556,7 +1556,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps)
struct si_shader_info *info = &ps->selector->info;
unsigned num_colors = !!(info->colors_read & 0x0f) + !!(info->colors_read & 0xf0);
unsigned num_interp =
ps->selector->info.num_inputs + (ps->key.part.ps.prolog.color_two_side ? num_colors : 0);
ps->selector->info.num_inputs + (ps->key.ps.part.prolog.color_two_side ? num_colors : 0);
assert(num_interp <= 32);
return MIN2(num_interp, 32);
@ -1564,7 +1564,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps)
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
{
unsigned spi_shader_col_format = shader->key.part.ps.epilog.spi_shader_col_format;
unsigned spi_shader_col_format = shader->key.ps.part.epilog.spi_shader_col_format;
unsigned value = 0, num_mrts = 0;
unsigned i, num_targets = (util_last_bit(spi_shader_col_format) + 3) / 4;
@ -1628,23 +1628,23 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
G_0286CC_PERSP_PULL_MODEL_ENA(input_ena));
/* Validate interpolation optimization flags (read as implications). */
assert(!shader->key.part.ps.prolog.bc_optimize_for_persp ||
assert(!shader->key.ps.part.prolog.bc_optimize_for_persp ||
(G_0286CC_PERSP_CENTER_ENA(input_ena) && G_0286CC_PERSP_CENTROID_ENA(input_ena)));
assert(!shader->key.part.ps.prolog.bc_optimize_for_linear ||
assert(!shader->key.ps.part.prolog.bc_optimize_for_linear ||
(G_0286CC_LINEAR_CENTER_ENA(input_ena) && G_0286CC_LINEAR_CENTROID_ENA(input_ena)));
assert(!shader->key.part.ps.prolog.force_persp_center_interp ||
assert(!shader->key.ps.part.prolog.force_persp_center_interp ||
(!G_0286CC_PERSP_SAMPLE_ENA(input_ena) && !G_0286CC_PERSP_CENTROID_ENA(input_ena)));
assert(!shader->key.part.ps.prolog.force_linear_center_interp ||
assert(!shader->key.ps.part.prolog.force_linear_center_interp ||
(!G_0286CC_LINEAR_SAMPLE_ENA(input_ena) && !G_0286CC_LINEAR_CENTROID_ENA(input_ena)));
assert(!shader->key.part.ps.prolog.force_persp_sample_interp ||
assert(!shader->key.ps.part.prolog.force_persp_sample_interp ||
(!G_0286CC_PERSP_CENTER_ENA(input_ena) && !G_0286CC_PERSP_CENTROID_ENA(input_ena)));
assert(!shader->key.part.ps.prolog.force_linear_sample_interp ||
assert(!shader->key.ps.part.prolog.force_linear_sample_interp ||
(!G_0286CC_LINEAR_CENTER_ENA(input_ena) && !G_0286CC_LINEAR_CENTROID_ENA(input_ena)));
/* Validate cases when the optimizations are off (read as implications). */
assert(shader->key.part.ps.prolog.bc_optimize_for_persp ||
assert(shader->key.ps.part.prolog.bc_optimize_for_persp ||
!G_0286CC_PERSP_CENTER_ENA(input_ena) || !G_0286CC_PERSP_CENTROID_ENA(input_ena));
assert(shader->key.part.ps.prolog.bc_optimize_for_linear ||
assert(shader->key.ps.part.prolog.bc_optimize_for_linear ||
!G_0286CC_LINEAR_CENTER_ENA(input_ena) || !G_0286CC_LINEAR_CENTROID_ENA(input_ena));
pm4 = si_get_shader_pm4_state(shader);
@ -1685,7 +1685,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
spi_shader_col_format = si_get_spi_shader_col_format(shader);
cb_shader_mask = ac_get_cb_shader_mask(shader->key.part.ps.epilog.spi_shader_col_format);
cb_shader_mask = ac_get_cb_shader_mask(shader->key.ps.part.epilog.spi_shader_col_format);
/* Ensure that some export memory is always allocated, for two reasons:
*
@ -1703,7 +1703,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
* instructions if any are present.
*/
if ((sscreen->info.chip_class <= GFX9 || info->base.fs.uses_discard ||
shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS) &&
shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS) &&
!spi_shader_col_format && !info->writes_z && !info->writes_stencil &&
!info->writes_samplemask)
spi_shader_col_format = V_028714_SPI_SHADER_32_R;
@ -1750,11 +1750,11 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
{
switch (shader->selector->info.stage) {
case MESA_SHADER_VERTEX:
if (shader->key.as_ls)
if (shader->key.ge.as_ls)
si_shader_ls(sscreen, shader);
else if (shader->key.as_es)
else if (shader->key.ge.as_es)
si_shader_es(sscreen, shader);
else if (shader->key.as_ngg)
else if (shader->key.ge.as_ngg)
gfx10_shader_ngg(sscreen, shader);
else
si_shader_vs(sscreen, shader, NULL);
@ -1763,15 +1763,15 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
si_shader_hs(sscreen, shader);
break;
case MESA_SHADER_TESS_EVAL:
if (shader->key.as_es)
if (shader->key.ge.as_es)
si_shader_es(sscreen, shader);
else if (shader->key.as_ngg)
else if (shader->key.ge.as_ngg)
gfx10_shader_ngg(sscreen, shader);
else
si_shader_vs(sscreen, shader, NULL);
break;
case MESA_SHADER_GEOMETRY:
if (shader->key.as_ngg)
if (shader->key.ge.as_ngg)
gfx10_shader_ngg(sscreen, shader);
else
si_shader_gs(sscreen, shader);
@ -1784,27 +1784,27 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
}
}
static void si_clear_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
struct si_vs_prolog_bits *prolog_key)
{
prolog_key->instance_divisor_is_one = 0;
prolog_key->instance_divisor_is_fetched = 0;
key->mono.vs_fetch_opencode = 0;
memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch));
key->ge.mono.vs_fetch_opencode = 0;
memset(key->ge.mono.vs_fix_fetch, 0, sizeof(key->ge.mono.vs_fix_fetch));
}
void si_vs_key_update_inputs(struct si_context *sctx)
{
struct si_shader_selector *vs = sctx->shader.vs.cso;
struct si_vertex_elements *elts = sctx->vertex_elements;
struct si_shader_key *key = &sctx->shader.vs.key;
union si_shader_key *key = &sctx->shader.vs.key;
if (!vs)
return;
if (vs->info.base.vs.blit_sgprs_amd) {
si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog);
key->opt.prefer_mono = 0;
si_clear_vs_key_inputs(sctx, key, &key->ge.part.vs.prolog);
key->ge.opt.prefer_mono = 0;
sctx->uses_nontrivial_vs_prolog = false;
return;
}
@ -1814,9 +1814,9 @@ void si_vs_key_update_inputs(struct si_context *sctx)
if (elts->instance_divisor_is_one || elts->instance_divisor_is_fetched)
uses_nontrivial_vs_prolog = true;
key->part.vs.prolog.instance_divisor_is_one = elts->instance_divisor_is_one;
key->part.vs.prolog.instance_divisor_is_fetched = elts->instance_divisor_is_fetched;
key->opt.prefer_mono = elts->instance_divisor_is_fetched;
key->ge.part.vs.prolog.instance_divisor_is_one = elts->instance_divisor_is_one;
key->ge.part.vs.prolog.instance_divisor_is_fetched = elts->instance_divisor_is_fetched;
key->ge.opt.prefer_mono = elts->instance_divisor_is_fetched;
unsigned count_mask = (1 << vs->info.num_inputs) - 1;
unsigned fix = elts->fix_fetch_always & count_mask;
@ -1837,17 +1837,17 @@ void si_vs_key_update_inputs(struct si_context *sctx)
}
}
memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch));
memset(key->ge.mono.vs_fix_fetch, 0, sizeof(key->ge.mono.vs_fix_fetch));
while (fix) {
unsigned i = u_bit_scan(&fix);
uint8_t fix_fetch = elts->fix_fetch[i];
key->mono.vs_fix_fetch[i].bits = fix_fetch;
key->ge.mono.vs_fix_fetch[i].bits = fix_fetch;
if (fix_fetch)
uses_nontrivial_vs_prolog = true;
}
key->mono.vs_fetch_opencode = opencode;
key->ge.mono.vs_fetch_opencode = opencode;
if (opencode)
uses_nontrivial_vs_prolog = true;
@ -1863,18 +1863,18 @@ void si_vs_key_update_inputs(struct si_context *sctx)
* cases.
*/
if (uses_nontrivial_vs_prolog && sctx->force_trivial_vs_prolog)
si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog);
si_clear_vs_key_inputs(sctx, key, &key->ge.part.vs.prolog);
}
void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
void si_get_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
struct si_vs_prolog_bits *prolog_key)
{
prolog_key->instance_divisor_is_one = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_one;
prolog_key->instance_divisor_is_fetched = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_fetched;
prolog_key->instance_divisor_is_one = sctx->shader.vs.key.ge.part.vs.prolog.instance_divisor_is_one;
prolog_key->instance_divisor_is_fetched = sctx->shader.vs.key.ge.part.vs.prolog.instance_divisor_is_fetched;
key->mono.vs_fetch_opencode = sctx->shader.vs.key.mono.vs_fetch_opencode;
memcpy(key->mono.vs_fix_fetch, sctx->shader.vs.key.mono.vs_fix_fetch,
sizeof(key->mono.vs_fix_fetch));
key->ge.mono.vs_fetch_opencode = sctx->shader.vs.key.ge.mono.vs_fetch_opencode;
memcpy(key->ge.mono.vs_fix_fetch, sctx->shader.vs.key.ge.mono.vs_fix_fetch,
sizeof(key->ge.mono.vs_fix_fetch));
}
void si_update_ps_inputs_read_or_disabled(struct si_context *sctx)
@ -1898,53 +1898,53 @@ void si_update_ps_inputs_read_or_disabled(struct si_context *sctx)
}
static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
struct si_shader_key *key)
union si_shader_key *key)
{
key->opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable;
key->ge.opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable;
/* Find out which VS outputs aren't used by the PS. */
uint64_t outputs_written = vs->outputs_written_before_ps;
uint64_t linked = outputs_written & sctx->ps_inputs_read_or_disabled;
key->opt.kill_outputs = ~linked & outputs_written;
key->ge.opt.kill_outputs = ~linked & outputs_written;
if (vs->info.stage != MESA_SHADER_GEOMETRY) {
key->opt.ngg_culling = sctx->ngg_culling;
key->mono.u.vs_export_prim_id = sctx->shader.ps.cso && sctx->shader.ps.cso->info.uses_primid;
key->ge.opt.ngg_culling = sctx->ngg_culling;
key->ge.mono.u.vs_export_prim_id = sctx->shader.ps.cso && sctx->shader.ps.cso->info.uses_primid;
} else {
key->opt.ngg_culling = 0;
key->mono.u.vs_export_prim_id = 0;
key->ge.opt.ngg_culling = 0;
key->ge.mono.u.vs_export_prim_id = 0;
}
key->opt.kill_pointsize = vs->info.writes_psize &&
sctx->current_rast_prim != PIPE_PRIM_POINTS &&
!sctx->queued.named.rasterizer->polygon_mode_is_points;
key->ge.opt.kill_pointsize = vs->info.writes_psize &&
sctx->current_rast_prim != PIPE_PRIM_POINTS &&
!sctx->queued.named.rasterizer->polygon_mode_is_points;
}
static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
struct si_shader_key *key)
union si_shader_key *key)
{
key->opt.kill_clip_distances = 0;
key->opt.kill_outputs = 0;
key->opt.ngg_culling = 0;
key->mono.u.vs_export_prim_id = 0;
key->opt.kill_pointsize = 0;
key->ge.opt.kill_clip_distances = 0;
key->ge.opt.kill_outputs = 0;
key->ge.opt.ngg_culling = 0;
key->ge.mono.u.vs_export_prim_id = 0;
key->ge.opt.kill_pointsize = 0;
}
void si_ps_key_update_framebuffer(struct si_context *sctx)
{
struct si_shader_selector *sel = sctx->shader.ps.cso;
struct si_shader_key *key = &sctx->shader.ps.key;
union si_shader_key *key = &sctx->shader.ps.key;
if (!sel)
return;
if (sel->info.color0_writes_all_cbufs &&
sel->info.colors_written == 0x1)
key->part.ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
key->ps.part.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
else
key->part.ps.epilog.last_cbuf = 0;
key->ps.part.epilog.last_cbuf = 0;
/* ps_uses_fbfetch is true only if the color buffer is bound. */
if (sctx->ps_uses_fbfetch) {
@ -1952,25 +1952,25 @@ void si_ps_key_update_framebuffer(struct si_context *sctx)
struct pipe_resource *tex = cb0->texture;
/* 1D textures are allocated and used as 2D on GFX9. */
key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
key->mono.u.ps.fbfetch_is_1D =
key->ps.mono.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
key->ps.mono.fbfetch_is_1D =
sctx->chip_class != GFX9 &&
(tex->target == PIPE_TEXTURE_1D || tex->target == PIPE_TEXTURE_1D_ARRAY);
key->mono.u.ps.fbfetch_layered =
key->ps.mono.fbfetch_layered =
tex->target == PIPE_TEXTURE_1D_ARRAY || tex->target == PIPE_TEXTURE_2D_ARRAY ||
tex->target == PIPE_TEXTURE_CUBE || tex->target == PIPE_TEXTURE_CUBE_ARRAY ||
tex->target == PIPE_TEXTURE_3D;
} else {
key->mono.u.ps.fbfetch_msaa = 0;
key->mono.u.ps.fbfetch_is_1D = 0;
key->mono.u.ps.fbfetch_layered = 0;
key->ps.mono.fbfetch_msaa = 0;
key->ps.mono.fbfetch_is_1D = 0;
key->ps.mono.fbfetch_layered = 0;
}
}
void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
{
struct si_shader_selector *sel = sctx->shader.ps.cso;
struct si_shader_key *key = &sctx->shader.ps.key;
union si_shader_key *key = &sctx->shader.ps.key;
struct si_state_blend *blend = sctx->queued.named.blend;
if (!sel)
@ -1979,7 +1979,7 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
/* Select the shader color format based on whether
* blending or alpha are needed.
*/
key->part.ps.epilog.spi_shader_col_format =
key->ps.part.epilog.spi_shader_col_format =
(blend->blend_enable_4bit & blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format_blend_alpha) |
(blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
@ -1988,36 +1988,36 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
sctx->framebuffer.spi_shader_col_format_alpha) |
(~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format);
key->part.ps.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
key->ps.part.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
/* The output for dual source blending should have
* the same format as the first output.
*/
if (blend->dual_src_blend) {
key->part.ps.epilog.spi_shader_col_format |=
(key->part.ps.epilog.spi_shader_col_format & 0xf) << 4;
key->ps.part.epilog.spi_shader_col_format |=
(key->ps.part.epilog.spi_shader_col_format & 0xf) << 4;
}
/* If alpha-to-coverage is enabled, we have to export alpha
* even if there is no color buffer.
*/
if (!(key->part.ps.epilog.spi_shader_col_format & 0xf) && blend->alpha_to_coverage)
key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
if (!(key->ps.part.epilog.spi_shader_col_format & 0xf) && blend->alpha_to_coverage)
key->ps.part.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
/* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs
* to the range supported by the type if a channel has less
* than 16 bits and the export format is 16_ABGR.
*/
if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) {
key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
key->ps.part.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
key->ps.part.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
}
/* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
if (!key->part.ps.epilog.last_cbuf) {
key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
key->part.ps.epilog.color_is_int8 &= sel->info.colors_written;
key->part.ps.epilog.color_is_int10 &= sel->info.colors_written;
if (!key->ps.part.epilog.last_cbuf) {
key->ps.part.epilog.spi_shader_col_format &= sel->colors_written_4bit;
key->ps.part.epilog.color_is_int8 &= sel->info.colors_written;
key->ps.part.epilog.color_is_int10 &= sel->info.colors_written;
}
/* Eliminate shader code computing output values that are unused.
@ -2026,51 +2026,51 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
*/
if (sel->colors_written_4bit &
~(sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_enabled_4bit))
key->opt.prefer_mono = 1;
key->ps.opt.prefer_mono = 1;
else
key->opt.prefer_mono = 0;
key->ps.opt.prefer_mono = 0;
}
void si_ps_key_update_blend_rasterizer(struct si_context *sctx)
{
struct si_shader_key *key = &sctx->shader.ps.key;
union si_shader_key *key = &sctx->shader.ps.key;
struct si_state_blend *blend = sctx->queued.named.blend;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
key->part.ps.epilog.alpha_to_one = blend->alpha_to_one && rs->multisample_enable;
key->ps.part.epilog.alpha_to_one = blend->alpha_to_one && rs->multisample_enable;
}
void si_ps_key_update_rasterizer(struct si_context *sctx)
{
struct si_shader_selector *sel = sctx->shader.ps.cso;
struct si_shader_key *key = &sctx->shader.ps.key;
union si_shader_key *key = &sctx->shader.ps.key;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
if (!sel)
return;
key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.uses_interp_color;
key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
key->ps.part.prolog.color_two_side = rs->two_side && sel->info.colors_read;
key->ps.part.prolog.flatshade_colors = rs->flatshade && sel->info.uses_interp_color;
key->ps.part.epilog.clamp_color = rs->clamp_fragment_color;
}
void si_ps_key_update_dsa(struct si_context *sctx)
{
struct si_shader_key *key = &sctx->shader.ps.key;
union si_shader_key *key = &sctx->shader.ps.key;
key->part.ps.epilog.alpha_func = sctx->queued.named.dsa->alpha_func;
key->ps.part.epilog.alpha_func = sctx->queued.named.dsa->alpha_func;
}
static void si_ps_key_update_primtype_shader_rasterizer_framebuffer(struct si_context *sctx)
{
struct si_shader_key *key = &sctx->shader.ps.key;
union si_shader_key *key = &sctx->shader.ps.key;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim);
bool is_line = util_prim_is_lines(sctx->current_rast_prim);
key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
key->part.ps.epilog.poly_line_smoothing =
key->ps.part.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
key->ps.part.epilog.poly_line_smoothing =
((is_poly && rs->poly_smooth) || (is_line && rs->line_smooth)) &&
sctx->framebuffer.nr_samples <= 1;
}
@ -2078,21 +2078,21 @@ static void si_ps_key_update_primtype_shader_rasterizer_framebuffer(struct si_co
void si_ps_key_update_sample_shading(struct si_context *sctx)
{
struct si_shader_selector *sel = sctx->shader.ps.cso;
struct si_shader_key *key = &sctx->shader.ps.key;
union si_shader_key *key = &sctx->shader.ps.key;
if (!sel)
return;
if (sctx->ps_iter_samples > 1 && sel->info.reads_samplemask)
key->part.ps.prolog.samplemask_log_ps_iter = util_logbase2(sctx->ps_iter_samples);
key->ps.part.prolog.samplemask_log_ps_iter = util_logbase2(sctx->ps_iter_samples);
else
key->part.ps.prolog.samplemask_log_ps_iter = 0;
key->ps.part.prolog.samplemask_log_ps_iter = 0;
}
void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *sctx)
{
struct si_shader_selector *sel = sctx->shader.ps.cso;
struct si_shader_key *key = &sctx->shader.ps.key;
union si_shader_key *key = &sctx->shader.ps.key;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
if (!sel)
@ -2107,49 +2107,49 @@ void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *s
if (rs->force_persample_interp && rs->multisample_enable &&
sctx->framebuffer.nr_samples > 1 && sctx->ps_iter_samples > 1) {
key->part.ps.prolog.force_persp_sample_interp =
key->ps.part.prolog.force_persp_sample_interp =
uses_persp_center || uses_persp_centroid;
key->part.ps.prolog.force_linear_sample_interp =
key->ps.part.prolog.force_linear_sample_interp =
sel->info.uses_linear_center || sel->info.uses_linear_centroid;
key->part.ps.prolog.force_persp_center_interp = 0;
key->part.ps.prolog.force_linear_center_interp = 0;
key->part.ps.prolog.bc_optimize_for_persp = 0;
key->part.ps.prolog.bc_optimize_for_linear = 0;
key->mono.u.ps.interpolate_at_sample_force_center = 0;
key->ps.part.prolog.force_persp_center_interp = 0;
key->ps.part.prolog.force_linear_center_interp = 0;
key->ps.part.prolog.bc_optimize_for_persp = 0;
key->ps.part.prolog.bc_optimize_for_linear = 0;
key->ps.mono.interpolate_at_sample_force_center = 0;
} else if (rs->multisample_enable && sctx->framebuffer.nr_samples > 1) {
key->part.ps.prolog.force_persp_sample_interp = 0;
key->part.ps.prolog.force_linear_sample_interp = 0;
key->part.ps.prolog.force_persp_center_interp = 0;
key->part.ps.prolog.force_linear_center_interp = 0;
key->part.ps.prolog.bc_optimize_for_persp =
key->ps.part.prolog.force_persp_sample_interp = 0;
key->ps.part.prolog.force_linear_sample_interp = 0;
key->ps.part.prolog.force_persp_center_interp = 0;
key->ps.part.prolog.force_linear_center_interp = 0;
key->ps.part.prolog.bc_optimize_for_persp =
uses_persp_center && uses_persp_centroid;
key->part.ps.prolog.bc_optimize_for_linear =
key->ps.part.prolog.bc_optimize_for_linear =
sel->info.uses_linear_center && sel->info.uses_linear_centroid;
key->mono.u.ps.interpolate_at_sample_force_center = 0;
key->ps.mono.interpolate_at_sample_force_center = 0;
} else {
key->part.ps.prolog.force_persp_sample_interp = 0;
key->part.ps.prolog.force_linear_sample_interp = 0;
key->ps.part.prolog.force_persp_sample_interp = 0;
key->ps.part.prolog.force_linear_sample_interp = 0;
/* Make sure SPI doesn't compute more than 1 pair
* of (i,j), which is the optimization here. */
key->part.ps.prolog.force_persp_center_interp = uses_persp_center +
key->ps.part.prolog.force_persp_center_interp = uses_persp_center +
uses_persp_centroid +
uses_persp_sample > 1;
key->part.ps.prolog.force_linear_center_interp = sel->info.uses_linear_center +
key->ps.part.prolog.force_linear_center_interp = sel->info.uses_linear_center +
sel->info.uses_linear_centroid +
sel->info.uses_linear_sample > 1;
key->part.ps.prolog.bc_optimize_for_persp = 0;
key->part.ps.prolog.bc_optimize_for_linear = 0;
key->mono.u.ps.interpolate_at_sample_force_center = sel->info.uses_interp_at_sample;
key->ps.part.prolog.bc_optimize_for_persp = 0;
key->ps.part.prolog.bc_optimize_for_linear = 0;
key->ps.mono.interpolate_at_sample_force_center = sel->info.uses_interp_at_sample;
}
}
/* Compute the key for the hw shader variant */
static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_shader_selector *sel,
struct si_shader_key *key)
union si_shader_key *key)
{
struct si_context *sctx = (struct si_context *)ctx;
@ -2162,8 +2162,8 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh
break;
case MESA_SHADER_TESS_CTRL:
if (sctx->chip_class >= GFX9) {
si_get_vs_key_inputs(sctx, key, &key->part.tcs.ls_prolog);
key->part.tcs.ls = sctx->shader.vs.cso;
si_get_vs_key_inputs(sctx, key, &key->ge.part.tcs.ls_prolog);
key->ge.part.tcs.ls = sctx->shader.vs.cso;
}
break;
case MESA_SHADER_TESS_EVAL:
@ -2175,11 +2175,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh
case MESA_SHADER_GEOMETRY:
if (sctx->chip_class >= GFX9) {
if (sctx->shader.tes.cso) {
si_clear_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog);
key->part.gs.es = sctx->shader.tes.cso;
si_clear_vs_key_inputs(sctx, key, &key->ge.part.gs.vs_prolog);
key->ge.part.gs.es = sctx->shader.tes.cso;
} else {
si_get_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog);
key->part.gs.es = sctx->shader.vs.cso;
si_get_vs_key_inputs(sctx, key, &key->ge.part.gs.vs_prolog);
key->ge.part.gs.es = sctx->shader.vs.cso;
}
/* Only NGG can eliminate GS outputs, because the code is shared with VS. */
@ -2249,11 +2249,11 @@ static void si_build_shader_variant_low_priority(void *job, void *gdata, int thr
}
/* This should be const, but C++ doesn't allow implicit zero-initialization with const. */
static struct si_shader_key zeroed;
static union si_shader_key zeroed;
static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shader_selector *sel,
struct si_compiler_ctx_state *compiler_state,
const struct si_shader_key *key)
const union si_shader_key *key)
{
struct si_shader **mainp = si_get_main_shader_part(sel, key);
@ -2269,9 +2269,11 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shad
util_queue_fence_init(&main_part->ready);
main_part->selector = sel;
main_part->key.as_es = key->as_es;
main_part->key.as_ls = key->as_ls;
main_part->key.as_ngg = key->as_ngg;
if (sel->info.stage <= MESA_SHADER_GEOMETRY) {
main_part->key.ge.as_es = key->ge.as_es;
main_part->key.ge.as_ls = key->ge.as_ls;
main_part->key.ge.as_ngg = key->ge.as_ngg;
}
main_part->is_monolithic = false;
if (!si_compile_shader(sscreen, compiler_state->compiler, main_part,
@ -2285,8 +2287,9 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shad
}
/* A helper to copy *key to *local_key and return local_key. */
static const struct si_shader_key *
use_local_key_copy(const struct si_shader_key *key, struct si_shader_key *local_key)
template<typename SHADER_KEY_TYPE>
static const SHADER_KEY_TYPE *
use_local_key_copy(const SHADER_KEY_TYPE *key, SHADER_KEY_TYPE *local_key)
{
if (key != local_key)
memcpy(local_key, key, sizeof(*key));
@ -2301,24 +2304,27 @@ use_local_key_copy(const struct si_shader_key *key, struct si_shader_key *local_
* the compilation isn't finished, don't select any
* shader and return an error.
*/
int si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state,
const struct si_shader_key *key, int thread_index,
bool optimized_or_none)
template<typename SHADER_KEY_TYPE>
static int si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state,
const SHADER_KEY_TYPE *key, int thread_index,
bool optimized_or_none)
{
struct si_screen *sscreen = sctx->screen;
struct si_shader_selector *sel = state->cso;
struct si_shader_selector *previous_stage_sel = NULL;
struct si_shader *current = state->current;
struct si_shader *iter, *shader = NULL;
const SHADER_KEY_TYPE *zeroed_key = (SHADER_KEY_TYPE*)&zeroed;
/* si_shader_select_with_key must not modify 'key' because it would affect future shaders.
* If we need to modify it for this specific shader (eg: to disable optimizations), we
* use a copy.
*/
struct si_shader_key local_key;
SHADER_KEY_TYPE local_key;
if (unlikely(sscreen->debug_flags & DBG(NO_OPT_VARIANT))) {
/* Disable shader variant optimizations. */
key = use_local_key_copy(key, &local_key);
key = use_local_key_copy<SHADER_KEY_TYPE>(key, &local_key);
memset(&local_key.opt, 0, sizeof(key->opt));
}
@ -2364,12 +2370,14 @@ current_not_ready:
/* Find the shader variant. */
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
if (memcmp(&iter->key, key, s) == 0) {
const SHADER_KEY_TYPE *iter_key = (const SHADER_KEY_TYPE *)&iter->key;
if (memcmp(iter_key, key, s) == 0) {
/* Check the inlined uniform values separatly, and count
* the number of variants based on them.
*/
if (key->opt.inline_uniforms &&
memcmp(iter->key.opt.inlined_uniform_values,
memcmp(iter_key->opt.inlined_uniform_values,
key->opt.inlined_uniform_values,
MAX_INLINABLE_UNIFORMS * 4) != 0) {
if (variant_count++ > max_inline_uniforms_variants) {
@ -2424,7 +2432,7 @@ current_not_ready:
si_init_compiler(sctx->screen, &sctx->compiler);
shader->selector = sel;
shader->key = *key;
*((SHADER_KEY_TYPE*)&shader->key) = *key;
shader->compiler_ctx_state.compiler = &sctx->compiler;
shader->compiler_ctx_state.debug = sctx->debug;
shader->compiler_ctx_state.is_debug_context = sctx->is_debug;
@ -2432,9 +2440,9 @@ current_not_ready:
/* If this is a merged shader, get the first shader's selector. */
if (sscreen->info.chip_class >= GFX9) {
if (sel->info.stage == MESA_SHADER_TESS_CTRL)
previous_stage_sel = key->part.tcs.ls;
previous_stage_sel = ((struct si_shader_key_ge*)key)->part.tcs.ls;
else if (sel->info.stage == MESA_SHADER_GEOMETRY)
previous_stage_sel = key->part.gs.es;
previous_stage_sel = ((struct si_shader_key_ge*)key)->part.gs.es;
/* We need to wait for the previous shader. */
if (previous_stage_sel && thread_index < 0)
@ -2442,7 +2450,7 @@ current_not_ready:
}
bool is_pure_monolithic =
sscreen->use_monolithic_shaders || memcmp(&key->mono, &zeroed.mono, sizeof(key->mono)) != 0;
sscreen->use_monolithic_shaders || memcmp(&key->mono, &zeroed_key->mono, sizeof(key->mono)) != 0;
/* Compile the main shader part if it doesn't exist. This can happen
* if the initial guess was wrong.
@ -2461,13 +2469,13 @@ current_not_ready:
* part is present.
*/
if (previous_stage_sel) {
struct si_shader_key shader1_key = zeroed;
union si_shader_key shader1_key = zeroed;
if (sel->info.stage == MESA_SHADER_TESS_CTRL) {
shader1_key.as_ls = 1;
shader1_key.ge.as_ls = 1;
} else if (sel->info.stage == MESA_SHADER_GEOMETRY) {
shader1_key.as_es = 1;
shader1_key.as_ngg = key->as_ngg; /* for Wave32 vs Wave64 */
shader1_key.ge.as_es = 1;
shader1_key.ge.as_ngg = ((struct si_shader_key_ge*)key)->as_ngg; /* for Wave32 vs Wave64 */
} else {
assert(0);
}
@ -2479,7 +2487,8 @@ current_not_ready:
}
if (ok) {
ok = si_check_missing_main_part(sscreen, sel, &shader->compiler_ctx_state, key);
ok = si_check_missing_main_part(sscreen, sel, &shader->compiler_ctx_state,
(union si_shader_key*)key);
}
if (!ok) {
@ -2500,10 +2509,10 @@ current_not_ready:
/* Monolithic-only shaders don't make a distinction between optimized
* and unoptimized. */
shader->is_monolithic =
is_pure_monolithic || memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
is_pure_monolithic || memcmp(&key->opt, &zeroed_key->opt, sizeof(key->opt)) != 0;
shader->is_optimized = !is_pure_monolithic &&
memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
memcmp(&key->opt, &zeroed_key->opt, sizeof(key->opt)) != 0;
/* If it's an optimized shader, compile it asynchronously. */
if (shader->is_optimized && thread_index < 0) {
@ -2563,11 +2572,15 @@ int si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state
struct si_context *sctx = (struct si_context *)ctx;
si_shader_selector_key(ctx, state->cso, &state->key);
return si_shader_select_with_key(sctx, state, &state->key, -1, false);
if (state->cso->info.stage == MESA_SHADER_FRAGMENT)
return si_shader_select_with_key(sctx, state, &state->key.ps, -1, false);
else
return si_shader_select_with_key(sctx, state, &state->key.ge, -1, false);
}
static void si_parse_next_shader_property(const struct si_shader_info *info, bool streamout,
struct si_shader_key *key)
union si_shader_key *key)
{
gl_shader_stage next_shader = info->base.next_stage;
@ -2575,11 +2588,11 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo
case MESA_SHADER_VERTEX:
switch (next_shader) {
case MESA_SHADER_GEOMETRY:
key->as_es = 1;
key->ge.as_es = 1;
break;
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
key->as_ls = 1;
key->ge.as_ls = 1;
break;
default:
/* If POSITION isn't written, it can only be a HW VS
@ -2588,13 +2601,13 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo
* This heuristic is needed for separate shader objects.
*/
if (!info->writes_position && !streamout)
key->as_ls = 1;
key->ge.as_ls = 1;
}
break;
case MESA_SHADER_TESS_EVAL:
if (next_shader == MESA_SHADER_GEOMETRY || !info->writes_position)
key->as_es = 1;
key->ge.as_es = 1;
break;
default:;
@ -2672,13 +2685,17 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
shader->is_monolithic = false;
si_parse_next_shader_property(&sel->info, sel->so.num_outputs != 0, &shader->key);
if (sscreen->use_ngg && (!sel->so.num_outputs || sscreen->use_ngg_streamout) &&
((sel->info.stage == MESA_SHADER_VERTEX && !shader->key.as_ls) ||
if (sel->info.stage <= MESA_SHADER_GEOMETRY &&
sscreen->use_ngg && (!sel->so.num_outputs || sscreen->use_ngg_streamout) &&
((sel->info.stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) ||
sel->info.stage == MESA_SHADER_TESS_EVAL || sel->info.stage == MESA_SHADER_GEOMETRY))
shader->key.as_ngg = 1;
shader->key.ge.as_ngg = 1;
if (sel->nir) {
si_get_ir_cache_key(sel, shader->key.as_ngg, shader->key.as_es, ir_sha1_cache_key);
if (sel->info.stage <= MESA_SHADER_GEOMETRY)
si_get_ir_cache_key(sel, shader->key.ge.as_ngg, shader->key.ge.as_es, ir_sha1_cache_key);
else
si_get_ir_cache_key(sel, false, false, ir_sha1_cache_key);
}
/* Try to load the shader from the shader cache. */
@ -2714,7 +2731,7 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
if ((sel->info.stage == MESA_SHADER_VERTEX ||
sel->info.stage == MESA_SHADER_TESS_EVAL ||
sel->info.stage == MESA_SHADER_GEOMETRY) &&
!shader->key.as_ls && !shader->key.as_es) {
!shader->key.ge.as_ls && !shader->key.ge.as_es) {
unsigned i;
for (i = 0; i < sel->info.num_outputs; i++) {
@ -3186,7 +3203,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
sctx->shader.vs.current = sel ? sel->first_variant : NULL;
sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
sctx->fixed_func_tcs_shader.key.mono.u.ff_tcs_inputs_to_copy = sel ? sel->outputs_written : 0;
sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->outputs_written : 0;
if (si_update_ngg(sctx))
si_shader_change_notify(sctx);
@ -3294,7 +3311,7 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
sctx->shader.tcs.cso = sel;
sctx->shader.tcs.current = sel ? sel->first_variant : NULL;
sctx->shader.tcs.key.part.tcs.epilog.invoc0_tess_factors_are_def =
sctx->shader.tcs.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
sel ? sel->info.tessfactors_are_def_in_all_invocs : 0;
si_update_tess_uses_prim_id(sctx);
@ -3320,12 +3337,12 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
sctx->ia_multi_vgt_param_key.u.uses_tess = sel != NULL;
si_update_tess_uses_prim_id(sctx);
sctx->shader.tcs.key.part.tcs.epilog.prim_mode =
sctx->fixed_func_tcs_shader.key.part.tcs.epilog.prim_mode =
sctx->shader.tcs.key.ge.part.tcs.epilog.prim_mode =
sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.prim_mode =
sel ? sel->info.base.tess.primitive_mode : 0;
sctx->shader.tcs.key.part.tcs.epilog.tes_reads_tess_factors =
sctx->fixed_func_tcs_shader.key.part.tcs.epilog.tes_reads_tess_factors =
sctx->shader.tcs.key.ge.part.tcs.epilog.tes_reads_tess_factors =
sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.tes_reads_tess_factors =
sel ? sel->info.reads_tess_factors : 0;
si_update_common_shader_state(sctx, sel, PIPE_SHADER_TESS_EVAL);
@ -3437,13 +3454,13 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
switch (shader->selector->info.stage) {
case MESA_SHADER_VERTEX:
if (shader->key.as_ls) {
if (shader->key.ge.as_ls) {
if (sctx->chip_class <= GFX8)
state_index = SI_STATE_IDX(ls);
} else if (shader->key.as_es) {
} else if (shader->key.ge.as_es) {
if (sctx->chip_class <= GFX8)
state_index = SI_STATE_IDX(es);
} else if (shader->key.as_ngg) {
} else if (shader->key.ge.as_ngg) {
state_index = SI_STATE_IDX(gs);
} else {
state_index = SI_STATE_IDX(vs);
@ -3453,10 +3470,10 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
state_index = SI_STATE_IDX(hs);
break;
case MESA_SHADER_TESS_EVAL:
if (shader->key.as_es) {
if (shader->key.ge.as_es) {
if (sctx->chip_class <= GFX8)
state_index = SI_STATE_IDX(es);
} else if (shader->key.as_ngg) {
} else if (shader->key.ge.as_ngg) {
state_index = SI_STATE_IDX(gs);
} else {
state_index = SI_STATE_IDX(vs);
@ -3808,11 +3825,11 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
if (r < 0)
return false;
if (r == 1) {
if (sctx->shader.vs.current->key.as_ls)
if (sctx->shader.vs.current->key.ge.as_ls)
si_pm4_bind_state(sctx, ls, sctx->shader.vs.current);
else if (sctx->shader.vs.current->key.as_es)
else if (sctx->shader.vs.current->key.ge.as_es)
si_pm4_bind_state(sctx, es, sctx->shader.vs.current);
else if (sctx->shader.vs.current->key.as_ngg)
else if (sctx->shader.vs.current->key.ge.as_ngg)
si_pm4_bind_state(sctx, gs, sctx->shader.vs.current);
else
si_pm4_bind_state(sctx, vs, sctx->shader.vs.current);
@ -3823,9 +3840,9 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
if (r < 0)
return false;
if (r == 1) {
if (sctx->shader.tes.current->key.as_es)
if (sctx->shader.tes.current->key.ge.as_es)
si_pm4_bind_state(sctx, es, sctx->shader.tes.current);
else if (sctx->shader.tes.current->key.as_ngg)
else if (sctx->shader.tes.current->key.ge.as_ngg)
si_pm4_bind_state(sctx, gs, sctx->shader.tes.current);
else
si_pm4_bind_state(sctx, vs, sctx->shader.tes.current);