mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
radeonsi: split si_shader_key into ps and ge parts to minimize memcmp overhead
ps is for the pixel shader, while ge is for VS, TCS, TES, and GS. si_shader_key: 68 bytes si_shader_key_ge: 68 bytes si_shader_key_ps: 28 bytes The only notable change is that si_shader_select_with_key is changed to a C++ template. Other changes are trivial. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13285>
This commit is contained in:
parent
385c9e1caf
commit
8c5a32b5fe
17 changed files with 652 additions and 540 deletions
|
|
@ -83,7 +83,7 @@ static LLVMValueRef ngg_get_vertices_per_prim(struct si_shader_context *ctx, uns
|
|||
/* Blits always use axis-aligned rectangles with 3 vertices. */
|
||||
*num_vertices = 3;
|
||||
return LLVMConstInt(ctx->ac.i32, 3, 0);
|
||||
} else if (ctx->shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) {
|
||||
} else if (ctx->shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) {
|
||||
*num_vertices = 2;
|
||||
return LLVMConstInt(ctx->ac.i32, 2, 0);
|
||||
} else {
|
||||
|
|
@ -115,7 +115,7 @@ bool gfx10_ngg_export_prim_early(struct si_shader *shader)
|
|||
{
|
||||
struct si_shader_selector *sel = shader->selector;
|
||||
|
||||
assert(shader->key.as_ngg && !shader->key.as_es);
|
||||
assert(shader->key.ge.as_ngg && !shader->key.ge.as_es);
|
||||
|
||||
return sel->info.stage != MESA_SHADER_GEOMETRY &&
|
||||
!gfx10_ngg_writes_user_edgeflags(shader);
|
||||
|
|
@ -137,7 +137,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use
|
|||
{
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
|
||||
if (gfx10_is_ngg_passthrough(ctx->shader) || ctx->shader->key.opt.ngg_culling) {
|
||||
if (gfx10_is_ngg_passthrough(ctx->shader) || ctx->shader->key.ge.opt.ngg_culling) {
|
||||
ac_build_ifcc(&ctx->ac, si_is_gs_thread(ctx), 6001);
|
||||
{
|
||||
struct ac_ngg_prim prim = {};
|
||||
|
|
@ -614,17 +614,17 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
|
|||
* to the ES thread of the provoking vertex. All ES threads
|
||||
* load and export PrimitiveID for their thread.
|
||||
*/
|
||||
if (shader->selector->info.stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)
|
||||
if (shader->selector->info.stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id)
|
||||
lds_vertex_size = MAX2(lds_vertex_size, 1);
|
||||
|
||||
if (shader->key.opt.ngg_culling) {
|
||||
if (shader->key.ge.opt.ngg_culling) {
|
||||
if (shader->selector->info.stage == MESA_SHADER_VERTEX) {
|
||||
STATIC_ASSERT(lds_instance_id + 1 == 7);
|
||||
lds_vertex_size = MAX2(lds_vertex_size, 7);
|
||||
} else {
|
||||
assert(shader->selector->info.stage == MESA_SHADER_TESS_EVAL);
|
||||
|
||||
if (shader->selector->info.uses_primid || shader->key.mono.u.vs_export_prim_id) {
|
||||
if (shader->selector->info.uses_primid || shader->key.ge.mono.u.vs_export_prim_id) {
|
||||
STATIC_ASSERT(lds_tes_patch_id + 2 == 9); /* +1 for LDS padding */
|
||||
lds_vertex_size = MAX2(lds_vertex_size, 9);
|
||||
} else {
|
||||
|
|
@ -823,10 +823,10 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
LLVMValueRef *addrs = abi->outputs;
|
||||
unsigned max_waves = DIV_ROUND_UP(ctx->screen->ngg_subgroup_size, ctx->ac.wave_size);
|
||||
|
||||
assert(shader->key.opt.ngg_culling);
|
||||
assert(shader->key.as_ngg);
|
||||
assert(shader->key.ge.opt.ngg_culling);
|
||||
assert(shader->key.ge.as_ngg);
|
||||
assert(sel->info.stage == MESA_SHADER_VERTEX ||
|
||||
(sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es));
|
||||
(sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es));
|
||||
|
||||
LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
|
||||
unsigned pos_index = 0;
|
||||
|
|
@ -840,8 +840,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
* the position. This is useful for analyzing maximum theoretical
|
||||
* performance without VS input loads.
|
||||
*/
|
||||
if (shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE &&
|
||||
shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE) {
|
||||
if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE &&
|
||||
shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE) {
|
||||
for (unsigned j = 0; j < 4; j++)
|
||||
LLVMBuildStore(builder, LLVMGetUndef(ctx->ac.f32), addrs[4 * i + j]);
|
||||
break;
|
||||
|
|
@ -993,15 +993,15 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
options.cull_view_xy = true;
|
||||
options.cull_w = true;
|
||||
|
||||
if (shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) {
|
||||
if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) {
|
||||
options.num_vertices = 2;
|
||||
|
||||
assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE));
|
||||
assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE));
|
||||
assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE));
|
||||
assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE));
|
||||
} else {
|
||||
options.num_vertices = 3;
|
||||
options.cull_front = shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
|
||||
options.cull_back = shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
|
||||
options.cull_front = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
|
||||
options.cull_back = shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
|
||||
options.cull_small_prims = true; /* this would only be false with conservative rasterization */
|
||||
options.cull_zero_area = options.cull_front || options.cull_back;
|
||||
}
|
||||
|
|
@ -1055,10 +1055,10 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
|
||||
bool uses_instance_id = ctx->stage == MESA_SHADER_VERTEX &&
|
||||
(sel->info.uses_instanceid ||
|
||||
shader->key.part.vs.prolog.instance_divisor_is_one ||
|
||||
shader->key.part.vs.prolog.instance_divisor_is_fetched);
|
||||
shader->key.ge.part.vs.prolog.instance_divisor_is_one ||
|
||||
shader->key.ge.part.vs.prolog.instance_divisor_is_fetched);
|
||||
bool uses_tes_prim_id = ctx->stage == MESA_SHADER_TESS_EVAL &&
|
||||
(sel->info.uses_primid || shader->key.mono.u.vs_export_prim_id);
|
||||
(sel->info.uses_primid || shader->key.ge.mono.u.vs_export_prim_id);
|
||||
|
||||
/* ES threads compute their prefix sum, which is the new ES thread ID.
|
||||
* Then they write the vertex position and input VGPRs into the LDS address
|
||||
|
|
@ -1278,7 +1278,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
|
||||
/* These two also use LDS. */
|
||||
if (gfx10_ngg_writes_user_edgeflags(shader) ||
|
||||
(ctx->stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
|
||||
(ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id))
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
|
||||
ctx->return_value = ret;
|
||||
|
|
@ -1338,7 +1338,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
bool unterminated_es_if_block =
|
||||
!sel->so.num_outputs && !gfx10_ngg_writes_user_edgeflags(ctx->shader) &&
|
||||
!ctx->screen->use_ngg_streamout && /* no query buffer */
|
||||
(ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id);
|
||||
(ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.ge.mono.u.vs_export_prim_id);
|
||||
|
||||
if (!unterminated_es_if_block)
|
||||
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
|
||||
|
|
@ -1347,7 +1347,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
LLVMValueRef is_es_thread = si_is_es_thread(ctx);
|
||||
LLVMValueRef vtxindex[3];
|
||||
|
||||
if (ctx->shader->key.opt.ngg_culling || gfx10_is_ngg_passthrough(ctx->shader)) {
|
||||
if (ctx->shader->key.ge.opt.ngg_culling || gfx10_is_ngg_passthrough(ctx->shader)) {
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
vtxindex[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[0], 10 * i, 9);
|
||||
} else {
|
||||
|
|
@ -1402,7 +1402,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
/* Copy Primitive IDs from GS threads to the LDS address corresponding
|
||||
* to the ES thread of the provoking vertex.
|
||||
*/
|
||||
if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) {
|
||||
if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader->key.ge.mono.u.vs_export_prim_id) {
|
||||
assert(!unterminated_es_if_block);
|
||||
|
||||
/* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */
|
||||
|
|
@ -1479,7 +1479,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
* load it from LDS.
|
||||
*/
|
||||
if (info->output_semantic[i] == VARYING_SLOT_POS &&
|
||||
ctx->shader->key.opt.ngg_culling) {
|
||||
ctx->shader->key.ge.opt.ngg_culling) {
|
||||
vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
|
||||
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
|
|
@ -1495,7 +1495,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
}
|
||||
}
|
||||
|
||||
if (ctx->shader->key.mono.u.vs_export_prim_id) {
|
||||
if (ctx->shader->key.ge.mono.u.vs_export_prim_id) {
|
||||
outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
|
||||
|
||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||
|
|
|
|||
|
|
@ -1222,15 +1222,34 @@ static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_res
|
|||
sctx->descriptors_dirty |= 1u << descriptors_idx;
|
||||
}
|
||||
|
||||
void si_get_inline_uniform_state(union si_shader_key *key, enum pipe_shader_type shader,
|
||||
bool *inline_uniforms, uint32_t **inlined_values)
|
||||
{
|
||||
if (shader == PIPE_SHADER_FRAGMENT) {
|
||||
*inline_uniforms = key->ps.opt.inline_uniforms;
|
||||
*inlined_values = key->ps.opt.inlined_uniform_values;
|
||||
} else {
|
||||
*inline_uniforms = key->ge.opt.inline_uniforms;
|
||||
*inlined_values = key->ge.opt.inlined_uniform_values;
|
||||
}
|
||||
}
|
||||
|
||||
void si_invalidate_inlinable_uniforms(struct si_context *sctx, enum pipe_shader_type shader)
|
||||
{
|
||||
if (shader == PIPE_SHADER_COMPUTE)
|
||||
return;
|
||||
|
||||
if (sctx->shaders[shader].key.opt.inline_uniforms) {
|
||||
sctx->shaders[shader].key.opt.inline_uniforms = false;
|
||||
memset(sctx->shaders[shader].key.opt.inlined_uniform_values, 0,
|
||||
sizeof(sctx->shaders[shader].key.opt.inlined_uniform_values));
|
||||
bool inline_uniforms;
|
||||
uint32_t *inlined_values;
|
||||
si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values);
|
||||
|
||||
if (inline_uniforms) {
|
||||
if (shader == PIPE_SHADER_FRAGMENT)
|
||||
sctx->shaders[shader].key.ps.opt.inline_uniforms = false;
|
||||
else
|
||||
sctx->shaders[shader].key.ge.opt.inline_uniforms = false;
|
||||
|
||||
memset(inlined_values, 0, MAX_INLINABLE_UNIFORMS * 4);
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1273,10 +1292,18 @@ static void si_set_inlinable_constants(struct pipe_context *ctx,
|
|||
if (shader == PIPE_SHADER_COMPUTE)
|
||||
return;
|
||||
|
||||
if (!sctx->shaders[shader].key.opt.inline_uniforms) {
|
||||
bool inline_uniforms;
|
||||
uint32_t *inlined_values;
|
||||
si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values);
|
||||
|
||||
if (!inline_uniforms) {
|
||||
/* It's the first time we set the constants. Always update shaders. */
|
||||
sctx->shaders[shader].key.opt.inline_uniforms = true;
|
||||
memcpy(sctx->shaders[shader].key.opt.inlined_uniform_values, values, num_values * 4);
|
||||
if (shader == PIPE_SHADER_FRAGMENT)
|
||||
sctx->shaders[shader].key.ps.opt.inline_uniforms = true;
|
||||
else
|
||||
sctx->shaders[shader].key.ge.opt.inline_uniforms = true;
|
||||
|
||||
memcpy(inlined_values, values, num_values * 4);
|
||||
sctx->do_update_shaders = true;
|
||||
return;
|
||||
}
|
||||
|
|
@ -1284,8 +1311,8 @@ static void si_set_inlinable_constants(struct pipe_context *ctx,
|
|||
/* We have already set inlinable constants for this shader. Update the shader only if
|
||||
* the constants are being changed so as not to update shaders needlessly.
|
||||
*/
|
||||
if (memcmp(sctx->shaders[shader].key.opt.inlined_uniform_values, values, num_values * 4)) {
|
||||
memcpy(sctx->shaders[shader].key.opt.inlined_uniform_values, values, num_values * 4);
|
||||
if (memcmp(inlined_values, values, num_values * 4)) {
|
||||
memcpy(inlined_values, values, num_values * 4);
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -2029,27 +2056,27 @@ void si_shader_change_notify(struct si_context *sctx)
|
|||
* If GS sets as_ngg, the previous stage must set as_ngg too.
|
||||
*/
|
||||
if (sctx->shader.tes.cso) {
|
||||
sctx->shader.vs.key.as_ls = 1;
|
||||
sctx->shader.vs.key.as_es = 0;
|
||||
sctx->shader.vs.key.as_ngg = 0;
|
||||
sctx->shader.vs.key.ge.as_ls = 1;
|
||||
sctx->shader.vs.key.ge.as_es = 0;
|
||||
sctx->shader.vs.key.ge.as_ngg = 0;
|
||||
|
||||
if (sctx->shader.gs.cso) {
|
||||
sctx->shader.tes.key.as_es = 1;
|
||||
sctx->shader.tes.key.as_ngg = sctx->ngg;
|
||||
sctx->shader.gs.key.as_ngg = sctx->ngg;
|
||||
sctx->shader.tes.key.ge.as_es = 1;
|
||||
sctx->shader.tes.key.ge.as_ngg = sctx->ngg;
|
||||
sctx->shader.gs.key.ge.as_ngg = sctx->ngg;
|
||||
} else {
|
||||
sctx->shader.tes.key.as_es = 0;
|
||||
sctx->shader.tes.key.as_ngg = sctx->ngg;
|
||||
sctx->shader.tes.key.ge.as_es = 0;
|
||||
sctx->shader.tes.key.ge.as_ngg = sctx->ngg;
|
||||
}
|
||||
} else if (sctx->shader.gs.cso) {
|
||||
sctx->shader.vs.key.as_ls = 0;
|
||||
sctx->shader.vs.key.as_es = 1;
|
||||
sctx->shader.vs.key.as_ngg = sctx->ngg;
|
||||
sctx->shader.gs.key.as_ngg = sctx->ngg;
|
||||
sctx->shader.vs.key.ge.as_ls = 0;
|
||||
sctx->shader.vs.key.ge.as_es = 1;
|
||||
sctx->shader.vs.key.ge.as_ngg = sctx->ngg;
|
||||
sctx->shader.gs.key.ge.as_ngg = sctx->ngg;
|
||||
} else {
|
||||
sctx->shader.vs.key.as_ls = 0;
|
||||
sctx->shader.vs.key.as_es = 0;
|
||||
sctx->shader.vs.key.as_ngg = sctx->ngg;
|
||||
sctx->shader.vs.key.ge.as_ls = 0;
|
||||
sctx->shader.vs.key.ge.as_es = 0;
|
||||
sctx->shader.vs.key.ge.as_ngg = sctx->ngg;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -747,10 +747,10 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
|
|||
* and the LS main part when !vs_needs_prolog
|
||||
* - remove the fixup for unused input VGPRs
|
||||
*/
|
||||
sctx->shader.tcs.key.opt.prefer_mono = 1;
|
||||
sctx->shader.tcs.key.ge.opt.prefer_mono = 1;
|
||||
|
||||
/* This enables jumping over the VS prolog for GS-only waves. */
|
||||
sctx->shader.gs.key.opt.prefer_mono = 1;
|
||||
sctx->shader.gs.key.ge.opt.prefer_mono = 1;
|
||||
}
|
||||
|
||||
si_begin_new_gfx_cs(sctx, true);
|
||||
|
|
|
|||
|
|
@ -805,7 +805,7 @@ struct si_shader_ctx_state {
|
|||
struct si_shader_selector *cso;
|
||||
struct si_shader *current;
|
||||
/* The shader variant key representing the current state. */
|
||||
struct si_shader_key key;
|
||||
union si_shader_key key;
|
||||
};
|
||||
|
||||
#define SI_NUM_VGT_PARAM_KEY_BITS 12
|
||||
|
|
@ -1965,9 +1965,14 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen,
|
|||
|
||||
static inline unsigned si_get_shader_wave_size(struct si_shader *shader)
|
||||
{
|
||||
if (shader->selector->info.stage <= MESA_SHADER_GEOMETRY) {
|
||||
return si_get_wave_size(shader->selector->screen, shader->selector->info.stage,
|
||||
shader->key.ge.as_ngg,
|
||||
shader->key.ge.as_es);
|
||||
}
|
||||
|
||||
return si_get_wave_size(shader->selector->screen, shader->selector->info.stage,
|
||||
shader->key.as_ngg,
|
||||
shader->key.as_es);
|
||||
false, false);
|
||||
}
|
||||
|
||||
static inline void si_select_draw_vbo(struct si_context *sctx)
|
||||
|
|
|
|||
|
|
@ -42,10 +42,11 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f);
|
|||
/** Whether the shader runs as a combination of multiple API shaders */
|
||||
bool si_is_multi_part_shader(struct si_shader *shader)
|
||||
{
|
||||
if (shader->selector->screen->info.chip_class <= GFX8)
|
||||
if (shader->selector->screen->info.chip_class <= GFX8 ||
|
||||
shader->selector->info.stage > MESA_SHADER_GEOMETRY)
|
||||
return false;
|
||||
|
||||
return shader->key.as_ls || shader->key.as_es ||
|
||||
return shader->key.ge.as_ls || shader->key.ge.as_es ||
|
||||
shader->selector->info.stage == MESA_SHADER_TESS_CTRL ||
|
||||
shader->selector->info.stage == MESA_SHADER_GEOMETRY;
|
||||
}
|
||||
|
|
@ -53,7 +54,10 @@ bool si_is_multi_part_shader(struct si_shader *shader)
|
|||
/** Whether the shader runs on a merged HW stage (LSHS or ESGS) */
|
||||
bool si_is_merged_shader(struct si_shader *shader)
|
||||
{
|
||||
return shader->key.as_ngg || si_is_multi_part_shader(shader);
|
||||
if (shader->selector->info.stage > MESA_SHADER_GEOMETRY)
|
||||
return false;
|
||||
|
||||
return shader->key.ge.as_ngg || si_is_multi_part_shader(shader);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -200,7 +204,7 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader)
|
|||
switch (shader->selector->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
return shader->key.as_ngg ? 128 : 0;
|
||||
return shader->key.ge.as_ngg ? 128 : 0;
|
||||
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
/* Return this so that LLVM doesn't remove s_barrier
|
||||
|
|
@ -300,7 +304,7 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_
|
|||
struct si_shader *shader = ctx->shader;
|
||||
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vertex_id);
|
||||
if (shader->key.as_ls) {
|
||||
if (shader->key.ge.as_ls) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_rel_patch_id);
|
||||
if (ctx->screen->info.chip_class >= GFX10) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
|
||||
|
|
@ -384,10 +388,10 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
memset(&ctx->args, 0, sizeof(ctx->args));
|
||||
|
||||
/* Set MERGED shaders. */
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
if (shader->key.as_ls || stage == MESA_SHADER_TESS_CTRL)
|
||||
if (ctx->screen->info.chip_class >= GFX9 && stage <= MESA_SHADER_GEOMETRY) {
|
||||
if (shader->key.ge.as_ls || stage == MESA_SHADER_TESS_CTRL)
|
||||
stage = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
|
||||
else if (shader->key.as_es || shader->key.as_ngg || stage == MESA_SHADER_GEOMETRY)
|
||||
else if (shader->key.ge.as_es || shader->key.ge.as_ngg || stage == MESA_SHADER_GEOMETRY)
|
||||
stage = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY;
|
||||
}
|
||||
|
||||
|
|
@ -408,9 +412,9 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
if (!shader->is_gs_copy_shader)
|
||||
declare_vb_descriptor_input_sgprs(ctx);
|
||||
|
||||
if (shader->key.as_es) {
|
||||
if (shader->key.ge.as_es) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset);
|
||||
} else if (shader->key.as_ls) {
|
||||
} else if (shader->key.ge.as_ls) {
|
||||
/* no extra parameters */
|
||||
} else {
|
||||
/* The locations of the other parameters are assigned dynamically. */
|
||||
|
|
@ -479,14 +483,14 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
ac_add_return(&ctx->args, AC_ARG_VGPR);
|
||||
|
||||
/* VS outputs passed via VGPRs to TCS. */
|
||||
if (shader->key.opt.same_patch_vertices) {
|
||||
if (shader->key.ge.opt.same_patch_vertices) {
|
||||
unsigned num_outputs = util_last_bit64(shader->selector->outputs_written);
|
||||
for (i = 0; i < num_outputs * 4; i++)
|
||||
ac_add_return(&ctx->args, AC_ARG_VGPR);
|
||||
}
|
||||
} else {
|
||||
/* TCS inputs are passed via VGPRs from VS. */
|
||||
if (shader->key.opt.same_patch_vertices) {
|
||||
if (shader->key.ge.opt.same_patch_vertices) {
|
||||
unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->outputs_written);
|
||||
for (i = 0; i < num_inputs * 4; i++)
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
|
||||
|
|
@ -510,7 +514,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
|
||||
declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_GEOMETRY);
|
||||
|
||||
if (ctx->shader->key.as_ngg)
|
||||
if (ctx->shader->key.ge.as_ngg)
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_tg_info);
|
||||
else
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs2vs_offset);
|
||||
|
|
@ -559,7 +563,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
declare_tes_input_vgprs(ctx);
|
||||
}
|
||||
|
||||
if ((ctx->shader->key.as_es || ngg_cull_shader) &&
|
||||
if ((ctx->shader->key.ge.as_es || ngg_cull_shader) &&
|
||||
(ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL)) {
|
||||
unsigned num_user_sgprs, num_vgprs;
|
||||
|
||||
|
|
@ -602,7 +606,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
|
||||
|
||||
if (shader->key.as_es) {
|
||||
if (shader->key.ge.as_es) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset);
|
||||
|
|
@ -786,14 +790,15 @@ static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *sh
|
|||
unsigned num_lds_symbols = 0;
|
||||
|
||||
if (sel && screen->info.chip_class >= GFX9 && !shader->is_gs_copy_shader &&
|
||||
(sel->info.stage == MESA_SHADER_GEOMETRY || shader->key.as_ngg)) {
|
||||
(sel->info.stage == MESA_SHADER_GEOMETRY ||
|
||||
(sel->info.stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg))) {
|
||||
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
|
||||
sym->name = "esgs_ring";
|
||||
sym->size = shader->gs_info.esgs_ring_size * 4;
|
||||
sym->align = 64 * 1024;
|
||||
}
|
||||
|
||||
if (shader->key.as_ngg && sel->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
if (sel->info.stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg) {
|
||||
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
|
||||
sym->name = "ngg_emit";
|
||||
sym->size = shader->ngg.ngg_emit_size * 4;
|
||||
|
|
@ -1060,20 +1065,20 @@ const char *si_get_shader_name(const struct si_shader *shader)
|
|||
{
|
||||
switch (shader->selector->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (shader->key.as_es)
|
||||
if (shader->key.ge.as_es)
|
||||
return "Vertex Shader as ES";
|
||||
else if (shader->key.as_ls)
|
||||
else if (shader->key.ge.as_ls)
|
||||
return "Vertex Shader as LS";
|
||||
else if (shader->key.as_ngg)
|
||||
else if (shader->key.ge.as_ngg)
|
||||
return "Vertex Shader as ESGS";
|
||||
else
|
||||
return "Vertex Shader as VS";
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
return "Tessellation Control Shader";
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (shader->key.as_es)
|
||||
if (shader->key.ge.as_es)
|
||||
return "Tessellation Evaluation Shader as ES";
|
||||
else if (shader->key.as_ngg)
|
||||
else if (shader->key.ge.as_ngg)
|
||||
return "Tessellation Evaluation Shader as ESGS";
|
||||
else
|
||||
return "Tessellation Evaluation Shader as VS";
|
||||
|
|
@ -1137,7 +1142,7 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
|
|||
si_shader_dump_stats(sscreen, shader, file, check_debug_option);
|
||||
}
|
||||
|
||||
static void si_dump_shader_key_vs(const struct si_shader_key *key,
|
||||
static void si_dump_shader_key_vs(const union si_shader_key *key,
|
||||
const struct si_vs_prolog_bits *prolog, const char *prefix,
|
||||
FILE *f)
|
||||
{
|
||||
|
|
@ -1146,10 +1151,10 @@ static void si_dump_shader_key_vs(const struct si_shader_key *key,
|
|||
prolog->instance_divisor_is_fetched);
|
||||
fprintf(f, " %s.ls_vgpr_fix = %u\n", prefix, prolog->ls_vgpr_fix);
|
||||
|
||||
fprintf(f, " mono.vs.fetch_opencode = %x\n", key->mono.vs_fetch_opencode);
|
||||
fprintf(f, " mono.vs.fetch_opencode = %x\n", key->ge.mono.vs_fetch_opencode);
|
||||
fprintf(f, " mono.vs.fix_fetch = {");
|
||||
for (int i = 0; i < SI_MAX_ATTRIBS; i++) {
|
||||
union si_vs_fix_fetch fix = key->mono.vs_fix_fetch[i];
|
||||
union si_vs_fix_fetch fix = key->ge.mono.vs_fix_fetch[i];
|
||||
if (i)
|
||||
fprintf(f, ", ");
|
||||
if (!fix.bits)
|
||||
|
|
@ -1163,35 +1168,35 @@ static void si_dump_shader_key_vs(const struct si_shader_key *key,
|
|||
|
||||
static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
|
||||
{
|
||||
const struct si_shader_key *key = &shader->key;
|
||||
const union si_shader_key *key = &shader->key;
|
||||
gl_shader_stage stage = shader->selector->info.stage;
|
||||
|
||||
fprintf(f, "SHADER KEY\n");
|
||||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
si_dump_shader_key_vs(key, &key->part.vs.prolog, "part.vs.prolog", f);
|
||||
fprintf(f, " as_es = %u\n", key->as_es);
|
||||
fprintf(f, " as_ls = %u\n", key->as_ls);
|
||||
fprintf(f, " as_ngg = %u\n", key->as_ngg);
|
||||
fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->mono.u.vs_export_prim_id);
|
||||
si_dump_shader_key_vs(key, &key->ge.part.vs.prolog, "part.vs.prolog", f);
|
||||
fprintf(f, " as_es = %u\n", key->ge.as_es);
|
||||
fprintf(f, " as_ls = %u\n", key->ge.as_ls);
|
||||
fprintf(f, " as_ngg = %u\n", key->ge.as_ngg);
|
||||
fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->ge.mono.u.vs_export_prim_id);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (shader->selector->screen->info.chip_class >= GFX9) {
|
||||
si_dump_shader_key_vs(key, &key->part.tcs.ls_prolog, "part.tcs.ls_prolog", f);
|
||||
si_dump_shader_key_vs(key, &key->ge.part.tcs.ls_prolog, "part.tcs.ls_prolog", f);
|
||||
}
|
||||
fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->part.tcs.epilog.prim_mode);
|
||||
fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->ge.part.tcs.epilog.prim_mode);
|
||||
fprintf(f, " mono.u.ff_tcs_inputs_to_copy = 0x%" PRIx64 "\n",
|
||||
key->mono.u.ff_tcs_inputs_to_copy);
|
||||
fprintf(f, " opt.prefer_mono = %u\n", key->opt.prefer_mono);
|
||||
fprintf(f, " opt.same_patch_vertices = %u\n", key->opt.same_patch_vertices);
|
||||
key->ge.mono.u.ff_tcs_inputs_to_copy);
|
||||
fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono);
|
||||
fprintf(f, " opt.same_patch_vertices = %u\n", key->ge.opt.same_patch_vertices);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
fprintf(f, " as_es = %u\n", key->as_es);
|
||||
fprintf(f, " as_ngg = %u\n", key->as_ngg);
|
||||
fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->mono.u.vs_export_prim_id);
|
||||
fprintf(f, " as_es = %u\n", key->ge.as_es);
|
||||
fprintf(f, " as_ngg = %u\n", key->ge.as_ngg);
|
||||
fprintf(f, " mono.u.vs_export_prim_id = %u\n", key->ge.mono.u.vs_export_prim_id);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
|
|
@ -1199,50 +1204,50 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
|
|||
break;
|
||||
|
||||
if (shader->selector->screen->info.chip_class >= GFX9 &&
|
||||
key->part.gs.es->info.stage == MESA_SHADER_VERTEX) {
|
||||
si_dump_shader_key_vs(key, &key->part.gs.vs_prolog, "part.gs.vs_prolog", f);
|
||||
key->ge.part.gs.es->info.stage == MESA_SHADER_VERTEX) {
|
||||
si_dump_shader_key_vs(key, &key->ge.part.gs.vs_prolog, "part.gs.vs_prolog", f);
|
||||
}
|
||||
fprintf(f, " part.gs.prolog.tri_strip_adj_fix = %u\n",
|
||||
key->part.gs.prolog.tri_strip_adj_fix);
|
||||
fprintf(f, " as_ngg = %u\n", key->as_ngg);
|
||||
key->ge.part.gs.prolog.tri_strip_adj_fix);
|
||||
fprintf(f, " as_ngg = %u\n", key->ge.as_ngg);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_COMPUTE:
|
||||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
fprintf(f, " part.ps.prolog.color_two_side = %u\n", key->part.ps.prolog.color_two_side);
|
||||
fprintf(f, " part.ps.prolog.flatshade_colors = %u\n", key->part.ps.prolog.flatshade_colors);
|
||||
fprintf(f, " part.ps.prolog.poly_stipple = %u\n", key->part.ps.prolog.poly_stipple);
|
||||
fprintf(f, " part.ps.prolog.force_persp_sample_interp = %u\n",
|
||||
key->part.ps.prolog.force_persp_sample_interp);
|
||||
fprintf(f, " part.ps.prolog.force_linear_sample_interp = %u\n",
|
||||
key->part.ps.prolog.force_linear_sample_interp);
|
||||
fprintf(f, " part.ps.prolog.force_persp_center_interp = %u\n",
|
||||
key->part.ps.prolog.force_persp_center_interp);
|
||||
fprintf(f, " part.ps.prolog.force_linear_center_interp = %u\n",
|
||||
key->part.ps.prolog.force_linear_center_interp);
|
||||
fprintf(f, " part.ps.prolog.bc_optimize_for_persp = %u\n",
|
||||
key->part.ps.prolog.bc_optimize_for_persp);
|
||||
fprintf(f, " part.ps.prolog.bc_optimize_for_linear = %u\n",
|
||||
key->part.ps.prolog.bc_optimize_for_linear);
|
||||
fprintf(f, " part.ps.prolog.samplemask_log_ps_iter = %u\n",
|
||||
key->part.ps.prolog.samplemask_log_ps_iter);
|
||||
fprintf(f, " part.ps.epilog.spi_shader_col_format = 0x%x\n",
|
||||
key->part.ps.epilog.spi_shader_col_format);
|
||||
fprintf(f, " part.ps.epilog.color_is_int8 = 0x%X\n", key->part.ps.epilog.color_is_int8);
|
||||
fprintf(f, " part.ps.epilog.color_is_int10 = 0x%X\n", key->part.ps.epilog.color_is_int10);
|
||||
fprintf(f, " part.ps.epilog.last_cbuf = %u\n", key->part.ps.epilog.last_cbuf);
|
||||
fprintf(f, " part.ps.epilog.alpha_func = %u\n", key->part.ps.epilog.alpha_func);
|
||||
fprintf(f, " part.ps.epilog.alpha_to_one = %u\n", key->part.ps.epilog.alpha_to_one);
|
||||
fprintf(f, " part.ps.epilog.poly_line_smoothing = %u\n",
|
||||
key->part.ps.epilog.poly_line_smoothing);
|
||||
fprintf(f, " part.ps.epilog.clamp_color = %u\n", key->part.ps.epilog.clamp_color);
|
||||
fprintf(f, " mono.u.ps.interpolate_at_sample_force_center = %u\n",
|
||||
key->mono.u.ps.interpolate_at_sample_force_center);
|
||||
fprintf(f, " mono.u.ps.fbfetch_msaa = %u\n", key->mono.u.ps.fbfetch_msaa);
|
||||
fprintf(f, " mono.u.ps.fbfetch_is_1D = %u\n", key->mono.u.ps.fbfetch_is_1D);
|
||||
fprintf(f, " mono.u.ps.fbfetch_layered = %u\n", key->mono.u.ps.fbfetch_layered);
|
||||
fprintf(f, " prolog.color_two_side = %u\n", key->ps.part.prolog.color_two_side);
|
||||
fprintf(f, " prolog.flatshade_colors = %u\n", key->ps.part.prolog.flatshade_colors);
|
||||
fprintf(f, " prolog.poly_stipple = %u\n", key->ps.part.prolog.poly_stipple);
|
||||
fprintf(f, " prolog.force_persp_sample_interp = %u\n",
|
||||
key->ps.part.prolog.force_persp_sample_interp);
|
||||
fprintf(f, " prolog.force_linear_sample_interp = %u\n",
|
||||
key->ps.part.prolog.force_linear_sample_interp);
|
||||
fprintf(f, " prolog.force_persp_center_interp = %u\n",
|
||||
key->ps.part.prolog.force_persp_center_interp);
|
||||
fprintf(f, " prolog.force_linear_center_interp = %u\n",
|
||||
key->ps.part.prolog.force_linear_center_interp);
|
||||
fprintf(f, " prolog.bc_optimize_for_persp = %u\n",
|
||||
key->ps.part.prolog.bc_optimize_for_persp);
|
||||
fprintf(f, " prolog.bc_optimize_for_linear = %u\n",
|
||||
key->ps.part.prolog.bc_optimize_for_linear);
|
||||
fprintf(f, " prolog.samplemask_log_ps_iter = %u\n",
|
||||
key->ps.part.prolog.samplemask_log_ps_iter);
|
||||
fprintf(f, " epilog.spi_shader_col_format = 0x%x\n",
|
||||
key->ps.part.epilog.spi_shader_col_format);
|
||||
fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.part.epilog.color_is_int8);
|
||||
fprintf(f, " epilog.color_is_int10 = 0x%X\n", key->ps.part.epilog.color_is_int10);
|
||||
fprintf(f, " epilog.last_cbuf = %u\n", key->ps.part.epilog.last_cbuf);
|
||||
fprintf(f, " epilog.alpha_func = %u\n", key->ps.part.epilog.alpha_func);
|
||||
fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.part.epilog.alpha_to_one);
|
||||
fprintf(f, " epilog.poly_line_smoothing = %u\n",
|
||||
key->ps.part.epilog.poly_line_smoothing);
|
||||
fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color);
|
||||
fprintf(f, " mono.interpolate_at_sample_force_center = %u\n",
|
||||
key->ps.mono.interpolate_at_sample_force_center);
|
||||
fprintf(f, " mono.fbfetch_msaa = %u\n", key->ps.mono.fbfetch_msaa);
|
||||
fprintf(f, " mono.fbfetch_is_1D = %u\n", key->ps.mono.fbfetch_is_1D);
|
||||
fprintf(f, " mono.fbfetch_layered = %u\n", key->ps.mono.fbfetch_layered);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -1251,32 +1256,44 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
|
|||
|
||||
if ((stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_TESS_EVAL ||
|
||||
stage == MESA_SHADER_VERTEX) &&
|
||||
!key->as_es && !key->as_ls) {
|
||||
fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->opt.kill_outputs);
|
||||
fprintf(f, " opt.kill_pointsize = 0x%x\n", key->opt.kill_pointsize);
|
||||
fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->opt.kill_clip_distances);
|
||||
!key->ge.as_es && !key->ge.as_ls) {
|
||||
fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->ge.opt.kill_outputs);
|
||||
fprintf(f, " opt.kill_pointsize = 0x%x\n", key->ge.opt.kill_pointsize);
|
||||
fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->ge.opt.kill_clip_distances);
|
||||
if (stage != MESA_SHADER_GEOMETRY)
|
||||
fprintf(f, " opt.ngg_culling = 0x%x\n", key->opt.ngg_culling);
|
||||
fprintf(f, " opt.ngg_culling = 0x%x\n", key->ge.opt.ngg_culling);
|
||||
}
|
||||
|
||||
fprintf(f, " opt.prefer_mono = %u\n", key->opt.prefer_mono);
|
||||
fprintf(f, " opt.inline_uniforms = %u (0x%x, 0x%x, 0x%x, 0x%x)\n",
|
||||
key->opt.inline_uniforms,
|
||||
key->opt.inlined_uniform_values[0],
|
||||
key->opt.inlined_uniform_values[1],
|
||||
key->opt.inlined_uniform_values[2],
|
||||
key->opt.inlined_uniform_values[3]);
|
||||
if (stage <= MESA_SHADER_GEOMETRY) {
|
||||
fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono);
|
||||
fprintf(f, " opt.inline_uniforms = %u (0x%x, 0x%x, 0x%x, 0x%x)\n",
|
||||
key->ge.opt.inline_uniforms,
|
||||
key->ge.opt.inlined_uniform_values[0],
|
||||
key->ge.opt.inlined_uniform_values[1],
|
||||
key->ge.opt.inlined_uniform_values[2],
|
||||
key->ge.opt.inlined_uniform_values[3]);
|
||||
} else {
|
||||
fprintf(f, " opt.prefer_mono = %u\n", key->ps.opt.prefer_mono);
|
||||
fprintf(f, " opt.inline_uniforms = %u (0x%x, 0x%x, 0x%x, 0x%x)\n",
|
||||
key->ps.opt.inline_uniforms,
|
||||
key->ps.opt.inlined_uniform_values[0],
|
||||
key->ps.opt.inlined_uniform_values[1],
|
||||
key->ps.opt.inlined_uniform_values[2],
|
||||
key->ps.opt.inlined_uniform_values[3]);
|
||||
}
|
||||
}
|
||||
|
||||
bool si_vs_needs_prolog(const struct si_shader_selector *sel,
|
||||
const struct si_vs_prolog_bits *prolog_key,
|
||||
const struct si_shader_key *key, bool ngg_cull_shader)
|
||||
const union si_shader_key *key, bool ngg_cull_shader)
|
||||
{
|
||||
assert(sel->info.stage == MESA_SHADER_VERTEX);
|
||||
|
||||
/* VGPR initialization fixup for Vega10 and Raven is always done in the
|
||||
* VS prolog. */
|
||||
return sel->vs_needs_prolog || prolog_key->ls_vgpr_fix ||
|
||||
/* The 2nd VS prolog loads input VGPRs from LDS */
|
||||
(key->opt.ngg_culling && !ngg_cull_shader);
|
||||
(key->ge.opt.ngg_culling && !ngg_cull_shader);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1298,11 +1315,11 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_
|
|||
key->vs_prolog.states = *prolog_key;
|
||||
key->vs_prolog.num_input_sgprs = num_input_sgprs;
|
||||
key->vs_prolog.num_inputs = info->num_inputs;
|
||||
key->vs_prolog.as_ls = shader_out->key.as_ls;
|
||||
key->vs_prolog.as_es = shader_out->key.as_es;
|
||||
key->vs_prolog.as_ngg = shader_out->key.as_ngg;
|
||||
key->vs_prolog.as_ls = shader_out->key.ge.as_ls;
|
||||
key->vs_prolog.as_es = shader_out->key.ge.as_es;
|
||||
key->vs_prolog.as_ngg = shader_out->key.ge.as_ngg;
|
||||
|
||||
if (!ngg_cull_shader && shader_out->key.opt.ngg_culling)
|
||||
if (!ngg_cull_shader && shader_out->key.ge.opt.ngg_culling)
|
||||
key->vs_prolog.load_vgprs_after_culling = 1;
|
||||
|
||||
if (shader_out->selector->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
|
|
@ -1311,7 +1328,7 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_
|
|||
} else if (shader_out->selector->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
key->vs_prolog.as_es = 1;
|
||||
key->vs_prolog.num_merged_next_stage_vgprs = 5;
|
||||
} else if (shader_out->key.as_ngg) {
|
||||
} else if (shader_out->key.ge.as_ngg) {
|
||||
key->vs_prolog.num_merged_next_stage_vgprs = 5;
|
||||
}
|
||||
|
||||
|
|
@ -1329,7 +1346,7 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_
|
|||
}
|
||||
|
||||
struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
||||
const struct si_shader_key *key,
|
||||
const union si_shader_key *key,
|
||||
bool *free_nir)
|
||||
{
|
||||
nir_shader *nir;
|
||||
|
|
@ -1350,7 +1367,14 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (key && key->opt.inline_uniforms) {
|
||||
bool inline_uniforms = false;
|
||||
uint32_t *inlined_uniform_values;
|
||||
if (key) {
|
||||
si_get_inline_uniform_state((union si_shader_key*)key, sel->pipe_shader_type,
|
||||
&inline_uniforms, &inlined_uniform_values);
|
||||
}
|
||||
|
||||
if (inline_uniforms) {
|
||||
assert(*free_nir);
|
||||
|
||||
/* Most places use shader information from the default variant, not
|
||||
|
|
@ -1394,7 +1418,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
|||
*/
|
||||
NIR_PASS_V(nir, nir_inline_uniforms,
|
||||
nir->info.num_inlinable_uniforms,
|
||||
key->opt.inlined_uniform_values,
|
||||
inlined_uniform_values,
|
||||
nir->info.inlinable_uniform_dw_offsets);
|
||||
|
||||
si_nir_opts(sel->screen, nir, true);
|
||||
|
|
@ -1441,15 +1465,15 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
if ((sel->info.stage == MESA_SHADER_VERTEX ||
|
||||
sel->info.stage == MESA_SHADER_TESS_EVAL ||
|
||||
sel->info.stage == MESA_SHADER_GEOMETRY) &&
|
||||
!shader->key.as_ls && !shader->key.as_es) {
|
||||
!shader->key.ge.as_ls && !shader->key.ge.as_es) {
|
||||
ubyte *vs_output_param_offset = shader->info.vs_output_param_offset;
|
||||
|
||||
if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.as_ngg)
|
||||
if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg)
|
||||
vs_output_param_offset = sel->gs_copy_shader->info.vs_output_param_offset;
|
||||
|
||||
/* VS and TES should also set primitive ID output if it's used. */
|
||||
unsigned num_outputs_with_prim_id = sel->info.num_outputs +
|
||||
shader->key.mono.u.vs_export_prim_id;
|
||||
shader->key.ge.mono.u.vs_export_prim_id;
|
||||
|
||||
for (unsigned i = 0; i < num_outputs_with_prim_id; i++) {
|
||||
unsigned semantic = sel->info.output_semantic[i];
|
||||
|
|
@ -1563,32 +1587,38 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
|
|||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
shader.key.as_ls = key->vs_prolog.as_ls;
|
||||
shader.key.as_es = key->vs_prolog.as_es;
|
||||
shader.key.as_ngg = key->vs_prolog.as_ngg;
|
||||
shader.key.ge.as_ls = key->vs_prolog.as_ls;
|
||||
shader.key.ge.as_es = key->vs_prolog.as_es;
|
||||
shader.key.ge.as_ngg = key->vs_prolog.as_ngg;
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
assert(!prolog);
|
||||
shader.key.part.tcs.epilog = key->tcs_epilog.states;
|
||||
shader.key.ge.part.tcs.epilog = key->tcs_epilog.states;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
assert(prolog);
|
||||
shader.key.as_ngg = key->gs_prolog.as_ngg;
|
||||
shader.key.ge.as_ngg = key->gs_prolog.as_ngg;
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
if (prolog)
|
||||
shader.key.part.ps.prolog = key->ps_prolog.states;
|
||||
shader.key.ps.part.prolog = key->ps_prolog.states;
|
||||
else
|
||||
shader.key.part.ps.epilog = key->ps_epilog.states;
|
||||
shader.key.ps.part.epilog = key->ps_epilog.states;
|
||||
break;
|
||||
default:
|
||||
unreachable("bad shader part");
|
||||
}
|
||||
|
||||
unsigned wave_size;
|
||||
if (stage <= MESA_SHADER_GEOMETRY) {
|
||||
wave_size = si_get_wave_size(sscreen, stage, shader.key.ge.as_ngg, shader.key.ge.as_es);
|
||||
} else {
|
||||
wave_size = si_get_wave_size(sscreen, stage, false, false);
|
||||
}
|
||||
|
||||
struct si_shader_context ctx;
|
||||
si_llvm_context_init(&ctx, sscreen, compiler,
|
||||
si_get_wave_size(sscreen, stage,
|
||||
shader.key.as_ngg, shader.key.as_es));
|
||||
si_llvm_context_init(&ctx, sscreen, compiler, wave_size);
|
||||
|
||||
ctx.shader = &shader;
|
||||
ctx.stage = stage;
|
||||
|
||||
|
|
@ -1639,7 +1669,7 @@ static bool si_get_vs_prolog(struct si_screen *sscreen, struct ac_llvm_compiler
|
|||
static bool si_shader_select_vs_parts(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
|
||||
struct si_shader *shader, struct pipe_debug_callback *debug)
|
||||
{
|
||||
return si_get_vs_prolog(sscreen, compiler, shader, debug, shader, &shader->key.part.vs.prolog);
|
||||
return si_get_vs_prolog(sscreen, compiler, shader, debug, shader, &shader->key.ge.part.vs.prolog);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1649,10 +1679,10 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct ac_llvm
|
|||
struct si_shader *shader, struct pipe_debug_callback *debug)
|
||||
{
|
||||
if (sscreen->info.chip_class >= GFX9) {
|
||||
struct si_shader *ls_main_part = shader->key.part.tcs.ls->main_shader_part_ls;
|
||||
struct si_shader *ls_main_part = shader->key.ge.part.tcs.ls->main_shader_part_ls;
|
||||
|
||||
if (!si_get_vs_prolog(sscreen, compiler, shader, debug, ls_main_part,
|
||||
&shader->key.part.tcs.ls_prolog))
|
||||
&shader->key.ge.part.tcs.ls_prolog))
|
||||
return false;
|
||||
|
||||
shader->previous_stage = ls_main_part;
|
||||
|
|
@ -1661,7 +1691,7 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct ac_llvm
|
|||
/* Get the epilog. */
|
||||
union si_shader_part_key epilog_key;
|
||||
memset(&epilog_key, 0, sizeof(epilog_key));
|
||||
epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
|
||||
epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
|
||||
|
||||
shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs, MESA_SHADER_TESS_CTRL, false,
|
||||
&epilog_key, compiler, debug, si_llvm_build_tcs_epilog,
|
||||
|
|
@ -1678,26 +1708,26 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, struct ac_llvm_
|
|||
if (sscreen->info.chip_class >= GFX9) {
|
||||
struct si_shader *es_main_part;
|
||||
|
||||
if (shader->key.as_ngg)
|
||||
es_main_part = shader->key.part.gs.es->main_shader_part_ngg_es;
|
||||
if (shader->key.ge.as_ngg)
|
||||
es_main_part = shader->key.ge.part.gs.es->main_shader_part_ngg_es;
|
||||
else
|
||||
es_main_part = shader->key.part.gs.es->main_shader_part_es;
|
||||
es_main_part = shader->key.ge.part.gs.es->main_shader_part_es;
|
||||
|
||||
if (shader->key.part.gs.es->info.stage == MESA_SHADER_VERTEX &&
|
||||
if (shader->key.ge.part.gs.es->info.stage == MESA_SHADER_VERTEX &&
|
||||
!si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part,
|
||||
&shader->key.part.gs.vs_prolog))
|
||||
&shader->key.ge.part.gs.vs_prolog))
|
||||
return false;
|
||||
|
||||
shader->previous_stage = es_main_part;
|
||||
}
|
||||
|
||||
if (!shader->key.part.gs.prolog.tri_strip_adj_fix)
|
||||
if (!shader->key.ge.part.gs.prolog.tri_strip_adj_fix)
|
||||
return true;
|
||||
|
||||
union si_shader_part_key prolog_key;
|
||||
memset(&prolog_key, 0, sizeof(prolog_key));
|
||||
prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
|
||||
prolog_key.gs_prolog.as_ngg = shader->key.as_ngg;
|
||||
prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog;
|
||||
prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg;
|
||||
|
||||
shader->prolog2 =
|
||||
si_get_shader_part(sscreen, &sscreen->gs_prologs, MESA_SHADER_GEOMETRY, true, &prolog_key,
|
||||
|
|
@ -1715,7 +1745,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
|
|||
struct si_shader_info *info = &shader->selector->info;
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
key->ps_prolog.states = shader->key.part.ps.prolog;
|
||||
key->ps_prolog.states = shader->key.ps.part.prolog;
|
||||
key->ps_prolog.colors_read = info->colors_read;
|
||||
key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
|
||||
key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
|
||||
|
|
@ -1731,7 +1761,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
|
|||
if (info->colors_read) {
|
||||
ubyte *color = shader->selector->color_attr_index;
|
||||
|
||||
if (shader->key.part.ps.prolog.color_two_side) {
|
||||
if (shader->key.ps.part.prolog.color_two_side) {
|
||||
/* BCOLORs are stored after the last input. */
|
||||
key->ps_prolog.num_interp_inputs = info->num_inputs;
|
||||
key->ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
|
||||
|
|
@ -1748,7 +1778,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
|
|||
|
||||
key->ps_prolog.color_attr_index[i] = color[i];
|
||||
|
||||
if (shader->key.part.ps.prolog.flatshade_colors && interp == INTERP_MODE_COLOR)
|
||||
if (shader->key.ps.part.prolog.flatshade_colors && interp == INTERP_MODE_COLOR)
|
||||
interp = INTERP_MODE_FLAT;
|
||||
|
||||
switch (interp) {
|
||||
|
|
@ -1758,9 +1788,9 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
|
|||
case INTERP_MODE_SMOOTH:
|
||||
case INTERP_MODE_COLOR:
|
||||
/* Force the interpolation location for colors here. */
|
||||
if (shader->key.part.ps.prolog.force_persp_sample_interp)
|
||||
if (shader->key.ps.part.prolog.force_persp_sample_interp)
|
||||
location = TGSI_INTERPOLATE_LOC_SAMPLE;
|
||||
if (shader->key.part.ps.prolog.force_persp_center_interp)
|
||||
if (shader->key.ps.part.prolog.force_persp_center_interp)
|
||||
location = TGSI_INTERPOLATE_LOC_CENTER;
|
||||
|
||||
switch (location) {
|
||||
|
|
@ -1788,9 +1818,9 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
|
|||
break;
|
||||
case INTERP_MODE_NOPERSPECTIVE:
|
||||
/* Force the interpolation location for colors here. */
|
||||
if (shader->key.part.ps.prolog.force_linear_sample_interp)
|
||||
if (shader->key.ps.part.prolog.force_linear_sample_interp)
|
||||
location = TGSI_INTERPOLATE_LOC_SAMPLE;
|
||||
if (shader->key.part.ps.prolog.force_linear_center_interp)
|
||||
if (shader->key.ps.part.prolog.force_linear_center_interp)
|
||||
location = TGSI_INTERPOLATE_LOC_CENTER;
|
||||
|
||||
/* The VGPR assignment for non-monolithic shaders
|
||||
|
|
@ -1854,7 +1884,7 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke
|
|||
key->ps_epilog.writes_z = info->writes_z;
|
||||
key->ps_epilog.writes_stencil = info->writes_stencil;
|
||||
key->ps_epilog.writes_samplemask = info->writes_samplemask;
|
||||
key->ps_epilog.states = shader->key.part.ps.epilog;
|
||||
key->ps_epilog.states = shader->key.ps.part.epilog;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1888,34 +1918,34 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_
|
|||
return false;
|
||||
|
||||
/* Enable POS_FIXED_PT if polygon stippling is enabled. */
|
||||
if (shader->key.part.ps.prolog.poly_stipple) {
|
||||
if (shader->key.ps.part.prolog.poly_stipple) {
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
|
||||
assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
|
||||
}
|
||||
|
||||
/* Set up the enable bits for per-sample shading if needed. */
|
||||
if (shader->key.part.ps.prolog.force_persp_sample_interp &&
|
||||
if (shader->key.ps.part.prolog.force_persp_sample_interp &&
|
||||
(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
||||
}
|
||||
if (shader->key.part.ps.prolog.force_linear_sample_interp &&
|
||||
if (shader->key.ps.part.prolog.force_linear_sample_interp &&
|
||||
(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
|
||||
}
|
||||
if (shader->key.part.ps.prolog.force_persp_center_interp &&
|
||||
if (shader->key.ps.part.prolog.force_persp_center_interp &&
|
||||
(G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
|
||||
}
|
||||
if (shader->key.part.ps.prolog.force_linear_center_interp &&
|
||||
if (shader->key.ps.part.prolog.force_linear_center_interp &&
|
||||
(G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
|
||||
|
|
@ -1937,7 +1967,7 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_
|
|||
}
|
||||
|
||||
/* Samplemask fixup requires the sample ID. */
|
||||
if (shader->key.part.ps.prolog.samplemask_log_ps_iter) {
|
||||
if (shader->key.ps.part.prolog.samplemask_log_ps_iter) {
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1);
|
||||
assert(G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr));
|
||||
}
|
||||
|
|
@ -1945,7 +1975,7 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct ac_llvm_
|
|||
/* The sample mask input is always enabled, because the API shader always
|
||||
* passes it through to the epilog. Disable it here if it's unused.
|
||||
*/
|
||||
if (!shader->key.part.ps.epilog.poly_line_smoothing && !shader->selector->info.reads_samplemask)
|
||||
if (!shader->key.ps.part.epilog.poly_line_smoothing && !shader->selector->info.reads_samplemask)
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
|
||||
|
||||
return true;
|
||||
|
|
@ -2098,8 +2128,8 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
|||
si_calculate_max_simd_waves(shader);
|
||||
}
|
||||
|
||||
if (shader->key.as_ngg) {
|
||||
assert(!shader->key.as_es && !shader->key.as_ls);
|
||||
if (sel->info.stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg) {
|
||||
assert(!shader->key.ge.as_es && !shader->key.ge.as_ls);
|
||||
if (!gfx10_ngg_calculate_subgroup_info(shader)) {
|
||||
fprintf(stderr, "Failed to compute subgroup info\n");
|
||||
return false;
|
||||
|
|
@ -2115,7 +2145,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
|||
util_rast_prim_is_triangles(sel->info.base.gs.output_primitive)) ||
|
||||
(sel->info.stage == MESA_SHADER_VERTEX &&
|
||||
/* Used to export PrimitiveID from the correct vertex. */
|
||||
shader->key.mono.u.vs_export_prim_id));
|
||||
shader->key.ge.mono.u.vs_export_prim_id));
|
||||
|
||||
shader->uses_vs_state_outprim = sscreen->use_ngg &&
|
||||
/* Only used by streamout in vertex shaders. */
|
||||
|
|
@ -2124,18 +2154,18 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
|||
|
||||
if (sel->info.stage == MESA_SHADER_VERTEX) {
|
||||
shader->uses_base_instance = sel->info.uses_base_instance ||
|
||||
shader->key.part.vs.prolog.instance_divisor_is_one ||
|
||||
shader->key.part.vs.prolog.instance_divisor_is_fetched;
|
||||
shader->key.ge.part.vs.prolog.instance_divisor_is_one ||
|
||||
shader->key.ge.part.vs.prolog.instance_divisor_is_fetched;
|
||||
} else if (sel->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
shader->uses_base_instance = shader->previous_stage_sel &&
|
||||
(shader->previous_stage_sel->info.uses_base_instance ||
|
||||
shader->key.part.tcs.ls_prolog.instance_divisor_is_one ||
|
||||
shader->key.part.tcs.ls_prolog.instance_divisor_is_fetched);
|
||||
shader->key.ge.part.tcs.ls_prolog.instance_divisor_is_one ||
|
||||
shader->key.ge.part.tcs.ls_prolog.instance_divisor_is_fetched);
|
||||
} else if (sel->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
shader->uses_base_instance = shader->previous_stage_sel &&
|
||||
(shader->previous_stage_sel->info.uses_base_instance ||
|
||||
shader->key.part.gs.vs_prolog.instance_divisor_is_one ||
|
||||
shader->key.part.gs.vs_prolog.instance_divisor_is_fetched);
|
||||
shader->key.ge.part.gs.vs_prolog.instance_divisor_is_one ||
|
||||
shader->key.ge.part.gs.vs_prolog.instance_divisor_is_fetched);
|
||||
}
|
||||
|
||||
si_fix_resource_usage(sscreen, shader);
|
||||
|
|
|
|||
|
|
@ -618,7 +618,8 @@ union si_shader_part_key {
|
|||
} ps_epilog;
|
||||
};
|
||||
|
||||
struct si_shader_key {
|
||||
/* The shader key for geometry stages (VS, TCS, TES, GS) */
|
||||
struct si_shader_key_ge {
|
||||
/* Prolog and epilog flags. */
|
||||
union {
|
||||
struct {
|
||||
|
|
@ -634,10 +635,6 @@ struct si_shader_key {
|
|||
struct si_shader_selector *es; /* for merged ES-GS */
|
||||
struct si_gs_prolog_bits prolog;
|
||||
} gs;
|
||||
struct {
|
||||
struct si_ps_prolog_bits prolog;
|
||||
struct si_ps_epilog_bits epilog;
|
||||
} ps;
|
||||
} part;
|
||||
|
||||
/* These three are initially set according to the NEXT_SHADER property,
|
||||
|
|
@ -660,12 +657,6 @@ struct si_shader_key {
|
|||
uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */
|
||||
/* When PS needs PrimID and GS is disabled. */
|
||||
unsigned vs_export_prim_id : 1;
|
||||
struct {
|
||||
unsigned interpolate_at_sample_force_center : 1;
|
||||
unsigned fbfetch_msaa : 1;
|
||||
unsigned fbfetch_is_1D : 1;
|
||||
unsigned fbfetch_layered : 1;
|
||||
} ps;
|
||||
} u;
|
||||
} mono;
|
||||
|
||||
|
|
@ -699,6 +690,44 @@ struct si_shader_key {
|
|||
} opt;
|
||||
};
|
||||
|
||||
struct si_shader_key_ps {
|
||||
struct {
|
||||
/* Prolog and epilog flags. */
|
||||
struct si_ps_prolog_bits prolog;
|
||||
struct si_ps_epilog_bits epilog;
|
||||
} part;
|
||||
|
||||
/* Flags for monolithic compilation only. */
|
||||
struct {
|
||||
unsigned interpolate_at_sample_force_center : 1;
|
||||
unsigned fbfetch_msaa : 1;
|
||||
unsigned fbfetch_is_1D : 1;
|
||||
unsigned fbfetch_layered : 1;
|
||||
} mono;
|
||||
|
||||
/* Optimization flags for asynchronous compilation only. */
|
||||
struct {
|
||||
/* For shaders where monolithic variants have better code.
|
||||
*
|
||||
* This is a flag that has no effect on code generation,
|
||||
* but forces monolithic shaders to be used as soon as
|
||||
* possible, because it's in the "opt" group.
|
||||
*/
|
||||
unsigned prefer_mono : 1;
|
||||
unsigned inline_uniforms:1;
|
||||
|
||||
/* This must be kept last to limit the number of variants
|
||||
* depending only on the uniform values.
|
||||
*/
|
||||
uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS];
|
||||
} opt;
|
||||
};
|
||||
|
||||
union si_shader_key {
|
||||
struct si_shader_key_ge ge; /* geometry engine shaders */
|
||||
struct si_shader_key_ps ps;
|
||||
};
|
||||
|
||||
/* Restore the pack alignment to default. */
|
||||
#pragma pack(pop)
|
||||
|
||||
|
|
@ -777,7 +806,7 @@ struct si_shader {
|
|||
|
||||
struct si_resource *bo;
|
||||
struct si_resource *scratch_bo;
|
||||
struct si_shader_key key;
|
||||
union si_shader_key key;
|
||||
struct util_queue_fence ready;
|
||||
bool compilation_failed;
|
||||
bool is_monolithic;
|
||||
|
|
@ -927,16 +956,18 @@ bool gfx10_is_ngg_passthrough(struct si_shader *shader);
|
|||
|
||||
/* Return the pointer to the main shader part's pointer. */
|
||||
static inline struct si_shader **si_get_main_shader_part(struct si_shader_selector *sel,
|
||||
const struct si_shader_key *key)
|
||||
const union si_shader_key *key)
|
||||
{
|
||||
if (key->as_ls)
|
||||
return &sel->main_shader_part_ls;
|
||||
if (key->as_es && key->as_ngg)
|
||||
return &sel->main_shader_part_ngg_es;
|
||||
if (key->as_es)
|
||||
return &sel->main_shader_part_es;
|
||||
if (key->as_ngg)
|
||||
return &sel->main_shader_part_ngg;
|
||||
if (sel->info.stage <= MESA_SHADER_GEOMETRY) {
|
||||
if (key->ge.as_ls)
|
||||
return &sel->main_shader_part_ls;
|
||||
if (key->ge.as_es && key->ge.as_ngg)
|
||||
return &sel->main_shader_part_ngg_es;
|
||||
if (key->ge.as_es)
|
||||
return &sel->main_shader_part_es;
|
||||
if (key->ge.as_ngg)
|
||||
return &sel->main_shader_part_ngg;
|
||||
}
|
||||
return &sel->main_shader_part;
|
||||
}
|
||||
|
||||
|
|
@ -954,7 +985,7 @@ static inline bool gfx10_edgeflags_have_effect(struct si_shader *shader)
|
|||
{
|
||||
if (shader->selector->info.stage == MESA_SHADER_VERTEX &&
|
||||
!shader->selector->info.base.vs.blit_sgprs_amd &&
|
||||
!(shader->key.opt.ngg_culling & SI_NGG_CULL_LINES))
|
||||
!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -168,12 +168,12 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader);
|
|||
unsigned si_get_max_workgroup_size(const struct si_shader *shader);
|
||||
bool si_vs_needs_prolog(const struct si_shader_selector *sel,
|
||||
const struct si_vs_prolog_bits *prolog_key,
|
||||
const struct si_shader_key *key, bool ngg_cull_shader);
|
||||
const union si_shader_key *key, bool ngg_cull_shader);
|
||||
void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_sgprs,
|
||||
bool ngg_cull_shader, const struct si_vs_prolog_bits *prolog_key,
|
||||
struct si_shader *shader_out, union si_shader_part_key *key);
|
||||
struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
||||
const struct si_shader_key *key,
|
||||
const union si_shader_key *key,
|
||||
bool *free_nir);
|
||||
bool si_need_ps_prolog(const union si_shader_part_key *key);
|
||||
void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key,
|
||||
|
|
|
|||
|
|
@ -150,10 +150,10 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
|
|||
gl_shader_stage real_stage = ctx->stage;
|
||||
|
||||
/* LS is merged into HS (TCS), and ES is merged into GS. */
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
if (ctx->shader->key.as_ls)
|
||||
if (ctx->screen->info.chip_class >= GFX9 && ctx->stage <= MESA_SHADER_GEOMETRY) {
|
||||
if (ctx->shader->key.ge.as_ls)
|
||||
real_stage = MESA_SHADER_TESS_CTRL;
|
||||
else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
|
||||
else if (ctx->shader->key.ge.as_es || ctx->shader->key.ge.as_ngg)
|
||||
real_stage = MESA_SHADER_GEOMETRY;
|
||||
}
|
||||
|
||||
|
|
@ -219,7 +219,8 @@ void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shade
|
|||
}
|
||||
|
||||
|
||||
if (shader->key.as_ls || ctx->stage == MESA_SHADER_TESS_CTRL) {
|
||||
if (ctx->stage <= MESA_SHADER_GEOMETRY &&
|
||||
(shader->key.ge.as_ls || ctx->stage == MESA_SHADER_TESS_CTRL)) {
|
||||
if (USE_LDS_SYMBOLS) {
|
||||
/* The LSHS size is not known until draw time, so we append it
|
||||
* at the end of whatever LDS use there may be in the rest of
|
||||
|
|
@ -470,7 +471,7 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
|
|||
}
|
||||
|
||||
ctx->abi.interp_at_sample_force_center =
|
||||
ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center;
|
||||
ctx->shader->key.ps.mono.interpolate_at_sample_force_center;
|
||||
|
||||
ctx->abi.kill_ps_if_inf_interp =
|
||||
ctx->screen->options.no_infinite_interp &&
|
||||
|
|
@ -854,7 +855,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
|
||||
si_llvm_create_main_func(ctx, ngg_cull_shader);
|
||||
|
||||
if (ctx->shader->key.as_es || ctx->stage == MESA_SHADER_GEOMETRY)
|
||||
if (ctx->stage <= MESA_SHADER_GEOMETRY &&
|
||||
(ctx->shader->key.ge.as_es || ctx->stage == MESA_SHADER_GEOMETRY))
|
||||
si_preload_esgs_ring(ctx);
|
||||
|
||||
if (ctx->stage == MESA_SHADER_GEOMETRY)
|
||||
|
|
@ -872,7 +874,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
for (unsigned i = 0; i < 4; i++) {
|
||||
ctx->gs_next_vertex[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, "");
|
||||
}
|
||||
if (shader->key.as_ngg) {
|
||||
if (shader->key.ge.as_ngg) {
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
ctx->gs_curprim_verts[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, "");
|
||||
ctx->gs_generated_prims[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, "");
|
||||
|
|
@ -892,7 +894,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
}
|
||||
}
|
||||
|
||||
if (ctx->stage != MESA_SHADER_GEOMETRY && (shader->key.as_ngg && !shader->key.as_es)) {
|
||||
if ((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
|
||||
shader->key.ge.as_ngg && !shader->key.ge.as_es) {
|
||||
/* Unconditionally declare scratch space base for streamout and
|
||||
* vertex compaction. Whether space is actually allocated is
|
||||
* determined during linking / PM4 creation.
|
||||
|
|
@ -902,7 +905,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
/* This is really only needed when streamout and / or vertex
|
||||
* compaction is enabled.
|
||||
*/
|
||||
if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.opt.ngg_culling)) {
|
||||
if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.ge.opt.ngg_culling)) {
|
||||
LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
|
||||
ctx->gs_ngg_scratch =
|
||||
LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
|
||||
|
|
@ -918,8 +921,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
/* TES is special because it has only 1 shader part if NGG shader culling is disabled,
|
||||
* and therefore it doesn't use the wrapper function.
|
||||
*/
|
||||
bool no_wrapper_func = ctx->stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es &&
|
||||
!shader->key.opt.ngg_culling;
|
||||
bool no_wrapper_func = ctx->stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es &&
|
||||
!shader->key.ge.opt.ngg_culling;
|
||||
|
||||
/* Set EXEC = ~0 before the first shader. If the prolog is present, EXEC is set there
|
||||
* instead. For monolithic shaders, the wrapper function does this.
|
||||
|
|
@ -927,14 +930,14 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
if ((!shader->is_monolithic || no_wrapper_func) &&
|
||||
(ctx->stage == MESA_SHADER_TESS_EVAL ||
|
||||
(ctx->stage == MESA_SHADER_VERTEX &&
|
||||
!si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, ngg_cull_shader))))
|
||||
!si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, ngg_cull_shader))))
|
||||
ac_init_exec_full_mask(&ctx->ac);
|
||||
|
||||
/* NGG VS and NGG TES: Send gs_alloc_req and the prim export at the beginning to decrease
|
||||
* register usage.
|
||||
*/
|
||||
if ((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
|
||||
shader->key.as_ngg && !shader->key.as_es && !shader->key.opt.ngg_culling) {
|
||||
shader->key.ge.as_ngg && !shader->key.ge.as_es && !shader->key.ge.opt.ngg_culling) {
|
||||
/* GFX10 requires a barrier before gs_alloc_req due to a hw bug. */
|
||||
if (ctx->screen->info.chip_class == GFX10)
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
|
|
@ -949,7 +952,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
}
|
||||
|
||||
/* NGG GS: Initialize LDS and insert s_barrier, which must not be inside the if statement. */
|
||||
if (ctx->stage == MESA_SHADER_GEOMETRY && shader->key.as_ngg)
|
||||
if (ctx->stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg)
|
||||
gfx10_ngg_gs_emit_prologue(ctx);
|
||||
|
||||
if (ctx->stage == MESA_SHADER_GEOMETRY ||
|
||||
|
|
@ -959,8 +962,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
* not here.
|
||||
*/
|
||||
thread_enabled = si_is_gs_thread(ctx); /* 2nd shader: thread enabled bool */
|
||||
} else if (((shader->key.as_ls || shader->key.as_es) && !shader->is_monolithic) ||
|
||||
(shader->key.as_ngg && !shader->key.as_es)) {
|
||||
} else if (((shader->key.ge.as_ls || shader->key.ge.as_es) && !shader->is_monolithic) ||
|
||||
(shader->key.ge.as_ngg && !shader->key.ge.as_es)) {
|
||||
/* This is NGG VS or NGG TES or VS before GS or TES before GS or VS before TCS.
|
||||
* For monolithic LS (VS before TCS) and ES (VS before GS and TES before GS),
|
||||
* the if statement is inserted by the wrapper function.
|
||||
|
|
@ -993,11 +996,11 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
*/
|
||||
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
|
||||
/* We need the barrier only if TCS inputs are read from LDS. */
|
||||
if (!shader->key.opt.same_patch_vertices ||
|
||||
if (!shader->key.ge.opt.same_patch_vertices ||
|
||||
shader->selector->info.base.inputs_read &
|
||||
~shader->selector->tcs_vgpr_only_inputs)
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
} else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.as_ngg) {
|
||||
} else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
/* gfx10_ngg_gs_emit_prologue inserts the barrier for NGG. */
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
}
|
||||
|
|
@ -1036,7 +1039,7 @@ static void si_optimize_vs_outputs(struct si_shader_context *ctx)
|
|||
unsigned skip_vs_optim_mask = 0;
|
||||
|
||||
if ((ctx->stage != MESA_SHADER_VERTEX && ctx->stage != MESA_SHADER_TESS_EVAL) ||
|
||||
shader->key.as_ls || shader->key.as_es)
|
||||
shader->key.ge.as_ls || shader->key.ge.as_es)
|
||||
return;
|
||||
|
||||
/* Optimizing these outputs is not possible, since they might be overriden
|
||||
|
|
@ -1064,7 +1067,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
si_llvm_context_init(&ctx, sscreen, compiler, si_get_shader_wave_size(shader));
|
||||
|
||||
LLVMValueRef ngg_cull_main_fn = NULL;
|
||||
if (shader->key.opt.ngg_culling) {
|
||||
if (ctx.stage <= MESA_SHADER_GEOMETRY && shader->key.ge.opt.ngg_culling) {
|
||||
if (!si_llvm_translate_nir(&ctx, shader, nir, false, true)) {
|
||||
si_llvm_dispose(&ctx);
|
||||
return false;
|
||||
|
|
@ -1085,10 +1088,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
LLVMValueRef main_fn = ctx.main_fn;
|
||||
|
||||
if (ngg_cull_main_fn) {
|
||||
if (si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, true)) {
|
||||
if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true)) {
|
||||
union si_shader_part_key prolog_key;
|
||||
si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, true,
|
||||
&shader->key.part.vs.prolog, shader, &prolog_key);
|
||||
&shader->key.ge.part.vs.prolog, shader, &prolog_key);
|
||||
prolog_key.vs_prolog.is_monolithic = true;
|
||||
si_llvm_build_vs_prolog(&ctx, &prolog_key);
|
||||
parts[num_parts++] = ctx.main_fn;
|
||||
|
|
@ -1097,10 +1100,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
parts[num_parts++] = ngg_cull_main_fn;
|
||||
}
|
||||
|
||||
if (si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, false)) {
|
||||
if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, false)) {
|
||||
union si_shader_part_key prolog_key;
|
||||
si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, false,
|
||||
&shader->key.part.vs.prolog, shader, &prolog_key);
|
||||
&shader->key.ge.part.vs.prolog, shader, &prolog_key);
|
||||
prolog_key.vs_prolog.is_monolithic = true;
|
||||
si_llvm_build_vs_prolog(&ctx, &prolog_key);
|
||||
parts[num_parts++] = ctx.main_fn;
|
||||
|
|
@ -1131,10 +1134,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
si_build_wrapper_function(&ctx, parts, 3, 0, 0, false);
|
||||
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_TESS_CTRL) {
|
||||
if (sscreen->info.chip_class >= GFX9) {
|
||||
struct si_shader_selector *ls = shader->key.part.tcs.ls;
|
||||
struct si_shader_selector *ls = shader->key.ge.part.tcs.ls;
|
||||
LLVMValueRef parts[4];
|
||||
bool vs_needs_prolog =
|
||||
si_vs_needs_prolog(ls, &shader->key.part.tcs.ls_prolog, &shader->key, false);
|
||||
si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false);
|
||||
|
||||
/* TCS main part */
|
||||
parts[2] = ctx.main_fn;
|
||||
|
|
@ -1142,7 +1145,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
/* TCS epilog */
|
||||
union si_shader_part_key tcs_epilog_key;
|
||||
memset(&tcs_epilog_key, 0, sizeof(tcs_epilog_key));
|
||||
tcs_epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
|
||||
tcs_epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
|
||||
si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key);
|
||||
parts[3] = ctx.main_fn;
|
||||
|
||||
|
|
@ -1151,9 +1154,9 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
nir = si_get_nir_shader(ls, NULL, &free_nir);
|
||||
struct si_shader shader_ls = {};
|
||||
shader_ls.selector = ls;
|
||||
shader_ls.key.as_ls = 1;
|
||||
shader_ls.key.mono = shader->key.mono;
|
||||
shader_ls.key.opt = shader->key.opt;
|
||||
shader_ls.key.ge.as_ls = 1;
|
||||
shader_ls.key.ge.mono = shader->key.ge.mono;
|
||||
shader_ls.key.ge.opt = shader->key.ge.opt;
|
||||
shader_ls.is_monolithic = true;
|
||||
|
||||
if (!si_llvm_translate_nir(&ctx, &shader_ls, nir, free_nir, false)) {
|
||||
|
|
@ -1167,7 +1170,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
if (vs_needs_prolog) {
|
||||
union si_shader_part_key vs_prolog_key;
|
||||
si_get_vs_prolog_key(&ls->info, shader_ls.info.num_input_sgprs, false,
|
||||
&shader->key.part.tcs.ls_prolog, shader, &vs_prolog_key);
|
||||
&shader->key.ge.part.tcs.ls_prolog, shader, &vs_prolog_key);
|
||||
vs_prolog_key.vs_prolog.is_monolithic = true;
|
||||
si_llvm_build_vs_prolog(&ctx, &vs_prolog_key);
|
||||
parts[0] = ctx.main_fn;
|
||||
|
|
@ -1179,7 +1182,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
|
||||
si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 4 - !vs_needs_prolog,
|
||||
vs_needs_prolog, vs_needs_prolog ? 2 : 1,
|
||||
shader->key.opt.same_patch_vertices);
|
||||
shader->key.ge.opt.same_patch_vertices);
|
||||
} else {
|
||||
LLVMValueRef parts[2];
|
||||
union si_shader_part_key epilog_key;
|
||||
|
|
@ -1187,7 +1190,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
parts[0] = ctx.main_fn;
|
||||
|
||||
memset(&epilog_key, 0, sizeof(epilog_key));
|
||||
epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
|
||||
epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
|
||||
si_llvm_build_tcs_epilog(&ctx, &epilog_key);
|
||||
parts[1] = ctx.main_fn;
|
||||
|
||||
|
|
@ -1195,7 +1198,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
}
|
||||
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_GEOMETRY) {
|
||||
if (ctx.screen->info.chip_class >= GFX9) {
|
||||
struct si_shader_selector *es = shader->key.part.gs.es;
|
||||
struct si_shader_selector *es = shader->key.ge.part.gs.es;
|
||||
LLVMValueRef es_prolog = NULL;
|
||||
LLVMValueRef es_main = NULL;
|
||||
LLVMValueRef gs_prolog = NULL;
|
||||
|
|
@ -1204,8 +1207,8 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
/* GS prolog */
|
||||
union si_shader_part_key gs_prolog_key;
|
||||
memset(&gs_prolog_key, 0, sizeof(gs_prolog_key));
|
||||
gs_prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
|
||||
gs_prolog_key.gs_prolog.as_ngg = shader->key.as_ngg;
|
||||
gs_prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog;
|
||||
gs_prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg;
|
||||
si_llvm_build_gs_prolog(&ctx, &gs_prolog_key);
|
||||
gs_prolog = ctx.main_fn;
|
||||
|
||||
|
|
@ -1213,10 +1216,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
nir = si_get_nir_shader(es, NULL, &free_nir);
|
||||
struct si_shader shader_es = {};
|
||||
shader_es.selector = es;
|
||||
shader_es.key.as_es = 1;
|
||||
shader_es.key.as_ngg = shader->key.as_ngg;
|
||||
shader_es.key.mono = shader->key.mono;
|
||||
shader_es.key.opt = shader->key.opt;
|
||||
shader_es.key.ge.as_es = 1;
|
||||
shader_es.key.ge.as_ngg = shader->key.ge.as_ngg;
|
||||
shader_es.key.ge.mono = shader->key.ge.mono;
|
||||
shader_es.key.ge.opt = shader->key.ge.opt;
|
||||
shader_es.is_monolithic = true;
|
||||
|
||||
if (!si_llvm_translate_nir(&ctx, &shader_es, nir, free_nir, false)) {
|
||||
|
|
@ -1228,10 +1231,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
|
||||
/* ES prolog */
|
||||
if (es->info.stage == MESA_SHADER_VERTEX &&
|
||||
si_vs_needs_prolog(es, &shader->key.part.gs.vs_prolog, &shader->key, false)) {
|
||||
si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false)) {
|
||||
union si_shader_part_key vs_prolog_key;
|
||||
si_get_vs_prolog_key(&es->info, shader_es.info.num_input_sgprs, false,
|
||||
&shader->key.part.gs.vs_prolog, shader, &vs_prolog_key);
|
||||
&shader->key.ge.part.gs.vs_prolog, shader, &vs_prolog_key);
|
||||
vs_prolog_key.vs_prolog.is_monolithic = true;
|
||||
si_llvm_build_vs_prolog(&ctx, &vs_prolog_key);
|
||||
es_prolog = ctx.main_fn;
|
||||
|
|
@ -1260,7 +1263,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||
parts[1] = ctx.main_fn;
|
||||
|
||||
memset(&prolog_key, 0, sizeof(prolog_key));
|
||||
prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
|
||||
prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog;
|
||||
si_llvm_build_gs_prolog(&ctx, &prolog_key);
|
||||
parts[0] = ctx.main_fn;
|
||||
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
|
|||
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
|
||||
if (ctx->shader->key.as_ngg)
|
||||
if (ctx->shader->key.ge.as_ngg)
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args.gs_tg_info, 2);
|
||||
else
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.gs2vs_offset, 2);
|
||||
|
|
@ -199,7 +199,7 @@ static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
|
|||
|
||||
static void emit_gs_epilogue(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->shader->key.as_ngg) {
|
||||
if (ctx->shader->key.ge.as_ngg) {
|
||||
gfx10_ngg_gs_emit_epilogue(ctx);
|
||||
return;
|
||||
}
|
||||
|
|
@ -228,7 +228,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM
|
|||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
if (ctx->shader->key.as_ngg) {
|
||||
if (ctx->shader->key.ge.as_ngg) {
|
||||
gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
|
||||
return;
|
||||
}
|
||||
|
|
@ -303,7 +303,7 @@ static void si_llvm_emit_primitive(struct ac_shader_abi *abi, unsigned stream)
|
|||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
if (ctx->shader->key.as_ngg) {
|
||||
if (ctx->shader->key.ge.as_ngg) {
|
||||
LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,22 +79,22 @@ static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
|
|||
|
||||
args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
|
||||
|
||||
if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
|
||||
if (!ctx->shader->key.ps.mono.fbfetch_is_1D)
|
||||
args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
|
||||
|
||||
/* Get the current render target layer index. */
|
||||
if (ctx->shader->key.mono.u.ps.fbfetch_layered)
|
||||
if (ctx->shader->key.ps.mono.fbfetch_layered)
|
||||
args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
|
||||
|
||||
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
|
||||
if (ctx->shader->key.ps.mono.fbfetch_msaa)
|
||||
args.coords[chan++] = si_get_sample_id(ctx);
|
||||
|
||||
if (ctx->shader->key.mono.u.ps.fbfetch_msaa && !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
|
||||
if (ctx->shader->key.ps.mono.fbfetch_msaa && !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
|
||||
fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
|
||||
LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
|
||||
|
||||
ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
|
||||
ctx->shader->key.mono.u.ps.fbfetch_layered);
|
||||
ctx->shader->key.ps.mono.fbfetch_layered);
|
||||
}
|
||||
|
||||
args.opcode = ac_image_load;
|
||||
|
|
@ -102,13 +102,13 @@ static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
|
|||
args.dmask = 0xf;
|
||||
args.attributes = AC_FUNC_ATTR_READNONE;
|
||||
|
||||
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
|
||||
if (ctx->shader->key.ps.mono.fbfetch_msaa)
|
||||
args.dim =
|
||||
ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa;
|
||||
else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
|
||||
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_1darray : ac_image_1d;
|
||||
ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa;
|
||||
else if (ctx->shader->key.ps.mono.fbfetch_is_1D)
|
||||
args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_1darray : ac_image_1d;
|
||||
else
|
||||
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darray : ac_image_2d;
|
||||
args.dim = ctx->shader->key.ps.mono.fbfetch_layered ? ac_image_2darray : ac_image_2d;
|
||||
|
||||
return ac_build_image_opcode(&ctx->ac, &args);
|
||||
}
|
||||
|
|
@ -170,7 +170,7 @@ static void interp_fs_color(struct si_shader_context *ctx, unsigned input_index,
|
|||
j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
|
||||
}
|
||||
|
||||
if (ctx->shader->key.part.ps.prolog.color_two_side) {
|
||||
if (ctx->shader->key.ps.part.prolog.color_two_side) {
|
||||
LLVMValueRef is_face_positive;
|
||||
|
||||
/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
|
||||
|
|
@ -199,13 +199,13 @@ static void interp_fs_color(struct si_shader_context *ctx, unsigned input_index,
|
|||
|
||||
static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha)
|
||||
{
|
||||
if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
|
||||
if (ctx->shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_NEVER) {
|
||||
static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
|
||||
[PIPE_FUNC_LESS] = LLVMRealOLT, [PIPE_FUNC_EQUAL] = LLVMRealOEQ,
|
||||
[PIPE_FUNC_LEQUAL] = LLVMRealOLE, [PIPE_FUNC_GREATER] = LLVMRealOGT,
|
||||
[PIPE_FUNC_NOTEQUAL] = LLVMRealONE, [PIPE_FUNC_GEQUAL] = LLVMRealOGE,
|
||||
};
|
||||
LLVMRealPredicate cond = cond_map[ctx->shader->key.part.ps.epilog.alpha_func];
|
||||
LLVMRealPredicate cond = cond_map[ctx->shader->key.ps.part.epilog.alpha_func];
|
||||
assert(cond);
|
||||
|
||||
LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF);
|
||||
|
|
@ -274,8 +274,8 @@ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue
|
|||
unsigned cbuf, unsigned compacted_mrt_index,
|
||||
unsigned color_type, struct ac_export_args *args)
|
||||
{
|
||||
const struct si_shader_key *key = &ctx->shader->key;
|
||||
unsigned col_formats = key->part.ps.epilog.spi_shader_col_format;
|
||||
const union si_shader_key *key = &ctx->shader->key;
|
||||
unsigned col_formats = key->ps.part.epilog.spi_shader_col_format;
|
||||
LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
|
||||
unsigned spi_shader_col_format;
|
||||
unsigned chan;
|
||||
|
|
@ -284,8 +284,8 @@ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue
|
|||
assert(cbuf < 8);
|
||||
|
||||
spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
|
||||
is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & 0x1;
|
||||
is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) & 0x1;
|
||||
is_int8 = (key->ps.part.epilog.color_is_int8 >> cbuf) & 0x1;
|
||||
is_int10 = (key->ps.part.epilog.color_is_int10 >> cbuf) & 0x1;
|
||||
|
||||
/* Default is 0xf. Adjusted below depending on the format. */
|
||||
args->enabled_channels = 0xf; /* writemask */
|
||||
|
|
@ -411,31 +411,31 @@ static bool si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col
|
|||
int i;
|
||||
|
||||
/* Clamp color */
|
||||
if (ctx->shader->key.part.ps.epilog.clamp_color)
|
||||
if (ctx->shader->key.ps.part.epilog.clamp_color)
|
||||
for (i = 0; i < 4; i++)
|
||||
color[i] = ac_build_clamp(&ctx->ac, color[i]);
|
||||
|
||||
/* Alpha to one */
|
||||
if (ctx->shader->key.part.ps.epilog.alpha_to_one)
|
||||
if (ctx->shader->key.ps.part.epilog.alpha_to_one)
|
||||
color[3] = LLVMConstReal(LLVMTypeOf(color[0]), 1);
|
||||
|
||||
/* Alpha test */
|
||||
if (index == 0 && ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
|
||||
if (index == 0 && ctx->shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS)
|
||||
si_alpha_test(ctx, color[3]);
|
||||
|
||||
/* Line & polygon smoothing */
|
||||
if (ctx->shader->key.part.ps.epilog.poly_line_smoothing)
|
||||
if (ctx->shader->key.ps.part.epilog.poly_line_smoothing)
|
||||
color[3] = si_scale_alpha_by_sample_mask(ctx, color[3], samplemask_param);
|
||||
|
||||
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
|
||||
if (ctx->shader->key.part.ps.epilog.last_cbuf > 0) {
|
||||
if (ctx->shader->key.ps.part.epilog.last_cbuf > 0) {
|
||||
struct ac_export_args args[8];
|
||||
int c, last = -1;
|
||||
|
||||
assert(compacted_mrt_index == 0);
|
||||
|
||||
/* Get the export arguments, also find out what the last one is. */
|
||||
for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
|
||||
for (c = 0; c <= ctx->shader->key.ps.part.epilog.last_cbuf; c++) {
|
||||
si_llvm_init_ps_export_args(ctx, color, c, compacted_mrt_index,
|
||||
color_type, &args[c]);
|
||||
if (args[c].enabled_channels) {
|
||||
|
|
@ -447,7 +447,7 @@ static bool si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col
|
|||
return false;
|
||||
|
||||
/* Emit all exports. */
|
||||
for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
|
||||
for (c = 0; c <= ctx->shader->key.ps.part.epilog.last_cbuf; c++) {
|
||||
if (is_last && last == c) {
|
||||
args[c].valid_mask = 1; /* whether the EXEC mask is valid */
|
||||
args[c].done = 1; /* DONE bit */
|
||||
|
|
|
|||
|
|
@ -71,8 +71,8 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *
|
|||
{
|
||||
assert(ctx->stage == MESA_SHADER_TESS_CTRL);
|
||||
|
||||
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
|
||||
return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
|
||||
if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
|
||||
return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4;
|
||||
|
||||
return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
|
||||
}
|
||||
|
|
@ -86,7 +86,7 @@ static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
|
|||
|
||||
static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
|
||||
if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
|
||||
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
|
||||
|
||||
const struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
|
|
@ -160,7 +160,7 @@ static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
|
|||
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) {
|
||||
stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
|
||||
stride = ctx->shader->key.ge.part.tcs.ls->lshs_vertex_stride / 4;
|
||||
return LLVMConstInt(ctx->ac.i32, stride, 0);
|
||||
}
|
||||
return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
|
||||
|
|
@ -396,7 +396,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
|
|||
}
|
||||
|
||||
/* Load the TCS input from a VGPR if possible. */
|
||||
if (ctx->shader->key.opt.same_patch_vertices &&
|
||||
if (ctx->shader->key.ge.opt.same_patch_vertices &&
|
||||
load_input && vertex_index_is_invoc_id && !param_index) {
|
||||
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
|
||||
si_shader_io_get_unique_index(semantic, false) * 4;
|
||||
|
|
@ -650,7 +650,7 @@ static void si_copy_tcs_inputs(struct si_shader_context *ctx)
|
|||
lds_base = get_tcs_in_current_patch_offset(ctx);
|
||||
lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
|
||||
|
||||
inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
|
||||
inputs = ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy;
|
||||
while (inputs) {
|
||||
unsigned i = u_bit_scan64(&inputs);
|
||||
|
||||
|
|
@ -679,7 +679,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
|
|||
unsigned stride, outer_comps, inner_comps, i, offset;
|
||||
|
||||
/* Add a barrier before loading tess factors from LDS. */
|
||||
if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
|
||||
if (!shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def)
|
||||
si_llvm_emit_barrier(ctx);
|
||||
|
||||
/* Do this only for invocation 0, because the tess levels are per-patch,
|
||||
|
|
@ -692,7 +692,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
|
|||
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);
|
||||
|
||||
/* Determine the layout of one tess factor element in the buffer. */
|
||||
switch (shader->key.part.tcs.epilog.prim_mode) {
|
||||
switch (shader->key.ge.part.tcs.epilog.prim_mode) {
|
||||
case GL_LINES:
|
||||
stride = 2; /* 2 dwords, 1 vec2 store */
|
||||
outer_comps = 2;
|
||||
|
|
@ -718,7 +718,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
|
|||
outer[i] = LLVMGetUndef(ctx->ac.i32);
|
||||
}
|
||||
|
||||
if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
|
||||
if (shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) {
|
||||
/* Tess factors are in VGPRs. */
|
||||
for (i = 0; i < outer_comps; i++)
|
||||
outer[i] = out[i] = invoc0_tf_outer[i];
|
||||
|
|
@ -745,7 +745,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
|
|||
}
|
||||
}
|
||||
|
||||
if (shader->key.part.tcs.epilog.prim_mode == GL_LINES) {
|
||||
if (shader->key.ge.part.tcs.epilog.prim_mode == GL_LINES) {
|
||||
/* For isolines, the hardware expects tess factors in the
|
||||
* reverse order from what NIR specifies.
|
||||
*/
|
||||
|
|
@ -789,7 +789,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
|
|||
ac_glc);
|
||||
|
||||
/* Store the tess factors into the offchip buffer if TES reads them. */
|
||||
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
|
||||
if (shader->key.ge.part.tcs.epilog.tes_reads_tess_factors) {
|
||||
LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
|
||||
LLVMValueRef tf_inner_offset;
|
||||
unsigned param_outer, param_inner;
|
||||
|
|
@ -983,11 +983,11 @@ void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi)
|
|||
|
||||
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
|
||||
|
||||
if (!shader->key.opt.same_patch_vertices ||
|
||||
if (!shader->key.ge.opt.same_patch_vertices ||
|
||||
!(ctx->next_shader_sel->tcs_vgpr_only_inputs & (1ull << semantic)))
|
||||
lshs_lds_store(ctx, chan, dw_addr, value);
|
||||
|
||||
if (shader->key.opt.same_patch_vertices) {
|
||||
if (shader->key.ge.opt.same_patch_vertices) {
|
||||
ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,
|
||||
value, ret_offset + param * 4 + chan, "");
|
||||
}
|
||||
|
|
@ -1084,11 +1084,11 @@ void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_sha
|
|||
ctx->abi.load_tess_level = si_load_tess_level;
|
||||
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
|
||||
|
||||
if (ctx->shader->key.as_es)
|
||||
if (ctx->shader->key.ge.as_es)
|
||||
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
|
||||
else if (ngg_cull_shader)
|
||||
ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue;
|
||||
else if (ctx->shader->key.as_ngg)
|
||||
else if (ctx->shader->key.ge.as_ngg)
|
||||
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
|
||||
else
|
||||
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
|
||||
|
|
|
|||
|
|
@ -131,8 +131,8 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
|
|||
* of dword-sized data that needs fixups. We need to insert conversion
|
||||
* code anyway, and the amd/common code does it for us.
|
||||
*/
|
||||
bool opencode = ctx->shader->key.mono.vs_fetch_opencode & (1 << input_index);
|
||||
fix_fetch.bits = ctx->shader->key.mono.vs_fix_fetch[input_index].bits;
|
||||
bool opencode = ctx->shader->key.ge.mono.vs_fetch_opencode & (1 << input_index);
|
||||
fix_fetch.bits = ctx->shader->key.ge.mono.vs_fix_fetch[input_index].bits;
|
||||
if (opencode || (fix_fetch.u.log_size == 3 && fix_fetch.u.format == AC_FETCH_FORMAT_FLOAT) ||
|
||||
(fix_fetch.u.log_size == 2)) {
|
||||
tmp = ac_build_opencoded_load_format(&ctx->ac, fix_fetch.u.log_size,
|
||||
|
|
@ -400,7 +400,7 @@ static void si_llvm_emit_clipvertex(struct si_shader_context *ctx, struct ac_exp
|
|||
LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0);
|
||||
LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index);
|
||||
unsigned clipdist_mask = ctx->shader->selector->clipdist_mask &
|
||||
~ctx->shader->key.opt.kill_clip_distances;
|
||||
~ctx->shader->key.ge.opt.kill_clip_distances;
|
||||
|
||||
for (reg_index = 0; reg_index < 2; reg_index++) {
|
||||
struct ac_export_args *args = &pos[2 + reg_index];
|
||||
|
|
@ -484,7 +484,7 @@ static void si_prepare_param_exports(struct si_shader_context *ctx,
|
|||
}
|
||||
|
||||
if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
|
||||
shader->key.opt.kill_outputs &
|
||||
shader->key.ge.opt.kill_outputs &
|
||||
(1ull << si_shader_io_get_unique_index(semantic, true)))
|
||||
continue;
|
||||
|
||||
|
|
@ -575,7 +575,7 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
|
|||
viewport_index_value = NULL;
|
||||
unsigned pos_idx, index;
|
||||
unsigned clipdist_mask = (shader->selector->clipdist_mask &
|
||||
~shader->key.opt.kill_clip_distances) |
|
||||
~shader->key.ge.opt.kill_clip_distances) |
|
||||
shader->selector->culldist_mask;
|
||||
int i;
|
||||
|
||||
|
|
@ -629,8 +629,8 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
|
|||
pos_args[0].out[3] = ctx->ac.f32_1; /* W */
|
||||
}
|
||||
|
||||
bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize;
|
||||
bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
|
||||
bool writes_psize = shader->selector->info.writes_psize && !shader->key.ge.opt.kill_pointsize;
|
||||
bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.ge.as_ngg;
|
||||
bool writes_vrs = ctx->screen->options.vrs2x2;
|
||||
|
||||
/* Write the misc vector (point size, edgeflag, layer, viewport). */
|
||||
|
|
@ -783,7 +783,7 @@ void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi)
|
|||
si_llvm_emit_streamout(ctx, outputs, i, 0);
|
||||
|
||||
/* Export PrimitiveID. */
|
||||
if (ctx->shader->key.mono.u.vs_export_prim_id) {
|
||||
if (ctx->shader->key.ge.mono.u.vs_export_prim_id) {
|
||||
outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
|
||||
outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
|
||||
for (j = 1; j < 4; j++)
|
||||
|
|
@ -990,13 +990,13 @@ void si_llvm_init_vs_callbacks(struct si_shader_context *ctx, bool ngg_cull_shad
|
|||
{
|
||||
struct si_shader *shader = ctx->shader;
|
||||
|
||||
if (shader->key.as_ls)
|
||||
if (shader->key.ge.as_ls)
|
||||
ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue;
|
||||
else if (shader->key.as_es)
|
||||
else if (shader->key.ge.as_es)
|
||||
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
|
||||
else if (ngg_cull_shader)
|
||||
ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue;
|
||||
else if (shader->key.as_ngg)
|
||||
else if (shader->key.ge.as_ngg)
|
||||
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
|
||||
else
|
||||
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
|
||||
|
|
|
|||
|
|
@ -927,24 +927,24 @@ si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data,
|
|||
|
||||
|
||||
static enum rgp_hardware_stages
|
||||
si_sqtt_pipe_to_rgp_shader_stage(struct si_shader_key* key, enum pipe_shader_type stage)
|
||||
si_sqtt_pipe_to_rgp_shader_stage(union si_shader_key* key, enum pipe_shader_type stage)
|
||||
{
|
||||
switch (stage) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
if (key->as_ls)
|
||||
if (key->ge.as_ls)
|
||||
return RGP_HW_STAGE_LS;
|
||||
else if (key->as_es)
|
||||
else if (key->ge.as_es)
|
||||
return RGP_HW_STAGE_ES;
|
||||
else if (key->as_ngg)
|
||||
else if (key->ge.as_ngg)
|
||||
return RGP_HW_STAGE_GS;
|
||||
else
|
||||
return RGP_HW_STAGE_VS;
|
||||
case PIPE_SHADER_TESS_CTRL:
|
||||
return RGP_HW_STAGE_HS;
|
||||
case PIPE_SHADER_TESS_EVAL:
|
||||
if (key->as_es)
|
||||
if (key->ge.as_es)
|
||||
return RGP_HW_STAGE_ES;
|
||||
else if (key->as_ngg)
|
||||
else if (key->ge.as_ngg)
|
||||
return RGP_HW_STAGE_GS;
|
||||
else
|
||||
return RGP_HW_STAGE_VS;
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ static void si_emit_cb_render_state(struct si_context *sctx)
|
|||
/* RB+ register settings. */
|
||||
if (sctx->screen->info.rbplus_allowed) {
|
||||
unsigned spi_shader_col_format =
|
||||
sctx->shader.ps.cso ? sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format
|
||||
sctx->shader.ps.cso ? sctx->shader.ps.current->key.ps.part.epilog.spi_shader_col_format
|
||||
: 0;
|
||||
unsigned sx_ps_downconvert = 0;
|
||||
unsigned sx_blend_opt_epsilon = 0;
|
||||
|
|
|
|||
|
|
@ -478,6 +478,8 @@ struct si_buffer_resources {
|
|||
} while (0)
|
||||
|
||||
/* si_descriptors.c */
|
||||
void si_get_inline_uniform_state(union si_shader_key *key, enum pipe_shader_type shader,
|
||||
bool *inline_uniforms, uint32_t **inlined_values);
|
||||
void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture *tex,
|
||||
const struct legacy_surf_level *base_level_info,
|
||||
unsigned base_level, unsigned first_level, unsigned block_width,
|
||||
|
|
@ -572,17 +574,14 @@ void si_schedule_initial_compile(struct si_context *sctx, gl_shader_stage stage,
|
|||
util_queue_execute_func execute);
|
||||
void si_get_active_slot_masks(const struct si_shader_info *info, uint64_t *const_and_shader_buffers,
|
||||
uint64_t *samplers_and_images);
|
||||
int si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state,
|
||||
const struct si_shader_key *key, int thread_index,
|
||||
bool optimized_or_none);
|
||||
int si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state);
|
||||
void si_vs_key_update_inputs(struct si_context *sctx);
|
||||
void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
|
||||
void si_get_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
|
||||
struct si_vs_prolog_bits *prolog_key);
|
||||
void si_update_ps_inputs_read_or_disabled(struct si_context *sctx);
|
||||
void si_update_ps_kill_enable(struct si_context *sctx);
|
||||
void si_update_vrs_flat_shading(struct si_context *sctx);
|
||||
unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key);
|
||||
unsigned si_get_input_prim(const struct si_shader_selector *gs, const union si_shader_key *key);
|
||||
bool si_update_ngg(struct si_context *sctx);
|
||||
void si_ps_key_update_framebuffer(struct si_context *sctx);
|
||||
void si_ps_key_update_framebuffer_blend(struct si_context *sctx);
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ static bool si_update_shaders(struct si_context *sctx)
|
|||
unsigned old_pa_cl_vs_out_cntl = old_vs ? old_vs->pa_cl_vs_out_cntl : 0;
|
||||
struct si_shader *old_ps = sctx->shader.ps.current;
|
||||
unsigned old_spi_shader_col_format =
|
||||
old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
|
||||
old_ps ? old_ps->key.ps.part.epilog.spi_shader_col_format : 0;
|
||||
int r;
|
||||
|
||||
/* Update TCS and TES. */
|
||||
|
|
@ -136,7 +136,7 @@ static bool si_update_shaders(struct si_context *sctx)
|
|||
if (!sctx->fixed_func_tcs_shader.cso)
|
||||
return false;
|
||||
|
||||
sctx->fixed_func_tcs_shader.key.part.tcs.epilog.invoc0_tess_factors_are_def =
|
||||
sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
|
||||
sctx->fixed_func_tcs_shader.cso->info.tessfactors_are_def_in_all_invocs;
|
||||
}
|
||||
|
||||
|
|
@ -260,12 +260,12 @@ static bool si_update_shaders(struct si_context *sctx)
|
|||
if ((GFX_VERSION >= GFX10_3 || (GFX_VERSION >= GFX9 && sctx->screen->info.rbplus_allowed)) &&
|
||||
si_pm4_state_changed(sctx, ps) &&
|
||||
(!old_ps || old_spi_shader_col_format !=
|
||||
sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format))
|
||||
sctx->shader.ps.current->key.ps.part.epilog.spi_shader_col_format))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
|
||||
|
||||
if (sctx->smoothing_enabled !=
|
||||
sctx->shader.ps.current->key.part.ps.epilog.poly_line_smoothing) {
|
||||
sctx->smoothing_enabled = sctx->shader.ps.current->key.part.ps.epilog.poly_line_smoothing;
|
||||
sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing) {
|
||||
sctx->smoothing_enabled = sctx->shader.ps.current->key.ps.part.epilog.poly_line_smoothing;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
|
||||
|
||||
/* NGG cull state uses smoothing_enabled. */
|
||||
|
|
@ -527,7 +527,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
|||
else
|
||||
ls_current = sctx->fixed_func_tcs_shader.current;
|
||||
|
||||
ls = ls_current->key.part.tcs.ls;
|
||||
ls = ls_current->key.ge.part.tcs.ls;
|
||||
} else {
|
||||
ls_current = sctx->shader.vs.current;
|
||||
ls = sctx->shader.vs.cso;
|
||||
|
|
@ -567,7 +567,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
|||
unsigned input_patch_size;
|
||||
|
||||
/* Allocate LDS for TCS inputs only if it's used. */
|
||||
if (!ls_current->key.opt.same_patch_vertices ||
|
||||
if (!ls_current->key.ge.opt.same_patch_vertices ||
|
||||
tcs->info.base.inputs_read & ~tcs->tcs_vgpr_only_inputs)
|
||||
input_patch_size = num_tcs_input_cp * input_vertex_size;
|
||||
else
|
||||
|
|
@ -2078,8 +2078,8 @@ static void si_draw(struct pipe_context *ctx,
|
|||
GFX_VERSION >= GFX9 &&
|
||||
tcs && sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
|
||||
|
||||
if (sctx->shader.tcs.key.opt.same_patch_vertices != same_patch_vertices) {
|
||||
sctx->shader.tcs.key.opt.same_patch_vertices = same_patch_vertices;
|
||||
if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
|
||||
sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
|
||||
|
|
@ -2094,9 +2094,9 @@ static void si_draw(struct pipe_context *ctx,
|
|||
bool ls_vgpr_fix =
|
||||
tcs && sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
|
||||
|
||||
if (ls_vgpr_fix != sctx->shader.tcs.key.part.tcs.ls_prolog.ls_vgpr_fix) {
|
||||
sctx->shader.tcs.key.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
|
||||
sctx->fixed_func_tcs_shader.key.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
|
||||
if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
|
||||
sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
|
||||
sctx->fixed_func_tcs_shader.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -2133,8 +2133,8 @@ static void si_draw(struct pipe_context *ctx,
|
|||
bool gs_tri_strip_adj_fix =
|
||||
!HAS_TESS && prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
|
||||
|
||||
if (gs_tri_strip_adj_fix != sctx->shader.gs.key.part.gs.prolog.tri_strip_adj_fix) {
|
||||
sctx->shader.gs.key.part.gs.prolog.tri_strip_adj_fix = gs_tri_strip_adj_fix;
|
||||
if (gs_tri_strip_adj_fix != sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix) {
|
||||
sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix = gs_tri_strip_adj_fix;
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -2326,7 +2326,7 @@ static void si_draw(struct pipe_context *ctx,
|
|||
* hasn't finished. Set it to the correct value in si_context.
|
||||
*/
|
||||
if (GFX_VERSION >= GFX10 && NGG)
|
||||
sctx->ngg_culling = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.opt.ngg_culling;
|
||||
sctx->ngg_culling = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.ge.opt.ngg_culling;
|
||||
}
|
||||
|
||||
/* Since we've called si_context_add_resource_size for vertex buffers,
|
||||
|
|
|
|||
|
|
@ -360,7 +360,7 @@ bool si_shader_mem_ordered(struct si_shader *shader)
|
|||
shader->config.scratch_bytes_per_wave ||
|
||||
(info->stage == MESA_SHADER_FRAGMENT &&
|
||||
(info->base.fs.uses_fbfetch_output ||
|
||||
shader->key.part.ps.prolog.poly_stipple));
|
||||
shader->key.ps.part.prolog.poly_stipple));
|
||||
|
||||
if (prev_info) {
|
||||
sampler_or_bvh |= prev_info->uses_vmem_return_type_sampler_or_bvh;
|
||||
|
|
@ -455,7 +455,7 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen, struct si_sh
|
|||
|
||||
/* VS as VS, or VS as ES: */
|
||||
if ((sel->info.stage == MESA_SHADER_VERTEX &&
|
||||
(!shader->key.as_ls && !shader->is_gs_copy_shader)) ||
|
||||
(!shader->key.ge.as_ls && !shader->is_gs_copy_shader)) ||
|
||||
/* TES as VS, or TES as ES: */
|
||||
sel->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
unsigned vtx_reuse_depth = 30;
|
||||
|
|
@ -504,7 +504,7 @@ static unsigned si_get_vs_vgpr_comp_cnt(struct si_screen *sscreen, struct si_sha
|
|||
* GFX10 LS (VertexID, RelAutoIndex, UserVGPR1, UserVGPR2 or InstanceID)
|
||||
* GFX10 ES,VS (VertexID, UserVGPR1, UserVGPR2 or VSPrimID, UserVGPR3 or InstanceID)
|
||||
*/
|
||||
bool is_ls = shader->selector->info.stage == MESA_SHADER_TESS_CTRL || shader->key.as_ls;
|
||||
bool is_ls = shader->selector->info.stage == MESA_SHADER_TESS_CTRL || shader->key.ge.as_ls;
|
||||
unsigned max = 0;
|
||||
|
||||
if (shader->info.uses_instanceid) {
|
||||
|
|
@ -810,7 +810,7 @@ static void si_emit_shader_gs(struct si_context *sctx)
|
|||
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
|
||||
shader->ctx_reg.gs.vgt_esgs_ring_itemsize);
|
||||
|
||||
if (shader->key.part.gs.es->info.stage == MESA_SHADER_TESS_EVAL)
|
||||
if (shader->key.ge.part.gs.es->info.stage == MESA_SHADER_TESS_EVAL)
|
||||
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
|
||||
shader->vgt_tf_param);
|
||||
if (shader->vgt_vertex_reuse_block_cntl)
|
||||
|
|
@ -886,13 +886,13 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
|||
|
||||
if (sscreen->info.chip_class >= GFX9) {
|
||||
unsigned input_prim = sel->info.base.gs.input_primitive;
|
||||
gl_shader_stage es_stage = shader->key.part.gs.es->info.stage;
|
||||
gl_shader_stage es_stage = shader->key.ge.part.gs.es->info.stage;
|
||||
unsigned es_vgpr_comp_cnt, gs_vgpr_comp_cnt;
|
||||
|
||||
if (es_stage == MESA_SHADER_VERTEX) {
|
||||
es_vgpr_comp_cnt = si_get_vs_vgpr_comp_cnt(sscreen, shader, false);
|
||||
} else if (es_stage == MESA_SHADER_TESS_EVAL)
|
||||
es_vgpr_comp_cnt = shader->key.part.gs.es->info.uses_primid ? 3 : 2;
|
||||
es_vgpr_comp_cnt = shader->key.ge.part.gs.es->info.uses_primid ? 3 : 2;
|
||||
else
|
||||
unreachable("invalid shader selector type");
|
||||
|
||||
|
|
@ -952,12 +952,12 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
|||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(shader->gs_info.gs_inst_prims_in_subgroup);
|
||||
shader->ctx_reg.gs.vgt_gs_max_prims_per_subgroup =
|
||||
S_028A94_MAX_PRIMS_PER_SUBGROUP(shader->gs_info.max_prims_per_subgroup);
|
||||
shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.part.gs.es->esgs_itemsize / 4;
|
||||
shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.ge.part.gs.es->esgs_itemsize / 4;
|
||||
|
||||
if (es_stage == MESA_SHADER_TESS_EVAL)
|
||||
si_set_tesseval_regs(sscreen, shader->key.part.gs.es, shader);
|
||||
si_set_tesseval_regs(sscreen, shader->key.ge.part.gs.es, shader);
|
||||
|
||||
polaris_set_vgt_vertex_reuse(sscreen, shader->key.part.gs.es, shader);
|
||||
polaris_set_vgt_vertex_reuse(sscreen, shader->key.ge.part.gs.es, shader);
|
||||
} else {
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(0xffff) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F);
|
||||
|
|
@ -994,7 +994,7 @@ bool gfx10_is_ngg_passthrough(struct si_shader *shader)
|
|||
*
|
||||
* NGG passthrough still allows the use of LDS.
|
||||
*/
|
||||
return sel->info.stage != MESA_SHADER_GEOMETRY && !shader->key.opt.ngg_culling;
|
||||
return sel->info.stage != MESA_SHADER_GEOMETRY && !shader->key.ge.opt.ngg_culling;
|
||||
}
|
||||
|
||||
/* Common tail code for NGG primitive shaders. */
|
||||
|
|
@ -1094,7 +1094,7 @@ static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx)
|
|||
gfx10_emit_shader_ngg_tail(sctx, shader);
|
||||
}
|
||||
|
||||
unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key)
|
||||
unsigned si_get_input_prim(const struct si_shader_selector *gs, const union si_shader_key *key)
|
||||
{
|
||||
if (gs->info.stage == MESA_SHADER_GEOMETRY)
|
||||
return gs->info.base.gs.input_primitive;
|
||||
|
|
@ -1107,7 +1107,7 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_
|
|||
return PIPE_PRIM_TRIANGLES;
|
||||
}
|
||||
|
||||
if (key->opt.ngg_culling & SI_NGG_CULL_LINES)
|
||||
if (key->ge.opt.ngg_culling & SI_NGG_CULL_LINES)
|
||||
return PIPE_PRIM_LINES;
|
||||
|
||||
return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
|
||||
|
|
@ -1117,9 +1117,9 @@ static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
|
|||
const struct si_shader *shader, bool ngg)
|
||||
{
|
||||
/* Clip distances can be killed, but cull distances can't. */
|
||||
unsigned clipcull_mask = (sel->clipdist_mask & ~shader->key.opt.kill_clip_distances) |
|
||||
unsigned clipcull_mask = (sel->clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) |
|
||||
sel->culldist_mask;
|
||||
bool writes_psize = sel->info.writes_psize && !shader->key.opt.kill_pointsize;
|
||||
bool writes_psize = sel->info.writes_psize && !shader->key.ge.opt.kill_pointsize;
|
||||
bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
|
||||
sel->screen->options.vrs2x2 ||
|
||||
sel->info.writes_layer || sel->info.writes_viewport_index;
|
||||
|
|
@ -1153,7 +1153,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
uint64_t va;
|
||||
bool window_space = gs_info->stage == MESA_SHADER_VERTEX ?
|
||||
gs_info->base.vs.window_space_position : 0;
|
||||
bool es_enable_prim_id = shader->key.mono.u.vs_export_prim_id || es_info->uses_primid;
|
||||
bool es_enable_prim_id = shader->key.ge.mono.u.vs_export_prim_id || es_info->uses_primid;
|
||||
unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1);
|
||||
unsigned input_prim = si_get_input_prim(gs_sel, &shader->key);
|
||||
bool break_wave_at_eoi = false;
|
||||
|
|
@ -1200,7 +1200,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
(gfx10_edgeflags_have_effect(shader) && !gfx10_is_ngg_passthrough(shader)))
|
||||
gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID, edge flags. */
|
||||
else if ((gs_stage == MESA_SHADER_GEOMETRY && gs_info->uses_primid) ||
|
||||
(gs_stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
|
||||
(gs_stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id))
|
||||
gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
|
||||
else if (input_prim >= PIPE_PRIM_TRIANGLES && !gfx10_is_ngg_passthrough(shader))
|
||||
gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
|
||||
|
|
@ -1210,7 +1210,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
unsigned wave_size = si_get_shader_wave_size(shader);
|
||||
unsigned late_alloc_wave64, cu_mask;
|
||||
|
||||
ac_compute_late_alloc(&sscreen->info, true, shader->key.opt.ngg_culling,
|
||||
ac_compute_late_alloc(&sscreen->info, true, shader->key.ge.opt.ngg_culling,
|
||||
shader->config.scratch_bytes_per_wave > 0,
|
||||
&late_alloc_wave64, &cu_mask);
|
||||
|
||||
|
|
@ -1255,7 +1255,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
|
||||
shader->ctx_reg.ngg.vgt_primitiveid_en =
|
||||
S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
|
||||
S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.mono.u.vs_export_prim_id ||
|
||||
S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.ge.mono.u.vs_export_prim_id ||
|
||||
gs_sel->info.writes_primid);
|
||||
|
||||
if (gs_stage == MESA_SHADER_GEOMETRY) {
|
||||
|
|
@ -1293,7 +1293,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
/* Oversubscribe PC. This improves performance when there are too many varyings. */
|
||||
unsigned oversub_pc_factor = 1;
|
||||
|
||||
if (shader->key.opt.ngg_culling) {
|
||||
if (shader->key.ge.opt.ngg_culling) {
|
||||
/* Be more aggressive with NGG culling. */
|
||||
if (shader->info.nr_param_exports > 4)
|
||||
oversub_pc_factor = 4;
|
||||
|
|
@ -1418,7 +1418,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
|||
unsigned nparams, oc_lds_en;
|
||||
bool window_space = info->stage == MESA_SHADER_VERTEX ?
|
||||
info->base.vs.window_space_position : 0;
|
||||
bool enable_prim_id = shader->key.mono.u.vs_export_prim_id || info->uses_primid;
|
||||
bool enable_prim_id = shader->key.ge.mono.u.vs_export_prim_id || info->uses_primid;
|
||||
|
||||
pm4 = si_get_shader_pm4_state(shader);
|
||||
if (!pm4)
|
||||
|
|
@ -1556,7 +1556,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps)
|
|||
struct si_shader_info *info = &ps->selector->info;
|
||||
unsigned num_colors = !!(info->colors_read & 0x0f) + !!(info->colors_read & 0xf0);
|
||||
unsigned num_interp =
|
||||
ps->selector->info.num_inputs + (ps->key.part.ps.prolog.color_two_side ? num_colors : 0);
|
||||
ps->selector->info.num_inputs + (ps->key.ps.part.prolog.color_two_side ? num_colors : 0);
|
||||
|
||||
assert(num_interp <= 32);
|
||||
return MIN2(num_interp, 32);
|
||||
|
|
@ -1564,7 +1564,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps)
|
|||
|
||||
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
|
||||
{
|
||||
unsigned spi_shader_col_format = shader->key.part.ps.epilog.spi_shader_col_format;
|
||||
unsigned spi_shader_col_format = shader->key.ps.part.epilog.spi_shader_col_format;
|
||||
unsigned value = 0, num_mrts = 0;
|
||||
unsigned i, num_targets = (util_last_bit(spi_shader_col_format) + 3) / 4;
|
||||
|
||||
|
|
@ -1628,23 +1628,23 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
|||
G_0286CC_PERSP_PULL_MODEL_ENA(input_ena));
|
||||
|
||||
/* Validate interpolation optimization flags (read as implications). */
|
||||
assert(!shader->key.part.ps.prolog.bc_optimize_for_persp ||
|
||||
assert(!shader->key.ps.part.prolog.bc_optimize_for_persp ||
|
||||
(G_0286CC_PERSP_CENTER_ENA(input_ena) && G_0286CC_PERSP_CENTROID_ENA(input_ena)));
|
||||
assert(!shader->key.part.ps.prolog.bc_optimize_for_linear ||
|
||||
assert(!shader->key.ps.part.prolog.bc_optimize_for_linear ||
|
||||
(G_0286CC_LINEAR_CENTER_ENA(input_ena) && G_0286CC_LINEAR_CENTROID_ENA(input_ena)));
|
||||
assert(!shader->key.part.ps.prolog.force_persp_center_interp ||
|
||||
assert(!shader->key.ps.part.prolog.force_persp_center_interp ||
|
||||
(!G_0286CC_PERSP_SAMPLE_ENA(input_ena) && !G_0286CC_PERSP_CENTROID_ENA(input_ena)));
|
||||
assert(!shader->key.part.ps.prolog.force_linear_center_interp ||
|
||||
assert(!shader->key.ps.part.prolog.force_linear_center_interp ||
|
||||
(!G_0286CC_LINEAR_SAMPLE_ENA(input_ena) && !G_0286CC_LINEAR_CENTROID_ENA(input_ena)));
|
||||
assert(!shader->key.part.ps.prolog.force_persp_sample_interp ||
|
||||
assert(!shader->key.ps.part.prolog.force_persp_sample_interp ||
|
||||
(!G_0286CC_PERSP_CENTER_ENA(input_ena) && !G_0286CC_PERSP_CENTROID_ENA(input_ena)));
|
||||
assert(!shader->key.part.ps.prolog.force_linear_sample_interp ||
|
||||
assert(!shader->key.ps.part.prolog.force_linear_sample_interp ||
|
||||
(!G_0286CC_LINEAR_CENTER_ENA(input_ena) && !G_0286CC_LINEAR_CENTROID_ENA(input_ena)));
|
||||
|
||||
/* Validate cases when the optimizations are off (read as implications). */
|
||||
assert(shader->key.part.ps.prolog.bc_optimize_for_persp ||
|
||||
assert(shader->key.ps.part.prolog.bc_optimize_for_persp ||
|
||||
!G_0286CC_PERSP_CENTER_ENA(input_ena) || !G_0286CC_PERSP_CENTROID_ENA(input_ena));
|
||||
assert(shader->key.part.ps.prolog.bc_optimize_for_linear ||
|
||||
assert(shader->key.ps.part.prolog.bc_optimize_for_linear ||
|
||||
!G_0286CC_LINEAR_CENTER_ENA(input_ena) || !G_0286CC_LINEAR_CENTROID_ENA(input_ena));
|
||||
|
||||
pm4 = si_get_shader_pm4_state(shader);
|
||||
|
|
@ -1685,7 +1685,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
|||
spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
|
||||
|
||||
spi_shader_col_format = si_get_spi_shader_col_format(shader);
|
||||
cb_shader_mask = ac_get_cb_shader_mask(shader->key.part.ps.epilog.spi_shader_col_format);
|
||||
cb_shader_mask = ac_get_cb_shader_mask(shader->key.ps.part.epilog.spi_shader_col_format);
|
||||
|
||||
/* Ensure that some export memory is always allocated, for two reasons:
|
||||
*
|
||||
|
|
@ -1703,7 +1703,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
|||
* instructions if any are present.
|
||||
*/
|
||||
if ((sscreen->info.chip_class <= GFX9 || info->base.fs.uses_discard ||
|
||||
shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS) &&
|
||||
shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS) &&
|
||||
!spi_shader_col_format && !info->writes_z && !info->writes_stencil &&
|
||||
!info->writes_samplemask)
|
||||
spi_shader_col_format = V_028714_SPI_SHADER_32_R;
|
||||
|
|
@ -1750,11 +1750,11 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
|
|||
{
|
||||
switch (shader->selector->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (shader->key.as_ls)
|
||||
if (shader->key.ge.as_ls)
|
||||
si_shader_ls(sscreen, shader);
|
||||
else if (shader->key.as_es)
|
||||
else if (shader->key.ge.as_es)
|
||||
si_shader_es(sscreen, shader);
|
||||
else if (shader->key.as_ngg)
|
||||
else if (shader->key.ge.as_ngg)
|
||||
gfx10_shader_ngg(sscreen, shader);
|
||||
else
|
||||
si_shader_vs(sscreen, shader, NULL);
|
||||
|
|
@ -1763,15 +1763,15 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
|
|||
si_shader_hs(sscreen, shader);
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (shader->key.as_es)
|
||||
if (shader->key.ge.as_es)
|
||||
si_shader_es(sscreen, shader);
|
||||
else if (shader->key.as_ngg)
|
||||
else if (shader->key.ge.as_ngg)
|
||||
gfx10_shader_ngg(sscreen, shader);
|
||||
else
|
||||
si_shader_vs(sscreen, shader, NULL);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
if (shader->key.as_ngg)
|
||||
if (shader->key.ge.as_ngg)
|
||||
gfx10_shader_ngg(sscreen, shader);
|
||||
else
|
||||
si_shader_gs(sscreen, shader);
|
||||
|
|
@ -1784,27 +1784,27 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
|
|||
}
|
||||
}
|
||||
|
||||
static void si_clear_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
|
||||
static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
|
||||
struct si_vs_prolog_bits *prolog_key)
|
||||
{
|
||||
prolog_key->instance_divisor_is_one = 0;
|
||||
prolog_key->instance_divisor_is_fetched = 0;
|
||||
key->mono.vs_fetch_opencode = 0;
|
||||
memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch));
|
||||
key->ge.mono.vs_fetch_opencode = 0;
|
||||
memset(key->ge.mono.vs_fix_fetch, 0, sizeof(key->ge.mono.vs_fix_fetch));
|
||||
}
|
||||
|
||||
void si_vs_key_update_inputs(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader_selector *vs = sctx->shader.vs.cso;
|
||||
struct si_vertex_elements *elts = sctx->vertex_elements;
|
||||
struct si_shader_key *key = &sctx->shader.vs.key;
|
||||
union si_shader_key *key = &sctx->shader.vs.key;
|
||||
|
||||
if (!vs)
|
||||
return;
|
||||
|
||||
if (vs->info.base.vs.blit_sgprs_amd) {
|
||||
si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog);
|
||||
key->opt.prefer_mono = 0;
|
||||
si_clear_vs_key_inputs(sctx, key, &key->ge.part.vs.prolog);
|
||||
key->ge.opt.prefer_mono = 0;
|
||||
sctx->uses_nontrivial_vs_prolog = false;
|
||||
return;
|
||||
}
|
||||
|
|
@ -1814,9 +1814,9 @@ void si_vs_key_update_inputs(struct si_context *sctx)
|
|||
if (elts->instance_divisor_is_one || elts->instance_divisor_is_fetched)
|
||||
uses_nontrivial_vs_prolog = true;
|
||||
|
||||
key->part.vs.prolog.instance_divisor_is_one = elts->instance_divisor_is_one;
|
||||
key->part.vs.prolog.instance_divisor_is_fetched = elts->instance_divisor_is_fetched;
|
||||
key->opt.prefer_mono = elts->instance_divisor_is_fetched;
|
||||
key->ge.part.vs.prolog.instance_divisor_is_one = elts->instance_divisor_is_one;
|
||||
key->ge.part.vs.prolog.instance_divisor_is_fetched = elts->instance_divisor_is_fetched;
|
||||
key->ge.opt.prefer_mono = elts->instance_divisor_is_fetched;
|
||||
|
||||
unsigned count_mask = (1 << vs->info.num_inputs) - 1;
|
||||
unsigned fix = elts->fix_fetch_always & count_mask;
|
||||
|
|
@ -1837,17 +1837,17 @@ void si_vs_key_update_inputs(struct si_context *sctx)
|
|||
}
|
||||
}
|
||||
|
||||
memset(key->mono.vs_fix_fetch, 0, sizeof(key->mono.vs_fix_fetch));
|
||||
memset(key->ge.mono.vs_fix_fetch, 0, sizeof(key->ge.mono.vs_fix_fetch));
|
||||
|
||||
while (fix) {
|
||||
unsigned i = u_bit_scan(&fix);
|
||||
uint8_t fix_fetch = elts->fix_fetch[i];
|
||||
|
||||
key->mono.vs_fix_fetch[i].bits = fix_fetch;
|
||||
key->ge.mono.vs_fix_fetch[i].bits = fix_fetch;
|
||||
if (fix_fetch)
|
||||
uses_nontrivial_vs_prolog = true;
|
||||
}
|
||||
key->mono.vs_fetch_opencode = opencode;
|
||||
key->ge.mono.vs_fetch_opencode = opencode;
|
||||
if (opencode)
|
||||
uses_nontrivial_vs_prolog = true;
|
||||
|
||||
|
|
@ -1863,18 +1863,18 @@ void si_vs_key_update_inputs(struct si_context *sctx)
|
|||
* cases.
|
||||
*/
|
||||
if (uses_nontrivial_vs_prolog && sctx->force_trivial_vs_prolog)
|
||||
si_clear_vs_key_inputs(sctx, key, &key->part.vs.prolog);
|
||||
si_clear_vs_key_inputs(sctx, key, &key->ge.part.vs.prolog);
|
||||
}
|
||||
|
||||
void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
|
||||
void si_get_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
|
||||
struct si_vs_prolog_bits *prolog_key)
|
||||
{
|
||||
prolog_key->instance_divisor_is_one = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_one;
|
||||
prolog_key->instance_divisor_is_fetched = sctx->shader.vs.key.part.vs.prolog.instance_divisor_is_fetched;
|
||||
prolog_key->instance_divisor_is_one = sctx->shader.vs.key.ge.part.vs.prolog.instance_divisor_is_one;
|
||||
prolog_key->instance_divisor_is_fetched = sctx->shader.vs.key.ge.part.vs.prolog.instance_divisor_is_fetched;
|
||||
|
||||
key->mono.vs_fetch_opencode = sctx->shader.vs.key.mono.vs_fetch_opencode;
|
||||
memcpy(key->mono.vs_fix_fetch, sctx->shader.vs.key.mono.vs_fix_fetch,
|
||||
sizeof(key->mono.vs_fix_fetch));
|
||||
key->ge.mono.vs_fetch_opencode = sctx->shader.vs.key.ge.mono.vs_fetch_opencode;
|
||||
memcpy(key->ge.mono.vs_fix_fetch, sctx->shader.vs.key.ge.mono.vs_fix_fetch,
|
||||
sizeof(key->ge.mono.vs_fix_fetch));
|
||||
}
|
||||
|
||||
void si_update_ps_inputs_read_or_disabled(struct si_context *sctx)
|
||||
|
|
@ -1898,53 +1898,53 @@ void si_update_ps_inputs_read_or_disabled(struct si_context *sctx)
|
|||
}
|
||||
|
||||
static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
|
||||
struct si_shader_key *key)
|
||||
union si_shader_key *key)
|
||||
{
|
||||
|
||||
key->opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable;
|
||||
key->ge.opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable;
|
||||
|
||||
/* Find out which VS outputs aren't used by the PS. */
|
||||
uint64_t outputs_written = vs->outputs_written_before_ps;
|
||||
uint64_t linked = outputs_written & sctx->ps_inputs_read_or_disabled;
|
||||
|
||||
key->opt.kill_outputs = ~linked & outputs_written;
|
||||
key->ge.opt.kill_outputs = ~linked & outputs_written;
|
||||
|
||||
if (vs->info.stage != MESA_SHADER_GEOMETRY) {
|
||||
key->opt.ngg_culling = sctx->ngg_culling;
|
||||
key->mono.u.vs_export_prim_id = sctx->shader.ps.cso && sctx->shader.ps.cso->info.uses_primid;
|
||||
key->ge.opt.ngg_culling = sctx->ngg_culling;
|
||||
key->ge.mono.u.vs_export_prim_id = sctx->shader.ps.cso && sctx->shader.ps.cso->info.uses_primid;
|
||||
} else {
|
||||
key->opt.ngg_culling = 0;
|
||||
key->mono.u.vs_export_prim_id = 0;
|
||||
key->ge.opt.ngg_culling = 0;
|
||||
key->ge.mono.u.vs_export_prim_id = 0;
|
||||
}
|
||||
|
||||
key->opt.kill_pointsize = vs->info.writes_psize &&
|
||||
sctx->current_rast_prim != PIPE_PRIM_POINTS &&
|
||||
!sctx->queued.named.rasterizer->polygon_mode_is_points;
|
||||
key->ge.opt.kill_pointsize = vs->info.writes_psize &&
|
||||
sctx->current_rast_prim != PIPE_PRIM_POINTS &&
|
||||
!sctx->queued.named.rasterizer->polygon_mode_is_points;
|
||||
}
|
||||
|
||||
static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
|
||||
struct si_shader_key *key)
|
||||
union si_shader_key *key)
|
||||
{
|
||||
key->opt.kill_clip_distances = 0;
|
||||
key->opt.kill_outputs = 0;
|
||||
key->opt.ngg_culling = 0;
|
||||
key->mono.u.vs_export_prim_id = 0;
|
||||
key->opt.kill_pointsize = 0;
|
||||
key->ge.opt.kill_clip_distances = 0;
|
||||
key->ge.opt.kill_outputs = 0;
|
||||
key->ge.opt.ngg_culling = 0;
|
||||
key->ge.mono.u.vs_export_prim_id = 0;
|
||||
key->ge.opt.kill_pointsize = 0;
|
||||
}
|
||||
|
||||
void si_ps_key_update_framebuffer(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader_selector *sel = sctx->shader.ps.cso;
|
||||
struct si_shader_key *key = &sctx->shader.ps.key;
|
||||
union si_shader_key *key = &sctx->shader.ps.key;
|
||||
|
||||
if (!sel)
|
||||
return;
|
||||
|
||||
if (sel->info.color0_writes_all_cbufs &&
|
||||
sel->info.colors_written == 0x1)
|
||||
key->part.ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
|
||||
key->ps.part.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
|
||||
else
|
||||
key->part.ps.epilog.last_cbuf = 0;
|
||||
key->ps.part.epilog.last_cbuf = 0;
|
||||
|
||||
/* ps_uses_fbfetch is true only if the color buffer is bound. */
|
||||
if (sctx->ps_uses_fbfetch) {
|
||||
|
|
@ -1952,25 +1952,25 @@ void si_ps_key_update_framebuffer(struct si_context *sctx)
|
|||
struct pipe_resource *tex = cb0->texture;
|
||||
|
||||
/* 1D textures are allocated and used as 2D on GFX9. */
|
||||
key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
|
||||
key->mono.u.ps.fbfetch_is_1D =
|
||||
key->ps.mono.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
|
||||
key->ps.mono.fbfetch_is_1D =
|
||||
sctx->chip_class != GFX9 &&
|
||||
(tex->target == PIPE_TEXTURE_1D || tex->target == PIPE_TEXTURE_1D_ARRAY);
|
||||
key->mono.u.ps.fbfetch_layered =
|
||||
key->ps.mono.fbfetch_layered =
|
||||
tex->target == PIPE_TEXTURE_1D_ARRAY || tex->target == PIPE_TEXTURE_2D_ARRAY ||
|
||||
tex->target == PIPE_TEXTURE_CUBE || tex->target == PIPE_TEXTURE_CUBE_ARRAY ||
|
||||
tex->target == PIPE_TEXTURE_3D;
|
||||
} else {
|
||||
key->mono.u.ps.fbfetch_msaa = 0;
|
||||
key->mono.u.ps.fbfetch_is_1D = 0;
|
||||
key->mono.u.ps.fbfetch_layered = 0;
|
||||
key->ps.mono.fbfetch_msaa = 0;
|
||||
key->ps.mono.fbfetch_is_1D = 0;
|
||||
key->ps.mono.fbfetch_layered = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader_selector *sel = sctx->shader.ps.cso;
|
||||
struct si_shader_key *key = &sctx->shader.ps.key;
|
||||
union si_shader_key *key = &sctx->shader.ps.key;
|
||||
struct si_state_blend *blend = sctx->queued.named.blend;
|
||||
|
||||
if (!sel)
|
||||
|
|
@ -1979,7 +1979,7 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
|
|||
/* Select the shader color format based on whether
|
||||
* blending or alpha are needed.
|
||||
*/
|
||||
key->part.ps.epilog.spi_shader_col_format =
|
||||
key->ps.part.epilog.spi_shader_col_format =
|
||||
(blend->blend_enable_4bit & blend->need_src_alpha_4bit &
|
||||
sctx->framebuffer.spi_shader_col_format_blend_alpha) |
|
||||
(blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
|
||||
|
|
@ -1988,36 +1988,36 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
|
|||
sctx->framebuffer.spi_shader_col_format_alpha) |
|
||||
(~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
|
||||
sctx->framebuffer.spi_shader_col_format);
|
||||
key->part.ps.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
|
||||
key->ps.part.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
|
||||
|
||||
/* The output for dual source blending should have
|
||||
* the same format as the first output.
|
||||
*/
|
||||
if (blend->dual_src_blend) {
|
||||
key->part.ps.epilog.spi_shader_col_format |=
|
||||
(key->part.ps.epilog.spi_shader_col_format & 0xf) << 4;
|
||||
key->ps.part.epilog.spi_shader_col_format |=
|
||||
(key->ps.part.epilog.spi_shader_col_format & 0xf) << 4;
|
||||
}
|
||||
|
||||
/* If alpha-to-coverage is enabled, we have to export alpha
|
||||
* even if there is no color buffer.
|
||||
*/
|
||||
if (!(key->part.ps.epilog.spi_shader_col_format & 0xf) && blend->alpha_to_coverage)
|
||||
key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
|
||||
if (!(key->ps.part.epilog.spi_shader_col_format & 0xf) && blend->alpha_to_coverage)
|
||||
key->ps.part.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
|
||||
|
||||
/* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs
|
||||
* to the range supported by the type if a channel has less
|
||||
* than 16 bits and the export format is 16_ABGR.
|
||||
*/
|
||||
if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) {
|
||||
key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
|
||||
key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
|
||||
key->ps.part.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
|
||||
key->ps.part.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
|
||||
}
|
||||
|
||||
/* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
|
||||
if (!key->part.ps.epilog.last_cbuf) {
|
||||
key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
|
||||
key->part.ps.epilog.color_is_int8 &= sel->info.colors_written;
|
||||
key->part.ps.epilog.color_is_int10 &= sel->info.colors_written;
|
||||
if (!key->ps.part.epilog.last_cbuf) {
|
||||
key->ps.part.epilog.spi_shader_col_format &= sel->colors_written_4bit;
|
||||
key->ps.part.epilog.color_is_int8 &= sel->info.colors_written;
|
||||
key->ps.part.epilog.color_is_int10 &= sel->info.colors_written;
|
||||
}
|
||||
|
||||
/* Eliminate shader code computing output values that are unused.
|
||||
|
|
@ -2026,51 +2026,51 @@ void si_ps_key_update_framebuffer_blend(struct si_context *sctx)
|
|||
*/
|
||||
if (sel->colors_written_4bit &
|
||||
~(sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_enabled_4bit))
|
||||
key->opt.prefer_mono = 1;
|
||||
key->ps.opt.prefer_mono = 1;
|
||||
else
|
||||
key->opt.prefer_mono = 0;
|
||||
key->ps.opt.prefer_mono = 0;
|
||||
}
|
||||
|
||||
void si_ps_key_update_blend_rasterizer(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader_key *key = &sctx->shader.ps.key;
|
||||
union si_shader_key *key = &sctx->shader.ps.key;
|
||||
struct si_state_blend *blend = sctx->queued.named.blend;
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
|
||||
key->part.ps.epilog.alpha_to_one = blend->alpha_to_one && rs->multisample_enable;
|
||||
key->ps.part.epilog.alpha_to_one = blend->alpha_to_one && rs->multisample_enable;
|
||||
}
|
||||
|
||||
void si_ps_key_update_rasterizer(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader_selector *sel = sctx->shader.ps.cso;
|
||||
struct si_shader_key *key = &sctx->shader.ps.key;
|
||||
union si_shader_key *key = &sctx->shader.ps.key;
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
|
||||
if (!sel)
|
||||
return;
|
||||
|
||||
key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
|
||||
key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.uses_interp_color;
|
||||
key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
|
||||
key->ps.part.prolog.color_two_side = rs->two_side && sel->info.colors_read;
|
||||
key->ps.part.prolog.flatshade_colors = rs->flatshade && sel->info.uses_interp_color;
|
||||
key->ps.part.epilog.clamp_color = rs->clamp_fragment_color;
|
||||
}
|
||||
|
||||
void si_ps_key_update_dsa(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader_key *key = &sctx->shader.ps.key;
|
||||
union si_shader_key *key = &sctx->shader.ps.key;
|
||||
|
||||
key->part.ps.epilog.alpha_func = sctx->queued.named.dsa->alpha_func;
|
||||
key->ps.part.epilog.alpha_func = sctx->queued.named.dsa->alpha_func;
|
||||
}
|
||||
|
||||
static void si_ps_key_update_primtype_shader_rasterizer_framebuffer(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader_key *key = &sctx->shader.ps.key;
|
||||
union si_shader_key *key = &sctx->shader.ps.key;
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
|
||||
bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim);
|
||||
bool is_line = util_prim_is_lines(sctx->current_rast_prim);
|
||||
|
||||
key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
|
||||
key->part.ps.epilog.poly_line_smoothing =
|
||||
key->ps.part.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
|
||||
key->ps.part.epilog.poly_line_smoothing =
|
||||
((is_poly && rs->poly_smooth) || (is_line && rs->line_smooth)) &&
|
||||
sctx->framebuffer.nr_samples <= 1;
|
||||
}
|
||||
|
|
@ -2078,21 +2078,21 @@ static void si_ps_key_update_primtype_shader_rasterizer_framebuffer(struct si_co
|
|||
void si_ps_key_update_sample_shading(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader_selector *sel = sctx->shader.ps.cso;
|
||||
struct si_shader_key *key = &sctx->shader.ps.key;
|
||||
union si_shader_key *key = &sctx->shader.ps.key;
|
||||
|
||||
if (!sel)
|
||||
return;
|
||||
|
||||
if (sctx->ps_iter_samples > 1 && sel->info.reads_samplemask)
|
||||
key->part.ps.prolog.samplemask_log_ps_iter = util_logbase2(sctx->ps_iter_samples);
|
||||
key->ps.part.prolog.samplemask_log_ps_iter = util_logbase2(sctx->ps_iter_samples);
|
||||
else
|
||||
key->part.ps.prolog.samplemask_log_ps_iter = 0;
|
||||
key->ps.part.prolog.samplemask_log_ps_iter = 0;
|
||||
}
|
||||
|
||||
void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader_selector *sel = sctx->shader.ps.cso;
|
||||
struct si_shader_key *key = &sctx->shader.ps.key;
|
||||
union si_shader_key *key = &sctx->shader.ps.key;
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
|
||||
if (!sel)
|
||||
|
|
@ -2107,49 +2107,49 @@ void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *s
|
|||
|
||||
if (rs->force_persample_interp && rs->multisample_enable &&
|
||||
sctx->framebuffer.nr_samples > 1 && sctx->ps_iter_samples > 1) {
|
||||
key->part.ps.prolog.force_persp_sample_interp =
|
||||
key->ps.part.prolog.force_persp_sample_interp =
|
||||
uses_persp_center || uses_persp_centroid;
|
||||
|
||||
key->part.ps.prolog.force_linear_sample_interp =
|
||||
key->ps.part.prolog.force_linear_sample_interp =
|
||||
sel->info.uses_linear_center || sel->info.uses_linear_centroid;
|
||||
|
||||
key->part.ps.prolog.force_persp_center_interp = 0;
|
||||
key->part.ps.prolog.force_linear_center_interp = 0;
|
||||
key->part.ps.prolog.bc_optimize_for_persp = 0;
|
||||
key->part.ps.prolog.bc_optimize_for_linear = 0;
|
||||
key->mono.u.ps.interpolate_at_sample_force_center = 0;
|
||||
key->ps.part.prolog.force_persp_center_interp = 0;
|
||||
key->ps.part.prolog.force_linear_center_interp = 0;
|
||||
key->ps.part.prolog.bc_optimize_for_persp = 0;
|
||||
key->ps.part.prolog.bc_optimize_for_linear = 0;
|
||||
key->ps.mono.interpolate_at_sample_force_center = 0;
|
||||
} else if (rs->multisample_enable && sctx->framebuffer.nr_samples > 1) {
|
||||
key->part.ps.prolog.force_persp_sample_interp = 0;
|
||||
key->part.ps.prolog.force_linear_sample_interp = 0;
|
||||
key->part.ps.prolog.force_persp_center_interp = 0;
|
||||
key->part.ps.prolog.force_linear_center_interp = 0;
|
||||
key->part.ps.prolog.bc_optimize_for_persp =
|
||||
key->ps.part.prolog.force_persp_sample_interp = 0;
|
||||
key->ps.part.prolog.force_linear_sample_interp = 0;
|
||||
key->ps.part.prolog.force_persp_center_interp = 0;
|
||||
key->ps.part.prolog.force_linear_center_interp = 0;
|
||||
key->ps.part.prolog.bc_optimize_for_persp =
|
||||
uses_persp_center && uses_persp_centroid;
|
||||
key->part.ps.prolog.bc_optimize_for_linear =
|
||||
key->ps.part.prolog.bc_optimize_for_linear =
|
||||
sel->info.uses_linear_center && sel->info.uses_linear_centroid;
|
||||
key->mono.u.ps.interpolate_at_sample_force_center = 0;
|
||||
key->ps.mono.interpolate_at_sample_force_center = 0;
|
||||
} else {
|
||||
key->part.ps.prolog.force_persp_sample_interp = 0;
|
||||
key->part.ps.prolog.force_linear_sample_interp = 0;
|
||||
key->ps.part.prolog.force_persp_sample_interp = 0;
|
||||
key->ps.part.prolog.force_linear_sample_interp = 0;
|
||||
|
||||
/* Make sure SPI doesn't compute more than 1 pair
|
||||
* of (i,j), which is the optimization here. */
|
||||
key->part.ps.prolog.force_persp_center_interp = uses_persp_center +
|
||||
key->ps.part.prolog.force_persp_center_interp = uses_persp_center +
|
||||
uses_persp_centroid +
|
||||
uses_persp_sample > 1;
|
||||
|
||||
key->part.ps.prolog.force_linear_center_interp = sel->info.uses_linear_center +
|
||||
key->ps.part.prolog.force_linear_center_interp = sel->info.uses_linear_center +
|
||||
sel->info.uses_linear_centroid +
|
||||
sel->info.uses_linear_sample > 1;
|
||||
key->part.ps.prolog.bc_optimize_for_persp = 0;
|
||||
key->part.ps.prolog.bc_optimize_for_linear = 0;
|
||||
key->mono.u.ps.interpolate_at_sample_force_center = sel->info.uses_interp_at_sample;
|
||||
key->ps.part.prolog.bc_optimize_for_persp = 0;
|
||||
key->ps.part.prolog.bc_optimize_for_linear = 0;
|
||||
key->ps.mono.interpolate_at_sample_force_center = sel->info.uses_interp_at_sample;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the key for the hw shader variant */
|
||||
static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_shader_selector *sel,
|
||||
struct si_shader_key *key)
|
||||
union si_shader_key *key)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
||||
|
|
@ -2162,8 +2162,8 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh
|
|||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (sctx->chip_class >= GFX9) {
|
||||
si_get_vs_key_inputs(sctx, key, &key->part.tcs.ls_prolog);
|
||||
key->part.tcs.ls = sctx->shader.vs.cso;
|
||||
si_get_vs_key_inputs(sctx, key, &key->ge.part.tcs.ls_prolog);
|
||||
key->ge.part.tcs.ls = sctx->shader.vs.cso;
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
|
|
@ -2175,11 +2175,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh
|
|||
case MESA_SHADER_GEOMETRY:
|
||||
if (sctx->chip_class >= GFX9) {
|
||||
if (sctx->shader.tes.cso) {
|
||||
si_clear_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog);
|
||||
key->part.gs.es = sctx->shader.tes.cso;
|
||||
si_clear_vs_key_inputs(sctx, key, &key->ge.part.gs.vs_prolog);
|
||||
key->ge.part.gs.es = sctx->shader.tes.cso;
|
||||
} else {
|
||||
si_get_vs_key_inputs(sctx, key, &key->part.gs.vs_prolog);
|
||||
key->part.gs.es = sctx->shader.vs.cso;
|
||||
si_get_vs_key_inputs(sctx, key, &key->ge.part.gs.vs_prolog);
|
||||
key->ge.part.gs.es = sctx->shader.vs.cso;
|
||||
}
|
||||
|
||||
/* Only NGG can eliminate GS outputs, because the code is shared with VS. */
|
||||
|
|
@ -2249,11 +2249,11 @@ static void si_build_shader_variant_low_priority(void *job, void *gdata, int thr
|
|||
}
|
||||
|
||||
/* This should be const, but C++ doesn't allow implicit zero-initialization with const. */
|
||||
static struct si_shader_key zeroed;
|
||||
static union si_shader_key zeroed;
|
||||
|
||||
static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shader_selector *sel,
|
||||
struct si_compiler_ctx_state *compiler_state,
|
||||
const struct si_shader_key *key)
|
||||
const union si_shader_key *key)
|
||||
{
|
||||
struct si_shader **mainp = si_get_main_shader_part(sel, key);
|
||||
|
||||
|
|
@ -2269,9 +2269,11 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shad
|
|||
util_queue_fence_init(&main_part->ready);
|
||||
|
||||
main_part->selector = sel;
|
||||
main_part->key.as_es = key->as_es;
|
||||
main_part->key.as_ls = key->as_ls;
|
||||
main_part->key.as_ngg = key->as_ngg;
|
||||
if (sel->info.stage <= MESA_SHADER_GEOMETRY) {
|
||||
main_part->key.ge.as_es = key->ge.as_es;
|
||||
main_part->key.ge.as_ls = key->ge.as_ls;
|
||||
main_part->key.ge.as_ngg = key->ge.as_ngg;
|
||||
}
|
||||
main_part->is_monolithic = false;
|
||||
|
||||
if (!si_compile_shader(sscreen, compiler_state->compiler, main_part,
|
||||
|
|
@ -2285,8 +2287,9 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shad
|
|||
}
|
||||
|
||||
/* A helper to copy *key to *local_key and return local_key. */
|
||||
static const struct si_shader_key *
|
||||
use_local_key_copy(const struct si_shader_key *key, struct si_shader_key *local_key)
|
||||
template<typename SHADER_KEY_TYPE>
|
||||
static const SHADER_KEY_TYPE *
|
||||
use_local_key_copy(const SHADER_KEY_TYPE *key, SHADER_KEY_TYPE *local_key)
|
||||
{
|
||||
if (key != local_key)
|
||||
memcpy(local_key, key, sizeof(*key));
|
||||
|
|
@ -2301,24 +2304,27 @@ use_local_key_copy(const struct si_shader_key *key, struct si_shader_key *local_
|
|||
* the compilation isn't finished, don't select any
|
||||
* shader and return an error.
|
||||
*/
|
||||
int si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state,
|
||||
const struct si_shader_key *key, int thread_index,
|
||||
bool optimized_or_none)
|
||||
template<typename SHADER_KEY_TYPE>
|
||||
static int si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state,
|
||||
const SHADER_KEY_TYPE *key, int thread_index,
|
||||
bool optimized_or_none)
|
||||
{
|
||||
struct si_screen *sscreen = sctx->screen;
|
||||
struct si_shader_selector *sel = state->cso;
|
||||
struct si_shader_selector *previous_stage_sel = NULL;
|
||||
struct si_shader *current = state->current;
|
||||
struct si_shader *iter, *shader = NULL;
|
||||
const SHADER_KEY_TYPE *zeroed_key = (SHADER_KEY_TYPE*)&zeroed;
|
||||
|
||||
/* si_shader_select_with_key must not modify 'key' because it would affect future shaders.
|
||||
* If we need to modify it for this specific shader (eg: to disable optimizations), we
|
||||
* use a copy.
|
||||
*/
|
||||
struct si_shader_key local_key;
|
||||
SHADER_KEY_TYPE local_key;
|
||||
|
||||
if (unlikely(sscreen->debug_flags & DBG(NO_OPT_VARIANT))) {
|
||||
/* Disable shader variant optimizations. */
|
||||
key = use_local_key_copy(key, &local_key);
|
||||
key = use_local_key_copy<SHADER_KEY_TYPE>(key, &local_key);
|
||||
memset(&local_key.opt, 0, sizeof(key->opt));
|
||||
}
|
||||
|
||||
|
|
@ -2364,12 +2370,14 @@ current_not_ready:
|
|||
|
||||
/* Find the shader variant. */
|
||||
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
|
||||
if (memcmp(&iter->key, key, s) == 0) {
|
||||
const SHADER_KEY_TYPE *iter_key = (const SHADER_KEY_TYPE *)&iter->key;
|
||||
|
||||
if (memcmp(iter_key, key, s) == 0) {
|
||||
/* Check the inlined uniform values separatly, and count
|
||||
* the number of variants based on them.
|
||||
*/
|
||||
if (key->opt.inline_uniforms &&
|
||||
memcmp(iter->key.opt.inlined_uniform_values,
|
||||
memcmp(iter_key->opt.inlined_uniform_values,
|
||||
key->opt.inlined_uniform_values,
|
||||
MAX_INLINABLE_UNIFORMS * 4) != 0) {
|
||||
if (variant_count++ > max_inline_uniforms_variants) {
|
||||
|
|
@ -2424,7 +2432,7 @@ current_not_ready:
|
|||
si_init_compiler(sctx->screen, &sctx->compiler);
|
||||
|
||||
shader->selector = sel;
|
||||
shader->key = *key;
|
||||
*((SHADER_KEY_TYPE*)&shader->key) = *key;
|
||||
shader->compiler_ctx_state.compiler = &sctx->compiler;
|
||||
shader->compiler_ctx_state.debug = sctx->debug;
|
||||
shader->compiler_ctx_state.is_debug_context = sctx->is_debug;
|
||||
|
|
@ -2432,9 +2440,9 @@ current_not_ready:
|
|||
/* If this is a merged shader, get the first shader's selector. */
|
||||
if (sscreen->info.chip_class >= GFX9) {
|
||||
if (sel->info.stage == MESA_SHADER_TESS_CTRL)
|
||||
previous_stage_sel = key->part.tcs.ls;
|
||||
previous_stage_sel = ((struct si_shader_key_ge*)key)->part.tcs.ls;
|
||||
else if (sel->info.stage == MESA_SHADER_GEOMETRY)
|
||||
previous_stage_sel = key->part.gs.es;
|
||||
previous_stage_sel = ((struct si_shader_key_ge*)key)->part.gs.es;
|
||||
|
||||
/* We need to wait for the previous shader. */
|
||||
if (previous_stage_sel && thread_index < 0)
|
||||
|
|
@ -2442,7 +2450,7 @@ current_not_ready:
|
|||
}
|
||||
|
||||
bool is_pure_monolithic =
|
||||
sscreen->use_monolithic_shaders || memcmp(&key->mono, &zeroed.mono, sizeof(key->mono)) != 0;
|
||||
sscreen->use_monolithic_shaders || memcmp(&key->mono, &zeroed_key->mono, sizeof(key->mono)) != 0;
|
||||
|
||||
/* Compile the main shader part if it doesn't exist. This can happen
|
||||
* if the initial guess was wrong.
|
||||
|
|
@ -2461,13 +2469,13 @@ current_not_ready:
|
|||
* part is present.
|
||||
*/
|
||||
if (previous_stage_sel) {
|
||||
struct si_shader_key shader1_key = zeroed;
|
||||
union si_shader_key shader1_key = zeroed;
|
||||
|
||||
if (sel->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
shader1_key.as_ls = 1;
|
||||
shader1_key.ge.as_ls = 1;
|
||||
} else if (sel->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
shader1_key.as_es = 1;
|
||||
shader1_key.as_ngg = key->as_ngg; /* for Wave32 vs Wave64 */
|
||||
shader1_key.ge.as_es = 1;
|
||||
shader1_key.ge.as_ngg = ((struct si_shader_key_ge*)key)->as_ngg; /* for Wave32 vs Wave64 */
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
|
@ -2479,7 +2487,8 @@ current_not_ready:
|
|||
}
|
||||
|
||||
if (ok) {
|
||||
ok = si_check_missing_main_part(sscreen, sel, &shader->compiler_ctx_state, key);
|
||||
ok = si_check_missing_main_part(sscreen, sel, &shader->compiler_ctx_state,
|
||||
(union si_shader_key*)key);
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
|
|
@ -2500,10 +2509,10 @@ current_not_ready:
|
|||
/* Monolithic-only shaders don't make a distinction between optimized
|
||||
* and unoptimized. */
|
||||
shader->is_monolithic =
|
||||
is_pure_monolithic || memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
|
||||
is_pure_monolithic || memcmp(&key->opt, &zeroed_key->opt, sizeof(key->opt)) != 0;
|
||||
|
||||
shader->is_optimized = !is_pure_monolithic &&
|
||||
memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
|
||||
memcmp(&key->opt, &zeroed_key->opt, sizeof(key->opt)) != 0;
|
||||
|
||||
/* If it's an optimized shader, compile it asynchronously. */
|
||||
if (shader->is_optimized && thread_index < 0) {
|
||||
|
|
@ -2563,11 +2572,15 @@ int si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state
|
|||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
||||
si_shader_selector_key(ctx, state->cso, &state->key);
|
||||
return si_shader_select_with_key(sctx, state, &state->key, -1, false);
|
||||
|
||||
if (state->cso->info.stage == MESA_SHADER_FRAGMENT)
|
||||
return si_shader_select_with_key(sctx, state, &state->key.ps, -1, false);
|
||||
else
|
||||
return si_shader_select_with_key(sctx, state, &state->key.ge, -1, false);
|
||||
}
|
||||
|
||||
static void si_parse_next_shader_property(const struct si_shader_info *info, bool streamout,
|
||||
struct si_shader_key *key)
|
||||
union si_shader_key *key)
|
||||
{
|
||||
gl_shader_stage next_shader = info->base.next_stage;
|
||||
|
||||
|
|
@ -2575,11 +2588,11 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo
|
|||
case MESA_SHADER_VERTEX:
|
||||
switch (next_shader) {
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
key->as_es = 1;
|
||||
key->ge.as_es = 1;
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
key->as_ls = 1;
|
||||
key->ge.as_ls = 1;
|
||||
break;
|
||||
default:
|
||||
/* If POSITION isn't written, it can only be a HW VS
|
||||
|
|
@ -2588,13 +2601,13 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo
|
|||
* This heuristic is needed for separate shader objects.
|
||||
*/
|
||||
if (!info->writes_position && !streamout)
|
||||
key->as_ls = 1;
|
||||
key->ge.as_ls = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (next_shader == MESA_SHADER_GEOMETRY || !info->writes_position)
|
||||
key->as_es = 1;
|
||||
key->ge.as_es = 1;
|
||||
break;
|
||||
|
||||
default:;
|
||||
|
|
@ -2672,13 +2685,17 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
|
|||
shader->is_monolithic = false;
|
||||
si_parse_next_shader_property(&sel->info, sel->so.num_outputs != 0, &shader->key);
|
||||
|
||||
if (sscreen->use_ngg && (!sel->so.num_outputs || sscreen->use_ngg_streamout) &&
|
||||
((sel->info.stage == MESA_SHADER_VERTEX && !shader->key.as_ls) ||
|
||||
if (sel->info.stage <= MESA_SHADER_GEOMETRY &&
|
||||
sscreen->use_ngg && (!sel->so.num_outputs || sscreen->use_ngg_streamout) &&
|
||||
((sel->info.stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) ||
|
||||
sel->info.stage == MESA_SHADER_TESS_EVAL || sel->info.stage == MESA_SHADER_GEOMETRY))
|
||||
shader->key.as_ngg = 1;
|
||||
shader->key.ge.as_ngg = 1;
|
||||
|
||||
if (sel->nir) {
|
||||
si_get_ir_cache_key(sel, shader->key.as_ngg, shader->key.as_es, ir_sha1_cache_key);
|
||||
if (sel->info.stage <= MESA_SHADER_GEOMETRY)
|
||||
si_get_ir_cache_key(sel, shader->key.ge.as_ngg, shader->key.ge.as_es, ir_sha1_cache_key);
|
||||
else
|
||||
si_get_ir_cache_key(sel, false, false, ir_sha1_cache_key);
|
||||
}
|
||||
|
||||
/* Try to load the shader from the shader cache. */
|
||||
|
|
@ -2714,7 +2731,7 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
|
|||
if ((sel->info.stage == MESA_SHADER_VERTEX ||
|
||||
sel->info.stage == MESA_SHADER_TESS_EVAL ||
|
||||
sel->info.stage == MESA_SHADER_GEOMETRY) &&
|
||||
!shader->key.as_ls && !shader->key.as_es) {
|
||||
!shader->key.ge.as_ls && !shader->key.ge.as_es) {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < sel->info.num_outputs; i++) {
|
||||
|
|
@ -3186,7 +3203,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
|
|||
sctx->shader.vs.current = sel ? sel->first_variant : NULL;
|
||||
sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
|
||||
sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
|
||||
sctx->fixed_func_tcs_shader.key.mono.u.ff_tcs_inputs_to_copy = sel ? sel->outputs_written : 0;
|
||||
sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->outputs_written : 0;
|
||||
|
||||
if (si_update_ngg(sctx))
|
||||
si_shader_change_notify(sctx);
|
||||
|
|
@ -3294,7 +3311,7 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
|
|||
|
||||
sctx->shader.tcs.cso = sel;
|
||||
sctx->shader.tcs.current = sel ? sel->first_variant : NULL;
|
||||
sctx->shader.tcs.key.part.tcs.epilog.invoc0_tess_factors_are_def =
|
||||
sctx->shader.tcs.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
|
||||
sel ? sel->info.tessfactors_are_def_in_all_invocs : 0;
|
||||
si_update_tess_uses_prim_id(sctx);
|
||||
|
||||
|
|
@ -3320,12 +3337,12 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
|
|||
sctx->ia_multi_vgt_param_key.u.uses_tess = sel != NULL;
|
||||
si_update_tess_uses_prim_id(sctx);
|
||||
|
||||
sctx->shader.tcs.key.part.tcs.epilog.prim_mode =
|
||||
sctx->fixed_func_tcs_shader.key.part.tcs.epilog.prim_mode =
|
||||
sctx->shader.tcs.key.ge.part.tcs.epilog.prim_mode =
|
||||
sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.prim_mode =
|
||||
sel ? sel->info.base.tess.primitive_mode : 0;
|
||||
|
||||
sctx->shader.tcs.key.part.tcs.epilog.tes_reads_tess_factors =
|
||||
sctx->fixed_func_tcs_shader.key.part.tcs.epilog.tes_reads_tess_factors =
|
||||
sctx->shader.tcs.key.ge.part.tcs.epilog.tes_reads_tess_factors =
|
||||
sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.tes_reads_tess_factors =
|
||||
sel ? sel->info.reads_tess_factors : 0;
|
||||
|
||||
si_update_common_shader_state(sctx, sel, PIPE_SHADER_TESS_EVAL);
|
||||
|
|
@ -3437,13 +3454,13 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
|
|||
|
||||
switch (shader->selector->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (shader->key.as_ls) {
|
||||
if (shader->key.ge.as_ls) {
|
||||
if (sctx->chip_class <= GFX8)
|
||||
state_index = SI_STATE_IDX(ls);
|
||||
} else if (shader->key.as_es) {
|
||||
} else if (shader->key.ge.as_es) {
|
||||
if (sctx->chip_class <= GFX8)
|
||||
state_index = SI_STATE_IDX(es);
|
||||
} else if (shader->key.as_ngg) {
|
||||
} else if (shader->key.ge.as_ngg) {
|
||||
state_index = SI_STATE_IDX(gs);
|
||||
} else {
|
||||
state_index = SI_STATE_IDX(vs);
|
||||
|
|
@ -3453,10 +3470,10 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
|
|||
state_index = SI_STATE_IDX(hs);
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (shader->key.as_es) {
|
||||
if (shader->key.ge.as_es) {
|
||||
if (sctx->chip_class <= GFX8)
|
||||
state_index = SI_STATE_IDX(es);
|
||||
} else if (shader->key.as_ngg) {
|
||||
} else if (shader->key.ge.as_ngg) {
|
||||
state_index = SI_STATE_IDX(gs);
|
||||
} else {
|
||||
state_index = SI_STATE_IDX(vs);
|
||||
|
|
@ -3808,11 +3825,11 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
|
|||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1) {
|
||||
if (sctx->shader.vs.current->key.as_ls)
|
||||
if (sctx->shader.vs.current->key.ge.as_ls)
|
||||
si_pm4_bind_state(sctx, ls, sctx->shader.vs.current);
|
||||
else if (sctx->shader.vs.current->key.as_es)
|
||||
else if (sctx->shader.vs.current->key.ge.as_es)
|
||||
si_pm4_bind_state(sctx, es, sctx->shader.vs.current);
|
||||
else if (sctx->shader.vs.current->key.as_ngg)
|
||||
else if (sctx->shader.vs.current->key.ge.as_ngg)
|
||||
si_pm4_bind_state(sctx, gs, sctx->shader.vs.current);
|
||||
else
|
||||
si_pm4_bind_state(sctx, vs, sctx->shader.vs.current);
|
||||
|
|
@ -3823,9 +3840,9 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
|
|||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1) {
|
||||
if (sctx->shader.tes.current->key.as_es)
|
||||
if (sctx->shader.tes.current->key.ge.as_es)
|
||||
si_pm4_bind_state(sctx, es, sctx->shader.tes.current);
|
||||
else if (sctx->shader.tes.current->key.as_ngg)
|
||||
else if (sctx->shader.tes.current->key.ge.as_ngg)
|
||||
si_pm4_bind_state(sctx, gs, sctx->shader.tes.current);
|
||||
else
|
||||
si_pm4_bind_state(sctx, vs, sctx->shader.tes.current);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue