mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-11 05:10:27 +01:00
aco: When LS and HS invocations are the same, pass LS outputs in temps.
We know that in this case, the LS and HS invocations are working on the exact same vertex, so it's safe to skip the LDS. Totals: VGPRS: 3960744 -> 3961844 (0.03 %) Code Size: 254824300 -> 254764624 (-0.02 %) bytes Max Waves: 1053748 -> 1053574 (-0.02 %) Totals from affected shaders: VGPRS: 26152 -> 27252 (4.21 %) Code Size: 1496600 -> 1436924 (-3.99 %) bytes Max Waves: 4860 -> 4686 (-3.58 %) Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4165>
This commit is contained in:
parent
0a91c086b8
commit
798dd98d6e
1 changed files with 35 additions and 0 deletions
|
|
@ -3329,6 +3329,34 @@ bool store_output_to_temps(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool load_input_from_temps(isel_context *ctx, nir_intrinsic_instr *instr, Temp dst)
|
||||
{
|
||||
/* Only TCS per-vertex inputs are supported by this function.
|
||||
* Per-vertex inputs only match between the VS/TCS invocation id when the number of invocations is the same.
|
||||
*/
|
||||
if (ctx->shader->info.stage != MESA_SHADER_TESS_CTRL || !ctx->tcs_in_out_eq)
|
||||
return false;
|
||||
|
||||
nir_src *off_src = nir_get_io_offset_src(instr);
|
||||
nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
|
||||
nir_instr *vertex_index_instr = vertex_index_src->ssa->parent_instr;
|
||||
bool can_use_temps = nir_src_is_const(*off_src) &&
|
||||
vertex_index_instr->type == nir_instr_type_intrinsic &&
|
||||
nir_instr_as_intrinsic(vertex_index_instr)->intrinsic == nir_intrinsic_load_invocation_id;
|
||||
|
||||
if (!can_use_temps)
|
||||
return false;
|
||||
|
||||
unsigned idx = nir_intrinsic_base(instr) + nir_intrinsic_component(instr) + 4 * nir_src_as_uint(*off_src);
|
||||
Temp *src = &ctx->inputs.temps[idx];
|
||||
Temp vec = create_vec_from_array(ctx, src, dst.size(), dst.regClass().type(), 4u);
|
||||
assert(vec.size() == dst.size());
|
||||
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
bld.copy(Definition(dst), vec);
|
||||
return true;
|
||||
}
|
||||
|
||||
void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
|
@ -3338,6 +3366,9 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8u;
|
||||
|
||||
if (ctx->tcs_in_out_eq)
|
||||
store_output_to_temps(ctx, instr);
|
||||
|
||||
if (ctx->stage == vertex_es || ctx->stage == tess_eval_es) {
|
||||
/* GFX6-8: ES stage is not merged into GS, data is passed from ES to GS in VMEM. */
|
||||
Temp esgs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_ESGS_VS * 16u));
|
||||
|
|
@ -3974,6 +4005,10 @@ void visit_load_tcs_per_vertex_input(isel_context *ctx, nir_intrinsic_instr *ins
|
|||
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
|
||||
if (load_input_from_temps(ctx, instr, dst))
|
||||
return;
|
||||
|
||||
std::pair<Temp, unsigned> offs = get_tcs_per_vertex_input_lds_offset(ctx, instr);
|
||||
unsigned elem_size_bytes = instr->dest.ssa.bit_size / 8;
|
||||
unsigned lds_align = calculate_lds_alignment(ctx, offs.second);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue