ac/nir/tess: don't allocate LDS for HS inputs that are passed via VGPRs

Right now we don't allocate LDS for HS inputs when all HS inputs are passed
via VGPRs.

This changes it to skip allocating exactly the HS inputs passed via VGPRs
by reducing the inputs_read mask to remove holes.

radeonsi changes to the LDS allocation will be in a different MR.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30962>
This commit is contained in:
Marek Olšák 2024-08-25 07:38:03 -04:00 committed by Marge Bot
parent 0767f91c8a
commit 52c41f25de
4 changed files with 25 additions and 10 deletions

View file

@ -83,7 +83,8 @@ ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
void
ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
ac_nir_map_io_driver_location map,
bool tcs_in_out_eq);
bool tcs_in_out_eq,
uint64_t tcs_temp_only_inputs);
void
ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,

View file

@ -286,7 +286,8 @@ lower_ls_output_store(nir_builder *b,
nir_def *vertex_idx = nir_load_local_invocation_index(b);
nir_def *base_off_var = nir_imul(b, vertex_idx, nir_load_lshs_vertex_stride_amd(b));
unsigned mapped = ac_nir_map_io_location(io_sem.location, st->tcs_inputs_read, st->map_io);
unsigned mapped = ac_nir_map_io_location(io_sem.location, st->tcs_inputs_read & ~st->tcs_temp_only_inputs,
st->map_io);
nir_def *io_off = ac_nir_calc_io_off(b, intrin, nir_imm_int(b, 16u), 4u, mapped);
unsigned write_mask = nir_intrinsic_write_mask(intrin);
@ -325,11 +326,21 @@ filter_load_tcs_per_vertex_input(const nir_instr *instr,
nir_src *vertex_index_src = nir_get_io_arrayed_index_src(intrin);
nir_instr *vertex_index_instr = vertex_index_src->ssa->parent_instr;
bool can_use_temps = nir_src_is_const(*off_src) &&
vertex_index_instr->type == nir_instr_type_intrinsic &&
nir_instr_as_intrinsic(vertex_index_instr)->intrinsic == nir_intrinsic_load_invocation_id;
return !can_use_temps;
const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
/* If this is a temp-only TCS input, we don't need to use shared memory at all. */
if (st->tcs_temp_only_inputs & BITFIELD64_BIT(io_sem.location)) {
ASSERTED bool can_use_temps =
nir_src_is_const(*off_src) &&
vertex_index_instr->type == nir_instr_type_intrinsic &&
nir_instr_as_intrinsic(vertex_index_instr)->intrinsic == nir_intrinsic_load_invocation_id;
assert(can_use_temps);
return false;
}
return true;
}
static nir_def *
@ -348,7 +359,8 @@ hs_per_vertex_input_lds_offset(nir_builder *b,
nir_def *tcs_in_current_patch_offset = nir_imul(b, rel_patch_id, tcs_in_patch_stride);
const nir_io_semantics io_sem = nir_intrinsic_io_semantics(instr);
const unsigned mapped = ac_nir_map_io_location(io_sem.location, st->tcs_inputs_read, st->map_io);
const unsigned mapped = ac_nir_map_io_location(io_sem.location, st->tcs_inputs_read & ~st->tcs_temp_only_inputs,
st->map_io);
nir_def *io_offset = ac_nir_calc_io_off(b, instr, nir_imm_int(b, 16u), 4u, mapped);
return nir_iadd_nuw(b, nir_iadd_nuw(b, tcs_in_current_patch_offset, vertex_index_off), io_offset);
@ -986,13 +998,15 @@ ac_nir_lower_ls_outputs_to_mem(nir_shader *shader,
void
ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
ac_nir_map_io_driver_location map,
bool tcs_in_out_eq)
bool tcs_in_out_eq,
uint64_t tcs_temp_only_inputs)
{
assert(shader->info.stage == MESA_SHADER_TESS_CTRL);
lower_tess_io_state state = {
.tcs_inputs_read = shader->info.inputs_read,
.tcs_in_out_eq = tcs_in_out_eq,
.tcs_temp_only_inputs = tcs_in_out_eq ? tcs_temp_only_inputs : 0,
.map_io = map,
};

View file

@ -141,7 +141,7 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
return true;
}
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq);
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq, info->vs.tcs_temp_only_input_mask);
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, pdev->info.gfx_level,
info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->wave_size, false, false);

View file

@ -1856,7 +1856,7 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir,
}
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location,
key->ge.opt.same_patch_vertices);
key->ge.opt.same_patch_vertices, sel->info.tcs_vgpr_only_inputs);
/* Used by hs_emit_write_tess_factors() when monolithic shader. */
nir->info.tess._primitive_mode = key->ge.opt.tes_prim_mode;