mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 09:38:07 +02:00
ac/nir: reserve the first LDS vec4 for the HS tf0/1 group vote in TCS
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31673>
This commit is contained in:
parent
fd5779c198
commit
f4eebb373c
5 changed files with 38 additions and 10 deletions
|
|
@ -17,6 +17,9 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Reserve this size at the beginning of LDS for the tf0/1 shader message group vote. */
|
||||
#define AC_HS_MSG_VOTE_LDS_BYTES 16
|
||||
|
||||
enum
|
||||
{
|
||||
/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
|
||||
|
|
@ -76,6 +79,7 @@ bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed,
|
|||
void
|
||||
ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
|
||||
ac_nir_map_io_driver_location map,
|
||||
enum amd_gfx_level gfx_level,
|
||||
bool tcs_in_out_eq,
|
||||
uint64_t tcs_inputs_read,
|
||||
uint64_t tcs_temp_only_inputs);
|
||||
|
|
@ -83,6 +87,7 @@ ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
|
|||
void
|
||||
ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
|
||||
ac_nir_map_io_driver_location map,
|
||||
enum amd_gfx_level gfx_level,
|
||||
bool tcs_in_out_eq,
|
||||
uint64_t tcs_temp_only_inputs);
|
||||
|
||||
|
|
|
|||
|
|
@ -61,8 +61,8 @@
|
|||
* TCS per-vertex inputs for patch 1
|
||||
* TCS per-vertex inputs for patch 2 <─── hs_per_vertex_input_lds_offset (rel_patch_id = 2)
|
||||
* ...
|
||||
* TCS per-vertex outputs for patch 0 <─── output_patch0_offset
|
||||
* TCS per-patch outputs for patch 0 <─── output_patch0_patch_data_offset
|
||||
* TCS per-vertex outputs for patch 0 <─── hs_output_lds_offset (rel_patch_id = 0, per-vertex)
|
||||
* TCS per-patch outputs for patch 0 <─── hs_output_lds_offset (rel_patch_id = 0, per-patch)
|
||||
* TCS per-vertex outputs for patch 1
|
||||
* TCS per-patch outputs for patch 1
|
||||
* TCS per-vertex outputs for patch 2 <─── hs_output_lds_offset (rel_patch_id = 2, per-vertex)
|
||||
|
|
@ -284,6 +284,11 @@ lower_ls_output_store(nir_builder *b,
|
|||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||
|
||||
nir_def *off = nir_iadd_nuw(b, base_off_var, io_off);
|
||||
|
||||
/* The first vec4 is reserved for the tf0/1 shader message group vote. */
|
||||
if (st->gfx_level >= GFX11)
|
||||
off = nir_iadd_imm_nuw(b, off, AC_HS_MSG_VOTE_LDS_BYTES);
|
||||
|
||||
AC_NIR_STORE_IO(b, intrin->src[0].ssa, 0, write_mask, io_sem.high_16bits,
|
||||
nir_store_shared, off, .write_mask = store_write_mask, .base = store_const_offset);
|
||||
|
||||
|
|
@ -354,8 +359,10 @@ hs_per_vertex_input_lds_offset(nir_builder *b,
|
|||
const unsigned mapped = ac_nir_map_io_location(io_sem.location, st->tcs_inputs_read & ~st->tcs_temp_only_inputs,
|
||||
st->map_io);
|
||||
nir_def *io_offset = ac_nir_calc_io_off(b, instr, nir_imm_int(b, 16u), 4u, mapped);
|
||||
nir_def *lds_offset = nir_iadd_nuw(b, nir_iadd_nuw(b, tcs_in_current_patch_offset, vertex_index_off), io_offset);
|
||||
|
||||
return nir_iadd_nuw(b, nir_iadd_nuw(b, tcs_in_current_patch_offset, vertex_index_off), io_offset);
|
||||
/* The first LDS vec4 is reserved for the tf0/1 shader message group vote. */
|
||||
return st->gfx_level >= GFX11 ? nir_iadd_imm_nuw(b, lds_offset, AC_HS_MSG_VOTE_LDS_BYTES) : lds_offset;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
|
@ -419,17 +426,21 @@ hs_output_lds_offset(nir_builder *b,
|
|||
nir_def *input_patch_size = nir_imul(b, tcs_in_vtxcnt, nir_load_lshs_vertex_stride_amd(b));
|
||||
nir_def *output_patch0_offset = nir_imul(b, input_patch_size, tcs_num_patches);
|
||||
nir_def *output_patch_offset = nir_iadd_nuw(b, patch_offset, output_patch0_offset);
|
||||
nir_def *lds_offset;
|
||||
|
||||
if (per_vertex) {
|
||||
nir_def *vertex_index = nir_get_io_arrayed_index_src(intrin)->ssa;
|
||||
nir_def *vertex_index_off = nir_imul_imm(b, vertex_index, output_vertex_size);
|
||||
|
||||
off = nir_iadd_nuw(b, off, vertex_index_off);
|
||||
return nir_iadd_nuw(b, off, output_patch_offset);
|
||||
lds_offset = nir_iadd_nuw(b, off, output_patch_offset);
|
||||
} else {
|
||||
off = nir_iadd_imm_nuw(b, off, pervertex_output_patch_size);
|
||||
return nir_iadd_nuw(b, off, output_patch_offset);
|
||||
lds_offset = nir_iadd_nuw(b, off, output_patch_offset);
|
||||
}
|
||||
|
||||
/* The first LDS vec4 is reserved for the tf0/1 shader message group vote. */
|
||||
return st->gfx_level >= GFX11 ? nir_iadd_imm_nuw(b, lds_offset, AC_HS_MSG_VOTE_LDS_BYTES) : lds_offset;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
|
@ -963,6 +974,7 @@ filter_any_input_access(const nir_instr *instr,
|
|||
void
|
||||
ac_nir_lower_ls_outputs_to_mem(nir_shader *shader,
|
||||
ac_nir_map_io_driver_location map,
|
||||
enum amd_gfx_level gfx_level,
|
||||
bool tcs_in_out_eq,
|
||||
uint64_t tcs_inputs_read,
|
||||
uint64_t tcs_temp_only_inputs)
|
||||
|
|
@ -970,6 +982,7 @@ ac_nir_lower_ls_outputs_to_mem(nir_shader *shader,
|
|||
assert(shader->info.stage == MESA_SHADER_VERTEX);
|
||||
|
||||
lower_tess_io_state state = {
|
||||
.gfx_level = gfx_level,
|
||||
.tcs_in_out_eq = tcs_in_out_eq,
|
||||
.tcs_inputs_read = tcs_inputs_read,
|
||||
.tcs_temp_only_inputs = tcs_in_out_eq ? tcs_temp_only_inputs : 0,
|
||||
|
|
@ -984,12 +997,14 @@ ac_nir_lower_ls_outputs_to_mem(nir_shader *shader,
|
|||
void
|
||||
ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
|
||||
ac_nir_map_io_driver_location map,
|
||||
enum amd_gfx_level gfx_level,
|
||||
bool tcs_in_out_eq,
|
||||
uint64_t tcs_temp_only_inputs)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_TESS_CTRL);
|
||||
|
||||
lower_tess_io_state state = {
|
||||
.gfx_level = gfx_level,
|
||||
.tcs_inputs_read = shader->info.inputs_read,
|
||||
.tcs_in_out_eq = tcs_in_out_eq,
|
||||
.tcs_temp_only_inputs = tcs_in_out_eq ? tcs_temp_only_inputs : 0,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "ac_shader_util.h"
|
||||
#include "ac_gpu_info.h"
|
||||
#include "ac_nir.h"
|
||||
|
||||
#include "sid.h"
|
||||
#include "util/u_math.h"
|
||||
|
|
@ -1209,7 +1210,11 @@ uint32_t ac_compute_num_tess_patches(const struct radeon_info *info, uint32_t nu
|
|||
uint32_t
|
||||
ac_compute_tess_lds_size(const struct radeon_info *info, uint32_t lds_per_patch, uint32_t num_patches)
|
||||
{
|
||||
const unsigned lds_size = lds_per_patch * num_patches;
|
||||
unsigned lds_size = lds_per_patch * num_patches;
|
||||
|
||||
/* The first vec4 is reserved for the tf0/1 shader message group vote. */
|
||||
if (info->gfx_level >= GFX11)
|
||||
lds_size += AC_HS_MSG_VOTE_LDS_BYTES;
|
||||
|
||||
assert(lds_size <= (info->gfx_level >= GFX9 ? 65536 : 32768));
|
||||
|
||||
|
|
|
|||
|
|
@ -219,7 +219,7 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
|
|||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
if (info->vs.as_ls) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, map_output, info->vs.tcs_in_out_eq,
|
||||
NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, map_output, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
|
||||
info->vs.hs_inputs_read, info->vs.tcs_temp_only_input_mask);
|
||||
return true;
|
||||
} else if (info->vs.as_es) {
|
||||
|
|
@ -227,9 +227,10 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
|
|||
return true;
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq, info->vs.tcs_temp_only_input_mask);
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, pdev->info.gfx_level,
|
||||
info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->wave_size);
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
|
||||
info->vs.tcs_temp_only_input_mask);
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, pdev->info.gfx_level, info->tcs.tes_inputs_read,
|
||||
info->tcs.tes_patch_inputs_read, info->wave_size);
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
|
|
|
|||
|
|
@ -1855,6 +1855,7 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir,
|
|||
if (key->ge.as_ls) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem,
|
||||
is_gfx9_mono_tcs ? NULL : si_map_io_driver_location,
|
||||
sel->screen->info.gfx_level,
|
||||
key->ge.opt.same_patch_vertices,
|
||||
is_gfx9_mono_tcs ? next_sel->info.base.inputs_read : ~0ull,
|
||||
tcs_vgpr_only_inputs);
|
||||
|
|
@ -1867,6 +1868,7 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir,
|
|||
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem,
|
||||
is_gfx9_mono_tcs ? NULL : si_map_io_driver_location,
|
||||
sel->screen->info.gfx_level,
|
||||
key->ge.opt.same_patch_vertices, sel->info.tcs_vgpr_only_inputs);
|
||||
|
||||
/* Used by hs_emit_write_tess_factors() when monolithic shader. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue