mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 15:10:10 +01:00
radeonsi: support mesh shader per primitive output
Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37932>
This commit is contained in:
parent
16656ebaaf
commit
dfc679e791
5 changed files with 40 additions and 8 deletions
|
|
@ -54,7 +54,9 @@ static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader,
|
|||
b->shader->info.vs.blit_sgprs_amd - 1) :
|
||||
ac_nir_load_arg(b, &args->ac, args->gs_attr_address);
|
||||
|
||||
unsigned stride = 16 * si_shader_num_alloc_param_exports(shader);
|
||||
unsigned per_vertex_params = MAX2(1, si_shader_num_alloc_param_exports(shader));
|
||||
unsigned total_params = per_vertex_params + shader->info.nr_prim_param_exports;
|
||||
unsigned stride = 16 * total_params;
|
||||
uint32_t desc[4];
|
||||
|
||||
ac_build_attr_ring_descriptor(sel->screen->info.gfx_level,
|
||||
|
|
|
|||
|
|
@ -1224,6 +1224,7 @@ static void si_nir_assign_param_offsets(nir_shader *nir, struct si_shader *shade
|
|||
|
||||
uint64_t outputs_written = 0;
|
||||
uint32_t outputs_written_16bit = 0;
|
||||
uint64_t per_primitive_outputs = 0;
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
assert(impl);
|
||||
|
|
@ -1235,7 +1236,8 @@ static void si_nir_assign_param_offsets(nir_shader *nir, struct si_shader *shade
|
|||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_store_output &&
|
||||
intr->intrinsic != nir_intrinsic_store_per_vertex_output)
|
||||
intr->intrinsic != nir_intrinsic_store_per_vertex_output &&
|
||||
intr->intrinsic != nir_intrinsic_store_per_primitive_output)
|
||||
continue;
|
||||
|
||||
/* No indirect indexing allowed. */
|
||||
|
|
@ -1250,6 +1252,9 @@ static void si_nir_assign_param_offsets(nir_shader *nir, struct si_shader *shade
|
|||
else
|
||||
outputs_written |= BITFIELD64_BIT(sem.location);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_store_per_primitive_output)
|
||||
per_primitive_outputs |= BITFIELD64_BIT(sem.location);
|
||||
|
||||
/* Assign the param index if it's unassigned. */
|
||||
if (nir_slot_is_varying(sem.location, MESA_SHADER_FRAGMENT) && !sem.no_varying &&
|
||||
(sem.gs_streams & 0x3) == 0 &&
|
||||
|
|
@ -1259,7 +1264,10 @@ static void si_nir_assign_param_offsets(nir_shader *nir, struct si_shader *shade
|
|||
/* It must not be remapped (duplicated). */
|
||||
assert(slot_remap[sem.location] == -1);
|
||||
|
||||
temp_info->vs_output_param_offset[sem.location] = info->nr_param_exports++;
|
||||
temp_info->vs_output_param_offset[sem.location] =
|
||||
intr->intrinsic == nir_intrinsic_store_per_primitive_output ?
|
||||
info->nr_prim_param_exports++ :
|
||||
info->nr_param_exports++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1274,9 +1282,19 @@ static void si_nir_assign_param_offsets(nir_shader *nir, struct si_shader *shade
|
|||
temp_info->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = info->nr_param_exports++;
|
||||
}
|
||||
|
||||
/* per primitive outputs come after per vertex outputs */
|
||||
unsigned per_primitive_outputs_offset = info->nr_param_exports;
|
||||
if (sel->screen->info.gfx_level >= GFX11)
|
||||
per_primitive_outputs_offset = MAX2(per_primitive_outputs_offset, 1);
|
||||
u_foreach_bit64 (i, per_primitive_outputs) {
|
||||
if (temp_info->vs_output_param_offset[i] != AC_EXP_PARAM_DEFAULT_VAL_0000)
|
||||
temp_info->vs_output_param_offset[i] += per_primitive_outputs_offset;
|
||||
}
|
||||
|
||||
/* Update outputs written info, we may remove some outputs before. */
|
||||
nir->info.outputs_written = outputs_written;
|
||||
nir->info.outputs_written_16bit = outputs_written_16bit;
|
||||
nir->info.per_primitive_outputs = per_primitive_outputs;
|
||||
}
|
||||
|
||||
static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader,
|
||||
|
|
@ -1284,6 +1302,7 @@ static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader,
|
|||
{
|
||||
/* Initialize this first. */
|
||||
shader->info.nr_param_exports = 0;
|
||||
shader->info.nr_prim_param_exports = 0;
|
||||
|
||||
STATIC_ASSERT(sizeof(temp_info->vs_output_param_offset[0]) == 1);
|
||||
memset(temp_info->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
|
||||
|
|
@ -1296,7 +1315,7 @@ static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader,
|
|||
memset(slot_remap, -1, NUM_TOTAL_VARYING_SLOTS);
|
||||
|
||||
/* This sets DEFAULT_VAL for constant outputs in vs_output_param_offset. */
|
||||
/* TODO: This doesn't affect GS. */
|
||||
/* TODO: This doesn't affect GS and MS. */
|
||||
NIR_PASS(_, nir, ac_nir_optimize_outputs, false, slot_remap,
|
||||
temp_info->vs_output_param_offset);
|
||||
|
||||
|
|
@ -2039,6 +2058,10 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
S_028644_DEFAULT_VAL(offset);
|
||||
}
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX11 &&
|
||||
(nir->info.per_primitive_outputs & BITFIELD64_BIT(semantic)))
|
||||
ps_input_cntl |= S_028644_PRIM_ATTR(1);
|
||||
|
||||
shader->info.vs_output_ps_input_cntl[semantic] = ps_input_cntl;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -341,6 +341,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
|
|||
case nir_intrinsic_load_per_vertex_output:
|
||||
case nir_intrinsic_store_output:
|
||||
case nir_intrinsic_store_per_vertex_output:
|
||||
case nir_intrinsic_store_per_primitive_output:
|
||||
scan_io_usage(nir, info, intr, false, colors_lowered);
|
||||
break;
|
||||
case nir_intrinsic_load_deref:
|
||||
|
|
|
|||
|
|
@ -224,6 +224,7 @@ struct si_shader_variant_info {
|
|||
bool uses_mesh_scratch_ring : 1;
|
||||
uint8_t nr_pos_exports;
|
||||
uint8_t nr_param_exports;
|
||||
uint8_t nr_prim_param_exports;
|
||||
uint8_t clipdist_mask;
|
||||
uint8_t culldist_mask;
|
||||
uint8_t num_streamout_vec4s;
|
||||
|
|
|
|||
|
|
@ -1600,8 +1600,11 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.ge.mono.u.vs_export_prim_id ||
|
||||
gs_sel->info.writes_primid);
|
||||
|
||||
unsigned num_params = si_shader_num_alloc_param_exports(shader);
|
||||
unsigned num_prim_params = shader->info.nr_prim_param_exports;
|
||||
bool no_pc_export = num_params == 0 && num_prim_params == 0;
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX12) {
|
||||
unsigned num_params = si_shader_num_alloc_param_exports(shader);
|
||||
unsigned wave_limit_per_se = 0x3ff;
|
||||
|
||||
/* This tuning adds up to 50% streamout performance. */
|
||||
|
|
@ -1632,7 +1635,8 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
S_00B220_WAVE_LIMIT(wave_limit_per_se) |
|
||||
S_00B220_INST_PREF_SIZE(si_get_shader_prefetch_size(shader));
|
||||
shader->ngg.spi_vs_out_config = S_00B0C4_VS_EXPORT_COUNT(MAX2(num_params, 1) - 1) |
|
||||
S_00B0C4_NO_PC_EXPORT(num_params == 0);
|
||||
S_00B0C4_PRIM_EXPORT_COUNT(num_prim_params) |
|
||||
S_00B0C4_NO_PC_EXPORT(no_pc_export);
|
||||
} else {
|
||||
unsigned late_alloc_wave64, cu_mask;
|
||||
|
||||
|
|
@ -1662,8 +1666,9 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
C_00B21C_CU_EN, 0, &sscreen->info);
|
||||
shader->ngg.spi_shader_pgm_rsrc4_gs = S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64);
|
||||
shader->ngg.spi_vs_out_config =
|
||||
S_0286C4_VS_EXPORT_COUNT(MAX2(shader->info.nr_param_exports, 1) - 1) |
|
||||
S_0286C4_NO_PC_EXPORT(shader->info.nr_param_exports == 0);
|
||||
S_0286C4_VS_EXPORT_COUNT(MAX2(num_params, 1) - 1) |
|
||||
S_0286C4_PRIM_EXPORT_COUNT(num_prim_params) |
|
||||
S_0286C4_NO_PC_EXPORT(no_pc_export);
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX11) {
|
||||
shader->ngg.spi_shader_pgm_rsrc4_gs |=
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue