mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
radeonsi/gfx12: fix VS output corruption with streamout
We increased VS_EXPORT_COUNT to 8 for streamout in gfx10_shader_ngg,
but we forgot to increase the attribute ring stride, causing all waves
except the first one to get corrupted VS outputs.
Fixes: f703dfd1bb - radeonsi: add gfx12
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30503>
This commit is contained in:
parent
8b3e02587e
commit
0e27df4521
3 changed files with 23 additions and 11 deletions
|
|
@ -43,7 +43,7 @@ static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader,
|
|||
sel->info.base.vs.blit_sgprs_amd - 1) :
|
||||
ac_nir_load_arg(b, &args->ac, args->gs_attr_address);
|
||||
|
||||
unsigned stride = 16 * shader->info.nr_param_exports;
|
||||
unsigned stride = 16 * si_shader_num_alloc_param_exports(shader);
|
||||
uint32_t desc[4];
|
||||
|
||||
ac_build_attr_ring_descriptor(sel->screen->info.gfx_level,
|
||||
|
|
|
|||
|
|
@ -1028,6 +1028,7 @@ void si_nir_late_opts(struct nir_shader *nir);
|
|||
char *si_finalize_nir(struct pipe_screen *screen, void *nirptr);
|
||||
|
||||
/* si_state_shaders.cpp */
|
||||
unsigned si_shader_num_alloc_param_exports(struct si_shader *shader);
|
||||
unsigned si_determine_wave_size(struct si_screen *sscreen, struct si_shader *shader);
|
||||
void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *gs,
|
||||
struct gfx9_gs_info *out);
|
||||
|
|
|
|||
|
|
@ -1391,6 +1391,26 @@ static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
|
|||
shader->info.nr_pos_exports > 1));
|
||||
}
|
||||
|
||||
/* Return the number of allocated param exports. This can be more than the number of param
|
||||
* exports in the shader.
|
||||
*/
|
||||
unsigned si_shader_num_alloc_param_exports(struct si_shader *shader)
|
||||
{
|
||||
unsigned num_params = shader->info.nr_param_exports;
|
||||
|
||||
/* Since there is no alloc/dealloc mechanism for the 12-bit ordered IDs on GFX12, they can wrap
|
||||
* around if there are more than 2^12 workgroups, causing 2 workgroups to get the same
|
||||
* ordered ID, which can deadlock the "ordered add" loop.
|
||||
*
|
||||
* The recommended solution is to use the alloc/dealloc mechanism of the attribute ring to limit
|
||||
* the number of workgroups in flight and thus the number of ordered IDs in flight.
|
||||
*/
|
||||
if (shader->selector->screen->info.gfx_level >= GFX12 && si_shader_uses_streamout(shader))
|
||||
num_params = MAX2(num_params, 8);
|
||||
|
||||
return num_params;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare the PM4 image for \p shader, which will run as a merged ESGS shader
|
||||
* in NGG mode.
|
||||
|
|
@ -1541,16 +1561,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
gs_sel->info.writes_primid);
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX12) {
|
||||
unsigned num_params = shader->info.nr_param_exports;
|
||||
|
||||
/* Since there is no alloc/dealloc mechanism for the 12-bit ordered IDs, they can wrap
|
||||
* around if there are more than 2^12 workgroups, causing 2 workgroups to get the same
|
||||
* ordered ID, which would break the streamout algorithm.
|
||||
* The recommended solution is to use the alloc/dealloc mechanism of the attribute ring,
|
||||
* which is enough to limit the range of ordered IDs that can be in flight.
|
||||
*/
|
||||
if (si_shader_uses_streamout(shader))
|
||||
num_params = MAX2(num_params, 8);
|
||||
unsigned num_params = si_shader_num_alloc_param_exports(shader);
|
||||
|
||||
shader->ngg.spi_shader_pgm_rsrc4_gs = S_00B220_SPI_SHADER_LATE_ALLOC_GS(127) |
|
||||
S_00B220_GLG_FORCE_DISABLE(1) |
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue