radeonsi: fix the DMA compute shader

It was correct for the parameters that the driver was using, but incorrect
for other parameters.

1. The address computation must multiply the workgroup size (wave size)
   by num_mem_ops to fix the case when num_dwords_per_thread > 4.
2. nir_load_ssbo shouldn't set the number of components to 4 when
   num_dwords_per_thread < 4.

Fixes: 6584088cd5 - radeonsi: "create_dma_compute" shader in nir

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28119>
(cherry picked from commit e99765df08)
This commit is contained in:
Marek Olšák 2024-03-11 15:13:50 -04:00 committed by Eric Engestrom
parent 895bc56899
commit c318561067
2 changed files with 7 additions and 5 deletions

View file

@ -624,7 +624,7 @@
"description": "radeonsi: fix the DMA compute shader",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "6584088cd5e6fe2538428b2ae13cbafc62604de2",
"notes": null

View file

@ -697,13 +697,15 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_
* the 2nd store writes into 1 * wavesize + tid,
* the 3rd store writes into 2 * wavesize + tid, etc.
*/
nir_def *store_address = get_global_ids(&b, 1);
nir_def *store_address =
nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b), 0),
default_wave_size * num_mem_ops),
nir_channel(&b, nir_load_local_invocation_id(&b), 0));
/* Convert from a "store size unit" into bytes. */
store_address = nir_imul_imm(&b, store_address, 4 * inst_dwords[0]);
nir_def *load_address = store_address, *value, *values[num_mem_ops];
value = nir_undef(&b, 1, 32);
nir_def *load_address = store_address, *value = NULL, *values[num_mem_ops];
if (is_copy) {
b.shader->info.num_ssbos++;
@ -723,7 +725,7 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_
load_address = nir_iadd(&b, load_address,
nir_imm_int(&b, 4 * inst_dwords[i] * default_wave_size));
}
values[i] = nir_load_ssbo(&b, 4, 32, nir_imm_int(&b, 1),load_address,
values[i] = nir_load_ssbo(&b, inst_dwords[i], 32, nir_imm_int(&b, 1), load_address,
.access = load_qualifier);
}