mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 13:40:11 +01:00
nir: Don't generate single iteration loops to zero-initialize memory
If the stride we're adding to our loop counter is larger than the total amount of shared local memory we're trying to initialize, we know the loop will run at most one time. So we can skip emitting a loop. Loop unrolling appears to be unable to detect this currently. Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31312>
This commit is contained in:
parent
c1a44e8d43
commit
0b34a7aff0
1 changed files with 27 additions and 15 deletions
|
|
@ -153,6 +153,7 @@ nir_zero_initialize_shared_memory(nir_shader *shader,
|
|||
const unsigned local_count = shader->info.workgroup_size[0] *
|
||||
shader->info.workgroup_size[1] *
|
||||
shader->info.workgroup_size[2];
|
||||
const unsigned stride = chunk_size * local_count;
|
||||
|
||||
/* The initialization logic is simplified if we can always split the memory
|
||||
* in full chunk_size units.
|
||||
|
|
@ -161,30 +162,41 @@ nir_zero_initialize_shared_memory(nir_shader *shader,
|
|||
|
||||
const unsigned chunk_comps = chunk_size / 4;
|
||||
|
||||
nir_variable *it = nir_local_variable_create(b.impl, glsl_uint_type(),
|
||||
"zero_init_iterator");
|
||||
nir_def *local_index = nir_load_local_invocation_index(&b);
|
||||
nir_def *first_offset = nir_imul_imm(&b, local_index, chunk_size);
|
||||
nir_store_var(&b, it, first_offset, 0x1);
|
||||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
{
|
||||
nir_def *offset = nir_load_var(&b, it);
|
||||
|
||||
nir_push_if(&b, nir_uge_imm(&b, offset, shared_size));
|
||||
if (stride >= shared_size) {
|
||||
nir_push_if(&b, nir_ult_imm(&b, first_offset, shared_size));
|
||||
{
|
||||
nir_jump(&b, nir_jump_break);
|
||||
nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), first_offset,
|
||||
.align_mul = chunk_size,
|
||||
.write_mask = ((1 << chunk_comps) - 1));
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
} else {
|
||||
nir_variable *it = nir_local_variable_create(b.impl, glsl_uint_type(),
|
||||
"zero_init_iterator");
|
||||
nir_store_var(&b, it, first_offset, 0x1);
|
||||
|
||||
nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), offset,
|
||||
.align_mul = chunk_size,
|
||||
.write_mask = ((1 << chunk_comps) - 1));
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
{
|
||||
nir_def *offset = nir_load_var(&b, it);
|
||||
|
||||
nir_def *new_offset = nir_iadd_imm(&b, offset, chunk_size * local_count);
|
||||
nir_store_var(&b, it, new_offset, 0x1);
|
||||
nir_push_if(&b, nir_uge_imm(&b, offset, shared_size));
|
||||
{
|
||||
nir_jump(&b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), offset,
|
||||
.align_mul = chunk_size,
|
||||
.write_mask = ((1 << chunk_comps) - 1));
|
||||
|
||||
nir_def *new_offset = nir_iadd_imm(&b, offset, stride);
|
||||
nir_store_var(&b, it, new_offset, 0x1);
|
||||
}
|
||||
nir_pop_loop(&b, loop);
|
||||
}
|
||||
nir_pop_loop(&b, loop);
|
||||
|
||||
nir_barrier(&b, SCOPE_WORKGROUP, SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
|
||||
nir_var_mem_shared);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue