diff --git a/src/compiler/nir/nir_lower_variable_initializers.c b/src/compiler/nir/nir_lower_variable_initializers.c index 6556a8638ed..9ab4f53cdf1 100644 --- a/src/compiler/nir/nir_lower_variable_initializers.c +++ b/src/compiler/nir/nir_lower_variable_initializers.c @@ -153,6 +153,7 @@ nir_zero_initialize_shared_memory(nir_shader *shader, const unsigned local_count = shader->info.workgroup_size[0] * shader->info.workgroup_size[1] * shader->info.workgroup_size[2]; + const unsigned stride = chunk_size * local_count; /* The initialization logic is simplified if we can always split the memory * in full chunk_size units. @@ -161,30 +162,41 @@ nir_zero_initialize_shared_memory(nir_shader *shader, const unsigned chunk_comps = chunk_size / 4; - nir_variable *it = nir_local_variable_create(b.impl, glsl_uint_type(), - "zero_init_iterator"); nir_def *local_index = nir_load_local_invocation_index(&b); nir_def *first_offset = nir_imul_imm(&b, local_index, chunk_size); - nir_store_var(&b, it, first_offset, 0x1); - nir_loop *loop = nir_push_loop(&b); - { - nir_def *offset = nir_load_var(&b, it); - - nir_push_if(&b, nir_uge_imm(&b, offset, shared_size)); + if (stride >= shared_size) { + nir_push_if(&b, nir_ult_imm(&b, first_offset, shared_size)); { - nir_jump(&b, nir_jump_break); + nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), first_offset, + .align_mul = chunk_size, + .write_mask = ((1 << chunk_comps) - 1)); } nir_pop_if(&b, NULL); + } else { + nir_variable *it = nir_local_variable_create(b.impl, glsl_uint_type(), + "zero_init_iterator"); + nir_store_var(&b, it, first_offset, 0x1); - nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), offset, - .align_mul = chunk_size, - .write_mask = ((1 << chunk_comps) - 1)); + nir_loop *loop = nir_push_loop(&b); + { + nir_def *offset = nir_load_var(&b, it); - nir_def *new_offset = nir_iadd_imm(&b, offset, chunk_size * local_count); - nir_store_var(&b, it, new_offset, 0x1); + nir_push_if(&b, nir_uge_imm(&b, offset, shared_size)); + { + nir_jump(&b, nir_jump_break); + } + nir_pop_if(&b, NULL); + + nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), offset, + .align_mul = chunk_size, + .write_mask = ((1 << chunk_comps) - 1)); + + nir_def *new_offset = nir_iadd_imm(&b, offset, stride); + nir_store_var(&b, it, new_offset, 0x1); + } + nir_pop_loop(&b, loop); } - nir_pop_loop(&b, loop); nir_barrier(&b, SCOPE_WORKGROUP, SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL, nir_var_mem_shared);