diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 424b1d7191c..197a12e4e73 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4518,6 +4518,9 @@ bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, bool nir_lower_variable_initializers(nir_shader *shader, nir_variable_mode modes); +bool nir_zero_initialize_shared_memory(nir_shader *shader, + const unsigned shared_size, + const unsigned chunk_size); bool nir_move_vec_src_uses_to_dest(nir_shader *shader); bool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb, diff --git a/src/compiler/nir/nir_lower_variable_initializers.c b/src/compiler/nir/nir_lower_variable_initializers.c index 86252d97511..b089cc15738 100644 --- a/src/compiler/nir/nir_lower_variable_initializers.c +++ b/src/compiler/nir/nir_lower_variable_initializers.c @@ -135,3 +135,66 @@ nir_lower_variable_initializers(nir_shader *shader, nir_variable_mode modes) return progress; } + +/* Zero initialize shared_size bytes of shared memory by splitting work writes + * of chunk_size bytes among the invocations. + * + * Used for implementing VK_KHR_zero_initialize_workgroup_memory. + */ +bool +nir_zero_initialize_shared_memory(nir_shader *shader, + const unsigned shared_size, + const unsigned chunk_size) +{ + assert(shared_size > 0); + assert(chunk_size > 0); + assert(chunk_size % 4 == 0); + + nir_builder b; + nir_builder_init(&b, nir_shader_get_entrypoint(shader)); + b.cursor = nir_before_cf_list(&b.impl->body); + + assert(!shader->info.cs.local_size_variable); + const unsigned local_count = shader->info.cs.local_size[0] * + shader->info.cs.local_size[1] * + shader->info.cs.local_size[2]; + + /* The initialization logic is simplified if we can always split the memory + * in full chunk_size units. + */ + assert(shared_size % chunk_size == 0); + + const unsigned chunk_comps = chunk_size / 4; + + nir_variable *it = nir_local_variable_create(b.impl, glsl_uint_type(), + "zero_init_iterator"); + nir_ssa_def *local_index = nir_load_local_invocation_index(&b); + nir_ssa_def *first_offset = nir_imul_imm(&b, local_index, chunk_size); + nir_store_var(&b, it, first_offset, 0x1); + + nir_loop *loop = nir_push_loop(&b); + { + nir_ssa_def *offset = nir_load_var(&b, it); + + nir_push_if(&b, nir_uge(&b, offset, nir_imm_int(&b, shared_size))); + { + nir_jump(&b, nir_jump_break); + } + nir_pop_if(&b, NULL); + + nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), offset, + .align_mul=chunk_size, + .write_mask=((1 << chunk_comps) - 1)); + + nir_ssa_def *new_offset = nir_iadd_imm(&b, offset, chunk_size * local_count); + nir_store_var(&b, it, new_offset, 0x1); + } + nir_pop_loop(&b, loop); + + nir_scoped_barrier(&b, NIR_SCOPE_WORKGROUP, NIR_SCOPE_WORKGROUP, + NIR_MEMORY_ACQ_REL, nir_var_mem_shared); + + nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none); + + return true; +}