diff --git a/src/microsoft/clc/clc_compiler.c b/src/microsoft/clc/clc_compiler.c index 01ac14df2c6..09411aaea32 100644 --- a/src/microsoft/clc/clc_compiler.c +++ b/src/microsoft/clc/clc_compiler.c @@ -546,136 +546,6 @@ copy_const_initializer(const nir_constant *constant, const struct glsl_type *typ } } -static const struct glsl_type * -get_cast_type(unsigned bit_size) -{ - switch (bit_size) { - case 64: - return glsl_int64_t_type(); - case 32: - return glsl_int_type(); - case 16: - return glsl_int16_t_type(); - case 8: - return glsl_int8_t_type(); - } - unreachable("Invalid bit_size"); -} - -static void -split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment) -{ - enum gl_access_qualifier access = nir_intrinsic_access(intrin); - nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8]; - unsigned comp_size = intrin->dest.ssa.bit_size / 8; - unsigned num_comps = intrin->dest.ssa.num_components; - - b->cursor = nir_before_instr(&intrin->instr); - - nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]); - - const struct glsl_type *cast_type = get_cast_type(alignment * 8); - nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment); - - unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment); - for (unsigned i = 0; i < num_loads; ++i) { - nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size)); - srcs[i] = nir_load_deref_with_access(b, elem, access); - } - - nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest); - nir_instr_remove(&intrin->instr); -} - -static void -split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment) -{ - enum gl_access_qualifier access = nir_intrinsic_access(intrin); - - assert(intrin->src[1].is_ssa); - nir_ssa_def *value = intrin->src[1].ssa; - unsigned comp_size = value->bit_size / 8; - unsigned num_comps = value->num_components; - - b->cursor = nir_before_instr(&intrin->instr); - - nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]); - - const struct glsl_type *cast_type = get_cast_type(alignment * 8); - nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment); - - unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment); - for (unsigned i = 0; i < num_stores; ++i) { - nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8); - nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size)); - nir_store_deref_with_access(b, elem, substore_val, ~0, access); - } - - nir_instr_remove(&intrin->instr); -} - -static bool -split_unaligned_loads_stores(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(function, shader) { - if (!function->impl) - continue; - - nir_builder b; - nir_builder_init(&b, function->impl); - - nir_foreach_block(block, function->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_deref && - intrin->intrinsic != nir_intrinsic_store_deref) - continue; - nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); - - unsigned align_mul = 0, align_offset = 0; - nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset); - - unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; - - /* We can load anything at 4-byte alignment, except for - * UBOs (AKA CBs where the granularity is 16 bytes). - */ - if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4)) - continue; - - nir_ssa_def *val; - if (intrin->intrinsic == nir_intrinsic_load_deref) { - assert(intrin->dest.is_ssa); - val = &intrin->dest.ssa; - } else { - assert(intrin->src[1].is_ssa); - val = intrin->src[1].ssa; - } - - unsigned natural_alignment = - val->bit_size / 8 * - (val->num_components == 3 ? 4 : val->num_components); - - if (alignment >= natural_alignment) - continue; - - if (intrin->intrinsic == nir_intrinsic_load_deref) - split_unaligned_load(&b, intrin, alignment); - else - split_unaligned_store(&b, intrin, alignment); - progress = true; - } - } - } - - return progress; -} - static enum pipe_tex_wrap wrap_from_cl_addressing(unsigned addressing_mode) { @@ -1018,7 +888,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib, NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo); - NIR_PASS_V(nir, split_unaligned_loads_stores); + NIR_PASS_V(nir, dxil_nir_split_unaligned_loads_stores); assert(nir->info.cs.ptr_size == 64); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, @@ -1078,7 +948,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib, } NIR_PASS_V(nir, clc_nir_lower_kernel_input_loads, inputs_var); - NIR_PASS_V(nir, split_unaligned_loads_stores); + NIR_PASS_V(nir, dxil_nir_split_unaligned_loads_stores); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, nir_address_format_32bit_index_offset); NIR_PASS_V(nir, clc_nir_lower_system_values, work_properties_var); diff --git a/src/microsoft/compiler/dxil_nir.c b/src/microsoft/compiler/dxil_nir.c index 364ab3a68f6..dba07253fa0 100644 --- a/src/microsoft/compiler/dxil_nir.c +++ b/src/microsoft/compiler/dxil_nir.c @@ -2147,3 +2147,134 @@ dxil_nir_lower_num_subgroups(nir_shader *s) nir_metadata_dominance | nir_metadata_loop_analysis, NULL); } + + +static const struct glsl_type * +get_cast_type(unsigned bit_size) +{ + switch (bit_size) { + case 64: + return glsl_int64_t_type(); + case 32: + return glsl_int_type(); + case 16: + return glsl_int16_t_type(); + case 8: + return glsl_int8_t_type(); + } + unreachable("Invalid bit_size"); +} + +static void +split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment) +{ + enum gl_access_qualifier access = nir_intrinsic_access(intrin); + nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8]; + unsigned comp_size = intrin->dest.ssa.bit_size / 8; + unsigned num_comps = intrin->dest.ssa.num_components; + + b->cursor = nir_before_instr(&intrin->instr); + + nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]); + + const struct glsl_type *cast_type = get_cast_type(alignment * 8); + nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment); + + unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment); + for (unsigned i = 0; i < num_loads; ++i) { + nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size)); + srcs[i] = nir_load_deref_with_access(b, elem, access); + } + + nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest); + nir_instr_remove(&intrin->instr); +} + +static void +split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment) +{ + enum gl_access_qualifier access = nir_intrinsic_access(intrin); + + assert(intrin->src[1].is_ssa); + nir_ssa_def *value = intrin->src[1].ssa; + unsigned comp_size = value->bit_size / 8; + unsigned num_comps = value->num_components; + + b->cursor = nir_before_instr(&intrin->instr); + + nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]); + + const struct glsl_type *cast_type = get_cast_type(alignment * 8); + nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment); + + unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment); + for (unsigned i = 0; i < num_stores; ++i) { + nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8); + nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size)); + nir_store_deref_with_access(b, elem, substore_val, ~0, access); + } + + nir_instr_remove(&intrin->instr); +} + +bool +dxil_nir_split_unaligned_loads_stores(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_deref && + intrin->intrinsic != nir_intrinsic_store_deref) + continue; + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + + unsigned align_mul = 0, align_offset = 0; + nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset); + + unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; + + /* We can load anything at 4-byte alignment, except for + * UBOs (AKA CBs where the granularity is 16 bytes). + */ + if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4)) + continue; + + nir_ssa_def *val; + if (intrin->intrinsic == nir_intrinsic_load_deref) { + assert(intrin->dest.is_ssa); + val = &intrin->dest.ssa; + } else { + assert(intrin->src[1].is_ssa); + val = intrin->src[1].ssa; + } + + unsigned natural_alignment = + val->bit_size / 8 * + (val->num_components == 3 ? 4 : val->num_components); + + if (alignment >= natural_alignment) + continue; + + if (intrin->intrinsic == nir_intrinsic_load_deref) + split_unaligned_load(&b, intrin, alignment); + else + split_unaligned_store(&b, intrin, alignment); + progress = true; + } + } + } + + return progress; +} diff --git a/src/microsoft/compiler/dxil_nir.h b/src/microsoft/compiler/dxil_nir.h index 0c0a85e0854..27272490b41 100644 --- a/src/microsoft/compiler/dxil_nir.h +++ b/src/microsoft/compiler/dxil_nir.h @@ -77,6 +77,7 @@ bool dxil_nir_ensure_position_writes(nir_shader *s); bool dxil_nir_lower_sample_pos(nir_shader *s); bool dxil_nir_lower_subgroup_id(nir_shader *s); bool dxil_nir_lower_num_subgroups(nir_shader *s); +bool dxil_nir_split_unaligned_loads_stores(nir_shader *shader); #ifdef __cplusplus }