microsoft/compiler: Move unaligned load/store pass from CL

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21029>
This commit is contained in:
Jesse Natalie 2023-01-30 14:08:45 -08:00 committed by Marge Bot
parent f50843fcdb
commit facd2e4fdb
3 changed files with 134 additions and 132 deletions

View file

@ -546,136 +546,6 @@ copy_const_initializer(const nir_constant *constant, const struct glsl_type *typ
}
}
static const struct glsl_type *
get_cast_type(unsigned bit_size)
{
switch (bit_size) {
case 64:
return glsl_int64_t_type();
case 32:
return glsl_int_type();
case 16:
return glsl_int16_t_type();
case 8:
return glsl_int8_t_type();
}
unreachable("Invalid bit_size");
}
static void
split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
{
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8];
unsigned comp_size = intrin->dest.ssa.bit_size / 8;
unsigned num_comps = intrin->dest.ssa.num_components;
b->cursor = nir_before_instr(&intrin->instr);
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment);
for (unsigned i = 0; i < num_loads; ++i) {
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
srcs[i] = nir_load_deref_with_access(b, elem, access);
}
nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
nir_instr_remove(&intrin->instr);
}
static void
split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
{
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
assert(intrin->src[1].is_ssa);
nir_ssa_def *value = intrin->src[1].ssa;
unsigned comp_size = value->bit_size / 8;
unsigned num_comps = value->num_components;
b->cursor = nir_before_instr(&intrin->instr);
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment);
for (unsigned i = 0; i < num_stores; ++i) {
nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8);
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
nir_store_deref_with_access(b, elem, substore_val, ~0, access);
}
nir_instr_remove(&intrin->instr);
}
static bool
split_unaligned_loads_stores(nir_shader *shader)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (!function->impl)
continue;
nir_builder b;
nir_builder_init(&b, function->impl);
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_deref &&
intrin->intrinsic != nir_intrinsic_store_deref)
continue;
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
unsigned align_mul = 0, align_offset = 0;
nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);
unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
/* We can load anything at 4-byte alignment, except for
* UBOs (AKA CBs where the granularity is 16 bytes).
*/
if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4))
continue;
nir_ssa_def *val;
if (intrin->intrinsic == nir_intrinsic_load_deref) {
assert(intrin->dest.is_ssa);
val = &intrin->dest.ssa;
} else {
assert(intrin->src[1].is_ssa);
val = intrin->src[1].ssa;
}
unsigned natural_alignment =
val->bit_size / 8 *
(val->num_components == 3 ? 4 : val->num_components);
if (alignment >= natural_alignment)
continue;
if (intrin->intrinsic == nir_intrinsic_load_deref)
split_unaligned_load(&b, intrin, alignment);
else
split_unaligned_store(&b, intrin, alignment);
progress = true;
}
}
}
return progress;
}
static enum pipe_tex_wrap
wrap_from_cl_addressing(unsigned addressing_mode)
{
@ -1018,7 +888,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);
NIR_PASS_V(nir, split_unaligned_loads_stores);
NIR_PASS_V(nir, dxil_nir_split_unaligned_loads_stores);
assert(nir->info.cs.ptr_size == 64);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
@ -1078,7 +948,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
}
NIR_PASS_V(nir, clc_nir_lower_kernel_input_loads, inputs_var);
NIR_PASS_V(nir, split_unaligned_loads_stores);
NIR_PASS_V(nir, dxil_nir_split_unaligned_loads_stores);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
nir_address_format_32bit_index_offset);
NIR_PASS_V(nir, clc_nir_lower_system_values, work_properties_var);

View file

@ -2147,3 +2147,134 @@ dxil_nir_lower_num_subgroups(nir_shader *s)
nir_metadata_dominance |
nir_metadata_loop_analysis, NULL);
}
static const struct glsl_type *
get_cast_type(unsigned bit_size)
{
switch (bit_size) {
case 64:
return glsl_int64_t_type();
case 32:
return glsl_int_type();
case 16:
return glsl_int16_t_type();
case 8:
return glsl_int8_t_type();
}
unreachable("Invalid bit_size");
}
static void
split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
{
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8];
unsigned comp_size = intrin->dest.ssa.bit_size / 8;
unsigned num_comps = intrin->dest.ssa.num_components;
b->cursor = nir_before_instr(&intrin->instr);
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment);
for (unsigned i = 0; i < num_loads; ++i) {
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
srcs[i] = nir_load_deref_with_access(b, elem, access);
}
nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
nir_instr_remove(&intrin->instr);
}
static void
split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
{
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
assert(intrin->src[1].is_ssa);
nir_ssa_def *value = intrin->src[1].ssa;
unsigned comp_size = value->bit_size / 8;
unsigned num_comps = value->num_components;
b->cursor = nir_before_instr(&intrin->instr);
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment);
for (unsigned i = 0; i < num_stores; ++i) {
nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8);
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
nir_store_deref_with_access(b, elem, substore_val, ~0, access);
}
nir_instr_remove(&intrin->instr);
}
bool
dxil_nir_split_unaligned_loads_stores(nir_shader *shader)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (!function->impl)
continue;
nir_builder b;
nir_builder_init(&b, function->impl);
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_deref &&
intrin->intrinsic != nir_intrinsic_store_deref)
continue;
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
unsigned align_mul = 0, align_offset = 0;
nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);
unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
/* We can load anything at 4-byte alignment, except for
* UBOs (AKA CBs where the granularity is 16 bytes).
*/
if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4))
continue;
nir_ssa_def *val;
if (intrin->intrinsic == nir_intrinsic_load_deref) {
assert(intrin->dest.is_ssa);
val = &intrin->dest.ssa;
} else {
assert(intrin->src[1].is_ssa);
val = intrin->src[1].ssa;
}
unsigned natural_alignment =
val->bit_size / 8 *
(val->num_components == 3 ? 4 : val->num_components);
if (alignment >= natural_alignment)
continue;
if (intrin->intrinsic == nir_intrinsic_load_deref)
split_unaligned_load(&b, intrin, alignment);
else
split_unaligned_store(&b, intrin, alignment);
progress = true;
}
}
}
return progress;
}

View file

@ -77,6 +77,7 @@ bool dxil_nir_ensure_position_writes(nir_shader *s);
bool dxil_nir_lower_sample_pos(nir_shader *s);
bool dxil_nir_lower_subgroup_id(nir_shader *s);
bool dxil_nir_lower_num_subgroups(nir_shader *s);
bool dxil_nir_split_unaligned_loads_stores(nir_shader *shader);
#ifdef __cplusplus
}