mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-31 14:10:09 +01:00
microsoft/compiler: Move unaligned load/store pass from CL
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21029>
This commit is contained in:
parent
f50843fcdb
commit
facd2e4fdb
3 changed files with 134 additions and 132 deletions
|
|
@ -546,136 +546,6 @@ copy_const_initializer(const nir_constant *constant, const struct glsl_type *typ
|
|||
}
|
||||
}
|
||||
|
||||
static const struct glsl_type *
|
||||
get_cast_type(unsigned bit_size)
|
||||
{
|
||||
switch (bit_size) {
|
||||
case 64:
|
||||
return glsl_int64_t_type();
|
||||
case 32:
|
||||
return glsl_int_type();
|
||||
case 16:
|
||||
return glsl_int16_t_type();
|
||||
case 8:
|
||||
return glsl_int8_t_type();
|
||||
}
|
||||
unreachable("Invalid bit_size");
|
||||
}
|
||||
|
||||
static void
|
||||
split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
|
||||
{
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
|
||||
nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8];
|
||||
unsigned comp_size = intrin->dest.ssa.bit_size / 8;
|
||||
unsigned num_comps = intrin->dest.ssa.num_components;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
|
||||
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
|
||||
|
||||
unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment);
|
||||
for (unsigned i = 0; i < num_loads; ++i) {
|
||||
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
|
||||
srcs[i] = nir_load_deref_with_access(b, elem, access);
|
||||
}
|
||||
|
||||
nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static void
|
||||
split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
|
||||
{
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
|
||||
|
||||
assert(intrin->src[1].is_ssa);
|
||||
nir_ssa_def *value = intrin->src[1].ssa;
|
||||
unsigned comp_size = value->bit_size / 8;
|
||||
unsigned num_comps = value->num_components;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
|
||||
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
|
||||
|
||||
unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment);
|
||||
for (unsigned i = 0; i < num_stores; ++i) {
|
||||
nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8);
|
||||
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
|
||||
nir_store_deref_with_access(b, elem, substore_val, ~0, access);
|
||||
}
|
||||
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
split_unaligned_loads_stores(nir_shader *shader)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
if (!function->impl)
|
||||
continue;
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, function->impl);
|
||||
|
||||
nir_foreach_block(block, function->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_deref &&
|
||||
intrin->intrinsic != nir_intrinsic_store_deref)
|
||||
continue;
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
unsigned align_mul = 0, align_offset = 0;
|
||||
nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);
|
||||
|
||||
unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
|
||||
|
||||
/* We can load anything at 4-byte alignment, except for
|
||||
* UBOs (AKA CBs where the granularity is 16 bytes).
|
||||
*/
|
||||
if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4))
|
||||
continue;
|
||||
|
||||
nir_ssa_def *val;
|
||||
if (intrin->intrinsic == nir_intrinsic_load_deref) {
|
||||
assert(intrin->dest.is_ssa);
|
||||
val = &intrin->dest.ssa;
|
||||
} else {
|
||||
assert(intrin->src[1].is_ssa);
|
||||
val = intrin->src[1].ssa;
|
||||
}
|
||||
|
||||
unsigned natural_alignment =
|
||||
val->bit_size / 8 *
|
||||
(val->num_components == 3 ? 4 : val->num_components);
|
||||
|
||||
if (alignment >= natural_alignment)
|
||||
continue;
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_deref)
|
||||
split_unaligned_load(&b, intrin, alignment);
|
||||
else
|
||||
split_unaligned_store(&b, intrin, alignment);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static enum pipe_tex_wrap
|
||||
wrap_from_cl_addressing(unsigned addressing_mode)
|
||||
{
|
||||
|
|
@ -1018,7 +888,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
|
|||
|
||||
NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);
|
||||
|
||||
NIR_PASS_V(nir, split_unaligned_loads_stores);
|
||||
NIR_PASS_V(nir, dxil_nir_split_unaligned_loads_stores);
|
||||
|
||||
assert(nir->info.cs.ptr_size == 64);
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
|
||||
|
|
@ -1078,7 +948,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
|
|||
}
|
||||
|
||||
NIR_PASS_V(nir, clc_nir_lower_kernel_input_loads, inputs_var);
|
||||
NIR_PASS_V(nir, split_unaligned_loads_stores);
|
||||
NIR_PASS_V(nir, dxil_nir_split_unaligned_loads_stores);
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
|
||||
nir_address_format_32bit_index_offset);
|
||||
NIR_PASS_V(nir, clc_nir_lower_system_values, work_properties_var);
|
||||
|
|
|
|||
|
|
@ -2147,3 +2147,134 @@ dxil_nir_lower_num_subgroups(nir_shader *s)
|
|||
nir_metadata_dominance |
|
||||
nir_metadata_loop_analysis, NULL);
|
||||
}
|
||||
|
||||
|
||||
static const struct glsl_type *
|
||||
get_cast_type(unsigned bit_size)
|
||||
{
|
||||
switch (bit_size) {
|
||||
case 64:
|
||||
return glsl_int64_t_type();
|
||||
case 32:
|
||||
return glsl_int_type();
|
||||
case 16:
|
||||
return glsl_int16_t_type();
|
||||
case 8:
|
||||
return glsl_int8_t_type();
|
||||
}
|
||||
unreachable("Invalid bit_size");
|
||||
}
|
||||
|
||||
static void
|
||||
split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
|
||||
{
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
|
||||
nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8];
|
||||
unsigned comp_size = intrin->dest.ssa.bit_size / 8;
|
||||
unsigned num_comps = intrin->dest.ssa.num_components;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
|
||||
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
|
||||
|
||||
unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment);
|
||||
for (unsigned i = 0; i < num_loads; ++i) {
|
||||
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
|
||||
srcs[i] = nir_load_deref_with_access(b, elem, access);
|
||||
}
|
||||
|
||||
nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static void
|
||||
split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
|
||||
{
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
|
||||
|
||||
assert(intrin->src[1].is_ssa);
|
||||
nir_ssa_def *value = intrin->src[1].ssa;
|
||||
unsigned comp_size = value->bit_size / 8;
|
||||
unsigned num_comps = value->num_components;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
|
||||
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
|
||||
|
||||
unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment);
|
||||
for (unsigned i = 0; i < num_stores; ++i) {
|
||||
nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8);
|
||||
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
|
||||
nir_store_deref_with_access(b, elem, substore_val, ~0, access);
|
||||
}
|
||||
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
bool
|
||||
dxil_nir_split_unaligned_loads_stores(nir_shader *shader)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
if (!function->impl)
|
||||
continue;
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, function->impl);
|
||||
|
||||
nir_foreach_block(block, function->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_deref &&
|
||||
intrin->intrinsic != nir_intrinsic_store_deref)
|
||||
continue;
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
unsigned align_mul = 0, align_offset = 0;
|
||||
nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);
|
||||
|
||||
unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
|
||||
|
||||
/* We can load anything at 4-byte alignment, except for
|
||||
* UBOs (AKA CBs where the granularity is 16 bytes).
|
||||
*/
|
||||
if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4))
|
||||
continue;
|
||||
|
||||
nir_ssa_def *val;
|
||||
if (intrin->intrinsic == nir_intrinsic_load_deref) {
|
||||
assert(intrin->dest.is_ssa);
|
||||
val = &intrin->dest.ssa;
|
||||
} else {
|
||||
assert(intrin->src[1].is_ssa);
|
||||
val = intrin->src[1].ssa;
|
||||
}
|
||||
|
||||
unsigned natural_alignment =
|
||||
val->bit_size / 8 *
|
||||
(val->num_components == 3 ? 4 : val->num_components);
|
||||
|
||||
if (alignment >= natural_alignment)
|
||||
continue;
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_deref)
|
||||
split_unaligned_load(&b, intrin, alignment);
|
||||
else
|
||||
split_unaligned_store(&b, intrin, alignment);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ bool dxil_nir_ensure_position_writes(nir_shader *s);
|
|||
bool dxil_nir_lower_sample_pos(nir_shader *s);
|
||||
bool dxil_nir_lower_subgroup_id(nir_shader *s);
|
||||
bool dxil_nir_lower_num_subgroups(nir_shader *s);
|
||||
bool dxil_nir_split_unaligned_loads_stores(nir_shader *shader);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue