broadcom/compiler: rework scratch lowering

Let's rely on nir_lower_mem_access_bit_sizes doing all the heavy work, so
v3d_nir_lower_scratch can be cleaned up quite a lot.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29711>
This commit is contained in:
Karol Herbst 2024-06-13 11:27:47 +02:00 committed by Marge Bot
parent 75196e86f1
commit 05b9705ae0
3 changed files with 26 additions and 74 deletions

View file

@ -133,6 +133,16 @@ v3d_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
uint32_t align_offset, bool offset_is_const,
const void *cb_data)
{
/* we only support single component 32 bit load/stores on scratch */
if (intrin == nir_intrinsic_load_scratch ||
intrin == nir_intrinsic_store_scratch) {
return (nir_mem_access_size_align){
.num_components = 1,
.bit_size = 32,
.align = 4,
};
}
align = nir_combined_align(align, align_offset);
assert(util_is_power_of_two_nonzero(align));
@ -210,7 +220,7 @@ v3d_nir_lower_load_store_bitsize(nir_shader *s)
nir_lower_mem_access_bit_sizes_options lower_options = {
.modes = nir_var_mem_global | nir_var_mem_ssbo |
nir_var_mem_ubo | nir_var_mem_constant |
nir_var_mem_shared,
nir_var_mem_shared | nir_var_function_temp,
.callback = v3d_size_align_cb,
};

View file

@ -30,18 +30,17 @@
*
* Swizzles around the addresses of
* nir_intrinsic_load_scratch/nir_intrinsic_store_scratch so that a QPU stores
* a cacheline at a time per dword of scratch access, scalarizing and removing
* writemasks in the process.
* a cacheline at a time per dword of scratch access.
*/
static nir_def *
v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr)
{
bool is_store = instr->intrinsic == nir_intrinsic_store_scratch;
nir_def *offset = instr->src[is_store ? 1 : 0].ssa;
b->cursor = nir_before_instr(&instr->instr);
nir_def *offset = nir_get_io_offset_src(instr)->ssa;
assert(nir_intrinsic_align_mul(instr) >= 4);
assert(nir_intrinsic_align_offset(instr) == 0);
assert(nir_intrinsic_align_offset(instr) % 4 == 0);
/* The spill_offset register will already have the subgroup ID (EIDX)
* shifted and ORed in at bit 2, so all we need to do is to move the
@ -51,67 +50,13 @@ v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr)
}
static void
v3d_nir_lower_load_scratch(nir_builder *b, nir_intrinsic_instr *instr)
v3d_nir_lower_scratch_instr(nir_builder *b, nir_intrinsic_instr *instr)
{
b->cursor = nir_before_instr(&instr->instr);
nir_def *offset = v3d_nir_scratch_offset(b,instr);
nir_def *chans[NIR_MAX_VEC_COMPONENTS];
for (int i = 0; i < instr->num_components; i++) {
nir_def *chan_offset =
nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
nir_intrinsic_instr *chan_instr =
nir_intrinsic_instr_create(b->shader, instr->intrinsic);
chan_instr->num_components = 1;
nir_def_init(&chan_instr->instr, &chan_instr->def, 1,
instr->def.bit_size);
chan_instr->src[0] = nir_src_for_ssa(chan_offset);
nir_intrinsic_set_align(chan_instr, 4, 0);
nir_builder_instr_insert(b, &chan_instr->instr);
chans[i] = &chan_instr->def;
}
nir_def *result = nir_vec(b, chans, instr->num_components);
nir_def_rewrite_uses(&instr->def, result);
nir_instr_remove(&instr->instr);
}
static void
v3d_nir_lower_store_scratch(nir_builder *b, nir_intrinsic_instr *instr)
{
b->cursor = nir_before_instr(&instr->instr);
/* scalarized through nir_lower_mem_access_bit_sizes */
assert(instr->num_components == 1);
nir_def *offset = v3d_nir_scratch_offset(b, instr);
nir_def *value = instr->src[0].ssa;
for (int i = 0; i < instr->num_components; i++) {
if (!(nir_intrinsic_write_mask(instr) & (1 << i)))
continue;
nir_def *chan_offset =
nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
nir_intrinsic_instr *chan_instr =
nir_intrinsic_instr_create(b->shader, instr->intrinsic);
chan_instr->num_components = 1;
chan_instr->src[0] = nir_src_for_ssa(nir_channel(b,
value,
i));
chan_instr->src[1] = nir_src_for_ssa(chan_offset);
nir_intrinsic_set_write_mask(chan_instr, 0x1);
nir_intrinsic_set_align(chan_instr, 4, 0);
nir_builder_instr_insert(b, &chan_instr->instr);
}
nir_instr_remove(&instr->instr);
nir_src_rewrite(nir_get_io_offset_src(instr), offset);
}
static bool
@ -121,10 +66,8 @@ v3d_nir_lower_scratch_cb(nir_builder *b,
{
switch (intr->intrinsic) {
case nir_intrinsic_load_scratch:
v3d_nir_lower_load_scratch(b, intr);
return true;
case nir_intrinsic_store_scratch:
v3d_nir_lower_store_scratch(b, intr);
v3d_nir_lower_scratch_instr(b, intr);
return true;
default:
return false;

View file

@ -725,13 +725,7 @@ v3d_lower_nir(struct v3d_compile *c)
}
NIR_PASS(_, c->s, nir_lower_compute_system_values, NULL);
NIR_PASS(_, c->s, nir_lower_vars_to_scratch,
nir_var_function_temp,
0,
glsl_get_natural_size_align_bytes);
NIR_PASS(_, c->s, nir_lower_is_helper_invocation);
NIR_PASS(_, c->s, v3d_nir_lower_scratch);
NIR_PASS(_, c->s, v3d_nir_lower_null_pointers);
}
@ -1708,10 +1702,15 @@ v3d_attempt_compile(struct v3d_compile *c)
NIR_PASS(_, c->s, nir_lower_robust_access, &opts);
}
NIR_PASS(_, c->s, nir_lower_vars_to_scratch,
nir_var_function_temp,
0,
glsl_get_natural_size_align_bytes);
NIR_PASS(_, c->s, v3d_nir_lower_global_2x32);
NIR_PASS(_, c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
NIR_PASS(_, c->s, v3d_nir_lower_load_store_bitsize);
NIR_PASS(_, c->s, v3d_nir_lower_scratch);
NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c);