mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
broadcom/compiler: rework scratch lowering
Let's rely on nir_lower_mem_access_bit_sizes doing all the heavy work, so v3d_nir_lower_scratch can be cleaned up quite a lot. Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29711>
This commit is contained in:
parent
75196e86f1
commit
05b9705ae0
3 changed files with 26 additions and 74 deletions
|
|
@ -133,6 +133,16 @@ v3d_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
|
|||
uint32_t align_offset, bool offset_is_const,
|
||||
const void *cb_data)
|
||||
{
|
||||
/* we only support single component 32 bit load/stores on scratch */
|
||||
if (intrin == nir_intrinsic_load_scratch ||
|
||||
intrin == nir_intrinsic_store_scratch) {
|
||||
return (nir_mem_access_size_align){
|
||||
.num_components = 1,
|
||||
.bit_size = 32,
|
||||
.align = 4,
|
||||
};
|
||||
}
|
||||
|
||||
align = nir_combined_align(align, align_offset);
|
||||
assert(util_is_power_of_two_nonzero(align));
|
||||
|
||||
|
|
@ -210,7 +220,7 @@ v3d_nir_lower_load_store_bitsize(nir_shader *s)
|
|||
nir_lower_mem_access_bit_sizes_options lower_options = {
|
||||
.modes = nir_var_mem_global | nir_var_mem_ssbo |
|
||||
nir_var_mem_ubo | nir_var_mem_constant |
|
||||
nir_var_mem_shared,
|
||||
nir_var_mem_shared | nir_var_function_temp,
|
||||
.callback = v3d_size_align_cb,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -30,18 +30,17 @@
|
|||
*
|
||||
* Swizzles around the addresses of
|
||||
* nir_intrinsic_load_scratch/nir_intrinsic_store_scratch so that a QPU stores
|
||||
* a cacheline at a time per dword of scratch access, scalarizing and removing
|
||||
* writemasks in the process.
|
||||
* a cacheline at a time per dword of scratch access.
|
||||
*/
|
||||
|
||||
static nir_def *
|
||||
v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
bool is_store = instr->intrinsic == nir_intrinsic_store_scratch;
|
||||
nir_def *offset = instr->src[is_store ? 1 : 0].ssa;
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
nir_def *offset = nir_get_io_offset_src(instr)->ssa;
|
||||
|
||||
assert(nir_intrinsic_align_mul(instr) >= 4);
|
||||
assert(nir_intrinsic_align_offset(instr) == 0);
|
||||
assert(nir_intrinsic_align_offset(instr) % 4 == 0);
|
||||
|
||||
/* The spill_offset register will already have the subgroup ID (EIDX)
|
||||
* shifted and ORed in at bit 2, so all we need to do is to move the
|
||||
|
|
@ -51,67 +50,13 @@ v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr)
|
|||
}
|
||||
|
||||
static void
|
||||
v3d_nir_lower_load_scratch(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
v3d_nir_lower_scratch_instr(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
nir_def *offset = v3d_nir_scratch_offset(b,instr);
|
||||
|
||||
nir_def *chans[NIR_MAX_VEC_COMPONENTS];
|
||||
for (int i = 0; i < instr->num_components; i++) {
|
||||
nir_def *chan_offset =
|
||||
nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
|
||||
|
||||
nir_intrinsic_instr *chan_instr =
|
||||
nir_intrinsic_instr_create(b->shader, instr->intrinsic);
|
||||
chan_instr->num_components = 1;
|
||||
nir_def_init(&chan_instr->instr, &chan_instr->def, 1,
|
||||
instr->def.bit_size);
|
||||
|
||||
chan_instr->src[0] = nir_src_for_ssa(chan_offset);
|
||||
|
||||
nir_intrinsic_set_align(chan_instr, 4, 0);
|
||||
|
||||
nir_builder_instr_insert(b, &chan_instr->instr);
|
||||
|
||||
chans[i] = &chan_instr->def;
|
||||
}
|
||||
|
||||
nir_def *result = nir_vec(b, chans, instr->num_components);
|
||||
nir_def_rewrite_uses(&instr->def, result);
|
||||
nir_instr_remove(&instr->instr);
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_nir_lower_store_scratch(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
/* scalarized through nir_lower_mem_access_bit_sizes */
|
||||
assert(instr->num_components == 1);
|
||||
|
||||
nir_def *offset = v3d_nir_scratch_offset(b, instr);
|
||||
nir_def *value = instr->src[0].ssa;
|
||||
|
||||
for (int i = 0; i < instr->num_components; i++) {
|
||||
if (!(nir_intrinsic_write_mask(instr) & (1 << i)))
|
||||
continue;
|
||||
|
||||
nir_def *chan_offset =
|
||||
nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
|
||||
|
||||
nir_intrinsic_instr *chan_instr =
|
||||
nir_intrinsic_instr_create(b->shader, instr->intrinsic);
|
||||
chan_instr->num_components = 1;
|
||||
|
||||
chan_instr->src[0] = nir_src_for_ssa(nir_channel(b,
|
||||
value,
|
||||
i));
|
||||
chan_instr->src[1] = nir_src_for_ssa(chan_offset);
|
||||
nir_intrinsic_set_write_mask(chan_instr, 0x1);
|
||||
nir_intrinsic_set_align(chan_instr, 4, 0);
|
||||
|
||||
nir_builder_instr_insert(b, &chan_instr->instr);
|
||||
}
|
||||
|
||||
nir_instr_remove(&instr->instr);
|
||||
nir_src_rewrite(nir_get_io_offset_src(instr), offset);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -121,10 +66,8 @@ v3d_nir_lower_scratch_cb(nir_builder *b,
|
|||
{
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_scratch:
|
||||
v3d_nir_lower_load_scratch(b, intr);
|
||||
return true;
|
||||
case nir_intrinsic_store_scratch:
|
||||
v3d_nir_lower_store_scratch(b, intr);
|
||||
v3d_nir_lower_scratch_instr(b, intr);
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -725,13 +725,7 @@ v3d_lower_nir(struct v3d_compile *c)
|
|||
}
|
||||
|
||||
NIR_PASS(_, c->s, nir_lower_compute_system_values, NULL);
|
||||
|
||||
NIR_PASS(_, c->s, nir_lower_vars_to_scratch,
|
||||
nir_var_function_temp,
|
||||
0,
|
||||
glsl_get_natural_size_align_bytes);
|
||||
NIR_PASS(_, c->s, nir_lower_is_helper_invocation);
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_scratch);
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_null_pointers);
|
||||
}
|
||||
|
||||
|
|
@ -1708,10 +1702,15 @@ v3d_attempt_compile(struct v3d_compile *c)
|
|||
NIR_PASS(_, c->s, nir_lower_robust_access, &opts);
|
||||
}
|
||||
|
||||
NIR_PASS(_, c->s, nir_lower_vars_to_scratch,
|
||||
nir_var_function_temp,
|
||||
0,
|
||||
glsl_get_natural_size_align_bytes);
|
||||
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_global_2x32);
|
||||
NIR_PASS(_, c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
|
||||
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_load_store_bitsize);
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_scratch);
|
||||
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue