ir3: lower SSBO access imm offsets

Add the BASE index to the load/store_ssbo_ir3 intrinsic to store an
immediate offset. This offset is encoded in the corresponding fields of
isam.v/ldib.b/stib.b.

One extra optimization is implemented: whenever the regular offset is
also a constant, the total offset (regular plus immediate) is aligned
down to a multiple of the max immediate offset and this is used as the
regular offset while the immediate is set to the remainder. This ensures
that the register used for the regular offset can often be reused among
multiple contiguous accesses.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28664>
This commit is contained in:
Job Noorman 2024-04-10 10:43:38 +02:00 committed by Marge Bot
parent 759a4679a3
commit d3f8de791d
5 changed files with 59 additions and 9 deletions

View file

@ -1286,9 +1286,9 @@ intrinsic("cmat_copy", src_comp=[-1, -1])
# The float versions are not handled because those are not supported
# by the backend.
store("ssbo_ir3", [1, 1, 1],
indices=[WRITE_MASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
indices=[BASE, WRITE_MASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
load("ssbo_ir3", [1, 1, 1],
indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
intrinsic("ssbo_atomic_ir3", src_comp=[1, 1, 1, 1], dest_comp=1,
indices=[ACCESS, ATOMIC_OP])
intrinsic("ssbo_atomic_swap_ir3", src_comp=[1, 1, 1, 1, 1], dest_comp=1,

View file

@ -37,6 +37,20 @@
* encoding compared to a4xx/a5xx.
*/
static void
lower_ssbo_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
nir_src *offset_src,
struct ir3_instruction **offset, unsigned *imm_offset)
{
if (ctx->compiler->has_ssbo_imm_offsets) {
ir3_lower_imm_offset(ctx, intr, offset_src, 7, offset, imm_offset);
} else {
assert(nir_intrinsic_base(intr) == 0);
*offset = ir3_get_src(ctx, offset_src)[0];
*imm_offset = 0;
}
}
/* src[] = { buffer_index, offset }. No const_index */
static void
emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
@ -45,9 +59,9 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_block *b = ctx->block;
struct ir3_instruction *offset;
struct ir3_instruction *ldib;
unsigned imm_offset_val = 0;
unsigned imm_offset_val;
offset = ir3_get_src(ctx, &intr->src[2])[0];
lower_ssbo_offset(ctx, intr, &intr->src[2], &offset, &imm_offset_val);
struct ir3_instruction *imm_offset = create_immed(b, imm_offset_val);
ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0,
@ -78,15 +92,15 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *stib, *val, *offset;
unsigned wrmask = nir_intrinsic_write_mask(intr);
unsigned ncomp = ffs(~wrmask) - 1;
unsigned imm_offset_val = 0;
unsigned imm_offset_val;
assert(wrmask == BITFIELD_MASK(intr->num_components));
/* src0 is offset, src1 is immediate offset, src2 is value:
*/
val = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp);
offset = ir3_get_src(ctx, &intr->src[3])[0];
lower_ssbo_offset(ctx, intr, &intr->src[3], &offset, &imm_offset_val);
struct ir3_instruction *imm_offset = create_immed(b, imm_offset_val);
stib = ir3_STIB(b, ir3_ssbo_to_ibo(ctx, intr->src[1]), 0, offset, 0,

View file

@ -1603,14 +1603,15 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx,
}
struct ir3_block *b = ctx->block;
struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[2])[0];
nir_src *offset_src = &intr->src[2];
struct ir3_instruction *coords = NULL;
unsigned imm_offset = 0;
if (ctx->compiler->has_isam_v) {
coords = offset;
ir3_lower_imm_offset(ctx, intr, offset_src, 8, &coords, &imm_offset);
} else {
coords = ir3_collect(b, offset, create_immed(b, 0));
coords =
ir3_collect(b, ir3_get_src(ctx, offset_src)[0], create_immed(b, 0));
}
struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0], false);
@ -1624,6 +1625,10 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx,
if (ctx->compiler->has_isam_v) {
sam->flags |= (IR3_INSTR_V | IR3_INSTR_INV_1D);
if (imm_offset) {
sam->flags |= IR3_INSTR_IMM_OFFSET;
}
}
ir3_handle_nonuniform(sam, intr);

View file

@ -31,6 +31,7 @@
#include "ir3_shader.h"
#include "nir.h"
#include "nir_intrinsics_indices.h"
#include "util/u_math.h"
struct ir3_context *
ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
@ -673,3 +674,29 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
*/
array_insert(block, block->keeps, mov);
}
void
ir3_lower_imm_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
nir_src *offset_src, unsigned imm_offset_bits,
struct ir3_instruction **offset, unsigned *imm_offset)
{
nir_const_value *nir_const_offset = nir_src_as_const_value(*offset_src);
int base = nir_intrinsic_base(intr);
unsigned imm_offset_bound = (1 << imm_offset_bits);
assert(base >= 0 && base < imm_offset_bound);
if (nir_const_offset) {
/* If both the offset and the base (immed offset) are constants, lower the
* offset to a multiple of the bound and the immed offset to the
* remainder. This ensures that the offset register can often be reused
* among multiple contiguous accesses.
*/
uint32_t full_offset = base + nir_const_offset->u32;
*offset =
create_immed(ctx->block, ROUND_DOWN_TO(full_offset, imm_offset_bound));
*imm_offset = full_offset % imm_offset_bound;
} else {
*offset = ir3_get_src(ctx, offset_src)[0];
*imm_offset = base;
}
}

View file

@ -255,6 +255,10 @@ struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx,
void ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr,
int n, struct ir3_instruction *src,
struct ir3_instruction *address);
void ir3_lower_imm_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
nir_src *offset_src, unsigned imm_offset_bits,
struct ir3_instruction **offset,
unsigned *imm_offset);
static inline type_t
utype_for_size(unsigned bit_size)