mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 15:50:17 +01:00
ir3: lower SSBO access imm offsets
Add the BASE index to the load/store_ssbo_ir3 intrinsic to store an immediate offset. This offset is encoded in the corresponding fields of isam.v/ldib.b/stib.b. One extra optimization is implemented: whenever the regular offset is also a constant, the total offset (regular plus immediate) is aligned down to a multiple of the max immediate offset and this is used as the regular offset while the immediate is set to the remainder. This ensures that the register used for the regular offset can often be reused among multiple contiguous accesses. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28664>
This commit is contained in:
parent
759a4679a3
commit
d3f8de791d
5 changed files with 59 additions and 9 deletions
|
|
@ -1286,9 +1286,9 @@ intrinsic("cmat_copy", src_comp=[-1, -1])
|
|||
# The float versions are not handled because those are not supported
|
||||
# by the backend.
|
||||
store("ssbo_ir3", [1, 1, 1],
|
||||
indices=[WRITE_MASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
||||
indices=[BASE, WRITE_MASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
||||
load("ssbo_ir3", [1, 1, 1],
|
||||
indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
||||
indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
||||
intrinsic("ssbo_atomic_ir3", src_comp=[1, 1, 1, 1], dest_comp=1,
|
||||
indices=[ACCESS, ATOMIC_OP])
|
||||
intrinsic("ssbo_atomic_swap_ir3", src_comp=[1, 1, 1, 1, 1], dest_comp=1,
|
||||
|
|
|
|||
|
|
@ -37,6 +37,20 @@
|
|||
* encoding compared to a4xx/a5xx.
|
||||
*/
|
||||
|
||||
static void
|
||||
lower_ssbo_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
nir_src *offset_src,
|
||||
struct ir3_instruction **offset, unsigned *imm_offset)
|
||||
{
|
||||
if (ctx->compiler->has_ssbo_imm_offsets) {
|
||||
ir3_lower_imm_offset(ctx, intr, offset_src, 7, offset, imm_offset);
|
||||
} else {
|
||||
assert(nir_intrinsic_base(intr) == 0);
|
||||
*offset = ir3_get_src(ctx, offset_src)[0];
|
||||
*imm_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
static void
|
||||
emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
|
|
@ -45,9 +59,9 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *offset;
|
||||
struct ir3_instruction *ldib;
|
||||
unsigned imm_offset_val = 0;
|
||||
unsigned imm_offset_val;
|
||||
|
||||
offset = ir3_get_src(ctx, &intr->src[2])[0];
|
||||
lower_ssbo_offset(ctx, intr, &intr->src[2], &offset, &imm_offset_val);
|
||||
struct ir3_instruction *imm_offset = create_immed(b, imm_offset_val);
|
||||
|
||||
ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0,
|
||||
|
|
@ -78,15 +92,15 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
struct ir3_instruction *stib, *val, *offset;
|
||||
unsigned wrmask = nir_intrinsic_write_mask(intr);
|
||||
unsigned ncomp = ffs(~wrmask) - 1;
|
||||
unsigned imm_offset_val = 0;
|
||||
unsigned imm_offset_val;
|
||||
|
||||
assert(wrmask == BITFIELD_MASK(intr->num_components));
|
||||
|
||||
/* src0 is offset, src1 is immediate offset, src2 is value:
|
||||
*/
|
||||
val = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp);
|
||||
offset = ir3_get_src(ctx, &intr->src[3])[0];
|
||||
|
||||
lower_ssbo_offset(ctx, intr, &intr->src[3], &offset, &imm_offset_val);
|
||||
struct ir3_instruction *imm_offset = create_immed(b, imm_offset_val);
|
||||
|
||||
stib = ir3_STIB(b, ir3_ssbo_to_ibo(ctx, intr->src[1]), 0, offset, 0,
|
||||
|
|
|
|||
|
|
@ -1603,14 +1603,15 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx,
|
|||
}
|
||||
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[2])[0];
|
||||
nir_src *offset_src = &intr->src[2];
|
||||
struct ir3_instruction *coords = NULL;
|
||||
unsigned imm_offset = 0;
|
||||
|
||||
if (ctx->compiler->has_isam_v) {
|
||||
coords = offset;
|
||||
ir3_lower_imm_offset(ctx, intr, offset_src, 8, &coords, &imm_offset);
|
||||
} else {
|
||||
coords = ir3_collect(b, offset, create_immed(b, 0));
|
||||
coords =
|
||||
ir3_collect(b, ir3_get_src(ctx, offset_src)[0], create_immed(b, 0));
|
||||
}
|
||||
|
||||
struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0], false);
|
||||
|
|
@ -1624,6 +1625,10 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx,
|
|||
|
||||
if (ctx->compiler->has_isam_v) {
|
||||
sam->flags |= (IR3_INSTR_V | IR3_INSTR_INV_1D);
|
||||
|
||||
if (imm_offset) {
|
||||
sam->flags |= IR3_INSTR_IMM_OFFSET;
|
||||
}
|
||||
}
|
||||
|
||||
ir3_handle_nonuniform(sam, intr);
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#include "ir3_shader.h"
|
||||
#include "nir.h"
|
||||
#include "nir_intrinsics_indices.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
struct ir3_context *
|
||||
ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
||||
|
|
@ -673,3 +674,29 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
|
|||
*/
|
||||
array_insert(block, block->keeps, mov);
|
||||
}
|
||||
|
||||
void
|
||||
ir3_lower_imm_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
nir_src *offset_src, unsigned imm_offset_bits,
|
||||
struct ir3_instruction **offset, unsigned *imm_offset)
|
||||
{
|
||||
nir_const_value *nir_const_offset = nir_src_as_const_value(*offset_src);
|
||||
int base = nir_intrinsic_base(intr);
|
||||
unsigned imm_offset_bound = (1 << imm_offset_bits);
|
||||
assert(base >= 0 && base < imm_offset_bound);
|
||||
|
||||
if (nir_const_offset) {
|
||||
/* If both the offset and the base (immed offset) are constants, lower the
|
||||
* offset to a multiple of the bound and the immed offset to the
|
||||
* remainder. This ensures that the offset register can often be reused
|
||||
* among multiple contiguous accesses.
|
||||
*/
|
||||
uint32_t full_offset = base + nir_const_offset->u32;
|
||||
*offset =
|
||||
create_immed(ctx->block, ROUND_DOWN_TO(full_offset, imm_offset_bound));
|
||||
*imm_offset = full_offset % imm_offset_bound;
|
||||
} else {
|
||||
*offset = ir3_get_src(ctx, offset_src)[0];
|
||||
*imm_offset = base;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -255,6 +255,10 @@ struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx,
|
|||
void ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr,
|
||||
int n, struct ir3_instruction *src,
|
||||
struct ir3_instruction *address);
|
||||
void ir3_lower_imm_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
nir_src *offset_src, unsigned imm_offset_bits,
|
||||
struct ir3_instruction **offset,
|
||||
unsigned *imm_offset);
|
||||
|
||||
static inline type_t
|
||||
utype_for_size(unsigned bit_size)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue