mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-09 12:40:23 +01:00
ir3: Plumb through two-dimensional UAV loads
There is native support for D3D-style untyped UAVs, which are an unsized array of "records." This will be needed for acceleration structures, because normal SSBO descriptors aren't large enough to cover all the 128-byte instance descriptors for the maximum number of instances (2**24). Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28447>
This commit is contained in:
parent
b6ae20f2d6
commit
91f19bcbe0
7 changed files with 135 additions and 27 deletions
|
|
@ -550,6 +550,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_ssbo_ir3:
|
||||
case nir_intrinsic_load_uav_ir3:
|
||||
is_divergent = (src_divergent(instr->src[0], state) &&
|
||||
(nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
|
||||
src_divergent(instr->src[1], state) ||
|
||||
|
|
|
|||
|
|
@ -1355,6 +1355,14 @@ intrinsic("ssbo_atomic_ir3", src_comp=[1, 1, 1, 1], dest_comp=1,
|
|||
intrinsic("ssbo_atomic_swap_ir3", src_comp=[1, 1, 1, 1, 1], dest_comp=1,
|
||||
indices=[ACCESS, ATOMIC_OP])
|
||||
|
||||
# IR3-specific intrinsic for UAVs, which are like SSBOs but with a source
|
||||
# for which "record" to access as well as the offset within the record, instead
|
||||
# of just an offset. The record stride is part of the descriptor.
|
||||
# Currently this is just used for the ray-tracing TLAS descriptor, where a
|
||||
# normal SSBO wouldn't have enough range.
|
||||
load("uav_ir3", [1, 2],
|
||||
indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
||||
|
||||
# System values for freedreno geometry shaders.
|
||||
system_value("vs_primitive_stride_ir3", 1)
|
||||
system_value("vs_vertex_stride_ir3", 1)
|
||||
|
|
|
|||
|
|
@ -33,24 +33,22 @@ lower_ssbo_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
}
|
||||
}
|
||||
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
static void
|
||||
emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
emit_load_uav(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction *offset,
|
||||
unsigned imm_offset_val,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
struct ir3_builder *b = &ctx->build;
|
||||
struct ir3_instruction *offset;
|
||||
struct ir3_instruction *ldib;
|
||||
unsigned imm_offset_val;
|
||||
|
||||
lower_ssbo_offset(ctx, intr, &intr->src[2], &offset, &imm_offset_val);
|
||||
struct ir3_instruction *imm_offset = create_immed(b, imm_offset_val);
|
||||
|
||||
ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0,
|
||||
imm_offset, 0);
|
||||
ldib->dsts[0]->wrmask = MASK(intr->num_components);
|
||||
ldib->cat6.iim_val = intr->num_components;
|
||||
ldib->cat6.d = 1;
|
||||
ldib->cat6.d = reg_elems(offset->dsts[0]);
|
||||
switch (intr->def.bit_size) {
|
||||
case 8:
|
||||
/* This encodes the 8-bit SSBO load and matches blob's encoding of
|
||||
|
|
@ -83,6 +81,30 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
ir3_split_dest(b, dst, ldib, 0, intr->num_components);
|
||||
}
|
||||
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
static void
|
||||
emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
struct ir3_instruction *offset;
|
||||
unsigned imm_offset_val;
|
||||
|
||||
lower_ssbo_offset(ctx, intr, &intr->src[2], &offset, &imm_offset_val);
|
||||
emit_load_uav(ctx, intr, offset, imm_offset_val, dst);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_intrinsic_load_uav(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
struct ir3_builder *b = &ctx->build;
|
||||
struct ir3_instruction *offset;
|
||||
|
||||
offset = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[1]), 2);
|
||||
|
||||
emit_load_uav(ctx, intr, offset, 0, dst);
|
||||
}
|
||||
|
||||
/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
|
||||
static void
|
||||
emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
|
|
@ -576,6 +598,7 @@ emit_intrinsic_atomic_global(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
|
||||
const struct ir3_context_funcs ir3_a6xx_funcs = {
|
||||
.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
|
||||
.emit_intrinsic_load_uav = emit_intrinsic_load_uav,
|
||||
.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
|
||||
.emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo,
|
||||
.emit_intrinsic_load_image = emit_intrinsic_load_image,
|
||||
|
|
|
|||
|
|
@ -1957,6 +1957,39 @@ get_bindless_samp_src(struct ir3_context *ctx, nir_src *tex,
|
|||
return info;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_readonly_load_uav(struct ir3_context *ctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
nir_src *index,
|
||||
struct ir3_instruction *coords,
|
||||
unsigned imm_offset,
|
||||
bool uav_load,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
struct ir3_builder *b = &ctx->build;
|
||||
struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, index, false);
|
||||
|
||||
unsigned num_components = intr->def.num_components;
|
||||
struct ir3_instruction *sam =
|
||||
emit_sam(ctx, OPC_ISAM, info, utype_for_size(intr->def.bit_size),
|
||||
MASK(num_components), coords, create_immed(b, imm_offset));
|
||||
|
||||
ir3_handle_nonuniform(sam, intr);
|
||||
|
||||
sam->barrier_class = IR3_BARRIER_BUFFER_R;
|
||||
sam->barrier_conflict = IR3_BARRIER_BUFFER_W;
|
||||
|
||||
ir3_split_dest(b, dst, sam, 0, num_components);
|
||||
|
||||
if (ctx->compiler->has_isam_v && !uav_load) {
|
||||
sam->flags |= (IR3_INSTR_V | IR3_INSTR_INV_1D);
|
||||
|
||||
if (imm_offset) {
|
||||
sam->flags |= IR3_INSTR_IMM_OFFSET;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
static void
|
||||
emit_intrinsic_load_ssbo(struct ir3_context *ctx,
|
||||
|
|
@ -1987,29 +2020,26 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx,
|
|||
ir3_collect(b, ir3_get_src(ctx, offset_src)[0], create_immed(b, 0));
|
||||
}
|
||||
|
||||
struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0], false);
|
||||
emit_readonly_load_uav(ctx, intr, &intr->src[0], coords, imm_offset, false, dst);
|
||||
}
|
||||
|
||||
unsigned num_components = intr->def.num_components;
|
||||
assert(num_components == 1 || ctx->compiler->has_isam_v);
|
||||
|
||||
struct ir3_instruction *sam =
|
||||
emit_sam(ctx, OPC_ISAM, info, utype_for_size(intr->def.bit_size),
|
||||
MASK(num_components), coords, create_immed(b, imm_offset));
|
||||
|
||||
if (ctx->compiler->has_isam_v) {
|
||||
sam->flags |= (IR3_INSTR_V | IR3_INSTR_INV_1D);
|
||||
|
||||
if (imm_offset) {
|
||||
sam->flags |= IR3_INSTR_IMM_OFFSET;
|
||||
}
|
||||
static void
|
||||
emit_intrinsic_load_uav(struct ir3_context *ctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
/* Note: isam currently can't handle vectorized loads/stores */
|
||||
if (!(nir_intrinsic_access(intr) & ACCESS_CAN_REORDER) ||
|
||||
intr->def.num_components > 1 ||
|
||||
!ctx->compiler->has_isam_ssbo) {
|
||||
ctx->funcs->emit_intrinsic_load_uav(ctx, intr, dst);
|
||||
return;
|
||||
}
|
||||
|
||||
ir3_handle_nonuniform(sam, intr);
|
||||
|
||||
sam->barrier_class = IR3_BARRIER_BUFFER_R;
|
||||
sam->barrier_conflict = IR3_BARRIER_BUFFER_W;
|
||||
|
||||
ir3_split_dest(b, dst, sam, 0, num_components);
|
||||
struct ir3_builder *b = &ctx->build;
|
||||
struct ir3_instruction *coords =
|
||||
ir3_create_collect(b, ir3_get_src(ctx, &intr->src[1]), 2);
|
||||
emit_readonly_load_uav(ctx, intr, &intr->src[0], coords, 0, true, dst);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2809,6 +2839,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_load_ssbo_ir3:
|
||||
emit_intrinsic_load_ssbo(ctx, intr, dst);
|
||||
break;
|
||||
case nir_intrinsic_load_uav_ir3:
|
||||
emit_intrinsic_load_uav(ctx, intr, dst);
|
||||
break;
|
||||
case nir_intrinsic_store_ssbo_ir3:
|
||||
ctx->funcs->emit_intrinsic_store_ssbo(ctx, intr);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -153,6 +153,9 @@ struct ir3_context_funcs {
|
|||
void (*emit_intrinsic_load_ssbo)(struct ir3_context *ctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst);
|
||||
void (*emit_intrinsic_load_uav)(struct ir3_context *ctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst);
|
||||
void (*emit_intrinsic_store_ssbo)(struct ir3_context *ctx,
|
||||
nir_intrinsic_instr *intr);
|
||||
struct ir3_instruction *(*emit_intrinsic_atomic_ssbo)(
|
||||
|
|
|
|||
|
|
@ -154,6 +154,37 @@ create_shift(nir_builder *b, nir_def *offset, int shift)
|
|||
return nir_ushr_imm(b, offset, shift);
|
||||
}
|
||||
|
||||
/* isam doesn't have an "untyped" field, so it can only load 1 component at a
|
||||
* time because our storage buffer descriptors use a 1-component format.
|
||||
* Therefore we need to scalarize any loads that would use isam.
|
||||
*/
|
||||
static void
|
||||
scalarize_load(nir_intrinsic_instr *intrinsic, nir_builder *b)
|
||||
{
|
||||
struct nir_def *results[NIR_MAX_VEC_COMPONENTS];
|
||||
|
||||
nir_def *descriptor = intrinsic->src[0].ssa;
|
||||
nir_def *offset = intrinsic->src[1].ssa;
|
||||
nir_def *record = nir_channel(b, offset, 0);
|
||||
nir_def *record_offset = nir_channel(b, offset, 1);
|
||||
|
||||
for (unsigned i = 0; i < intrinsic->def.num_components; i++) {
|
||||
results[i] =
|
||||
nir_load_uav_ir3(b, 1, intrinsic->def.bit_size, descriptor,
|
||||
nir_vec2(b, record,
|
||||
nir_iadd_imm(b, record_offset, i)),
|
||||
.access = nir_intrinsic_access(intrinsic),
|
||||
.align_mul = nir_intrinsic_align_mul(intrinsic),
|
||||
.align_offset = nir_intrinsic_align_offset(intrinsic));
|
||||
}
|
||||
|
||||
nir_def *result = nir_vec(b, results, intrinsic->def.num_components);
|
||||
|
||||
nir_def_rewrite_uses(&intrinsic->def, result);
|
||||
|
||||
nir_instr_remove(&intrinsic->instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
|
||||
unsigned ir3_ssbo_opcode, uint8_t offset_src_idx)
|
||||
|
|
@ -271,6 +302,14 @@ lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
|
|||
progress |= lower_offset_for_ssbo(intr, b, (unsigned)ir3_intrinsic,
|
||||
offset_src_idx);
|
||||
}
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_uav_ir3 &&
|
||||
(nir_intrinsic_access(intr) & ACCESS_CAN_REORDER) &&
|
||||
ir3_bindless_resource(intr->src[0]) &&
|
||||
intr->num_components > 1) {
|
||||
b->cursor = nir_before_instr(instr);
|
||||
scalarize_load(intr, b);
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
|
|
|
|||
|
|
@ -495,6 +495,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_uav_ir3:
|
||||
case nir_intrinsic_store_ssbo:
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue