mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-21 16:30:29 +01:00
ir3: Use isam for bindless readonly ssbo loads
Since this isn't hooked up in gallium, only do it for bindless for now. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15288>
This commit is contained in:
parent
625ebb977f
commit
a83ea0253f
2 changed files with 75 additions and 7 deletions
|
|
@ -1313,11 +1313,11 @@ struct tex_src_info {
|
|||
* to handle with the image_mapping table..
|
||||
*/
|
||||
static struct tex_src_info
|
||||
get_image_samp_tex_src(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
get_image_ssbo_samp_tex_src(struct ir3_context *ctx, nir_src *src)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct tex_src_info info = {0};
|
||||
nir_intrinsic_instr *bindless_tex = ir3_bindless_resource(intr->src[0]);
|
||||
nir_intrinsic_instr *bindless_tex = ir3_bindless_resource(*src);
|
||||
|
||||
if (bindless_tex) {
|
||||
/* Bindless case */
|
||||
|
|
@ -1351,13 +1351,13 @@ get_image_samp_tex_src(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
/* Note: the indirect source is now a vec2 instead of hvec2 */
|
||||
struct ir3_instruction *texture, *sampler;
|
||||
|
||||
texture = ir3_get_src(ctx, &intr->src[0])[0];
|
||||
texture = ir3_get_src(ctx, src)[0];
|
||||
sampler = create_immed(b, 0);
|
||||
info.samp_tex = ir3_collect(b, texture, sampler);
|
||||
}
|
||||
} else {
|
||||
info.flags |= IR3_INSTR_S2EN;
|
||||
unsigned slot = nir_src_as_uint(intr->src[0]);
|
||||
unsigned slot = nir_src_as_uint(*src);
|
||||
unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
|
||||
struct ir3_instruction *texture, *sampler;
|
||||
|
||||
|
|
@ -1416,7 +1416,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
}
|
||||
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct tex_src_info info = get_image_samp_tex_src(ctx, intr);
|
||||
struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0]);
|
||||
struct ir3_instruction *sam;
|
||||
struct ir3_instruction *const *src0 = ir3_get_src(ctx, &intr->src[1]);
|
||||
struct ir3_instruction *coords[4];
|
||||
|
|
@ -1458,7 +1458,7 @@ emit_intrinsic_image_size_tex(struct ir3_context *ctx,
|
|||
struct ir3_instruction **dst)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct tex_src_info info = get_image_samp_tex_src(ctx, intr);
|
||||
struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0]);
|
||||
struct ir3_instruction *sam, *lod;
|
||||
unsigned flags, ncoords = ir3_get_image_coords(intr, &flags);
|
||||
type_t dst_type = nir_dest_bit_size(intr->dest) == 16 ? TYPE_U16 : TYPE_U32;
|
||||
|
|
@ -1494,6 +1494,38 @@ emit_intrinsic_image_size_tex(struct ir3_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
static void
|
||||
emit_intrinsic_load_ssbo(struct ir3_context *ctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
/* Note: isam currently can't handle vectorized loads/stores */
|
||||
if (!(nir_intrinsic_access(intr) & ACCESS_CAN_REORDER) ||
|
||||
!ir3_bindless_resource(intr->src[0]) ||
|
||||
intr->dest.ssa.num_components > 1) {
|
||||
ctx->funcs->emit_intrinsic_load_ssbo(ctx, intr, dst);
|
||||
return;
|
||||
}
|
||||
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[2])[0];
|
||||
struct ir3_instruction *coords = ir3_collect(b, offset, create_immed(b, 0));
|
||||
struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0]);
|
||||
|
||||
unsigned num_components = intr->dest.ssa.num_components;
|
||||
struct ir3_instruction *sam =
|
||||
emit_sam(ctx, OPC_ISAM, info, utype_for_size(intr->dest.ssa.bit_size),
|
||||
MASK(num_components), coords, NULL);
|
||||
|
||||
ir3_handle_nonuniform(sam, intr);
|
||||
|
||||
sam->barrier_class = IR3_BARRIER_BUFFER_R;
|
||||
sam->barrier_conflict = IR3_BARRIER_BUFFER_W;
|
||||
|
||||
ir3_split_dest(b, dst, sam, 0, num_components);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_control_barrier(struct ir3_context *ctx)
|
||||
{
|
||||
|
|
@ -2134,7 +2166,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
* dword-offset in the last source.
|
||||
*/
|
||||
case nir_intrinsic_load_ssbo_ir3:
|
||||
ctx->funcs->emit_intrinsic_load_ssbo(ctx, intr, dst);
|
||||
emit_intrinsic_load_ssbo(ctx, intr, dst);
|
||||
break;
|
||||
case nir_intrinsic_store_ssbo_ir3:
|
||||
if ((ctx->so->type == MESA_SHADER_FRAGMENT) &&
|
||||
|
|
|
|||
|
|
@ -161,6 +161,36 @@ ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset,
|
|||
return new_offset;
|
||||
}
|
||||
|
||||
/* isam doesn't have an "untyped" field, so it can only load 1 component at a
|
||||
* time because our storage buffer descriptors use a 1-component format.
|
||||
* Therefore we need to scalarize any loads that would use isam.
|
||||
*/
|
||||
static void
|
||||
scalarize_load(nir_intrinsic_instr *intrinsic, nir_builder *b)
|
||||
{
|
||||
struct nir_ssa_def *results[NIR_MAX_VEC_COMPONENTS];
|
||||
|
||||
nir_ssa_def *descriptor = intrinsic->src[0].ssa;
|
||||
nir_ssa_def *offset = intrinsic->src[1].ssa;
|
||||
nir_ssa_def *new_offset = intrinsic->src[2].ssa;
|
||||
unsigned comp_size = intrinsic->dest.ssa.bit_size / 8;
|
||||
for (unsigned i = 0; i < intrinsic->dest.ssa.num_components; i++) {
|
||||
results[i] =
|
||||
nir_load_ssbo_ir3(b, 1, intrinsic->dest.ssa.bit_size, descriptor,
|
||||
nir_iadd(b, offset, nir_imm_int(b, i * comp_size)),
|
||||
nir_iadd(b, new_offset, nir_imm_int(b, i)),
|
||||
.access = nir_intrinsic_access(intrinsic),
|
||||
.align_mul = nir_intrinsic_align_mul(intrinsic),
|
||||
.align_offset = nir_intrinsic_align_offset(intrinsic));
|
||||
}
|
||||
|
||||
nir_ssa_def *result = nir_vec(b, results, intrinsic->dest.ssa.num_components);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, result);
|
||||
|
||||
nir_instr_remove(&intrinsic->instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
|
||||
unsigned ir3_ssbo_opcode, uint8_t offset_src_idx)
|
||||
|
|
@ -248,6 +278,12 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
|
|||
/* Finally remove the original intrinsic. */
|
||||
nir_instr_remove(&intrinsic->instr);
|
||||
|
||||
if (new_intrinsic->intrinsic == nir_intrinsic_load_ssbo_ir3 &&
|
||||
(nir_intrinsic_access(new_intrinsic) & ACCESS_CAN_REORDER) &&
|
||||
ir3_bindless_resource(new_intrinsic->src[0]) &&
|
||||
new_intrinsic->num_components > 1)
|
||||
scalarize_load(new_intrinsic, b);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue