From a83ea0253f7f9be348b6dad4ac74ecaac87ba692 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 22 Feb 2022 18:01:27 +0100 Subject: [PATCH] ir3: Use isam for bindless readonly ssbo loads Since this isn't hooked up in gallium, only do it for bindless for now. Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 46 +++++++++++++++++--- src/freedreno/ir3/ir3_nir_lower_io_offsets.c | 36 +++++++++++++++ 2 files changed, 75 insertions(+), 7 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 8ff044011ce..f4dd5048182 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1313,11 +1313,11 @@ struct tex_src_info { * to handle with the image_mapping table.. */ static struct tex_src_info -get_image_samp_tex_src(struct ir3_context *ctx, nir_intrinsic_instr *intr) +get_image_ssbo_samp_tex_src(struct ir3_context *ctx, nir_src *src) { struct ir3_block *b = ctx->block; struct tex_src_info info = {0}; - nir_intrinsic_instr *bindless_tex = ir3_bindless_resource(intr->src[0]); + nir_intrinsic_instr *bindless_tex = ir3_bindless_resource(*src); if (bindless_tex) { /* Bindless case */ @@ -1351,13 +1351,13 @@ get_image_samp_tex_src(struct ir3_context *ctx, nir_intrinsic_instr *intr) /* Note: the indirect source is now a vec2 instead of hvec2 */ struct ir3_instruction *texture, *sampler; - texture = ir3_get_src(ctx, &intr->src[0])[0]; + texture = ir3_get_src(ctx, src)[0]; sampler = create_immed(b, 0); info.samp_tex = ir3_collect(b, texture, sampler); } } else { info.flags |= IR3_INSTR_S2EN; - unsigned slot = nir_src_as_uint(intr->src[0]); + unsigned slot = nir_src_as_uint(*src); unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot); struct ir3_instruction *texture, *sampler; @@ -1416,7 +1416,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, } struct ir3_block *b = ctx->block; - struct tex_src_info info = get_image_samp_tex_src(ctx, intr); + struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0]); struct ir3_instruction *sam; struct ir3_instruction *const *src0 = ir3_get_src(ctx, &intr->src[1]); struct ir3_instruction *coords[4]; @@ -1458,7 +1458,7 @@ emit_intrinsic_image_size_tex(struct ir3_context *ctx, struct ir3_instruction **dst) { struct ir3_block *b = ctx->block; - struct tex_src_info info = get_image_samp_tex_src(ctx, intr); + struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0]); struct ir3_instruction *sam, *lod; unsigned flags, ncoords = ir3_get_image_coords(intr, &flags); type_t dst_type = nir_dest_bit_size(intr->dest) == 16 ? TYPE_U16 : TYPE_U32; @@ -1494,6 +1494,38 @@ emit_intrinsic_image_size_tex(struct ir3_context *ctx, } } +/* src[] = { buffer_index, offset }. No const_index */ +static void +emit_intrinsic_load_ssbo(struct ir3_context *ctx, + nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + /* Note: isam currently can't handle vectorized loads/stores */ + if (!(nir_intrinsic_access(intr) & ACCESS_CAN_REORDER) || + !ir3_bindless_resource(intr->src[0]) || + intr->dest.ssa.num_components > 1) { + ctx->funcs->emit_intrinsic_load_ssbo(ctx, intr, dst); + return; + } + + struct ir3_block *b = ctx->block; + struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[2])[0]; + struct ir3_instruction *coords = ir3_collect(b, offset, create_immed(b, 0)); + struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0]); + + unsigned num_components = intr->dest.ssa.num_components; + struct ir3_instruction *sam = + emit_sam(ctx, OPC_ISAM, info, utype_for_size(intr->dest.ssa.bit_size), + MASK(num_components), coords, NULL); + + ir3_handle_nonuniform(sam, intr); + + sam->barrier_class = IR3_BARRIER_BUFFER_R; + sam->barrier_conflict = IR3_BARRIER_BUFFER_W; + + ir3_split_dest(b, dst, sam, 0, num_components); +} + static void emit_control_barrier(struct ir3_context *ctx) { @@ -2134,7 +2166,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) * dword-offset in the last source. */ case nir_intrinsic_load_ssbo_ir3: - ctx->funcs->emit_intrinsic_load_ssbo(ctx, intr, dst); + emit_intrinsic_load_ssbo(ctx, intr, dst); break; case nir_intrinsic_store_ssbo_ir3: if ((ctx->so->type == MESA_SHADER_FRAGMENT) && diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c index da680d40102..86eecad51b5 100644 --- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c @@ -161,6 +161,36 @@ ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, return new_offset; } +/* isam doesn't have an "untyped" field, so it can only load 1 component at a + * time because our storage buffer descriptors use a 1-component format. + * Therefore we need to scalarize any loads that would use isam. + */ +static void +scalarize_load(nir_intrinsic_instr *intrinsic, nir_builder *b) +{ + struct nir_ssa_def *results[NIR_MAX_VEC_COMPONENTS]; + + nir_ssa_def *descriptor = intrinsic->src[0].ssa; + nir_ssa_def *offset = intrinsic->src[1].ssa; + nir_ssa_def *new_offset = intrinsic->src[2].ssa; + unsigned comp_size = intrinsic->dest.ssa.bit_size / 8; + for (unsigned i = 0; i < intrinsic->dest.ssa.num_components; i++) { + results[i] = + nir_load_ssbo_ir3(b, 1, intrinsic->dest.ssa.bit_size, descriptor, + nir_iadd(b, offset, nir_imm_int(b, i * comp_size)), + nir_iadd(b, new_offset, nir_imm_int(b, i)), + .access = nir_intrinsic_access(intrinsic), + .align_mul = nir_intrinsic_align_mul(intrinsic), + .align_offset = nir_intrinsic_align_offset(intrinsic)); + } + + nir_ssa_def *result = nir_vec(b, results, intrinsic->dest.ssa.num_components); + + nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, result); + + nir_instr_remove(&intrinsic->instr); +} + static bool lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b, unsigned ir3_ssbo_opcode, uint8_t offset_src_idx) @@ -248,6 +278,12 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b, /* Finally remove the original intrinsic. */ nir_instr_remove(&intrinsic->instr); + if (new_intrinsic->intrinsic == nir_intrinsic_load_ssbo_ir3 && + (nir_intrinsic_access(new_intrinsic) & ACCESS_CAN_REORDER) && + ir3_bindless_resource(new_intrinsic->src[0]) && + new_intrinsic->num_components > 1) + scalarize_load(new_intrinsic, b); + return true; }