From 2ab0cf2b549bdeef92fb72f16119bf1d21f9e90c Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 12 Oct 2021 08:50:55 -0700 Subject: [PATCH] freedreno/ir3: Use flat.b to load flat varyings on a6xx The flat.b/bary.f cat2 instruction should be faster than an ldlv cat6 instruction, even with a couple of additional moves (which will be removed in the next patch). Part-of: --- src/freedreno/ir3/ir3.h | 4 ++++ src/freedreno/ir3/ir3_compiler_nir.c | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 96835e0e595..5cdf85d2970 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1462,6 +1462,10 @@ ir3_output_conv_src_type(struct ir3_instruction *instr, type_t base_type) */ return TYPE_F32; + case OPC_FLAT_B: + /* Treat the input data as u32 if not interpolating. */ + return TYPE_U32; + default: return (instr->srcs[0]->flags & IR3_REG_HALF) ? half_type(base_type) : full_type(base_type); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 6302f293072..7b21dfa40d9 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -86,9 +86,20 @@ create_frag_input(struct ir3_context *ctx, struct ir3_instruction *coord, if (coord) { instr = ir3_BARY_F(block, inloc, 0, coord, 0); } else if (ctx->compiler->flat_bypass) { - instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0); - instr->cat6.type = TYPE_U32; - instr->cat6.iim_val = 1; + if (ctx->compiler->gen >= 6) { + struct ir3_instruction *ij[2]; + + for (int i = 0; i < 2; i++) { + ij[i] = create_immed(block, fui(0.0)); + } + + instr = ir3_FLAT_B(block, inloc, 0, ir3_create_collect(block, ij, 2), 0); + instr->srcs[1]->wrmask = 0x3; + } else { + instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0); + instr->cat6.type = TYPE_U32; + instr->cat6.iim_val = 1; + } } else { instr = ir3_BARY_F(block, inloc, 0, ctx->ij[IJ_PERSP_PIXEL], 0); instr->srcs[1]->wrmask = 0x3;