diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index cb6854c4350..1bb4c178b8b 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1146,6 +1146,9 @@ intrinsic("preamble_start_ir3", [], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORD barrier("preamble_end_ir3") +# IR3-specific intrinsic for stc. Should be used in the shader preamble. +store("uniform_ir3", [], indices=[BASE]) + # DXIL specific intrinsics # src[] = { value, mask, index, offset }. intrinsic("store_ssbo_masked_dxil", [1, 1, 1, 1]) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 0756cd8fb30..e73eca38512 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -2071,21 +2071,23 @@ static inline struct ir3_instruction *ir3_##name( \ #define INSTR1NODST(name) __INSTR1(0, 0, name, OPC_##name) /* clang-format off */ -#define __INSTR2(flag, name, opc) \ +#define __INSTR2(flag, dst_count, name, opc) \ static inline struct ir3_instruction *ir3_##name( \ struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ struct ir3_instruction *b, unsigned bflags) \ { \ - struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 2); \ - __ssa_dst(instr); \ + struct ir3_instruction *instr = ir3_instr_create(block, opc, dst_count, 2); \ + for (unsigned i = 0; i < dst_count; i++) \ + __ssa_dst(instr); \ __ssa_src(instr, a, aflags); \ __ssa_src(instr, b, bflags); \ instr->flags |= flag; \ return instr; \ } /* clang-format on */ -#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, name##_##f, OPC_##name) -#define INSTR2(name) __INSTR2(0, name, OPC_##name) +#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, 1, name##_##f, OPC_##name) +#define INSTR2(name) __INSTR2(0, 1, name, OPC_##name) +#define INSTR2NODST(name) __INSTR2(0, 0, name, OPC_##name) /* clang-format off */ #define __INSTR3(flag, dst_count, name, opc) \ @@ -2374,6 +2376,7 @@ INSTR2(QUAD_SHUFFLE_BRCST) INSTR1(QUAD_SHUFFLE_HORIZ) INSTR1(QUAD_SHUFFLE_VERT) INSTR1(QUAD_SHUFFLE_DIAG) +INSTR2NODST(STC) #if GPU >= 600 INSTR3NODST(STIB); INSTR2(LDIB); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 4645366393a..e63c0ba2d60 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2617,6 +2617,35 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) array_insert(b, b->keeps, instr); break; } + case nir_intrinsic_store_uniform_ir3: { + unsigned components = nir_src_num_components(intr->src[0]); + unsigned dst = nir_intrinsic_base(intr); + unsigned dst_lo = dst & 0xff; + unsigned dst_hi = dst >> 8; + + struct ir3_instruction *src = + ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), components); + struct ir3_instruction *a1 = NULL; + if (dst_hi) { + /* Encode only the high part of the destination in a1.x to increase the + * chance that we can reuse the a1.x value in subsequent stc + * instructions. + */ + a1 = ir3_get_addr1(ctx, dst_hi << 8); + } + + struct ir3_instruction *stc = + ir3_STC(ctx->block, create_immed(b, dst_lo), 0, src, 0); + stc->cat6.iim_val = components; + stc->cat6.type = TYPE_U32; + stc->barrier_conflict = IR3_BARRIER_CONST_W; + if (a1) { + ir3_instr_set_address(stc, a1); + stc->flags |= IR3_INSTR_A1EN; + } + array_insert(b, b->keeps, stc); + break; + } default: ir3_context_error(ctx, "Unhandled intrinsic type: %s\n", nir_intrinsic_infos[intr->intrinsic].name);