From 2e2a36ddb06a72bf7db1f5004a2e7dcd45d1de44 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Fri, 29 Nov 2024 17:09:16 +0100 Subject: [PATCH] ir3: add codegen for read_getlast_ir3 Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/instr-a3xx.h | 1 + src/freedreno/ir3/ir3.h | 2 ++ src/freedreno/ir3/ir3_compiler_nir.c | 13 +++++++++++++ src/freedreno/ir3/ir3_lower_subgroups.c | 10 +++++++++- src/freedreno/ir3/ir3_validate.c | 2 ++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index f08750dd746..a6546a4530a 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -107,6 +107,7 @@ typedef enum { OPC_READ_COND_MACRO = _OPC(1, 54), OPC_READ_FIRST_MACRO = _OPC(1, 55), OPC_SHPS_MACRO = _OPC(1, 56), + OPC_READ_GETLAST_MACRO = _OPC(1, 57), /* Macros that expand to a loop */ OPC_SCAN_MACRO = _OPC(1, 58), diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index a38064d4477..20370c59573 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1083,6 +1083,7 @@ is_subgroup_cond_mov_macro(struct ir3_instruction *instr) case OPC_ALL_MACRO: case OPC_ELECT_MACRO: case OPC_READ_COND_MACRO: + case OPC_READ_GETLAST_MACRO: case OPC_READ_FIRST_MACRO: case OPC_SCAN_MACRO: case OPC_SCAN_CLUSTERS_MACRO: @@ -2764,6 +2765,7 @@ INSTR1(ANY_MACRO) INSTR1(ALL_MACRO) INSTR1(READ_FIRST_MACRO) INSTR2(READ_COND_MACRO) +INSTR1(READ_GETLAST_MACRO) static inline struct ir3_instruction * ir3_ELECT_MACRO(struct ir3_block *block) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index c2d28e2b662..fac4bbba217 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -3129,6 +3129,19 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) break; } + case nir_intrinsic_read_getlast_ir3: { + struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0]; + dst[0] = ir3_READ_GETLAST_MACRO(ctx->block, src, 0); + dst[0]->dsts[0]->flags |= IR3_REG_SHARED; + /* See above. */ + if (src->dsts[0]->flags & IR3_REG_HALF) { + dst[0] = ir3_MOV(b, dst[0], TYPE_U32); + if (!ctx->compiler->has_scalar_alu) + dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED; + } + break; + } + case nir_intrinsic_ballot: { struct ir3_instruction *ballot; unsigned components = intr->def.num_components; diff --git a/src/freedreno/ir3/ir3_lower_subgroups.c b/src/freedreno/ir3/ir3_lower_subgroups.c index 1979752e6bb..95db756067a 100644 --- a/src/freedreno/ir3/ir3_lower_subgroups.c +++ b/src/freedreno/ir3/ir3_lower_subgroups.c @@ -253,6 +253,7 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in case OPC_ALL_MACRO: case OPC_ELECT_MACRO: case OPC_READ_COND_MACRO: + case OPC_READ_GETLAST_MACRO: case OPC_SCAN_MACRO: case OPC_SCAN_CLUSTERS_MACRO: break; @@ -439,6 +440,11 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in branch_opc = OPC_GETONE; branch_flags = instr->flags & IR3_INSTR_NEEDS_HELPERS; break; + case OPC_READ_GETLAST_MACRO: + after_block->reconvergence_point = true; + branch_opc = OPC_GETLAST; + branch_flags = instr->flags & IR3_INSTR_NEEDS_HELPERS; + break; default: unreachable("bad opcode"); } @@ -464,12 +470,14 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in break; } + case OPC_READ_GETLAST_MACRO: case OPC_READ_COND_MACRO: { struct ir3_instruction *mov = ir3_instr_create(then_block, OPC_MOV, 1, 1); ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags); struct ir3_register *new_src = ir3_src_create(mov, 0, 0); - *new_src = *instr->srcs[1]; + unsigned idx = instr->opc == OPC_READ_COND_MACRO ? 1 : 0; + *new_src = *instr->srcs[idx]; mov->cat1.dst_type = TYPE_U32; mov->cat1.src_type = (new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 3041e46f3f8..5e0ed03a993 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -241,6 +241,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) */ } else if (instr->opc == OPC_ANY_MACRO || instr->opc == OPC_ALL_MACRO || instr->opc == OPC_READ_FIRST_MACRO || + instr->opc == OPC_READ_GETLAST_MACRO || instr->opc == OPC_READ_COND_MACRO) { /* nothing yet */ } else if (n > 0) { @@ -283,6 +284,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) ctx, util_is_power_of_two_or_zero(instr->dsts[0]->wrmask + 1)); } else if (instr->opc == OPC_ANY_MACRO || instr->opc == OPC_ALL_MACRO || instr->opc == OPC_READ_FIRST_MACRO || + instr->opc == OPC_READ_GETLAST_MACRO || instr->opc == OPC_READ_COND_MACRO) { /* nothing yet */ } else if (instr->opc == OPC_ELECT_MACRO || instr->opc == OPC_SHPS_MACRO) {