diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 3568abcb98c..47644d7c573 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -5067,9 +5067,9 @@ bi_compile_variant_nir(nir_shader *nir, if (ctx->arch >= 9) { va_optimize(ctx); + va_lower_isel(ctx); bi_foreach_instr_global_safe(ctx, I) { - va_lower_isel(I); va_lower_constants(ctx, I); bi_builder b = bi_init_builder(ctx, bi_before_instr(I)); diff --git a/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp b/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp index 97ff6391750..994885b66db 100644 --- a/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp +++ b/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp @@ -27,15 +27,7 @@ #include -static inline void -case_cb(bi_context *ctx) -{ - bi_foreach_instr_global(ctx, I) { - va_lower_isel(I); - } -} - -#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, case_cb) +#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, va_lower_isel) #define NEGCASE(instr) CASE(instr, instr) class LowerIsel : public testing::Test { diff --git a/src/panfrost/bifrost/valhall/va_compiler.h b/src/panfrost/bifrost/valhall/va_compiler.h index 22aaea77c53..534f0a0ee91 100644 --- a/src/panfrost/bifrost/valhall/va_compiler.h +++ b/src/panfrost/bifrost/valhall/va_compiler.h @@ -39,7 +39,7 @@ void va_validate(FILE *fp, bi_context *ctx); void va_repair_fau(bi_builder *b, bi_instr *I); void va_fuse_add_imm(bi_instr *I); void va_lower_constants(bi_context *ctx, bi_instr *I); -void va_lower_isel(bi_instr *I); +void va_lower_isel(bi_context *ctx); void va_assign_slots(bi_context *ctx); void va_insert_flow_control_nops(bi_context *ctx); void va_merge_flow(bi_context *ctx); diff --git a/src/panfrost/bifrost/valhall/va_lower_isel.c b/src/panfrost/bifrost/valhall/va_lower_isel.c index 0ac9c12c40f..62a4db7c51c 100644 --- a/src/panfrost/bifrost/valhall/va_lower_isel.c +++ b/src/panfrost/bifrost/valhall/va_lower_isel.c @@ -25,89 +25,50 @@ #include "valhall.h" #include "bi_builder.h" -void -va_lower_isel(bi_instr *I) +static bi_instr * +lower(bi_builder *b, bi_instr *I) { switch (I->op) { /* Integer addition has swizzles and addition with 0 is canonical swizzle */ case BI_OPCODE_SWZ_V2I16: - I->op = BI_OPCODE_IADD_V2U16; - I->src[1] = bi_zero(); - I->nr_srcs = 2; - break; + return bi_iadd_v2u16_to(b, I->dest[0], I->src[0], bi_zero(), false); case BI_OPCODE_SWZ_V4I8: - I->op = BI_OPCODE_IADD_V4U8; - I->src[1] = bi_zero(); - I->nr_srcs = 2; - break; + return bi_iadd_v4u8_to(b, I->dest[0], I->src[0], bi_zero(), false); case BI_OPCODE_ICMP_I32: - I->op = BI_OPCODE_ICMP_OR_U32; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_ICMP_V2I16: - I->op = BI_OPCODE_ICMP_OR_V2U16; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_ICMP_V4I8: - I->op = BI_OPCODE_ICMP_OR_V4U8; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_ICMP_U32: - I->op = BI_OPCODE_ICMP_OR_U32; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_ICMP_V2U16: - I->op = BI_OPCODE_ICMP_OR_V2U16; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_ICMP_V4U8: - I->op = BI_OPCODE_ICMP_OR_V4U8; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_ICMP_S32: - I->op = BI_OPCODE_ICMP_OR_S32; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_icmp_or_s32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_ICMP_V2S16: - I->op = BI_OPCODE_ICMP_OR_V2S16; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_icmp_or_v2s16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_ICMP_V4S8: - I->op = BI_OPCODE_ICMP_OR_V4S8; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_icmp_or_v4s8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_FCMP_F32: - I->op = BI_OPCODE_FCMP_OR_F32; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_fcmp_or_f32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); case BI_OPCODE_FCMP_V2F16: - I->op = BI_OPCODE_FCMP_OR_V2F16; - I->src[2] = bi_zero(); - I->nr_srcs = 3; - break; + return bi_fcmp_or_v2f16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type); /* Integer CSEL must have a signedness */ case BI_OPCODE_CSEL_I32: @@ -116,53 +77,60 @@ va_lower_isel(bi_instr *I) I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16; - break; + return NULL; /* Jump -> conditional branch with condition tied to true. */ case BI_OPCODE_JUMP: - I->op = I->branch_target ? BI_OPCODE_BRANCHZ_I16 : BI_OPCODE_BRANCHZI; - I->src[1] = I->src[0]; - I->src[0] = bi_zero(); - I->nr_srcs = 2; - I->cmpf = BI_CMPF_EQ; - break; + if (I->branch_target) { + bi_instr *new_I = bi_branchz_i16(b, bi_zero(), I->src[0], BI_CMPF_EQ); + new_I->branch_target = I->branch_target; + return I; + } else { + return bi_branchzi(b, bi_zero(), I->src[0], BI_CMPF_EQ); + } case BI_OPCODE_AXCHG_I32: I->op = BI_OPCODE_ATOM_RETURN_I32; I->atom_opc = BI_ATOM_OPC_AXCHG; I->sr_count = 1; - break; + return NULL; case BI_OPCODE_ACMPXCHG_I32: I->op = BI_OPCODE_ATOM_RETURN_I32; I->atom_opc = BI_ATOM_OPC_ACMPXCHG; /* Reads 2, this is special cased in bir.c */ I->sr_count = 1; - break; + return NULL; case BI_OPCODE_ATOM_RETURN_I32: if (bi_is_null(I->dest[0])) I->op = BI_OPCODE_ATOM_I32; - break; + return NULL; case BI_OPCODE_MUX_I32: case BI_OPCODE_MUX_V2I16: if (bi_can_replace_with_csel(I)) bi_replace_mux_with_csel(I, true); - break; + return NULL; - /* FADD_RSCALE.f32(x, y, z) -> FMA_RSCALE.f32(x, 1.0, y, z) */ case BI_OPCODE_FADD_RSCALE_F32: - I->op = BI_OPCODE_FMA_RSCALE_F32; - I->src[3] = I->src[2]; - I->src[2] = I->src[1]; - I->src[1] = bi_imm_f32(1.0); - I->nr_srcs = 4; - break; + return bi_fma_rscale_f32_to(b, I->dest[0], I->src[0], bi_imm_f32(1.0), + I->src[1], I->src[2], I->special); default: - break; + return NULL; + } +} + +void +va_lower_isel(bi_context *ctx) +{ + bi_foreach_instr_global_safe(ctx, I) { + bi_builder b = bi_init_builder(ctx, bi_before_instr(I)); + + if (lower(&b, I)) + bi_remove_instruction(I); } }