diff --git a/src/gallium/drivers/panfrost/ci/expected-failures.txt b/src/gallium/drivers/panfrost/ci/expected-failures.txt index f72aa07deb9..0fe3342802d 100644 --- a/src/gallium/drivers/panfrost/ci/expected-failures.txt +++ b/src/gallium/drivers/panfrost/ci/expected-failures.txt @@ -1872,9 +1872,6 @@ dEQP-GLES2.functional.shaders.random.all_features.fragment.74 dEQP-GLES2.functional.shaders.random.all_features.fragment.77 dEQP-GLES2.functional.shaders.random.all_features.fragment.95 dEQP-GLES2.functional.shaders.random.all_features.vertex.17 -dEQP-GLES2.functional.shaders.random.conditionals.combined.88 -dEQP-GLES2.functional.shaders.random.conditionals.combined.92 -dEQP-GLES2.functional.shaders.random.conditionals.fragment.24 dEQP-GLES2.functional.shaders.random.exponential.fragment.46 dEQP-GLES2.functional.shaders.random.exponential.vertex.46 dEQP-GLES2.functional.shaders.random.texture.fragment.1 diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h b/src/gallium/drivers/panfrost/midgard/helpers.h index be2a45cd19d..dc2de15931b 100644 --- a/src/gallium/drivers/panfrost/midgard/helpers.h +++ b/src/gallium/drivers/panfrost/midgard/helpers.h @@ -196,7 +196,8 @@ static struct { [midgard_alu_op_ule] = {"ule", UNITS_MOST}, [midgard_alu_op_icsel] = {"icsel", UNITS_ADD}, - [midgard_alu_op_fcsel_i] = {"fcsel_i", UNITS_ADD}, + [midgard_alu_op_icsel_v] = {"icsel_v", UNITS_ADD}, + [midgard_alu_op_fcsel_v] = {"fcsel_v", UNITS_ADD}, [midgard_alu_op_fcsel] = {"fcsel", UNITS_ADD | UNIT_SMUL}, [midgard_alu_op_frcp] = {"frcp", UNIT_VLUT}, diff --git a/src/gallium/drivers/panfrost/midgard/midgard.h b/src/gallium/drivers/panfrost/midgard/midgard.h index c2808937b43..91d1c075f96 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard.h +++ b/src/gallium/drivers/panfrost/midgard/midgard.h @@ -138,8 +138,9 @@ typedef enum { midgard_alu_op_i2f = 0xB8, midgard_alu_op_u2f = 0xBC, - midgard_alu_op_icsel = 0xC1, - midgard_alu_op_fcsel_i = 0xC4, + midgard_alu_op_icsel_v = 0xC0, /* condition code r31 */ + midgard_alu_op_icsel = 0xC1, /* condition code r31.w */ + midgard_alu_op_fcsel_v = 0xC4, midgard_alu_op_fcsel = 0xC5, midgard_alu_op_fround = 0xC6, diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index 25316cab053..4a26ba769b2 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -138,6 +138,12 @@ typedef struct midgard_instruction { /* I.e. (1 << alu_bit) */ int unit; + /* When emitting bundle, should this instruction have a break forced + * before it? Used for r31 writes which are valid only within a single + * bundle and *need* to happen as early as possible... this is a hack, + * TODO remove when we have a scheduler */ + bool precede_break; + bool has_constants; float constants[4]; uint16_t inline_constant; @@ -287,21 +293,6 @@ vector_alu_modifiers(nir_alu_src *src, bool is_int) return alu_src; } -static bool -mir_nontrivial_mod(midgard_vector_alu_src src, bool is_int, unsigned mask) -{ - /* abs or neg */ - if (!is_int && src.mod) return true; - - /* swizzle */ - for (unsigned c = 0; c < 4; ++c) { - if (!(mask & (1 << c))) continue; - if (((src.swizzle >> (2*c)) & 3) != c) return true; - } - - return false; -} - /* 'Intrinsic' move for misc aliasing uses independent of actual NIR ALU code */ static midgard_instruction @@ -715,58 +706,6 @@ midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu) nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum)); } -/* Lower csel with mixed condition channels to mulitple csel instructions. For - * context, the csel ops on Midgard are vector in *outputs*, but not in - * *conditions*. So, if the condition is e.g. yyyy, a single op can select a - * vec4. But if the condition is e.g. xyzw, four ops are needed as the ISA - * can't cope with the divergent channels.*/ - -static void -midgard_nir_lower_mixed_csel_body(nir_builder *b, nir_alu_instr *alu) -{ - if (alu->op != nir_op_bcsel) - return; - - b->cursor = nir_before_instr(&alu->instr); - - /* Must be run before registering */ - assert(alu->dest.dest.is_ssa); - - /* Check for mixed condition */ - - unsigned comp = alu->src[0].swizzle[0]; - unsigned nr_components = alu->dest.dest.ssa.num_components; - - bool mixed = false; - - for (unsigned c = 1; c < nr_components; ++c) - mixed |= (alu->src[0].swizzle[c] != comp); - - if (!mixed) - return; - - /* We're mixed, so lower */ - - assert(nr_components <= 4); - nir_ssa_def *results[4]; - - nir_ssa_def *cond = nir_ssa_for_alu_src(b, alu, 0); - nir_ssa_def *choice0 = nir_ssa_for_alu_src(b, alu, 1); - nir_ssa_def *choice1 = nir_ssa_for_alu_src(b, alu, 2); - - for (unsigned c = 0; c < nr_components; ++c) { - results[c] = nir_bcsel(b, - nir_channel(b, cond, c), - nir_channel(b, choice0, c), - nir_channel(b, choice1, c)); - } - - /* Replace with our scalarized version */ - - nir_ssa_def *result = nir_vec(b, results, nr_components); - nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(result)); -} - static int midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr) { @@ -851,36 +790,6 @@ midgard_nir_lower_fdot2(nir_shader *shader) return progress; } -static bool -midgard_nir_lower_mixed_csel(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(function, shader) { - if (!function->impl) continue; - - nir_builder _b; - nir_builder *b = &_b; - nir_builder_init(b, function->impl); - - nir_foreach_block(block, function->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_alu) continue; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - midgard_nir_lower_mixed_csel_body(b, alu); - - progress |= true; - } - } - - nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); - - } - - return progress; -} - static void optimise_nir(nir_shader *nir) { @@ -892,7 +801,6 @@ optimise_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_lower_regs_to_ssa); NIR_PASS(progress, nir, midgard_nir_lower_fdot2); - NIR_PASS(progress, nir, midgard_nir_lower_mixed_csel); nir_lower_tex_options lower_tex_options = { .lower_rect = true @@ -957,6 +865,11 @@ optimise_nir(nir_shader *nir) } while (progress); NIR_PASS(progress, nir, nir_opt_algebraic_late); + + /* We implement booleans as 32-bit 0/~0 */ + NIR_PASS(progress, nir, nir_lower_bool_to_int32); + + /* Now that booleans are lowered, we can run out late opts */ NIR_PASS(progress, nir, midgard_nir_lower_algebraic_late); /* Lower mods for float ops only. Integer ops don't support modifiers @@ -967,9 +880,6 @@ optimise_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_copy_prop); NIR_PASS(progress, nir, nir_opt_dce); - /* We implement booleans as 32-bit 0/~0 */ - NIR_PASS(progress, nir, nir_lower_bool_to_int32); - /* Take us out of SSA */ NIR_PASS(progress, nir, nir_lower_locals_to_regs); NIR_PASS(progress, nir, nir_convert_from_ssa, true); @@ -1119,8 +1029,21 @@ nir_alu_src_index(compiler_context *ctx, nir_alu_src *src) return nir_src_index(ctx, &src->src); } -/* Midgard puts conditionals in r31.w; move an arbitrary source (the output of - * a conditional test) into that register */ +static bool +nir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components) +{ + unsigned comp = src->swizzle[0]; + + for (unsigned c = 1; c < nr_components; ++c) { + if (src->swizzle[c] != comp) + return true; + } + + return false; +} + +/* Midgard puts scalar conditionals in r31.w; move an arbitrary source (the + * output of a conditional test) into that register */ static void emit_condition(compiler_context *ctx, nir_src *src, bool for_branch, unsigned component) @@ -1138,8 +1061,13 @@ emit_condition(compiler_context *ctx, nir_src *src, bool for_branch, unsigned co midgard_instruction ins = { .type = TAG_ALU_4, - .unit = for_branch ? UNIT_SMUL : UNIT_SADD, /* TODO: DEDUCE THIS */ + + /* We need to set the conditional as close as possible */ + .precede_break = true, + .unit = for_branch ? UNIT_SMUL : UNIT_SADD, + .ssa_args = { + .src0 = condition, .src1 = condition, .dest = SSA_FIXED_REGISTER(31), @@ -1157,6 +1085,46 @@ emit_condition(compiler_context *ctx, nir_src *src, bool for_branch, unsigned co emit_mir_instruction(ctx, ins); } +/* Or, for mixed conditions (with csel_v), here's a vector version using all of + * r31 instead */ + +static void +emit_condition_mixed(compiler_context *ctx, nir_alu_src *src, unsigned nr_comp) +{ + int condition = nir_src_index(ctx, &src->src); + + /* Source to swizzle the desired component into w */ + + const midgard_vector_alu_src alu_src = { + .swizzle = SWIZZLE_FROM_ARRAY(src->swizzle), + }; + + /* There is no boolean move instruction. Instead, we simulate a move by + * ANDing the condition with itself to get it into r31.w */ + + midgard_instruction ins = { + .type = TAG_ALU_4, + .precede_break = true, + .ssa_args = { + .src0 = condition, + .src1 = condition, + .dest = SSA_FIXED_REGISTER(31), + }, + .alu = { + .op = midgard_alu_op_iand, + .reg_mode = midgard_reg_mode_32, + .dest_override = midgard_dest_override_none, + .mask = expand_writemask((1 << nr_comp) - 1), + .src1 = vector_alu_srco_unsigned(alu_src), + .src2 = vector_alu_srco_unsigned(alu_src) + }, + }; + + emit_mir_instruction(ctx, ins); +} + + + /* Likewise, indirect offsets are put in r27.w. TODO: Allow componentwise * pinning to eliminate this move in all known cases */ @@ -1189,7 +1157,6 @@ emit_indirect_offset(compiler_context *ctx, nir_src *src) case nir_op_##nir: \ op = midgard_alu_op_##_op; \ break; - static bool nir_is_fzero_constant(nir_src src) { @@ -1332,53 +1299,34 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) break; } - /* For a few special csel cases not handled by NIR, we can opt to - * bitwise. Otherwise, we emit the condition and do a real csel */ - case nir_op_b32csel: { - if (nir_is_fzero_constant(instr->src[2].src)) { - /* (b ? v : 0) = (b & v) */ - op = midgard_alu_op_iand; - nr_inputs = 2; - } else if (nir_is_fzero_constant(instr->src[1].src)) { - /* (b ? 0 : v) = (!b ? v : 0) = (~b & v) = (v & ~b) */ - op = midgard_alu_op_iandnot; - nr_inputs = 2; - instr->src[1] = instr->src[0]; - instr->src[0] = instr->src[2]; - } else { - /* Midgard features both fcsel and icsel, depending on - * the type of the arguments/output. However, as long - * as we're careful we can _always_ use icsel and - * _never_ need fcsel, since the latter does additional - * floating-point-specific processing whereas the - * former just moves bits on the wire. It's not obvious - * why these are separate opcodes, save for the ability - * to do things like sat/pos/abs/neg for free */ + /* Midgard features both fcsel and icsel, depending on + * the type of the arguments/output. However, as long + * as we're careful we can _always_ use icsel and + * _never_ need fcsel, since the latter does additional + * floating-point-specific processing whereas the + * former just moves bits on the wire. It's not obvious + * why these are separate opcodes, save for the ability + * to do things like sat/pos/abs/neg for free */ - op = midgard_alu_op_icsel; + bool mixed = nir_is_non_scalar_swizzle(&instr->src[0], nr_components); + op = mixed ? midgard_alu_op_icsel_v : midgard_alu_op_icsel; - /* csel works as a two-arg in Midgard, since the condition is hardcoded in r31.w */ - nr_inputs = 2; + /* csel works as a two-arg in Midgard, since the condition is hardcoded in r31.w */ + nr_inputs = 2; - /* Figure out which component the condition is in */ + /* Emit the condition into r31 */ - unsigned comp = instr->src[0].swizzle[0]; + if (mixed) + emit_condition_mixed(ctx, &instr->src[0], nr_components); + else + emit_condition(ctx, &instr->src[0].src, false, instr->src[0].swizzle[0]); - /* Make sure NIR isn't throwing a mixed condition at us */ + /* The condition is the first argument; move the other + * arguments up one to be a binary instruction for + * Midgard */ - for (unsigned c = 1; c < nr_components; ++c) - assert(instr->src[0].swizzle[c] == comp); - - /* Emit the condition into r31.w */ - emit_condition(ctx, &instr->src[0].src, false, comp); - - /* The condition is the first argument; move the other - * arguments up one to be a binary instruction for - * Midgard */ - - memmove(instr->src, instr->src + 1, 2 * sizeof(nir_alu_src)); - } + memmove(instr->src, instr->src + 1, 2 * sizeof(nir_alu_src)); break; } @@ -2596,6 +2544,10 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction /* Ensure that the chain can continue */ if (ains->type != TAG_ALU_4) break; + /* If there's already something in the bundle and we + * have weird scheduler constraints, break now */ + if (ains->precede_break && index) break; + /* According to the presentation "The ARM * Mali-T880 Mobile GPU" from HotChips 27, * there are two pipeline stages. Branching @@ -3296,6 +3248,21 @@ midgard_opt_dead_code_eliminate(compiler_context *ctx, midgard_block *block) return progress; } +static bool +mir_nontrivial_mod(midgard_vector_alu_src src, bool is_int, unsigned mask) +{ + /* abs or neg */ + if (!is_int && src.mod) return true; + + /* swizzle */ + for (unsigned c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) continue; + if (((src.swizzle >> (2*c)) & 3) != c) return true; + } + + return false; +} + static bool midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block) { @@ -3315,6 +3282,10 @@ midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block) if (to >= ctx->func->impl->ssa_alloc) continue; if (from >= ctx->func->impl->ssa_alloc) continue; + /* Constant propagation is not handled here, either */ + if (ins->ssa_args.inline_constant) continue; + if (ins->has_constants) continue; + /* Also, if the move has side effects, we're helpless */ midgard_vector_alu_src src = diff --git a/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py b/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py index 7c8cd06feed..df0caa26640 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py +++ b/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py @@ -28,6 +28,7 @@ import math a = 'a' b = 'b' +c = 'c' algebraic_late = [ # ineg must be lowered late, but only for integers; floats will try to @@ -35,8 +36,13 @@ algebraic_late = [ # a more standard lower_negate approach (('ineg', a), ('isub', 0, a)), -] + # These two special-cases save space/an op than the actual csel op + + # scheduler flexibility + + (('b32csel', a, 'b@32', 0), ('iand', a, b)), + (('b32csel', a, 0, 'b@32'), ('iand', ('inot', a), b)), +] # Midgard scales fsin/fcos arguments by pi. # Pass must be run only once, after the main loop