diff --git a/src/panfrost/bifrost/ISA.xml b/src/panfrost/bifrost/ISA.xml index 2a6b3ff79de..70d87cc4b67 100644 --- a/src/panfrost/bifrost/ISA.xml +++ b/src/panfrost/bifrost/ISA.xml @@ -2520,8 +2520,11 @@ + + + diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c index e2b3e6caaf4..ad0f015688c 100644 --- a/src/panfrost/bifrost/bi_ra.c +++ b/src/panfrost/bifrost/bi_ra.c @@ -318,6 +318,11 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs) unsigned node = bi_get_node(ins->src[0]); assert(node < node_count); l->solutions[node] = 0; + + /* Dual source blend input in r4-r7 */ + node = bi_get_node(ins->src[4]); + if (node < node_count) + l->solutions[node] = 4; } if (dest < node_count) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 6ae343a2bd7..a6a5f9f4d7e 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -504,11 +504,14 @@ bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr) } static void -bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt) +bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, + bi_index rgba2, nir_alu_type T2, unsigned rt) { /* Reads 2 or 4 staging registers to cover the input */ unsigned size = nir_alu_type_get_type_size(T); + unsigned size_2 = nir_alu_type_get_type_size(T2); unsigned sr_count = (size <= 16) ? 2 : 4; + unsigned sr_count_2 = (size_2 <= 16) ? 2 : 4; const struct panfrost_compile_inputs *inputs = b->shader->inputs; uint64_t blend_desc = inputs->blend.bifrost_blend_desc; @@ -523,7 +526,8 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt) bi_blend_to(b, bi_register(0), rgba, bi_register(60), bi_imm_u32(blend_desc & 0xffffffff), - bi_imm_u32(blend_desc >> 32), sr_count); + bi_imm_u32(blend_desc >> 32), + bi_null(), sr_count, 0); } else { /* Blend descriptor comes from the FAU RAM. By convention, the * return address is stored in r48 and will be used by the @@ -531,11 +535,15 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt) bi_blend_to(b, bi_register(48), rgba, bi_register(60), bi_fau(BIR_FAU_BLEND_0 + rt, false), - bi_fau(BIR_FAU_BLEND_0 + rt, true), sr_count); + bi_fau(BIR_FAU_BLEND_0 + rt, true), + rgba2, sr_count, sr_count_2); } assert(rt < 8); b->shader->info.bifrost->blend[rt].type = T; + + if (T2) + b->shader->info.bifrost->blend_src1_type = T2; } /* Blend shaders do not need to run ATEST since they are dependent on a @@ -586,7 +594,6 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr) } bi_index src0 = bi_src_index(&instr->src[0]); - bi_index src1 = combined ? bi_src_index(&instr->src[4]) : bi_null(); /* By ISA convention, the coverage mask is stored in R60. The store * itself will be handled by a subsequent ATEST instruction */ @@ -598,19 +605,6 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr) return; } - - /* Dual-source blending is implemented by putting the color in - * registers r4-r7. */ - if (writeout & PAN_WRITEOUT_2) { - unsigned count = nir_src_num_components(instr->src[4]); - - for (unsigned i = 0; i < count; ++i) - bi_mov_i32_to(b, bi_register(4 + i), bi_word(src1, i)); - - b->shader->info.bifrost->blend_src1_type = - nir_intrinsic_dest_type(instr); - } - /* Emit ATEST if we have to, note ATEST requires a floating-point alpha * value, but render target #0 might not be floating point. However the * alpha value is only used for alpha-to-coverage, a stage which is @@ -648,7 +642,10 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr) if (emit_blend) { unsigned rt = combined ? 0 : (loc - FRAG_RESULT_DATA0); + bool dual = (writeout & PAN_WRITEOUT_2); bi_index color = bi_src_index(&instr->src[0]); + bi_index color2 = dual ? bi_src_index(&instr->src[4]) : bi_null(); + nir_alu_type T2 = dual ? nir_intrinsic_dest_type(instr) : 0; /* Explicit copy since BLEND inputs are precoloured to R0-R3, * TODO: maybe schedule around this or implement in RA as a @@ -667,7 +664,8 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr) nir_alu_type_get_type_size(nir_intrinsic_src_type(instr))); } - bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr), rt); + bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr), + color2, T2, rt); } if (b->shader->inputs->is_blend) { diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c index 92e32f1bb45..178950d27f3 100644 --- a/src/panfrost/bifrost/bir.c +++ b/src/panfrost/bifrost/bir.c @@ -91,6 +91,8 @@ bi_count_read_registers(const bi_instr *ins, unsigned s) return 1; else if (s == 0 && bi_opcode_props[ins->op].sr_read) return bi_count_staging_registers(ins); + else if (s == 4 && ins->op == BI_OPCODE_BLEND) + return ins->sr_count_2; /* Dual source blending */ else return 1; } diff --git a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp index 44d651db3a1..f43754d29ca 100644 --- a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp +++ b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp @@ -81,7 +81,7 @@ TEST_F(SchedulerPredicates, LOAD) TEST_F(SchedulerPredicates, BLEND) { - bi_instr *blend = bi_blend_to(b, TMP(), TMP(), TMP(), TMP(), TMP(), 4); + bi_instr *blend = bi_blend_to(b, TMP(), TMP(), TMP(), TMP(), TMP(), TMP(), 4, 4); ASSERT_FALSE(bi_can_fma(blend)); ASSERT_TRUE(bi_can_add(blend)); ASSERT_TRUE(bi_must_message(blend)); diff --git a/src/panfrost/ci/panfrost-g52-fails.txt b/src/panfrost/ci/panfrost-g52-fails.txt index cbfbfbe4a83..fec3d2d729a 100644 --- a/src/panfrost/ci/panfrost-g52-fails.txt +++ b/src/panfrost/ci/panfrost-g52-fails.txt @@ -29,13 +29,6 @@ shaders@point-vertex-id gl_vertexid gl_instanceid divisor,Fail shaders@point-vertex-id gl_vertexid gl_instanceid,Fail spec@arb_base_instance@arb_base_instance-drawarrays,Fail spec@arb_blend_func_extended@arb_blend_func_extended-dual-src-blending-issue-1917_gles3,Crash -spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-explicit,Fail -spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-explicit_gles3,Fail -spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend,Fail -spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend_gles3,Fail -spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-pattern,Fail -spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-pattern_gles2,Crash -spec@arb_blend_func_extended@arb_blend_func_extended-fbo-extended-blend-pattern_gles3,Fail spec@arb_color_buffer_float@gl_rgba16f-render,Fail spec@arb_color_buffer_float@gl_rgba16f-render-fog,Fail spec@arb_color_buffer_float@gl_rgba16f-render-sanity,Fail