diff --git a/.gitlab-ci/deqp-freedreno-a630-fails.txt b/.gitlab-ci/deqp-freedreno-a630-fails.txt index daf0960d63b..274cfd6239a 100644 --- a/.gitlab-ci/deqp-freedreno-a630-fails.txt +++ b/.gitlab-ci/deqp-freedreno-a630-fails.txt @@ -5,7 +5,6 @@ dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_clear dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0 dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3 -dEQP-VK.glsl.derivate.fwidthfine.uniform_loop.vec3_mediump dEQP-VK.glsl.linkage.varying.struct.mat3x2 dEQP-VK.graphicsfuzz.mat-array-deep-control-flow dEQP-VK.pipeline.spec_constant.graphics.geometry.composite.array.array_vec4 diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index de8387f3bca..6cd2789fcd1 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -1263,6 +1263,9 @@ static const struct opc_info { OPC(5, OPC_DSYPP_1, dsypp.1), OPC(5, OPC_RGETPOS, rgetpos), OPC(5, OPC_RGETINFO, rgetinfo), + /* macros are needed here for ir3_print */ + OPC(5, OPC_DSXPP_MACRO, dsxpp.macro), + OPC(5, OPC_DSYPP_MACRO, dsypp.macro), /* category 6: */ diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index 822d9dd7bc1..d2715080060 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -185,6 +185,9 @@ typedef enum { OPC_DSYPP_1 = _OPC(5, 25), OPC_RGETPOS = _OPC(5, 26), OPC_RGETINFO = _OPC(5, 27), + /* cat5 meta instructions, placed above the cat5 opc field's size */ + OPC_DSXPP_MACRO = _OPC(5, 32), + OPC_DSYPP_MACRO = _OPC(5, 33), /* category 6: */ OPC_LDG = _OPC(6, 0), /* load-global */ diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 634089c590b..a198e4cb927 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1663,9 +1663,9 @@ INSTR1(SQRT) /* cat5 instructions: */ INSTR1(DSX) -INSTR1(DSXPP_1) +INSTR1(DSXPP_MACRO) INSTR1(DSY) -INSTR1(DSYPP_1) +INSTR1(DSYPP_MACRO) INSTR1F(3D, DSX) INSTR1F(3D, DSY) INSTR1(RGETPOS) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index acfe4341ccd..10a51eb55e7 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -478,7 +478,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) dst[0]->cat5.type = TYPE_F32; break; case nir_op_fddx_fine: - dst[0] = ir3_DSXPP_1(b, src[0], 0); + dst[0] = ir3_DSXPP_MACRO(b, src[0], 0); dst[0]->cat5.type = TYPE_F32; break; case nir_op_fddy: @@ -488,7 +488,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) break; break; case nir_op_fddy_fine: - dst[0] = ir3_DSYPP_1(b, src[0], 0); + dst[0] = ir3_DSYPP_MACRO(b, src[0], 0); dst[0]->cat5.type = TYPE_F32; break; case nir_op_flt: diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index c4d90c15ba9..69efdbabc3f 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -233,13 +233,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) list_addtail(&n->node, &block->instr_list); } - if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) { - struct ir3_instruction *op_p = ir3_instr_clone(n); - op_p->flags = IR3_INSTR_P; - - ctx->so->need_fine_derivatives = true; - } - if (is_sfu(n)) regmask_set(&state->needs_ss, n->regs[0]); @@ -360,6 +353,42 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) return true; } +/* Expands dsxpp and dsypp macros to: + * + * dsxpp.1 dst, src + * dsxpp.1.p dst, src + * + * We apply this after flags syncing, as we don't want to sync in between the + * two (which might happen if dst == src). We do it before nop scheduling + * because that needs to count actual instructions. + */ +static bool +apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block) +{ + struct list_head instr_list; + + /* remove all the instructions from the list, we'll be adding + * them back in as we go + */ + list_replace(&block->instr_list, &instr_list); + list_inithead(&block->instr_list); + + foreach_instr_safe (n, &instr_list) { + list_addtail(&n->node, &block->instr_list); + + if (n->opc == OPC_DSXPP_MACRO || n->opc == OPC_DSYPP_MACRO) { + n->opc = (n->opc == OPC_DSXPP_MACRO) ? OPC_DSXPP_1 : OPC_DSYPP_1; + + struct ir3_instruction *op_p = ir3_instr_clone(n); + op_p->flags = IR3_INSTR_P; + + ctx->so->need_fine_derivatives = true; + } + } + + return true; +} + /* NOTE: branch instructions are always the last instruction(s) * in the block. We take advantage of this as we resolve the * branches, since "if (foo) break;" constructs turn into @@ -752,6 +781,11 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) block_sched(ir); if (so->type == MESA_SHADER_FRAGMENT) kill_sched(ir, so); + + foreach_block (block, &ir->block_list) { + progress |= apply_fine_deriv_macro(ctx, block); + } + nop_sched(ir); do {