diff --git a/src/broadcom/ci/broadcom-rpi5-fails.txt b/src/broadcom/ci/broadcom-rpi5-fails.txt index 648dfd689a3..cf2598f6688 100644 --- a/src/broadcom/ci/broadcom-rpi5-fails.txt +++ b/src/broadcom/ci/broadcom-rpi5-fails.txt @@ -15,59 +15,3 @@ dEQP-VK.wsi.wayland.swapchain.simulate_oom.image_usage,Crash dEQP-VK.wsi.wayland.swapchain.simulate_oom.min_image_count,Crash dEQP-VK.wsi.wayland.swapchain.simulate_oom.pre_transform,Crash dEQP-VK.wsi.wayland.swapchain.simulate_oom.present_mode,Crash - -# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27409 -dEQP-VK.glsl.derivate.dfdx.dynamic_loop.float_highp,Fail -dEQP-VK.glsl.derivate.dfdx.dynamic_loop.float_mediump,Fail -dEQP-VK.glsl.derivate.dfdx.dynamic_loop.vec2_highp,Fail -dEQP-VK.glsl.derivate.dfdx.dynamic_loop.vec2_mediump,Fail -dEQP-VK.glsl.derivate.dfdx.dynamic_loop.vec4_highp,Fail -dEQP-VK.glsl.derivate.dfdx.dynamic_loop.vec4_mediump,Fail -dEQP-VK.glsl.derivate.dfdxcoarse.dynamic_loop.float_highp,Fail -dEQP-VK.glsl.derivate.dfdxcoarse.dynamic_loop.float_mediump,Fail -dEQP-VK.glsl.derivate.dfdxcoarse.dynamic_loop.vec2_highp,Fail -dEQP-VK.glsl.derivate.dfdxcoarse.dynamic_loop.vec2_mediump,Fail -dEQP-VK.glsl.derivate.dfdxcoarse.dynamic_loop.vec4_highp,Fail -dEQP-VK.glsl.derivate.dfdxcoarse.dynamic_loop.vec4_mediump,Fail -dEQP-VK.glsl.derivate.dfdxfine.dynamic_loop.float_highp,Fail -dEQP-VK.glsl.derivate.dfdxfine.dynamic_loop.float_mediump,Fail -dEQP-VK.glsl.derivate.dfdxfine.dynamic_loop.vec2_highp,Fail -dEQP-VK.glsl.derivate.dfdxfine.dynamic_loop.vec2_mediump,Fail -dEQP-VK.glsl.derivate.dfdxfine.dynamic_loop.vec4_highp,Fail -dEQP-VK.glsl.derivate.dfdxfine.dynamic_loop.vec4_mediump,Fail -dEQP-VK.glsl.derivate.dfdy.dynamic_loop.float_highp,Fail -dEQP-VK.glsl.derivate.dfdy.dynamic_loop.float_mediump,Fail -dEQP-VK.glsl.derivate.dfdy.dynamic_loop.vec2_highp,Fail -dEQP-VK.glsl.derivate.dfdy.dynamic_loop.vec2_mediump,Fail -dEQP-VK.glsl.derivate.dfdy.dynamic_loop.vec4_highp,Fail -dEQP-VK.glsl.derivate.dfdy.dynamic_loop.vec4_mediump,Fail -dEQP-VK.glsl.derivate.dfdycoarse.dynamic_loop.float_highp,Fail -dEQP-VK.glsl.derivate.dfdycoarse.dynamic_loop.float_mediump,Fail -dEQP-VK.glsl.derivate.dfdycoarse.dynamic_loop.vec2_highp,Fail -dEQP-VK.glsl.derivate.dfdycoarse.dynamic_loop.vec2_mediump,Fail -dEQP-VK.glsl.derivate.dfdycoarse.dynamic_loop.vec4_highp,Fail -dEQP-VK.glsl.derivate.dfdycoarse.dynamic_loop.vec4_mediump,Fail -dEQP-VK.glsl.derivate.dfdyfine.dynamic_loop.float_highp,Fail -dEQP-VK.glsl.derivate.dfdyfine.dynamic_loop.float_mediump,Fail -dEQP-VK.glsl.derivate.dfdyfine.dynamic_loop.vec2_highp,Fail -dEQP-VK.glsl.derivate.dfdyfine.dynamic_loop.vec2_mediump,Fail -dEQP-VK.glsl.derivate.dfdyfine.dynamic_loop.vec4_highp,Fail -dEQP-VK.glsl.derivate.dfdyfine.dynamic_loop.vec4_mediump,Fail -dEQP-VK.glsl.derivate.fwidth.dynamic_loop.float_highp,Fail -dEQP-VK.glsl.derivate.fwidth.dynamic_loop.float_mediump,Fail -dEQP-VK.glsl.derivate.fwidth.dynamic_loop.vec2_highp,Fail -dEQP-VK.glsl.derivate.fwidth.dynamic_loop.vec2_mediump,Fail -dEQP-VK.glsl.derivate.fwidth.dynamic_loop.vec4_highp,Fail -dEQP-VK.glsl.derivate.fwidth.dynamic_loop.vec4_mediump,Fail -dEQP-VK.glsl.derivate.fwidthcoarse.dynamic_loop.float_highp,Fail -dEQP-VK.glsl.derivate.fwidthcoarse.dynamic_loop.float_mediump,Fail -dEQP-VK.glsl.derivate.fwidthcoarse.dynamic_loop.vec2_highp,Fail -dEQP-VK.glsl.derivate.fwidthcoarse.dynamic_loop.vec2_mediump,Fail -dEQP-VK.glsl.derivate.fwidthcoarse.dynamic_loop.vec4_highp,Fail -dEQP-VK.glsl.derivate.fwidthcoarse.dynamic_loop.vec4_mediump,Fail -dEQP-VK.glsl.derivate.fwidthfine.dynamic_loop.float_highp,Fail -dEQP-VK.glsl.derivate.fwidthfine.dynamic_loop.float_mediump,Fail -dEQP-VK.glsl.derivate.fwidthfine.dynamic_loop.vec2_highp,Fail -dEQP-VK.glsl.derivate.fwidthfine.dynamic_loop.vec2_mediump,Fail -dEQP-VK.glsl.derivate.fwidthfine.dynamic_loop.vec4_highp,Fail -dEQP-VK.glsl.derivate.fwidthfine.dynamic_loop.vec4_mediump,Fail diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index e6ed160523a..cee1cd612a7 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -3282,25 +3282,28 @@ emit_load_local_invocation_index(struct v3d_compile *c) /* For the purposes of reduction operations (ballot, alleq, allfeq, bcastf) in * fragment shaders a lane is considered active if any sample flags are set - * for *any* lane in the same quad. This is not what we want. To fix this we - * can emit MSF to get the active lanes and produce a condition that we - * can then use with these operations to limit execution only to lanes that - * are really active in each quad. Further, we also need to disable lanes - * that may be disabled because of non-uniform control flow. + * for *any* lane in the same quad, however, we still need to ensure that + * terminated lanes (OpTerminate) are not included. Further, we also need to + * disable lanes that may be disabled because of non-uniform control + * flow. */ static enum v3d_qpu_cond -setup_subgroup_reduction_condition(struct v3d_compile *c) +setup_subgroup_control_flow_condition(struct v3d_compile *c) { assert(c->s->info.stage == MESA_SHADER_FRAGMENT || c->s->info.stage == MESA_SHADER_COMPUTE); enum v3d_qpu_cond cond = V3D_QPU_COND_NONE; - /* Produce condition for 'lane is active' from current sample flags. - * Only required for fragment shaders. + /* We need to make sure that terminated lanes in fragment shaders are + * not included. We can identify these lanes by comparing the inital + * sample mask with the current. This fixes: + * dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.subgroup_* */ - if (c->s->info.stage == MESA_SHADER_FRAGMENT) { - vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), vir_MSF(c)), + if (c->s->info.stage == MESA_SHADER_FRAGMENT && c->emitted_discard) { + vir_set_pf(c, vir_AND_dest(c, vir_nop_reg(), c->start_msf, + vir_NOT(c, vir_XOR(c, c->start_msf, + vir_MSF(c)))), V3D_QPU_PF_PUSHZ); cond = V3D_QPU_COND_IFNA; } @@ -3883,7 +3886,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_ballot: { assert(c->devinfo->ver >= 71); struct qreg value = ntq_get_src(c, instr->src[0], 0); - enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c); + enum v3d_qpu_cond cond = setup_subgroup_control_flow_condition(c); struct qreg res = vir_get_temp(c); vir_set_cond(vir_BALLOT_dest(c, res, value), cond); ntq_store_def(c, &instr->def, 0, vir_MOV(c, res)); @@ -3902,7 +3905,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_read_first_invocation: { assert(c->devinfo->ver >= 71); struct qreg value = ntq_get_src(c, instr->src[0], 0); - enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c); + enum v3d_qpu_cond cond = setup_subgroup_control_flow_condition(c); struct qreg res = vir_get_temp(c); vir_set_cond(vir_BCASTF_dest(c, res, value), cond); ntq_store_def(c, &instr->def, 0, vir_MOV(c, res)); @@ -3922,7 +3925,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_vote_ieq: { assert(c->devinfo->ver >= 71); struct qreg value = ntq_get_src(c, instr->src[0], 0); - enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c); + enum v3d_qpu_cond cond = setup_subgroup_control_flow_condition(c); struct qreg res = vir_get_temp(c); vir_set_cond(instr->intrinsic == nir_intrinsic_vote_ieq ? vir_ALLEQ_dest(c, res, value) : @@ -3940,7 +3943,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_vote_all: { assert(c->devinfo->ver >= 71); struct qreg value = ntq_get_src(c, instr->src[0], 0); - enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c); + enum v3d_qpu_cond cond = setup_subgroup_control_flow_condition(c); struct qreg res = vir_get_temp(c); vir_set_cond(vir_ALLEQ_dest(c, res, value), cond); @@ -3964,7 +3967,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_vote_any: { assert(c->devinfo->ver >= 71); struct qreg value = ntq_get_src(c, instr->src[0], 0); - enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c); + enum v3d_qpu_cond cond = setup_subgroup_control_flow_condition(c); struct qreg res = vir_get_temp(c); vir_set_cond(vir_ALLEQ_dest(c, res, value), cond); @@ -4544,6 +4547,7 @@ nir_to_vir(struct v3d_compile *c) { switch (c->s->info.stage) { case MESA_SHADER_FRAGMENT: + c->start_msf = vir_MSF(c); if (c->devinfo->ver < 71) c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0)); else diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index ef6cd13d6f3..12aaacdc14a 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -793,6 +793,11 @@ struct v3d_compile { struct qreg cs_shared_offset; int local_invocation_index_bits; + /* Starting value of the sample mask in a fragment shader. We use + * this to identify lanes that have been terminated/discarded. + */ + struct qreg start_msf; + /* If the shader uses subgroup functionality */ bool has_subgroups;