From a8ac61b0ee2fdf4e8bc7b47aee9c24f96c40435c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Sep 2020 17:42:10 -0500 Subject: [PATCH] intel/fs: NoMask initialize the address register for shuffles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: mesa-stable@lists.freedesktop.org Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2979 Tested-by: Iván Briano Reviewed-by: Matt Turner Reviewed-by: Francisco Jerez Part-of: --- src/intel/compiler/brw_fs_generator.cpp | 37 +++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index dc1f212c003..45ffce2e39f 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -650,15 +650,42 @@ fs_generator::generate_shuffle(fs_inst *inst, group_idx = retype(spread(group_idx, 2), BRW_REGISTER_TYPE_W); } + uint32_t src_start_offset = src.nr * REG_SIZE + src.subnr; + + /* Whether we can use destination dependency control without running + * the risk of a hang if an instruction gets shot down. + */ + const bool use_dep_ctrl = !inst->predicate && + inst->exec_size == dispatch_width; + brw_inst *insn; + + /* Due to a hardware bug some platforms (particularly Gen11+) seem + * to require the address components of all channels to be valid + * whether or not they're active, which causes issues if we use VxH + * addressing under non-uniform control-flow. We can easily work + * around that by initializing the whole address register with a + * pipelined NoMask MOV instruction. + */ + insn = brw_MOV(p, addr, brw_imm_uw(src_start_offset)); + brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); + brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); + if (devinfo->gen >= 12) + brw_set_default_swsb(p, tgl_swsb_null()); + else + brw_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl); + /* Take into account the component size and horizontal stride. */ assert(src.vstride == src.hstride + src.width); - brw_SHL(p, addr, group_idx, - brw_imm_uw(util_logbase2(type_sz(src.type)) + - src.hstride - 1)); + insn = brw_SHL(p, addr, group_idx, + brw_imm_uw(util_logbase2(type_sz(src.type)) + + src.hstride - 1)); + if (devinfo->gen >= 12) + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + else + brw_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl); /* Add on the register start offset */ - brw_set_default_swsb(p, tgl_swsb_regdist(1)); - brw_ADD(p, addr, addr, brw_imm_uw(src.nr * REG_SIZE + src.subnr)); + brw_ADD(p, addr, addr, brw_imm_uw(src_start_offset)); if (type_sz(src.type) > 4 && ((devinfo->gen == 7 && !devinfo->is_haswell) ||