diff --git a/.pick_status.json b/.pick_status.json index 96703886ccc..bee1b01a30f 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -8797,7 +8797,7 @@ "description": "intel/fs: Don't use NoDDClk/NoDDClr for split SHUFFLEs", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "a8ac61b0ee2fdf4e8bc7b47aee9c24f96c40435c" }, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 1373a9c75ee..8fd1538df93 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -594,11 +594,23 @@ fs_generator::generate_shuffle(fs_inst *inst, uint32_t src_start_offset = src.nr * REG_SIZE + src.subnr; - /* Whether we can use destination dependency control without running - * the risk of a hang if an instruction gets shot down. + /* From the Haswell PRM: + * + * "When a sequence of NoDDChk and NoDDClr are used, the last + * instruction that completes the scoreboard clear must have a + * non-zero execution mask. This means, if any kind of predication + * can change the execution mask or channel enable of the last + * instruction, the optimization must be avoided. This is to + * avoid instructions being shot down the pipeline when no writes + * are required." + * + * Whenever predication is enabled or the instructions being emitted + * aren't the full width, it's possible that it will be run with zero + * channels enabled so we can't use dependency control without + * running the risk of a hang if an instruction gets shot down. */ const bool use_dep_ctrl = !inst->predicate && - inst->exec_size == dispatch_width; + lower_width == dispatch_width; brw_inst *insn; /* Due to a hardware bug some platforms (particularly Gen11+) seem