brw/algebraic: Constant folding for BROADCAST and SHUFFLE

This prevents assertion failures in brw_eu_emit in a later commit in
this MR. Even though they have not been previously observed, these
assertion failures could happen even without that commit.

No shader-db or fossil-db changes on any Intel platform.

Fixes: 04e1783278 ("brw: Call brw_fs_opt_algebraic less often")

v2: Add SHUFFLE. Suggested by Ken. Fixed indentation.

v3: Update BROADCAST exec_size after rebasing on "brw/build: Use SIMD8
temporaries in emit_uniformize".

v4: Explain why munging the exec_size is correct.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31497>
(cherry picked from commit 8b2be206f3)
This commit is contained in:
Ian Romanick 2024-10-28 12:18:56 -07:00 committed by Eric Engestrom
parent c3952af96d
commit d2e0c22518
2 changed files with 30 additions and 1 deletions

View file

@ -454,7 +454,7 @@
"description": "brw/algebraic: Constant folding for BROADCAST and SHUFFLE",
"nominated": true,
"nomination_type": 2,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "04e17832782983853299ac616caea8567abbd71f",
"notes": null

View file

@ -302,6 +302,30 @@ brw_opt_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *ins
}
break;
case SHADER_OPCODE_BROADCAST:
if (inst->src[0].file == IMM) {
inst->opcode = BRW_OPCODE_MOV;
inst->force_writemask_all = true;
inst->resize_sources(1);
/* The destination of BROADCAST will always be is_scalar, so the
* allocation will always be REG_SIZE * reg_unit. Adjust the
* exec_size to match.
*/
inst->exec_size = 8 * reg_unit(devinfo);
assert(inst->size_written == inst->dst.component_size(inst->exec_size));
progress = true;
}
break;
case SHADER_OPCODE_SHUFFLE:
if (inst->src[0].file == IMM) {
inst->opcode = BRW_OPCODE_MOV;
inst->resize_sources(1);
progress = true;
}
break;
default:
break;
}
@ -666,6 +690,11 @@ brw_opt_algebraic(fs_visitor &s)
if (is_uniform(inst->src[0])) {
inst->opcode = BRW_OPCODE_MOV;
inst->force_writemask_all = true;
/* The destination of BROADCAST will always be is_scalar, so the
* allocation will always be REG_SIZE * reg_unit. Adjust the
* exec_size to match.
*/
inst->exec_size = 8 * reg_unit(devinfo);
assert(inst->size_written == inst->dst.component_size(inst->exec_size));
inst->resize_sources(1);