From 8b2be206f366c0eb49e77282772e371beef7f47a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 28 Oct 2024 12:18:56 -0700 Subject: [PATCH] brw/algebraic: Constant folding for BROADCAST and SHUFFLE This prevents assertion failures in brw_eu_emit in a later commit in this MR. Even though they have not been previously observed, these assertion failures could happen even without that commit. No shader-db or fossil-db changes on any Intel platform. Fixes: 04e17832782 ("brw: Call brw_fs_opt_algebraic less often") v2: Add SHUFFLE. Suggested by Ken. Fixed indentation. v3: Update BROADCAST exec_size after rebasing on "brw/build: Use SIMD8 temporaries in emit_uniformize". v4: Explain why munging the exec_size is correct. Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_opt_algebraic.cpp | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/intel/compiler/brw_opt_algebraic.cpp b/src/intel/compiler/brw_opt_algebraic.cpp index a19314d3bbc..15517eff1be 100644 --- a/src/intel/compiler/brw_opt_algebraic.cpp +++ b/src/intel/compiler/brw_opt_algebraic.cpp @@ -300,6 +300,30 @@ brw_opt_constant_fold_instruction(const intel_device_info *devinfo, brw_inst *in } break; + case SHADER_OPCODE_BROADCAST: + if (inst->src[0].file == IMM) { + inst->opcode = BRW_OPCODE_MOV; + inst->force_writemask_all = true; + inst->resize_sources(1); + + /* The destination of BROADCAST will always be is_scalar, so the + * allocation will always be REG_SIZE * reg_unit. Adjust the + * exec_size to match. + */ + inst->exec_size = 8 * reg_unit(devinfo); + assert(inst->size_written == inst->dst.component_size(inst->exec_size)); + progress = true; + } + break; + + case SHADER_OPCODE_SHUFFLE: + if (inst->src[0].file == IMM) { + inst->opcode = BRW_OPCODE_MOV; + inst->resize_sources(1); + progress = true; + } + break; + default: break; } @@ -665,6 +689,11 @@ brw_opt_algebraic(brw_shader &s) if (is_uniform(inst->src[0])) { inst->opcode = BRW_OPCODE_MOV; inst->force_writemask_all = true; + + /* The destination of BROADCAST will always be is_scalar, so the + * allocation will always be REG_SIZE * reg_unit. Adjust the + * exec_size to match. + */ inst->exec_size = 8 * reg_unit(devinfo); assert(inst->size_written == inst->dst.component_size(inst->exec_size)); inst->resize_sources(1);