From cfa7eec06c713bd4ec1eb91f02cfddc57c7bd2f2 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 20 Jun 2023 14:31:55 +0100 Subject: [PATCH] aco: don't set exec_hi for wave32 scan reductions fossil-db (wave32): Totals from 21 (0.02% of 133428) affected shaders: Instrs: 10778 -> 10712 (-0.61%) CodeSize: 56604 -> 56208 (-0.70%) Latency: 168293 -> 168251 (-0.02%) InvThroughput: 25256 -> 25253 (-0.01%) Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index f35991e36da..a971e45551a 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -649,7 +649,8 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c /* fill in the gaps in rows 1 and 3 */ bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0x10000u)); - bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand::c32(0x10000u)); + if (ctx->program->wave_size == 64) + bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand::c32(0x10000u)); for (unsigned i = 0; i < src.size(); i++) { Instruction* perm = bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1), @@ -782,8 +783,10 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c if (ctx->program->gfx_level >= GFX10) { bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(16u), Operand::c32(16u)); - bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u), - Operand::c32(16u)); + if (ctx->program->wave_size == 64) { + bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u), + Operand::c32(16u)); + } for (unsigned i = 0; i < src.size(); i++) { Instruction* perm = bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1),