mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-02 05:10:17 +01:00
aco: don't set exec_hi for wave32 scan reductions
fossil-db (wave32): Totals from 21 (0.02% of 133428) affected shaders: Instrs: 10778 -> 10712 (-0.61%) CodeSize: 56604 -> 56208 (-0.70%) Latency: 168293 -> 168251 (-0.02%) InvThroughput: 25256 -> 25253 (-0.01%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23745>
This commit is contained in:
parent
5d03bbc91d
commit
cfa7eec06c
1 changed files with 6 additions and 3 deletions
|
|
@ -649,7 +649,8 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
|
||||
/* fill in the gaps in rows 1 and 3 */
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0x10000u));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand::c32(0x10000u));
|
||||
if (ctx->program->wave_size == 64)
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand::c32(0x10000u));
|
||||
for (unsigned i = 0; i < src.size(); i++) {
|
||||
Instruction* perm =
|
||||
bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1),
|
||||
|
|
@ -782,8 +783,10 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
if (ctx->program->gfx_level >= GFX10) {
|
||||
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(16u),
|
||||
Operand::c32(16u));
|
||||
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u),
|
||||
Operand::c32(16u));
|
||||
if (ctx->program->wave_size == 64) {
|
||||
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u),
|
||||
Operand::c32(16u));
|
||||
}
|
||||
for (unsigned i = 0; i < src.size(); i++) {
|
||||
Instruction* perm =
|
||||
bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue