mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-20 05:40:39 +02:00
aco/gfx11+: don't use VOP3 v_swap_b16
v_swap_b16 is not offically supported as VOP3, so it can't be used with v128-255.
Tests show that VOP3 appears to work correctly, but according to AMD that should
not be relied on.
https://github.com/llvm/llvm-project/pull/100442#discussion_r1703929676
Foz-DB Navi31:
Totals from 6 (0.01% of 79395) affected shaders:
Instrs: 64799 -> 65932 (+1.75%)
CodeSize: 360180 -> 368440 (+2.29%)
Latency: 1364648 -> 1365922 (+0.09%)
InvThroughput: 635843 -> 636475 (+0.10%)
Copies: 14766 -> 15698 (+6.31%)
VALU: 38743 -> 39675 (+2.41%)
Fixes: 80b8bbf0c5 ("aco/gfx11: use v_swap_b16")
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30515>
This commit is contained in:
parent
796b3ab23d
commit
e0818cb87b
1 changed files with 22 additions and 3 deletions
|
|
@ -1408,9 +1408,28 @@ swap_subdword_gfx11(Builder& bld, Definition def, Operand op)
|
|||
if (def.bytes() == 2) {
|
||||
Operand def_as_op = Operand(def.physReg(), def.regClass());
|
||||
Definition op_as_def = Definition(op.physReg(), op.regClass());
|
||||
Instruction* instr = bld.vop1(aco_opcode::v_swap_b16, def, op_as_def, op, def_as_op);
|
||||
instr->valu().opsel[0] = op.physReg().byte();
|
||||
instr->valu().opsel[3] = def.physReg().byte();
|
||||
/* v_swap_b16 is not offically supported as VOP3, so it can't be used with v128-255.
|
||||
* Tests show that VOP3 appears to work correctly, but according to AMD that should
|
||||
* not be relied on.
|
||||
*/
|
||||
if (def.physReg() < (256 + 128) && op.physReg() < (256 + 128)) {
|
||||
Instruction* instr = bld.vop1(aco_opcode::v_swap_b16, def, op_as_def, op, def_as_op);
|
||||
instr->valu().opsel[0] = op.physReg().byte();
|
||||
instr->valu().opsel[3] = def.physReg().byte();
|
||||
} else {
|
||||
Instruction* instr = bld.vop3(aco_opcode::v_xor_b16, def, op, def_as_op);
|
||||
instr->valu().opsel[0] = op.physReg().byte();
|
||||
instr->valu().opsel[1] = def_as_op.physReg().byte();
|
||||
instr->valu().opsel[3] = def.physReg().byte();
|
||||
instr = bld.vop3(aco_opcode::v_xor_b16, op_as_def, op, def_as_op);
|
||||
instr->valu().opsel[0] = op.physReg().byte();
|
||||
instr->valu().opsel[1] = def_as_op.physReg().byte();
|
||||
instr->valu().opsel[3] = op_as_def.physReg().byte();
|
||||
instr = bld.vop3(aco_opcode::v_xor_b16, def, op, def_as_op);
|
||||
instr->valu().opsel[0] = op.physReg().byte();
|
||||
instr->valu().opsel[1] = def_as_op.physReg().byte();
|
||||
instr->valu().opsel[3] = def.physReg().byte();
|
||||
}
|
||||
} else {
|
||||
PhysReg op_half = op.physReg();
|
||||
op_half.reg_b &= ~1;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue