mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 17:58:26 +02:00
aco/lower_to_hw: Don't use 2 SGPR operands before GFX10 in a single VOP3 instruction in do_pack_2x16()
Cc: mesa-stable
(cherry picked from commit 9f5996ae8a)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39462>
This commit is contained in:
parent
5407c3924e
commit
fb1bd8bdf4
2 changed files with 11 additions and 5 deletions
|
|
@ -664,7 +664,7 @@
|
||||||
"description": "aco/lower_to_hw: Don't use 2 SGPR operands before GFX10 in a single VOP3 instruction in do_pack_2x16()",
|
"description": "aco/lower_to_hw: Don't use 2 SGPR operands before GFX10 in a single VOP3 instruction in do_pack_2x16()",
|
||||||
"nominated": true,
|
"nominated": true,
|
||||||
"nomination_type": 1,
|
"nomination_type": 1,
|
||||||
"resolution": 0,
|
"resolution": 1,
|
||||||
"main_sha": null,
|
"main_sha": null,
|
||||||
"because_sha": null,
|
"because_sha": null,
|
||||||
"notes": null
|
"notes": null
|
||||||
|
|
|
||||||
|
|
@ -1276,6 +1276,7 @@ create_bperm(Builder& bld, uint8_t swiz[4], Definition dst, Operand src1,
|
||||||
else if (!src0.isConstant())
|
else if (!src0.isConstant())
|
||||||
src0 = Operand(PhysReg(src0.physReg().reg()), src0.regClass().resize(4));
|
src0 = Operand(PhysReg(src0.physReg().reg()), src0.regClass().resize(4));
|
||||||
|
|
||||||
|
assert(src0.isOfType(RegType::vgpr) || src1.isOfType(RegType::vgpr));
|
||||||
bld.vop3(aco_opcode::v_perm_b32, dst, src0, src1, Operand::c32(swiz_packed));
|
bld.vop3(aco_opcode::v_perm_b32, dst, src0, src1, Operand::c32(swiz_packed));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1608,14 +1609,18 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Whether both Operands can be used in a single VOP3 instruction. */
|
||||||
|
bool both_ops_are_sgpr = lo.isOfType(RegType::sgpr) && hi.isOfType(RegType::sgpr);
|
||||||
|
bool can_use_vop3 = ctx->program->gfx_level >= GFX10 ||
|
||||||
|
(!lo.isLiteral() && !hi.isLiteral() && !both_ops_are_sgpr);
|
||||||
|
|
||||||
/* v_pack_b32_f16 can be used for bit exact copies if:
|
/* v_pack_b32_f16 can be used for bit exact copies if:
|
||||||
* - fp16 input denorms are enabled, otherwise they get flushed to zero
|
* - fp16 input denorms are enabled, otherwise they get flushed to zero
|
||||||
* - signalling input NaNs are kept, which is the case with IEEE_MODE=0
|
* - signalling input NaNs are kept, which is the case with IEEE_MODE=0
|
||||||
* GFX12+ always quiets signalling NaNs, IEEE_MODE was removed
|
* GFX12+ always quiets signalling NaNs, IEEE_MODE was removed
|
||||||
*/
|
*/
|
||||||
bool can_use_pack = (ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in) &&
|
bool can_use_pack = (ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in) &&
|
||||||
(ctx->program->gfx_level >= GFX10 ||
|
ctx->program->gfx_level >= GFX9 && can_use_vop3 &&
|
||||||
(ctx->program->gfx_level >= GFX9 && !lo.isLiteral() && !hi.isLiteral())) &&
|
|
||||||
ctx->program->gfx_level < GFX12;
|
ctx->program->gfx_level < GFX12;
|
||||||
|
|
||||||
if (can_use_pack) {
|
if (can_use_pack) {
|
||||||
|
|
@ -1626,7 +1631,7 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
|
||||||
}
|
}
|
||||||
|
|
||||||
/* a single alignbyte can be sufficient: hi can be a 32-bit integer constant */
|
/* a single alignbyte can be sufficient: hi can be a 32-bit integer constant */
|
||||||
if (lo.physReg().byte() == 2 && hi.physReg().byte() == 0 &&
|
if (lo.physReg().byte() == 2 && hi.physReg().byte() == 0 && can_use_vop3 &&
|
||||||
(!hi.isConstant() || (hi.constantValue() && (!Operand::c32(hi.constantValue()).isLiteral() ||
|
(!hi.isConstant() || (hi.constantValue() && (!Operand::c32(hi.constantValue()).isLiteral() ||
|
||||||
ctx->program->gfx_level >= GFX10)))) {
|
ctx->program->gfx_level >= GFX10)))) {
|
||||||
if (hi.isConstant())
|
if (hi.isConstant())
|
||||||
|
|
@ -1637,7 +1642,8 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->program->gfx_level >= GFX10 && !lo.constantEquals(0) && !hi.constantEquals(0)) {
|
if (ctx->program->gfx_level >= GFX10 && !lo.constantEquals(0) && !hi.constantEquals(0) &&
|
||||||
|
!both_ops_are_sgpr) {
|
||||||
uint8_t swiz[4];
|
uint8_t swiz[4];
|
||||||
Operand ops[2] = {lo, hi};
|
Operand ops[2] = {lo, hi};
|
||||||
for (unsigned i = 0; i < 2; i++) {
|
for (unsigned i = 0; i < 2; i++) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue