mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
ir3/lower_copies: Handle HW bug with shared half-floats
In the past we avoided emitting pure 16-bit subgroup macros because of this bug, but because we're going to start emitting the special moves they expand to directly, we also have to handle the bug directly. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
This commit is contained in:
parent
100096394f
commit
fec5b9397f
1 changed files with 57 additions and 0 deletions
|
|
@ -572,6 +572,63 @@ ir3_lower_copies(struct ir3_shader_variant *v)
|
|||
list_del(&instr->node);
|
||||
} else if (instr->opc == OPC_META_PHI) {
|
||||
list_del(&instr->node);
|
||||
} else if (instr->opc == OPC_MOV) {
|
||||
/* There seems to be a HW bug where moves where the source is 16-bit
|
||||
* non-shared and the destination is 16-bit shared don't work when
|
||||
* only fibers 64-127 are active. We work around it by instead
|
||||
* generating a narrowing mov, which only works with even-numbered
|
||||
* registers (i.e. .x and .z), but for odd numbers we can swap the
|
||||
* components of the normal src and its even neighbor and then
|
||||
* unswap afterwords to make it work for everything.
|
||||
*/
|
||||
if ((instr->dsts[0]->flags & IR3_REG_SHARED) &&
|
||||
(instr->dsts[0]->flags & IR3_REG_HALF) &&
|
||||
!(instr->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_IMMED |
|
||||
IR3_REG_CONST)) &&
|
||||
(instr->srcs[0]->flags & IR3_REG_HALF)) {
|
||||
unsigned src_num = instr->srcs[0]->num;
|
||||
unsigned dst_num = instr->dsts[0]->num;
|
||||
|
||||
for (unsigned i = 0; i <= instr->repeat; i++,
|
||||
src_num++, dst_num++) {
|
||||
if (src_num & 1) {
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
struct ir3_instruction *swz = ir3_instr_create(instr->block, OPC_SWZ, 2, 2);
|
||||
ir3_dst_create(swz, src_num - 1, IR3_REG_HALF);
|
||||
ir3_dst_create(swz, src_num, IR3_REG_HALF);
|
||||
ir3_src_create(swz, src_num, IR3_REG_HALF);
|
||||
ir3_src_create(swz, src_num - 1, IR3_REG_HALF);
|
||||
swz->cat1.dst_type = TYPE_U16;
|
||||
swz->cat1.src_type = TYPE_U16;
|
||||
swz->repeat = 1;
|
||||
if (i == 0)
|
||||
ir3_instr_move_before(swz, instr);
|
||||
else
|
||||
ir3_instr_move_after(swz, instr);
|
||||
}
|
||||
}
|
||||
|
||||
struct ir3_instruction *mov =
|
||||
ir3_instr_create(instr->block, OPC_MOV, 1, 1);
|
||||
|
||||
ir3_dst_create(mov, dst_num, instr->dsts[0]->flags);
|
||||
ir3_src_create(mov, src_num / 2,
|
||||
instr->srcs[0]->flags & ~IR3_REG_HALF);
|
||||
|
||||
/* Float conversions are banned in this case in
|
||||
* ir3_valid_flags(), so we only have to worry about normal
|
||||
* non-converting moves.
|
||||
*/
|
||||
assert(instr->cat1.src_type == TYPE_U16 ||
|
||||
instr->cat1.src_type == TYPE_S16);
|
||||
mov->cat1.src_type = TYPE_U32;
|
||||
mov->cat1.dst_type = TYPE_U16;
|
||||
|
||||
ir3_instr_move_before(mov, instr);
|
||||
}
|
||||
|
||||
list_del(&instr->node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue