diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 6ba71eb34b6..8c0c5aabcd1 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -462,23 +462,28 @@ unsigned get_subdword_operand_stride(chip_class chip, const aco_ptr& instr, unsigned idx, RegClass rc) { - /* v_readfirstlane_b32 cannot use SDWA */ - if (instr->opcode == aco_opcode::p_as_uniform) - return 4; - if (instr->isPseudo() && chip >= GFX8) - return rc.bytes() % 2 == 0 ? 2 : 1; + if (instr->isPseudo()) { + /* v_readfirstlane_b32 cannot use SDWA */ + if (instr->opcode == aco_opcode::p_as_uniform) + return 4; + else if (chip >= GFX8) + return rc.bytes() % 2 == 0 ? 2 : 1; + else + return 4; + } - if (instr->opcode == aco_opcode::v_cvt_f32_ubyte0) { - return 1; - } else if (can_use_SDWA(chip, instr, false)) { - return rc.bytes() % 2 == 0 ? 2 : 1; - } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, 1)) { - return 2; - } else if (instr->isVOP3P()) { - return 2; + assert(rc.bytes() <= 2); + if (instr->isVALU()) { + if (can_use_SDWA(chip, instr, false)) + return rc.bytes(); + if (can_use_opsel(chip, instr->opcode, idx, true)) + return 2; + if (instr->format == Format::VOP3P) + return 2; } switch (instr->opcode) { + case aco_opcode::v_cvt_f32_ubyte0: return 1; case aco_opcode::ds_write_b8: case aco_opcode::ds_write_b16: return chip >= GFX8 ? 2 : 4; case aco_opcode::buffer_store_byte: @@ -489,10 +494,8 @@ get_subdword_operand_stride(chip_class chip, const aco_ptr& instr, case aco_opcode::scratch_store_short: case aco_opcode::global_store_byte: case aco_opcode::global_store_short: return chip >= GFX9 ? 2 : 4; - default: break; + default: return 4; } - - return 4; } void @@ -504,58 +507,59 @@ add_subdword_operand(ra_ctx& ctx, aco_ptr& instr, unsigned idx, uns return; assert(rc.bytes() <= 2); - - if (!instr->usesModifiers() && instr->opcode == aco_opcode::v_cvt_f32_ubyte0) { - switch (byte) { - case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break; - case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break; - case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break; - case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break; + if (instr->isVALU()) { + /* check if we can use opsel */ + if (instr->format == Format::VOP3) { + assert(rc == v2b && byte == 2); + instr->vop3().opsel |= 1 << idx; + return; } - return; - } else if (can_use_SDWA(chip, instr, false)) { - aco_ptr tmp = convert_to_SDWA(chip, instr); - return; - } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) { - instr->vop3().opsel |= (byte / 2) << idx; - return; - } else if (instr->isVOP3P() && byte == 2) { - VOP3P_instruction& vop3p = instr->vop3p(); - assert(!(vop3p.opsel_lo & (1 << idx))); - vop3p.opsel_lo |= 1 << idx; - vop3p.opsel_hi |= 1 << idx; + if (instr->isVOP3P()) { + assert(rc == v2b && byte == 2 && !(instr->vop3p().opsel_lo & (1 << idx))); + instr->vop3p().opsel_lo |= 1 << idx; + instr->vop3p().opsel_hi |= 1 << idx; + return; + } + if (instr->opcode == aco_opcode::v_cvt_f32_ubyte0) { + switch (byte) { + case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break; + case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break; + case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break; + case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break; + } + return; + } + + /* use SDWA */ + assert(can_use_SDWA(chip, instr, false)); + convert_to_SDWA(chip, instr); return; } - if (chip >= GFX8 && instr->opcode == aco_opcode::ds_write_b8 && byte == 2) { + assert(byte == 2); + if (instr->opcode == aco_opcode::ds_write_b8) instr->opcode = aco_opcode::ds_write_b8_d16_hi; - return; - } - if (chip >= GFX8 && instr->opcode == aco_opcode::ds_write_b16 && byte == 2) { + else if (instr->opcode == aco_opcode::ds_write_b16) instr->opcode = aco_opcode::ds_write_b16_d16_hi; - return; - } - - if (chip >= GFX9 && byte == 2) { - if (instr->opcode == aco_opcode::buffer_store_byte) - instr->opcode = aco_opcode::buffer_store_byte_d16_hi; - else if (instr->opcode == aco_opcode::buffer_store_short) - instr->opcode = aco_opcode::buffer_store_short_d16_hi; - else if (instr->opcode == aco_opcode::flat_store_byte) - instr->opcode = aco_opcode::flat_store_byte_d16_hi; - else if (instr->opcode == aco_opcode::flat_store_short) - instr->opcode = aco_opcode::flat_store_short_d16_hi; - else if (instr->opcode == aco_opcode::scratch_store_byte) - instr->opcode = aco_opcode::scratch_store_byte_d16_hi; - else if (instr->opcode == aco_opcode::scratch_store_short) - instr->opcode = aco_opcode::scratch_store_short_d16_hi; - else if (instr->opcode == aco_opcode::global_store_byte) - instr->opcode = aco_opcode::global_store_byte_d16_hi; - else if (instr->opcode == aco_opcode::global_store_short) - instr->opcode = aco_opcode::global_store_short_d16_hi; - else - unreachable("Something went wrong: Impossible register assignment."); - } + else if (instr->opcode == aco_opcode::buffer_store_byte) + instr->opcode = aco_opcode::buffer_store_byte_d16_hi; + else if (instr->opcode == aco_opcode::buffer_store_short) + instr->opcode = aco_opcode::buffer_store_short_d16_hi; + else if (instr->opcode == aco_opcode::flat_store_byte) + instr->opcode = aco_opcode::flat_store_byte_d16_hi; + else if (instr->opcode == aco_opcode::flat_store_short) + instr->opcode = aco_opcode::flat_store_short_d16_hi; + else if (instr->opcode == aco_opcode::scratch_store_byte) + instr->opcode = aco_opcode::scratch_store_byte_d16_hi; + else if (instr->opcode == aco_opcode::scratch_store_short) + instr->opcode = aco_opcode::scratch_store_short_d16_hi; + else if (instr->opcode == aco_opcode::global_store_byte) + instr->opcode = aco_opcode::global_store_byte_d16_hi; + else if (instr->opcode == aco_opcode::global_store_short) + instr->opcode = aco_opcode::global_store_short_d16_hi; + else + unreachable("Something went wrong: Impossible register assignment."); + return; } /* minimum_stride, bytes_written */