diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 37179646d32..8a99e53ddaf 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -765,10 +765,45 @@ emit_instruction(asm_context& ctx, std::vector& out, Instruction* inst unreachable("Pseudo instructions should be lowered before assembly."); break; default: - /* TODO: VOP3/VOP3P can use DPP8/16 on GFX11 (encoding of src0 and DPP8/16 word seems same - * except abs/neg is ignored). src2 cannot be literal and src0/src1 must be VGPR. - */ - if (instr->isVOP3() && !instr->isDPP()) { + if (instr->isDPP16()) { + assert(ctx.gfx_level >= GFX8); + DPP16_instruction& dpp = instr->dpp16(); + + /* first emit the instruction without the DPP operand */ + Operand dpp_op = instr->operands[0]; + instr->operands[0] = Operand(PhysReg{250}, v1); + instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16); + emit_instruction(ctx, out, instr); + uint32_t encoding = (0xF & dpp.row_mask) << 28; + encoding |= (0xF & dpp.bank_mask) << 24; + encoding |= dpp.abs[1] << 23; + encoding |= dpp.neg[1] << 22; + encoding |= dpp.abs[0] << 21; + encoding |= dpp.neg[0] << 20; + if (ctx.gfx_level >= GFX10) + encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */ + encoding |= dpp.bound_ctrl << 19; + encoding |= dpp.dpp_ctrl << 8; + encoding |= reg(ctx, dpp_op, 8); + encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0; + out.push_back(encoding); + return; + } else if (instr->isDPP8()) { + assert(ctx.gfx_level >= GFX10); + DPP8_instruction& dpp = instr->dpp8(); + + /* first emit the instruction without the DPP operand */ + Operand dpp_op = instr->operands[0]; + instr->operands[0] = Operand(PhysReg{234}, v1); + instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8); + emit_instruction(ctx, out, instr); + uint32_t encoding = reg(ctx, dpp_op, 8); + encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0; + for (unsigned i = 0; i < 8; ++i) + encoding |= dpp.lane_sel[i] << (8 + i * 3); + out.push_back(encoding); + return; + } else if (instr->isVOP3()) { VALU_instruction& vop3 = instr->valu(); if (instr->isVOP2()) { @@ -855,45 +890,6 @@ emit_instruction(asm_context& ctx, std::vector& out, Instruction* inst for (unsigned i = 0; i < 3; i++) encoding |= vop3.neg_lo[i] << (29 + i); out.push_back(encoding); - - } else if (instr->isDPP16()) { - assert(ctx.gfx_level >= GFX8); - DPP16_instruction& dpp = instr->dpp16(); - - /* first emit the instruction without the DPP operand */ - Operand dpp_op = instr->operands[0]; - instr->operands[0] = Operand(PhysReg{250}, v1); - instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16); - emit_instruction(ctx, out, instr); - uint32_t encoding = (0xF & dpp.row_mask) << 28; - encoding |= (0xF & dpp.bank_mask) << 24; - encoding |= dpp.abs[1] << 23; - encoding |= dpp.neg[1] << 22; - encoding |= dpp.abs[0] << 21; - encoding |= dpp.neg[0] << 20; - if (ctx.gfx_level >= GFX10) - encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */ - encoding |= dpp.bound_ctrl << 19; - encoding |= dpp.dpp_ctrl << 8; - encoding |= reg(ctx, dpp_op, 8); - encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0; - out.push_back(encoding); - return; - } else if (instr->isDPP8()) { - assert(ctx.gfx_level >= GFX10); - DPP8_instruction& dpp = instr->dpp8(); - - /* first emit the instruction without the DPP operand */ - Operand dpp_op = instr->operands[0]; - instr->operands[0] = Operand(PhysReg{234}, v1); - instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8); - emit_instruction(ctx, out, instr); - uint32_t encoding = reg(ctx, dpp_op, 8); - encoding |= dpp.opsel[0] && !instr->isVOP3() ? 128 : 0; - for (unsigned i = 0; i < 8; ++i) - encoding |= dpp.lane_sel[i] << (8 + i * 3); - out.push_back(encoding); - return; } else if (instr->isSDWA()) { assert(ctx.gfx_level >= GFX8 && ctx.gfx_level < GFX11); SDWA_instruction& sdwa = instr->sdwa(); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 528e0c36f40..7a95b00b0e9 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -98,8 +98,8 @@ enum class Format : std::uint16_t { PSEUDO_REDUCTION = 19, /* Vector ALU Formats */ - VOP3P = 20, VINTERP_INREG = 21, + VOP3P = 1 << 7, VOP1 = 1 << 8, VOP2 = 1 << 9, VOPC = 1 << 10, @@ -1248,7 +1248,7 @@ struct Instruction { return *(Pseudo_reduction_instruction*)this; } constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; } - constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; } + constexpr bool isVOP3P() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3P; } VINTERP_inreg_instruction& vinterp_inreg() noexcept { assert(isVINTERP_INREG()); diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 3fdbf9d2832..d656c0d7de9 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -69,8 +69,8 @@ class Format(Enum): PSEUDO_BRANCH = 17 PSEUDO_BARRIER = 18 PSEUDO_REDUCTION = 19 - VOP3P = 20 VINTERP_INREG = 21 + VOP3P = 1 << 7 VOP1 = 1 << 8 VOP2 = 1 << 9 VOPC = 1 << 10