diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index a3f70da6321..df94f21db85 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2129,7 +2129,8 @@ lower_to_hw_instr(Program* program) bld.sop2(signext ? aco_opcode::s_bfe_i32 : aco_opcode::s_bfe_u32, dst, bld.def(s1, scc), op, Operand::c32((bits << 16) | offset)); } - } else if (dst.regClass() == v1 || ctx.program->chip_class <= GFX7) { + } else if ((dst.regClass() == v1 && op.regClass() == v1) || + ctx.program->chip_class <= GFX7) { assert(op.physReg().byte() == 0 && dst.physReg().byte() == 0); if (offset == (32 - bits) && op.regClass() != s1) { bld.vop2(signext ? aco_opcode::v_ashrrev_i32 : aco_opcode::v_lshrrev_b32, dst, @@ -2138,9 +2139,12 @@ lower_to_hw_instr(Program* program) bld.vop3(signext ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32, dst, op, Operand::c32(offset), Operand::c32(bits)); } - } else if (dst.regClass() == v2b) { - bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa().sel[0] = - SubdwordSel(1, offset / 8, signext); + } else { + assert(dst.regClass() == v2b || dst.regClass() == v1b || op.regClass() == v2b || + op.regClass() == v1b); + SDWA_instruction& sdwa = + bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa(); + sdwa.sel[0] = SubdwordSel(bits / 8, offset / 8, signext); } break; } diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index dc25abab9ef..0badb3ce17a 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -466,22 +466,29 @@ validate_ir(Program* program) instr->operands[0].getTemp().type() == RegType::sgpr, "Can't extract/insert VGPR to SGPR", instr.get()); - if (instr->operands[0].getTemp().type() == RegType::vgpr) + if (instr->opcode == aco_opcode::p_insert) check(instr->operands[0].bytes() == instr->definitions[0].bytes(), - "Sizes of operand and definition must match", instr.get()); + "Sizes of p_insert data operand and definition must match", instr.get()); if (instr->definitions[0].getTemp().type() == RegType::sgpr) check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() && instr->definitions[1].physReg() == scc, - "SGPR extract/insert needs a SCC definition", instr.get()); + "SGPR extract/insert needs an SCC definition", instr.get()); - check(instr->operands[2].constantEquals(8) || instr->operands[2].constantEquals(16), - "Size must be 8 or 16", instr.get()); - check(instr->operands[2].constantValue() < instr->operands[0].getTemp().bytes() * 8u, - "Size must be smaller than source", instr.get()); + unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u; + unsigned op_bits = instr->operands[2].constantValue(); - unsigned comp = - instr->operands[0].bytes() * 8u / MAX2(instr->operands[2].constantValue(), 1); + if (instr->opcode == aco_opcode::p_insert) { + check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get()); + check(op_bits < data_bits, "Size must be smaller than source", instr.get()); + } else if (instr->opcode == aco_opcode::p_extract) { + check(op_bits == 8 || op_bits == 16 || op_bits == 32, + "Size must be 8 or 16 or 32", instr.get()); + check(data_bits >= op_bits, "Can't extract more bits than what the data has.", + instr.get()); + } + + unsigned comp = data_bits / MAX2(op_bits, 1); check(instr->operands[1].constantValue() < comp, "Index must be in-bounds", instr.get()); }