aco: return references in instruction cast methods

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8595>
This commit is contained in:
Rhys Perry 2021-01-21 16:13:34 +00:00 committed by Marge Bot
parent 1d245cd18b
commit e115b01948
20 changed files with 752 additions and 753 deletions

View file

@ -109,7 +109,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
}
case Format::SOPK: {
SOPK_instruction *sopk = instr->sopk();
SOPK_instruction& sopk = instr->sopk();
if (instr->opcode == aco_opcode::s_subvector_loop_begin) {
assert(ctx.chip_class >= GFX10);
@ -121,7 +121,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
/* Adjust s_subvector_loop_begin instruction to the address after the end */
out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos);
/* Adjust s_subvector_loop_end instruction to the address after the beginning */
sopk->imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());
sopk.imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());
ctx.subvector_begin_pos = -1;
}
@ -132,7 +132,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
instr->definitions[0].physReg() << 16 :
!instr->operands.empty() && instr->operands[0].physReg() <= 127 ?
instr->operands[0].physReg() << 16 : 0;
encoding |= sopk->imm;
encoding |= sopk.imm;
out.push_back(encoding);
break;
}
@ -157,19 +157,19 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
}
case Format::SOPP: {
SOPP_instruction* sopp = instr->sopp();
SOPP_instruction& sopp = instr->sopp();
uint32_t encoding = (0b101111111 << 23);
encoding |= opcode << 16;
encoding |= (uint16_t) sopp->imm;
if (sopp->block != -1) {
sopp->pass_flags = 0;
ctx.branches.emplace_back(out.size(), sopp);
encoding |= (uint16_t) sopp.imm;
if (sopp.block != -1) {
sopp.pass_flags = 0;
ctx.branches.emplace_back(out.size(), &sopp);
}
out.push_back(encoding);
break;
}
case Format::SMEM: {
SMEM_instruction* smem = instr->smem();
SMEM_instruction& smem = instr->smem();
bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);
bool is_load = !instr->definitions.empty();
uint32_t encoding = 0;
@ -196,16 +196,16 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
if (ctx.chip_class <= GFX9) {
encoding = (0b110000 << 26);
assert(!smem->dlc); /* Device-level coherent is not supported on GFX9 and lower */
encoding |= smem->nv ? 1 << 15 : 0;
assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */
encoding |= smem.nv ? 1 << 15 : 0;
} else {
encoding = (0b111101 << 26);
assert(!smem->nv); /* Non-volatile is not supported on GFX10 */
encoding |= smem->dlc ? 1 << 14 : 0;
assert(!smem.nv); /* Non-volatile is not supported on GFX10 */
encoding |= smem.dlc ? 1 << 14 : 0;
}
encoding |= opcode << 18;
encoding |= smem->glc ? 1 << 16 : 0;
encoding |= smem.glc ? 1 << 16 : 0;
if (ctx.chip_class <= GFX9) {
if (instr->operands.size() >= 2)
@ -284,7 +284,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
}
case Format::VINTRP: {
Interp_instruction* interp = instr->vintrp();
Interp_instruction& interp = instr->vintrp();
uint32_t encoding = 0;
if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
@ -304,8 +304,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
out.push_back(encoding);
encoding = 0;
encoding |= interp->attribute;
encoding |= interp->component << 6;
encoding |= interp.attribute;
encoding |= interp.component << 6;
encoding |= instr->operands[0].physReg() << 9;
if (instr->opcode == aco_opcode::v_interp_p2_f16 ||
instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
@ -323,8 +323,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
assert(encoding);
encoding |= (0xFF & instr->definitions[0].physReg()) << 18;
encoding |= opcode << 16;
encoding |= interp->attribute << 10;
encoding |= interp->component << 8;
encoding |= interp.attribute << 10;
encoding |= interp.component << 8;
if (instr->opcode == aco_opcode::v_interp_mov_f32)
encoding |= (0x3 & instr->operands[0].constantValue());
else
@ -334,17 +334,17 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
}
case Format::DS: {
DS_instruction* ds = instr->ds();
DS_instruction& ds = instr->ds();
uint32_t encoding = (0b110110 << 26);
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
encoding |= opcode << 17;
encoding |= (ds->gds ? 1 : 0) << 16;
encoding |= (ds.gds ? 1 : 0) << 16;
} else {
encoding |= opcode << 18;
encoding |= (ds->gds ? 1 : 0) << 17;
encoding |= (ds.gds ? 1 : 0) << 17;
}
encoding |= ((0xFF & ds->offset1) << 8);
encoding |= (0xFFFF & ds->offset0);
encoding |= ((0xFF & ds.offset1) << 8);
encoding |= (0xFFFF & ds.offset0);
out.push_back(encoding);
encoding = 0;
unsigned reg = !instr->definitions.empty() ? instr->definitions[0].physReg() : 0;
@ -358,30 +358,30 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
}
case Format::MUBUF: {
MUBUF_instruction* mubuf = instr->mubuf();
MUBUF_instruction& mubuf = instr->mubuf();
uint32_t encoding = (0b111000 << 26);
encoding |= opcode << 18;
encoding |= (mubuf->lds ? 1 : 0) << 16;
encoding |= (mubuf->glc ? 1 : 0) << 14;
encoding |= (mubuf->idxen ? 1 : 0) << 13;
assert(!mubuf->addr64 || ctx.chip_class <= GFX7);
encoding |= (mubuf.lds ? 1 : 0) << 16;
encoding |= (mubuf.glc ? 1 : 0) << 14;
encoding |= (mubuf.idxen ? 1 : 0) << 13;
assert(!mubuf.addr64 || ctx.chip_class <= GFX7);
if (ctx.chip_class == GFX6 || ctx.chip_class == GFX7)
encoding |= (mubuf->addr64 ? 1 : 0) << 15;
encoding |= (mubuf->offen ? 1 : 0) << 12;
encoding |= (mubuf.addr64 ? 1 : 0) << 15;
encoding |= (mubuf.offen ? 1 : 0) << 12;
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
assert(!mubuf->dlc); /* Device-level coherent is not supported on GFX9 and lower */
encoding |= (mubuf->slc ? 1 : 0) << 17;
assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */
encoding |= (mubuf.slc ? 1 : 0) << 17;
} else if (ctx.chip_class >= GFX10) {
encoding |= (mubuf->dlc ? 1 : 0) << 15;
encoding |= (mubuf.dlc ? 1 : 0) << 15;
}
encoding |= 0x0FFF & mubuf->offset;
encoding |= 0x0FFF & mubuf.offset;
out.push_back(encoding);
encoding = 0;
if (ctx.chip_class <= GFX7 || ctx.chip_class >= GFX10) {
encoding |= (mubuf->slc ? 1 : 0) << 22;
encoding |= (mubuf.slc ? 1 : 0) << 22;
}
encoding |= instr->operands[2].physReg() << 24;
encoding |= (mubuf->tfe ? 1 : 0) << 23;
encoding |= (mubuf.tfe ? 1 : 0) << 23;
encoding |= (instr->operands[0].physReg() >> 2) << 16;
unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
encoding |= (0xFF & reg) << 8;
@ -390,17 +390,17 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
}
case Format::MTBUF: {
MTBUF_instruction* mtbuf = instr->mtbuf();
MTBUF_instruction& mtbuf = instr->mtbuf();
uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt);
uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf.dfmt, mtbuf.nfmt);
uint32_t encoding = (0b111010 << 26);
assert(img_format <= 0x7F);
assert(!mtbuf->dlc || ctx.chip_class >= GFX10);
encoding |= (mtbuf->dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
encoding |= (mtbuf->glc ? 1 : 0) << 14;
encoding |= (mtbuf->idxen ? 1 : 0) << 13;
encoding |= (mtbuf->offen ? 1 : 0) << 12;
encoding |= 0x0FFF & mtbuf->offset;
assert(!mtbuf.dlc || ctx.chip_class >= GFX10);
encoding |= (mtbuf.dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
encoding |= (mtbuf.glc ? 1 : 0) << 14;
encoding |= (mtbuf.idxen ? 1 : 0) << 13;
encoding |= (mtbuf.offen ? 1 : 0) << 12;
encoding |= 0x0FFF & mtbuf.offset;
encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
@ -413,8 +413,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
encoding = 0;
encoding |= instr->operands[2].physReg() << 24;
encoding |= (mtbuf->tfe ? 1 : 0) << 23;
encoding |= (mtbuf->slc ? 1 : 0) << 22;
encoding |= (mtbuf.tfe ? 1 : 0) << 23;
encoding |= (mtbuf.slc ? 1 : 0) << 22;
encoding |= (instr->operands[0].physReg() >> 2) << 16;
unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
encoding |= (0xFF & reg) << 8;
@ -437,26 +437,26 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
assert(!use_nsa || ctx.chip_class >= GFX10);
unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
MIMG_instruction* mimg = instr->mimg();
MIMG_instruction& mimg = instr->mimg();
uint32_t encoding = (0b111100 << 26);
encoding |= mimg->slc ? 1 << 25 : 0;
encoding |= mimg.slc ? 1 << 25 : 0;
encoding |= opcode << 18;
encoding |= mimg->lwe ? 1 << 17 : 0;
encoding |= mimg->tfe ? 1 << 16 : 0;
encoding |= mimg->glc ? 1 << 13 : 0;
encoding |= mimg->unrm ? 1 << 12 : 0;
encoding |= mimg.lwe ? 1 << 17 : 0;
encoding |= mimg.tfe ? 1 << 16 : 0;
encoding |= mimg.glc ? 1 << 13 : 0;
encoding |= mimg.unrm ? 1 << 12 : 0;
if (ctx.chip_class <= GFX9) {
assert(!mimg->dlc); /* Device-level coherent is not supported on GFX9 and lower */
assert(!mimg->r128);
encoding |= mimg->a16 ? 1 << 15 : 0;
encoding |= mimg->da ? 1 << 14 : 0;
assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */
assert(!mimg.r128);
encoding |= mimg.a16 ? 1 << 15 : 0;
encoding |= mimg.da ? 1 << 14 : 0;
} else {
encoding |= mimg->r128 ? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
encoding |= mimg.r128 ? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
encoding |= nsa_dwords << 1;
encoding |= mimg->dim << 3; /* GFX10: dimensionality instead of declare array */
encoding |= mimg->dlc ? 1 << 7 : 0;
encoding |= mimg.dim << 3; /* GFX10: dimensionality instead of declare array */
encoding |= mimg.dlc ? 1 << 7 : 0;
}
encoding |= (0xF & mimg->dmask) << 8;
encoding |= (0xF & mimg.dmask) << 8;
out.push_back(encoding);
encoding = (0xFF & instr->operands[3].physReg()); /* VADDR */
if (!instr->definitions.empty()) {
@ -468,10 +468,10 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
if (!instr->operands[1].isUndefined())
encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */
assert(!mimg->d16 || ctx.chip_class >= GFX9);
encoding |= mimg->d16 ? 1 << 15 : 0;
assert(!mimg.d16 || ctx.chip_class >= GFX9);
encoding |= mimg.d16 ? 1 << 15 : 0;
if (ctx.chip_class >= GFX10) {
encoding |= mimg->a16 ? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */
encoding |= mimg.a16 ? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */
}
out.push_back(encoding);
@ -487,33 +487,33 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
case Format::FLAT:
case Format::SCRATCH:
case Format::GLOBAL: {
FLAT_instruction *flat = instr->flatlike();
FLAT_instruction& flat = instr->flatlike();
uint32_t encoding = (0b110111 << 26);
encoding |= opcode << 18;
if (ctx.chip_class <= GFX9) {
assert(flat->offset <= 0x1fff);
encoding |= flat->offset & 0x1fff;
assert(flat.offset <= 0x1fff);
encoding |= flat.offset & 0x1fff;
} else if (instr->isFlat()) {
/* GFX10 has a 12-bit immediate OFFSET field,
* but it has a hw bug: it ignores the offset, called FlatSegmentOffsetBug
*/
assert(flat->offset == 0);
assert(flat.offset == 0);
} else {
assert(flat->offset <= 0xfff);
encoding |= flat->offset & 0xfff;
assert(flat.offset <= 0xfff);
encoding |= flat.offset & 0xfff;
}
if (instr->isScratch())
encoding |= 1 << 14;
else if (instr->isGlobal())
encoding |= 2 << 14;
encoding |= flat->lds ? 1 << 13 : 0;
encoding |= flat->glc ? 1 << 16 : 0;
encoding |= flat->slc ? 1 << 17 : 0;
encoding |= flat.lds ? 1 << 13 : 0;
encoding |= flat.glc ? 1 << 16 : 0;
encoding |= flat.slc ? 1 << 17 : 0;
if (ctx.chip_class >= GFX10) {
assert(!flat->nv);
encoding |= flat->dlc ? 1 << 12 : 0;
assert(!flat.nv);
encoding |= flat.dlc ? 1 << 12 : 0;
} else {
assert(!flat->dlc);
assert(!flat.dlc);
}
out.push_back(encoding);
encoding = (0xFF & instr->operands[0].physReg());
@ -531,12 +531,12 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
else
encoding |= sgpr_null << 16;
}
encoding |= flat->nv ? 1 << 23 : 0;
encoding |= flat.nv ? 1 << 23 : 0;
out.push_back(encoding);
break;
}
case Format::EXP: {
Export_instruction* exp = instr->exp();
Export_instruction& exp = instr->exp();
uint32_t encoding;
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
encoding = (0b110001 << 26);
@ -544,16 +544,16 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
encoding = (0b111110 << 26);
}
encoding |= exp->valid_mask ? 0b1 << 12 : 0;
encoding |= exp->done ? 0b1 << 11 : 0;
encoding |= exp->compressed ? 0b1 << 10 : 0;
encoding |= exp->dest << 4;
encoding |= exp->enabled_mask;
encoding |= exp.valid_mask ? 0b1 << 12 : 0;
encoding |= exp.done ? 0b1 << 11 : 0;
encoding |= exp.compressed ? 0b1 << 10 : 0;
encoding |= exp.dest << 4;
encoding |= exp.enabled_mask;
out.push_back(encoding);
encoding = 0xFF & exp->operands[0].physReg();
encoding |= (0xFF & exp->operands[1].physReg()) << 8;
encoding |= (0xFF & exp->operands[2].physReg()) << 16;
encoding |= (0xFF & exp->operands[3].physReg()) << 24;
encoding = 0xFF & exp.operands[0].physReg();
encoding |= (0xFF & exp.operands[1].physReg()) << 8;
encoding |= (0xFF & exp.operands[2].physReg()) << 16;
encoding |= (0xFF & exp.operands[3].physReg()) << 24;
out.push_back(encoding);
break;
}
@ -564,7 +564,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
default:
if (instr->isVOP3()) {
VOP3_instruction* vop3 = instr->vop3();
VOP3_instruction& vop3 = instr->vop3();
if (instr->isVOP2()) {
opcode = opcode + 0x100;
@ -590,14 +590,14 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
if (ctx.chip_class <= GFX7) {
encoding |= opcode << 17;
encoding |= (vop3->clamp ? 1 : 0) << 11;
encoding |= (vop3.clamp ? 1 : 0) << 11;
} else {
encoding |= opcode << 16;
encoding |= (vop3->clamp ? 1 : 0) << 15;
encoding |= (vop3.clamp ? 1 : 0) << 15;
}
encoding |= vop3->opsel << 11;
encoding |= vop3.opsel << 11;
for (unsigned i = 0; i < 3; i++)
encoding |= vop3->abs[i] << (8+i);
encoding |= vop3.abs[i] << (8+i);
if (instr->definitions.size() == 2)
encoding |= instr->definitions[1].physReg() << 8;
encoding |= (0xFF & instr->definitions[0].physReg());
@ -609,13 +609,13 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
for (unsigned i = 0; i < instr->operands.size(); i++)
encoding |= instr->operands[i].physReg() << (i * 9);
}
encoding |= vop3->omod << 27;
encoding |= vop3.omod << 27;
for (unsigned i = 0; i < 3; i++)
encoding |= vop3->neg[i] << (29+i);
encoding |= vop3.neg[i] << (29+i);
out.push_back(encoding);
} else if (instr->isVOP3P()) {
VOP3P_instruction* vop3 = instr->vop3p();
VOP3P_instruction& vop3 = instr->vop3p();
uint32_t encoding;
if (ctx.chip_class == GFX9) {
@ -627,45 +627,45 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
}
encoding |= opcode << 16;
encoding |= (vop3->clamp ? 1 : 0) << 15;
encoding |= vop3->opsel_lo << 11;
encoding |= ((vop3->opsel_hi & 0x4) ? 1 : 0) << 14;
encoding |= (vop3.clamp ? 1 : 0) << 15;
encoding |= vop3.opsel_lo << 11;
encoding |= ((vop3.opsel_hi & 0x4) ? 1 : 0) << 14;
for (unsigned i = 0; i < 3; i++)
encoding |= vop3->neg_hi[i] << (8+i);
encoding |= vop3.neg_hi[i] << (8+i);
encoding |= (0xFF & instr->definitions[0].physReg());
out.push_back(encoding);
encoding = 0;
for (unsigned i = 0; i < instr->operands.size(); i++)
encoding |= instr->operands[i].physReg() << (i * 9);
encoding |= (vop3->opsel_hi & 0x3) << 27;
encoding |= (vop3.opsel_hi & 0x3) << 27;
for (unsigned i = 0; i < 3; i++)
encoding |= vop3->neg_lo[i] << (29+i);
encoding |= vop3.neg_lo[i] << (29+i);
out.push_back(encoding);
} else if (instr->isDPP()){
assert(ctx.chip_class >= GFX8);
DPP_instruction* dpp = instr->dpp();
DPP_instruction& dpp = instr->dpp();
/* first emit the instruction without the DPP operand */
Operand dpp_op = instr->operands[0];
instr->operands[0] = Operand(PhysReg{250}, v1);
instr->format = (Format) ((uint16_t) instr->format & ~(uint16_t)Format::DPP);
emit_instruction(ctx, out, instr);
uint32_t encoding = (0xF & dpp->row_mask) << 28;
encoding |= (0xF & dpp->bank_mask) << 24;
encoding |= dpp->abs[1] << 23;
encoding |= dpp->neg[1] << 22;
encoding |= dpp->abs[0] << 21;
encoding |= dpp->neg[0] << 20;
uint32_t encoding = (0xF & dpp.row_mask) << 28;
encoding |= (0xF & dpp.bank_mask) << 24;
encoding |= dpp.abs[1] << 23;
encoding |= dpp.neg[1] << 22;
encoding |= dpp.abs[0] << 21;
encoding |= dpp.neg[0] << 20;
if (ctx.chip_class >= GFX10)
encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
encoding |= dpp->bound_ctrl << 19;
encoding |= dpp->dpp_ctrl << 8;
encoding |= dpp.bound_ctrl << 19;
encoding |= dpp.dpp_ctrl << 8;
encoding |= (0xFF) & dpp_op.physReg();
out.push_back(encoding);
return;
} else if (instr->isSDWA()) {
SDWA_instruction* sdwa = instr->sdwa();
SDWA_instruction& sdwa = instr->sdwa();
/* first emit the instruction without the SDWA operand */
Operand sdwa_op = instr->operands[0];
@ -680,27 +680,27 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
encoding |= instr->definitions[0].physReg() << 8;
encoding |= 1 << 15;
}
encoding |= (sdwa->clamp ? 1 : 0) << 13;
encoding |= (sdwa.clamp ? 1 : 0) << 13;
} else {
encoding |= get_sdwa_sel(sdwa->dst_sel, instr->definitions[0].physReg()) << 8;
uint32_t dst_u = sdwa->dst_sel & sdwa_sext ? 1 : 0;
if (sdwa->dst_preserve || (sdwa->dst_sel & sdwa_isra))
encoding |= get_sdwa_sel(sdwa.dst_sel, instr->definitions[0].physReg()) << 8;
uint32_t dst_u = sdwa.dst_sel & sdwa_sext ? 1 : 0;
if (sdwa.dst_preserve || (sdwa.dst_sel & sdwa_isra))
dst_u = 2;
encoding |= dst_u << 11;
encoding |= (sdwa->clamp ? 1 : 0) << 13;
encoding |= sdwa->omod << 14;
encoding |= (sdwa.clamp ? 1 : 0) << 13;
encoding |= sdwa.omod << 14;
}
encoding |= get_sdwa_sel(sdwa->sel[0], sdwa_op.physReg()) << 16;
encoding |= sdwa->sel[0] & sdwa_sext ? 1 << 19 : 0;
encoding |= sdwa->abs[0] << 21;
encoding |= sdwa->neg[0] << 20;
encoding |= get_sdwa_sel(sdwa.sel[0], sdwa_op.physReg()) << 16;
encoding |= sdwa.sel[0] & sdwa_sext ? 1 << 19 : 0;
encoding |= sdwa.abs[0] << 21;
encoding |= sdwa.neg[0] << 20;
if (instr->operands.size() >= 2) {
encoding |= get_sdwa_sel(sdwa->sel[1], instr->operands[1].physReg()) << 24;
encoding |= sdwa->sel[1] & sdwa_sext ? 1 << 27 : 0;
encoding |= sdwa->abs[1] << 29;
encoding |= sdwa->neg[1] << 28;
encoding |= get_sdwa_sel(sdwa.sel[1], instr->operands[1].physReg()) << 24;
encoding |= sdwa.sel[1] & sdwa_sext ? 1 << 27 : 0;
encoding |= sdwa.abs[1] << 29;
encoding |= sdwa.neg[1] << 28;
}
encoding |= 0xFF & sdwa_op.physReg();
@ -750,16 +750,16 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
while ( it != block.instructions.rend())
{
if ((*it)->isEXP()) {
Export_instruction* exp = (*it)->exp();
Export_instruction& exp = (*it)->exp();
if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) {
if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) {
exp->done = true;
if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) {
exp.done = true;
exported = true;
break;
}
} else {
exp->done = true;
exp->valid_mask = true;
exp.done = true;
exp.valid_mask = true;
exported = true;
break;
}

View file

@ -180,7 +180,7 @@ struct NOP_ctx_gfx10 {
int get_wait_states(aco_ptr<Instruction>& instr)
{
if (instr->opcode == aco_opcode::s_nop)
return instr->sopp()->imm + 1;
return instr->sopp().imm + 1;
else if (instr->opcode == aco_opcode::p_constaddr)
return 3; /* lowered to 3 instructions in the assembler */
else
@ -351,7 +351,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
if (instr->opcode == aco_opcode::s_sendmsg || instr->opcode == aco_opcode::s_ttracedata)
NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
} else if (instr->isDS() && instr->ds()->gds) {
} else if (instr->isDS() && instr->ds().gds) {
NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
} else if (instr->isVALU() || instr->isVINTRP()) {
for (Operand op : instr->operands) {
@ -407,7 +407,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
if (program->chip_class == GFX9) {
bool lds_scratch_global = (instr->isScratch() || instr->isGlobal()) &&
instr->flatlike()->lds;
instr->flatlike().lds;
if (instr->isVINTRP() ||
instr->opcode == aco_opcode::ds_read_addtid_b32 ||
instr->opcode == aco_opcode::ds_write_addtid_b32 ||
@ -480,10 +480,10 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
ctx.salu_wr_m0_then_moverel = 1;
}
} else if (instr->opcode == aco_opcode::s_setreg_b32 || instr->opcode == aco_opcode::s_setreg_imm32_b32) {
SOPK_instruction *sopk = instr->sopk();
unsigned offset = (sopk->imm >> 6) & 0x1f;
unsigned size = ((sopk->imm >> 11) & 0x1f) + 1;
unsigned reg = sopk->imm & 0x3f;
SOPK_instruction& sopk = instr->sopk();
unsigned offset = (sopk.imm >> 6) & 0x1f;
unsigned size = ((sopk.imm >> 11) & 0x1f) + 1;
unsigned reg = sopk.imm & 0x3f;
ctx.setreg_then_getsetreg = 2;
if (reg == 1 && offset >= 28 && size > (28 - offset))
@ -603,13 +603,13 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
} else if (instr->isSALU() || instr->isSMEM()) {
if (instr->opcode == aco_opcode::s_waitcnt) {
/* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
uint16_t imm = instr->sopp()->imm;
uint16_t imm = instr->sopp().imm;
unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
if (vmcnt == 0)
ctx.sgprs_read_by_VMEM.reset();
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
/* Hazard is mitigated by a s_waitcnt_depctr with a magic imm */
if (instr->sopp()->imm == 0xffe3)
if (instr->sopp().imm == 0xffe3)
ctx.sgprs_read_by_VMEM.reset();
}
@ -667,7 +667,7 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
}
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
/* s_waitcnt_depctr can mitigate the problem if it has a magic imm */
if ((instr->sopp()->imm & 0xfffe) == 0xfffe)
if ((instr->sopp().imm & 0xfffe) == 0xfffe)
ctx.has_nonVALU_exec_read = false;
}
@ -694,12 +694,12 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
ctx.sgprs_read_by_SMEM.reset();
} else {
/* Reducing lgkmcnt count to 0 always mitigates the hazard. */
const SOPP_instruction *sopp = instr->sopp();
if (sopp->opcode == aco_opcode::s_waitcnt_lgkmcnt) {
if (sopp->imm == 0 && sopp->definitions[0].physReg() == sgpr_null)
const SOPP_instruction& sopp = instr->sopp();
if (sopp.opcode == aco_opcode::s_waitcnt_lgkmcnt) {
if (sopp.imm == 0 && sopp.definitions[0].physReg() == sgpr_null)
ctx.sgprs_read_by_SMEM.reset();
} else if (sopp->opcode == aco_opcode::s_waitcnt) {
unsigned lgkm = (sopp->imm >> 8) & 0x3f;
} else if (sopp.opcode == aco_opcode::s_waitcnt) {
unsigned lgkm = (sopp.imm >> 8) & 0x3f;
if (lgkm == 0)
ctx.sgprs_read_by_SMEM.reset();
}
@ -724,8 +724,8 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
ctx.has_branch_after_DS = ctx.has_DS;
} else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
/* Only s_waitcnt_vscnt can mitigate the hazard */
const SOPK_instruction *sopk = instr->sopk();
if (sopk->definitions[0].physReg() == sgpr_null && sopk->imm == 0)
const SOPK_instruction& sopk = instr->sopk();
if (sopk.definitions[0].physReg() == sgpr_null && sopk.imm == 0)
ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false;
}
if ((ctx.has_VMEM && ctx.has_branch_after_DS) || (ctx.has_DS && ctx.has_branch_after_VMEM)) {

View file

@ -98,13 +98,13 @@ struct exec_ctx {
bool needs_exact(aco_ptr<Instruction>& instr) {
if (instr->isMUBUF()) {
return instr->mubuf()->disable_wqm;
return instr->mubuf().disable_wqm;
} else if (instr->isMTBUF()) {
return instr->mtbuf()->disable_wqm;
return instr->mtbuf().disable_wqm;
} else if (instr->isMIMG()) {
return instr->mimg()->disable_wqm;
return instr->mimg().disable_wqm;
} else if (instr->isFlatLike()) {
return instr->flatlike()->disable_wqm;
return instr->flatlike().disable_wqm;
} else {
return instr->isEXP();
}
@ -908,12 +908,12 @@ void add_branch_code(exec_ctx& ctx, Block* block)
}
if (block->kind & block_kind_uniform) {
Pseudo_branch_instruction* branch = block->instructions.back()->branch();
if (branch->opcode == aco_opcode::p_branch) {
branch->target[0] = block->linear_succs[0];
Pseudo_branch_instruction& branch = block->instructions.back()->branch();
if (branch.opcode == aco_opcode::p_branch) {
branch.target[0] = block->linear_succs[0];
} else {
branch->target[0] = block->linear_succs[1];
branch->target[1] = block->linear_succs[0];
branch.target[0] = block->linear_succs[1];
branch.target[1] = block->linear_succs[0];
}
return;
}

View file

@ -422,7 +422,7 @@ wait_imm check_instr(Instruction* instr, wait_ctx& ctx)
continue;
/* LDS reads and writes return in the order they were issued. same for GDS */
if (instr->isDS() && (it->second.events & lgkm_events) == (instr->ds()->gds ? event_gds : event_lds))
if (instr->isDS() && (it->second.events & lgkm_events) == (instr->ds().gds ? event_gds : event_lds))
continue;
wait.combine(it->second.imm);
@ -437,10 +437,10 @@ wait_imm parse_wait_instr(wait_ctx& ctx, Instruction *instr)
if (instr->opcode == aco_opcode::s_waitcnt_vscnt &&
instr->definitions[0].physReg() == sgpr_null) {
wait_imm imm;
imm.vs = std::min<uint8_t>(imm.vs, instr->sopk()->imm);
imm.vs = std::min<uint8_t>(imm.vs, instr->sopk().imm);
return imm;
} else if (instr->opcode == aco_opcode::s_waitcnt) {
return wait_imm(ctx.chip_class, instr->sopp()->imm);
return wait_imm(ctx.chip_class, instr->sopp().imm);
}
return wait_imm();
}
@ -521,15 +521,15 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
* TODO: Refine this when we have proper alias analysis.
*/
if (ctx.pending_s_buffer_store &&
!instr->smem()->definitions.empty() &&
!instr->smem()->sync.can_reorder()) {
!instr->smem().definitions.empty() &&
!instr->smem().sync.can_reorder()) {
imm.lgkm = 0;
}
}
if (ctx.program->early_rast && instr->opcode == aco_opcode::exp) {
if (instr->exp()->dest >= V_008DFC_SQ_EXP_POS &&
instr->exp()->dest < V_008DFC_SQ_EXP_PRIM) {
if (instr->exp().dest >= V_008DFC_SQ_EXP_POS &&
instr->exp().dest < V_008DFC_SQ_EXP_PRIM) {
/* With early_rast, the HW will start clipping and rasterization after the 1st DONE pos export.
* Wait for all stores (and atomics) to complete, so PS can read them.
@ -543,7 +543,7 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
}
if (instr->opcode == aco_opcode::p_barrier)
imm.combine(perform_barrier(ctx, instr->barrier()->sync, semantic_acqrel));
imm.combine(perform_barrier(ctx, instr->barrier().sync, semantic_acqrel));
else
imm.combine(perform_barrier(ctx, sync_info, semantic_release));
@ -760,12 +760,12 @@ void gen(Instruction* instr, wait_ctx& ctx)
{
switch (instr->format) {
case Format::EXP: {
Export_instruction* exp_instr = instr->exp();
Export_instruction& exp_instr = instr->exp();
wait_event ev;
if (exp_instr->dest <= 9)
if (exp_instr.dest <= 9)
ev = event_exp_mrt_null;
else if (exp_instr->dest <= 15)
else if (exp_instr.dest <= 15)
ev = event_exp_pos;
else
ev = event_exp_param;
@ -774,10 +774,10 @@ void gen(Instruction* instr, wait_ctx& ctx)
/* insert new entries for exported vgprs */
for (unsigned i = 0; i < 4; i++)
{
if (exp_instr->enabled_mask & (1 << i)) {
unsigned idx = exp_instr->compressed ? i >> 1 : i;
assert(idx < exp_instr->operands.size());
insert_wait_entry(ctx, exp_instr->operands[idx], ev);
if (exp_instr.enabled_mask & (1 << i)) {
unsigned idx = exp_instr.compressed ? i >> 1 : i;
assert(idx < exp_instr.operands.size());
insert_wait_entry(ctx, exp_instr.operands[idx], ev);
}
}
@ -785,38 +785,38 @@ void gen(Instruction* instr, wait_ctx& ctx)
break;
}
case Format::FLAT: {
FLAT_instruction *flat = instr->flat();
FLAT_instruction& flat = instr->flat();
if (ctx.chip_class < GFX10 && !instr->definitions.empty())
update_counters_for_flat_load(ctx, flat->sync);
update_counters_for_flat_load(ctx, flat.sync);
else
update_counters(ctx, event_flat, flat->sync);
update_counters(ctx, event_flat, flat.sync);
if (!instr->definitions.empty())
insert_wait_entry(ctx, instr->definitions[0], event_flat);
break;
}
case Format::SMEM: {
SMEM_instruction *smem = instr->smem();
update_counters(ctx, event_smem, smem->sync);
SMEM_instruction& smem = instr->smem();
update_counters(ctx, event_smem, smem.sync);
if (!instr->definitions.empty())
insert_wait_entry(ctx, instr->definitions[0], event_smem);
else if (ctx.chip_class >= GFX10 &&
!smem->sync.can_reorder())
!smem.sync.can_reorder())
ctx.pending_s_buffer_store = true;
break;
}
case Format::DS: {
DS_instruction *ds = instr->ds();
update_counters(ctx, ds->gds ? event_gds : event_lds, ds->sync);
if (ds->gds)
DS_instruction& ds = instr->ds();
update_counters(ctx, ds.gds ? event_gds : event_lds, ds.sync);
if (ds.gds)
update_counters(ctx, event_gds_gpr_lock);
if (!instr->definitions.empty())
insert_wait_entry(ctx, instr->definitions[0], ds->gds ? event_gds : event_lds);
insert_wait_entry(ctx, instr->definitions[0], ds.gds ? event_gds : event_lds);
if (ds->gds) {
if (ds.gds) {
for (const Operand& op : instr->operands)
insert_wait_entry(ctx, op, event_gds_gpr_lock);
insert_wait_entry(ctx, exec, s2, event_gds_gpr_lock, false);

View file

@ -1224,7 +1224,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
add->vop3()->neg[1] = true;
add->vop3().neg[1] = true;
return add->definitions[0].getTemp();
}
@ -1692,7 +1692,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
std::swap(src0, src1);
add_instr = bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
}
add_instr->vop3()->clamp = 1;
add_instr->vop3().clamp = 1;
} else if (dst.regClass() == v1) {
if (ctx->options->chip_class >= GFX9) {
aco_ptr<VOP3_instruction> add{create_instruction<VOP3_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)};
@ -1944,9 +1944,9 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
case nir_op_fsub: {
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* add = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst);
VOP3P_instruction* sub = add->vop3p();
sub->neg_lo[1] = true;
sub->neg_hi[1] = true;
VOP3P_instruction& sub = add->vop3p();
sub.neg_lo[1] = true;
sub.neg_hi[1] = true;
break;
}
@ -1965,7 +1965,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} else if (dst.regClass() == v2) {
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst),
as_vgpr(ctx, src0), as_vgpr(ctx, src1));
add->vop3()->neg[1] = true;
add->vop3().neg[1] = true;
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
@ -2101,7 +2101,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
Instruction* vop3p = bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand(uint16_t(0x3C00)),
instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
vop3p->vop3p()->clamp = true;
vop3p->vop3p().clamp = true;
emit_split_vector(ctx, dst, 2);
break;
}
@ -2114,7 +2114,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
// TODO: confirm that this holds under any circumstances
} else if (dst.regClass() == v2) {
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u));
add->vop3()->clamp = true;
add->vop3().clamp = true;
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
@ -2253,12 +2253,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi));
Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
Instruction *sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
sub->vop3()->neg[1] = true;
sub->vop3().neg[1] = true;
tmp = sub->definitions[0].getTemp();
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x432fffffu));
Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v);
vop3->vop3()->abs[0] = true;
vop3->vop3().abs[0] = true;
Temp cond = vop3->definitions[0].getTemp();
Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
@ -2924,7 +2924,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u));
Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest);
vop3->vop3()->abs[0] = true;
vop3->vop3().abs[0] = true;
cmp_res = vop3->definitions[0].getTemp();
}
@ -3515,7 +3515,7 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info,
instr = bld.ds(op, Definition(val), offset, m, const_offset, const_offset + 1);
else
instr = bld.ds(op, Definition(val), offset, m, const_offset);
instr->ds()->sync = info.sync;
instr->ds().sync = info.sync;
if (size < 4)
val = bld.pseudo(aco_opcode::p_extract_vector, bld.def(RegClass::get(RegType::vgpr, size)), val, Operand(0u));
@ -3931,7 +3931,7 @@ void store_lds(isel_context *ctx, unsigned elem_size_bytes, Temp data, uint32_t
} else {
instr = bld.ds(op, address_offset, split_data, m, inline_offset);
}
instr->ds()->sync = memory_sync_info(storage_shared);
instr->ds().sync = memory_sync_info(storage_shared);
}
}
@ -4094,7 +4094,7 @@ void emit_single_mubuf_store(isel_context *ctx, Temp descriptor, Temp voffset, T
/* idxen*/ false, /* addr64 */ false, /* disable_wqm */ false, /* glc */ true,
/* dlc*/ false, /* slc */ slc);
r.instr->mubuf()->sync = sync;
r.instr->mubuf().sync = sync;
}
void store_vmem_mubuf(isel_context *ctx, Temp src, Temp descriptor, Temp voffset, Temp soffset,
@ -5503,7 +5503,7 @@ void visit_load_push_constant(isel_context *ctx, nir_intrinsic_instr *instr)
unreachable("unimplemented or forbidden load_push_constant.");
}
bld.smem(op, Definition(vec), ptr, index).instr->smem()->prevent_overflow = true;
bld.smem(op, Definition(vec), ptr, index).instr->smem().prevent_overflow = true;
if (!aligned) {
Operand byte_offset = index_cv ? Operand((offset + index_cv->u32) % 4) : Operand(index);
@ -7147,7 +7147,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
for (unsigned i = 0; i < write_count; i++) {
aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
Instruction *mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true, true);
mubuf->mubuf()->sync = memory_sync_info(storage_scratch, semantic_private);
mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private);
}
}

View file

@ -140,19 +140,19 @@ memory_sync_info get_sync_info(const Instruction* instr)
{
switch (instr->format) {
case Format::SMEM:
return instr->smem()->sync;
return instr->smem().sync;
case Format::MUBUF:
return instr->mubuf()->sync;
return instr->mubuf().sync;
case Format::MIMG:
return instr->mimg()->sync;
return instr->mimg().sync;
case Format::MTBUF:
return instr->mtbuf()->sync;
return instr->mtbuf().sync;
case Format::FLAT:
case Format::GLOBAL:
case Format::SCRATCH:
return instr->flatlike()->sync;
return instr->flatlike().sync;
case Format::DS:
return instr->ds()->sync;
return instr->ds().sync;
default:
return memory_sync_info();
}
@ -170,12 +170,12 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
return true;
if (instr->isVOP3()) {
VOP3_instruction *vop3 = instr->vop3();
VOP3_instruction& vop3 = instr->vop3();
if (instr->format == Format::VOP3)
return false;
if (vop3->clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8)
if (vop3.clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8)
return false;
if (vop3->omod && chip < GFX9)
if (vop3.omod && chip < GFX9)
return false;
//TODO: return true if we know we will use vcc
@ -232,14 +232,14 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(), instr->definitions.begin());
SDWA_instruction *sdwa = instr->sdwa();
SDWA_instruction& sdwa = instr->sdwa();
if (tmp->isVOP3()) {
VOP3_instruction *vop3 = tmp->vop3();
memcpy(sdwa->neg, vop3->neg, sizeof(sdwa->neg));
memcpy(sdwa->abs, vop3->abs, sizeof(sdwa->abs));
sdwa->omod = vop3->omod;
sdwa->clamp = vop3->clamp;
VOP3_instruction& vop3 = tmp->vop3();
memcpy(sdwa.neg, vop3.neg, sizeof(sdwa.neg));
memcpy(sdwa.abs, vop3.abs, sizeof(sdwa.abs));
sdwa.omod = vop3.omod;
sdwa.clamp = vop3.clamp;
}
for (unsigned i = 0; i < instr->operands.size(); i++) {
@ -249,27 +249,27 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst
switch (instr->operands[i].bytes()) {
case 1:
sdwa->sel[i] = sdwa_ubyte;
sdwa.sel[i] = sdwa_ubyte;
break;
case 2:
sdwa->sel[i] = sdwa_uword;
sdwa.sel[i] = sdwa_uword;
break;
case 4:
sdwa->sel[i] = sdwa_udword;
sdwa.sel[i] = sdwa_udword;
break;
}
}
switch (instr->definitions[0].bytes()) {
case 1:
sdwa->dst_sel = sdwa_ubyte;
sdwa->dst_preserve = true;
sdwa.dst_sel = sdwa_ubyte;
sdwa.dst_preserve = true;
break;
case 2:
sdwa->dst_sel = sdwa_uword;
sdwa->dst_preserve = true;
sdwa.dst_sel = sdwa_uword;
sdwa.dst_preserve = true;
break;
case 4:
sdwa->dst_sel = sdwa_udword;
sdwa.dst_sel = sdwa_udword;
break;
}

View file

@ -1030,93 +1030,93 @@ struct Instruction {
return false;
}
Pseudo_instruction *pseudo() noexcept {assert(isPseudo()); return (Pseudo_instruction *)this;}
const Pseudo_instruction *pseudo() const noexcept {assert(isPseudo()); return (Pseudo_instruction *)this;}
Pseudo_instruction& pseudo() noexcept {assert(isPseudo()); return *(Pseudo_instruction *)this;}
const Pseudo_instruction& pseudo() const noexcept {assert(isPseudo()); return *(Pseudo_instruction *)this;}
constexpr bool isPseudo() const noexcept {return format == Format::PSEUDO;}
SOP1_instruction *sop1() noexcept {assert(isSOP1()); return (SOP1_instruction *)this;}
const SOP1_instruction *sop1() const noexcept {assert(isSOP1()); return (SOP1_instruction *)this;}
SOP1_instruction& sop1() noexcept {assert(isSOP1()); return *(SOP1_instruction *)this;}
const SOP1_instruction& sop1() const noexcept {assert(isSOP1()); return *(SOP1_instruction *)this;}
constexpr bool isSOP1() const noexcept {return format == Format::SOP1;}
SOP2_instruction *sop2() noexcept {assert(isSOP2()); return (SOP2_instruction *)this;}
const SOP2_instruction *sop2() const noexcept {assert(isSOP2()); return (SOP2_instruction *)this;}
SOP2_instruction& sop2() noexcept {assert(isSOP2()); return *(SOP2_instruction *)this;}
const SOP2_instruction& sop2() const noexcept {assert(isSOP2()); return *(SOP2_instruction *)this;}
constexpr bool isSOP2() const noexcept {return format == Format::SOP2;}
SOPK_instruction *sopk() noexcept {assert(isSOPK()); return (SOPK_instruction *)this;}
const SOPK_instruction *sopk() const noexcept {assert(isSOPK()); return (SOPK_instruction *)this;}
SOPK_instruction& sopk() noexcept {assert(isSOPK()); return *(SOPK_instruction *)this;}
const SOPK_instruction& sopk() const noexcept {assert(isSOPK()); return *(SOPK_instruction *)this;}
constexpr bool isSOPK() const noexcept {return format == Format::SOPK;}
SOPP_instruction *sopp() noexcept {assert(isSOPP()); return (SOPP_instruction *)this;}
const SOPP_instruction *sopp() const noexcept {assert(isSOPP()); return (SOPP_instruction *)this;}
SOPP_instruction& sopp() noexcept {assert(isSOPP()); return *(SOPP_instruction *)this;}
const SOPP_instruction& sopp() const noexcept {assert(isSOPP()); return *(SOPP_instruction *)this;}
constexpr bool isSOPP() const noexcept {return format == Format::SOPP;}
SOPC_instruction *sopc() noexcept {assert(isSOPC()); return (SOPC_instruction *)this;}
const SOPC_instruction *sopc() const noexcept {assert(isSOPC()); return (SOPC_instruction *)this;}
SOPC_instruction& sopc() noexcept {assert(isSOPC()); return *(SOPC_instruction *)this;}
const SOPC_instruction& sopc() const noexcept {assert(isSOPC()); return *(SOPC_instruction *)this;}
constexpr bool isSOPC() const noexcept {return format == Format::SOPC;}
SMEM_instruction *smem() noexcept {assert(isSMEM()); return (SMEM_instruction *)this;}
const SMEM_instruction *smem() const noexcept {assert(isSMEM()); return (SMEM_instruction *)this;}
SMEM_instruction& smem() noexcept {assert(isSMEM()); return *(SMEM_instruction *)this;}
const SMEM_instruction& smem() const noexcept {assert(isSMEM()); return *(SMEM_instruction *)this;}
constexpr bool isSMEM() const noexcept {return format == Format::SMEM;}
DS_instruction *ds() noexcept {assert(isDS()); return (DS_instruction *)this;}
const DS_instruction *ds() const noexcept {assert(isDS()); return (DS_instruction *)this;}
DS_instruction& ds() noexcept {assert(isDS()); return *(DS_instruction *)this;}
const DS_instruction& ds() const noexcept {assert(isDS()); return *(DS_instruction *)this;}
constexpr bool isDS() const noexcept {return format == Format::DS;}
MTBUF_instruction *mtbuf() noexcept {assert(isMTBUF()); return (MTBUF_instruction *)this;}
const MTBUF_instruction *mtbuf() const noexcept {assert(isMTBUF()); return (MTBUF_instruction *)this;}
MTBUF_instruction& mtbuf() noexcept {assert(isMTBUF()); return *(MTBUF_instruction *)this;}
const MTBUF_instruction& mtbuf() const noexcept {assert(isMTBUF()); return *(MTBUF_instruction *)this;}
constexpr bool isMTBUF() const noexcept {return format == Format::MTBUF;}
MUBUF_instruction *mubuf() noexcept {assert(isMUBUF()); return (MUBUF_instruction *)this;}
const MUBUF_instruction *mubuf() const noexcept {assert(isMUBUF()); return (MUBUF_instruction *)this;}
MUBUF_instruction& mubuf() noexcept {assert(isMUBUF()); return *(MUBUF_instruction *)this;}
const MUBUF_instruction& mubuf() const noexcept {assert(isMUBUF()); return *(MUBUF_instruction *)this;}
constexpr bool isMUBUF() const noexcept {return format == Format::MUBUF;}
MIMG_instruction *mimg() noexcept {assert(isMIMG()); return (MIMG_instruction *)this;}
const MIMG_instruction *mimg() const noexcept {assert(isMIMG()); return (MIMG_instruction *)this;}
MIMG_instruction& mimg() noexcept {assert(isMIMG()); return *(MIMG_instruction *)this;}
const MIMG_instruction& mimg() const noexcept {assert(isMIMG()); return *(MIMG_instruction *)this;}
constexpr bool isMIMG() const noexcept {return format == Format::MIMG;}
Export_instruction *exp() noexcept {assert(isEXP()); return (Export_instruction *)this;}
const Export_instruction *exp() const noexcept {assert(isEXP()); return (Export_instruction *)this;}
Export_instruction& exp() noexcept {assert(isEXP()); return *(Export_instruction *)this;}
const Export_instruction& exp() const noexcept {assert(isEXP()); return *(Export_instruction *)this;}
constexpr bool isEXP() const noexcept {return format == Format::EXP;}
FLAT_instruction *flat() noexcept {assert(isFlat()); return (FLAT_instruction *)this;}
const FLAT_instruction *flat() const noexcept {assert(isFlat()); return (FLAT_instruction *)this;}
FLAT_instruction& flat() noexcept {assert(isFlat()); return *(FLAT_instruction *)this;}
const FLAT_instruction& flat() const noexcept {assert(isFlat()); return *(FLAT_instruction *)this;}
constexpr bool isFlat() const noexcept {return format == Format::FLAT;}
FLAT_instruction *global() noexcept {assert(isGlobal()); return (FLAT_instruction *)this;}
const FLAT_instruction *global() const noexcept {assert(isGlobal()); return (FLAT_instruction *)this;}
FLAT_instruction& global() noexcept {assert(isGlobal()); return *(FLAT_instruction *)this;}
const FLAT_instruction& global() const noexcept {assert(isGlobal()); return *(FLAT_instruction *)this;}
constexpr bool isGlobal() const noexcept {return format == Format::GLOBAL;}
FLAT_instruction *scratch() noexcept {assert(isScratch()); return (FLAT_instruction *)this;}
const FLAT_instruction *scratch() const noexcept {assert(isScratch()); return (FLAT_instruction *)this;}
FLAT_instruction& scratch() noexcept {assert(isScratch()); return *(FLAT_instruction *)this;}
const FLAT_instruction& scratch() const noexcept {assert(isScratch()); return *(FLAT_instruction *)this;}
constexpr bool isScratch() const noexcept {return format == Format::SCRATCH;}
Pseudo_branch_instruction *branch() noexcept {assert(isBranch()); return (Pseudo_branch_instruction *)this;}
const Pseudo_branch_instruction *branch() const noexcept {assert(isBranch()); return (Pseudo_branch_instruction *)this;}
Pseudo_branch_instruction& branch() noexcept {assert(isBranch()); return *(Pseudo_branch_instruction *)this;}
const Pseudo_branch_instruction& branch() const noexcept {assert(isBranch()); return *(Pseudo_branch_instruction *)this;}
constexpr bool isBranch() const noexcept {return format == Format::PSEUDO_BRANCH;}
Pseudo_barrier_instruction *barrier() noexcept {assert(isBarrier()); return (Pseudo_barrier_instruction *)this;}
const Pseudo_barrier_instruction *barrier() const noexcept {assert(isBarrier()); return (Pseudo_barrier_instruction *)this;}
Pseudo_barrier_instruction& barrier() noexcept {assert(isBarrier()); return *(Pseudo_barrier_instruction *)this;}
const Pseudo_barrier_instruction& barrier() const noexcept {assert(isBarrier()); return *(Pseudo_barrier_instruction *)this;}
constexpr bool isBarrier() const noexcept {return format == Format::PSEUDO_BARRIER;}
Pseudo_reduction_instruction *reduction() noexcept {assert(isReduction()); return (Pseudo_reduction_instruction *)this;}
const Pseudo_reduction_instruction *reduction() const noexcept {assert(isReduction()); return (Pseudo_reduction_instruction *)this;}
Pseudo_reduction_instruction& reduction() noexcept {assert(isReduction()); return *(Pseudo_reduction_instruction *)this;}
const Pseudo_reduction_instruction& reduction() const noexcept {assert(isReduction()); return *(Pseudo_reduction_instruction *)this;}
constexpr bool isReduction() const noexcept {return format == Format::PSEUDO_REDUCTION;}
VOP3P_instruction *vop3p() noexcept {assert(isVOP3P()); return (VOP3P_instruction *)this;}
const VOP3P_instruction *vop3p() const noexcept {assert(isVOP3P()); return (VOP3P_instruction *)this;}
VOP3P_instruction& vop3p() noexcept {assert(isVOP3P()); return *(VOP3P_instruction *)this;}
const VOP3P_instruction& vop3p() const noexcept {assert(isVOP3P()); return *(VOP3P_instruction *)this;}
constexpr bool isVOP3P() const noexcept {return format == Format::VOP3P;}
VOP1_instruction *vop1() noexcept {assert(isVOP1()); return (VOP1_instruction *)this;}
const VOP1_instruction *vop1() const noexcept {assert(isVOP1()); return (VOP1_instruction *)this;}
VOP1_instruction& vop1() noexcept {assert(isVOP1()); return *(VOP1_instruction *)this;}
const VOP1_instruction& vop1() const noexcept {assert(isVOP1()); return *(VOP1_instruction *)this;}
constexpr bool isVOP1() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP1;}
VOP2_instruction *vop2() noexcept {assert(isVOP2()); return (VOP2_instruction *)this;}
const VOP2_instruction *vop2() const noexcept {assert(isVOP2()); return (VOP2_instruction *)this;}
VOP2_instruction& vop2() noexcept {assert(isVOP2()); return *(VOP2_instruction *)this;}
const VOP2_instruction& vop2() const noexcept {assert(isVOP2()); return *(VOP2_instruction *)this;}
constexpr bool isVOP2() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP2;}
VOPC_instruction *vopc() noexcept {assert(isVOPC()); return (VOPC_instruction *)this;}
const VOPC_instruction *vopc() const noexcept {assert(isVOPC()); return (VOPC_instruction *)this;}
VOPC_instruction& vopc() noexcept {assert(isVOPC()); return *(VOPC_instruction *)this;}
const VOPC_instruction& vopc() const noexcept {assert(isVOPC()); return *(VOPC_instruction *)this;}
constexpr bool isVOPC() const noexcept {return (uint16_t)format & (uint16_t)Format::VOPC;}
VOP3_instruction *vop3() noexcept {assert(isVOP3()); return (VOP3_instruction *)this;}
const VOP3_instruction *vop3() const noexcept {assert(isVOP3()); return (VOP3_instruction *)this;}
VOP3_instruction& vop3() noexcept {assert(isVOP3()); return *(VOP3_instruction *)this;}
const VOP3_instruction& vop3() const noexcept {assert(isVOP3()); return *(VOP3_instruction *)this;}
constexpr bool isVOP3() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP3;}
Interp_instruction *vintrp() noexcept {assert(isVINTRP()); return (Interp_instruction *)this;}
const Interp_instruction *vintrp() const noexcept {assert(isVINTRP()); return (Interp_instruction *)this;}
Interp_instruction& vintrp() noexcept {assert(isVINTRP()); return *(Interp_instruction *)this;}
const Interp_instruction& vintrp() const noexcept {assert(isVINTRP()); return *(Interp_instruction *)this;}
constexpr bool isVINTRP() const noexcept {return (uint16_t)format & (uint16_t)Format::VINTRP;}
DPP_instruction *dpp() noexcept {assert(isDPP()); return (DPP_instruction *)this;}
const DPP_instruction *dpp() const noexcept {assert(isDPP()); return (DPP_instruction *)this;}
DPP_instruction& dpp() noexcept {assert(isDPP()); return *(DPP_instruction *)this;}
const DPP_instruction& dpp() const noexcept {assert(isDPP()); return *(DPP_instruction *)this;}
constexpr bool isDPP() const noexcept {return (uint16_t)format & (uint16_t)Format::DPP;}
SDWA_instruction *sdwa() noexcept {assert(isSDWA()); return (SDWA_instruction *)this;}
const SDWA_instruction *sdwa() const noexcept {assert(isSDWA()); return (SDWA_instruction *)this;}
SDWA_instruction& sdwa() noexcept {assert(isSDWA()); return *(SDWA_instruction *)this;}
const SDWA_instruction& sdwa() const noexcept {assert(isSDWA()); return *(SDWA_instruction *)this;}
constexpr bool isSDWA() const noexcept {return (uint16_t)format & (uint16_t)Format::SDWA;}
FLAT_instruction *flatlike()
FLAT_instruction& flatlike()
{
return (FLAT_instruction *)this;
return *(FLAT_instruction *)this;
}
const FLAT_instruction *flatlike() const
const FLAT_instruction& flatlike() const
{
return (FLAT_instruction *)this;
return *(FLAT_instruction *)this;
}
constexpr bool isFlatLike() const noexcept
@ -1524,23 +1524,23 @@ constexpr bool Instruction::usesModifiers() const noexcept
return true;
if (isVOP3P()) {
const VOP3P_instruction *vop3p = this->vop3p();
const VOP3P_instruction& vop3p = this->vop3p();
for (unsigned i = 0; i < operands.size(); i++) {
if (vop3p->neg_lo[i] || vop3p->neg_hi[i])
if (vop3p.neg_lo[i] || vop3p.neg_hi[i])
return true;
/* opsel_hi must be 1 to not be considered a modifier - even for constants */
if (!(vop3p->opsel_hi & (1 << i)))
if (!(vop3p.opsel_hi & (1 << i)))
return true;
}
return vop3p->opsel_lo || vop3p->clamp;
return vop3p.opsel_lo || vop3p.clamp;
} else if (isVOP3()) {
const VOP3_instruction *vop3 = this->vop3();
const VOP3_instruction& vop3 = this->vop3();
for (unsigned i = 0; i < operands.size(); i++) {
if (vop3->abs[i] || vop3->neg[i])
if (vop3.abs[i] || vop3.neg[i])
return true;
}
return vop3->opsel || vop3->clamp || vop3->omod;
return vop3.opsel || vop3.clamp || vop3.omod;
}
return false;
}

View file

@ -646,7 +646,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
Definition(PhysReg{vtmp+i}, v1),
Operand(PhysReg{tmp+i}, v1),
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */
perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
}
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX));
@ -757,7 +757,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
Definition(PhysReg{vtmp+i}, v1),
Operand(PhysReg{tmp+i}, v1),
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */
perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
}
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
@ -1052,12 +1052,12 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op)
if (dst.physReg().byte() == 2) {
Operand def_lo(dst.physReg().advance(-2), v2b);
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
instr->vop3()->opsel = 0;
instr->vop3().opsel = 0;
} else {
assert(dst.physReg().byte() == 0);
Operand def_hi(dst.physReg().advance(2), v2b);
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
instr->vop3()->opsel = 2;
instr->vop3().opsel = 2;
}
} else {
uint32_t offset = dst.physReg().byte() * 8u;
@ -1251,7 +1251,7 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo,
if (can_use_pack) {
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi);
/* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */
instr->vop3()->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
instr->vop3().opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
return;
}
@ -1810,7 +1810,7 @@ void lower_to_hw_instr(Program* program)
aco_ptr<Instruction>& instr = block->instructions[instr_idx];
aco_ptr<Instruction> mov;
if (instr->isPseudo() && instr->opcode != aco_opcode::p_unit_test) {
Pseudo_instruction *pi = instr->pseudo();
Pseudo_instruction *pi = &instr->pseudo();
switch (instr->opcode)
{
@ -1897,7 +1897,7 @@ void lower_to_hw_instr(Program* program)
instr2->opcode == aco_opcode::p_logical_end)
continue;
else if (instr2->opcode == aco_opcode::exp &&
instr2->exp()->dest == null_exp_dest)
instr2->exp().dest == null_exp_dest)
continue;
else if (instr2->opcode == aco_opcode::p_parallelcopy &&
instr2->definitions[0].isFixed() &&
@ -1983,7 +1983,7 @@ void lower_to_hw_instr(Program* program)
break;
}
} else if (instr->isBranch()) {
Pseudo_branch_instruction* branch = instr->branch();
Pseudo_branch_instruction* branch = &instr->branch();
uint32_t target = branch->target[0];
/* check if all blocks from current to target are empty */
@ -2055,20 +2055,20 @@ void lower_to_hw_instr(Program* program)
}
} else if (instr->isReduction()) {
Pseudo_reduction_instruction* reduce = instr->reduction();
emit_reduction(&ctx, reduce->opcode, reduce->reduce_op, reduce->cluster_size,
reduce->operands[1].physReg(), // tmp
reduce->definitions[1].physReg(), // stmp
reduce->operands[2].physReg(), // vtmp
reduce->definitions[2].physReg(), // sitmp
reduce->operands[0], reduce->definitions[0]);
Pseudo_reduction_instruction& reduce = instr->reduction();
emit_reduction(&ctx, reduce.opcode, reduce.reduce_op, reduce.cluster_size,
reduce.operands[1].physReg(), // tmp
reduce.definitions[1].physReg(), // stmp
reduce.operands[2].physReg(), // vtmp
reduce.definitions[2].physReg(), // sitmp
reduce.operands[0], reduce.definitions[0]);
} else if (instr->isBarrier()) {
Pseudo_barrier_instruction* barrier = instr->barrier();
Pseudo_barrier_instruction& barrier = instr->barrier();
/* Anything larger than a workgroup isn't possible. Anything
* smaller requires no instructions and this pseudo instruction
* would only be included to control optimizations. */
bool emit_s_barrier = barrier->exec_scope == scope_workgroup &&
bool emit_s_barrier = barrier.exec_scope == scope_workgroup &&
program->workgroup_size > program->wave_size;
bld.insert(std::move(instr));

View file

@ -178,121 +178,121 @@ struct InstrPred {
return false;
if (a->isVOP3()) {
VOP3_instruction* a3 = a->vop3();
VOP3_instruction* b3 = b->vop3();
VOP3_instruction& a3 = a->vop3();
VOP3_instruction& b3 = b->vop3();
for (unsigned i = 0; i < 3; i++) {
if (a3->abs[i] != b3->abs[i] ||
a3->neg[i] != b3->neg[i])
if (a3.abs[i] != b3.abs[i] ||
a3.neg[i] != b3.neg[i])
return false;
}
return a3->clamp == b3->clamp &&
a3->omod == b3->omod &&
a3->opsel == b3->opsel;
return a3.clamp == b3.clamp &&
a3.omod == b3.omod &&
a3.opsel == b3.opsel;
}
if (a->isDPP()) {
DPP_instruction* aDPP = a->dpp();
DPP_instruction* bDPP = b->dpp();
return aDPP->pass_flags == bDPP->pass_flags &&
aDPP->dpp_ctrl == bDPP->dpp_ctrl &&
aDPP->bank_mask == bDPP->bank_mask &&
aDPP->row_mask == bDPP->row_mask &&
aDPP->bound_ctrl == bDPP->bound_ctrl &&
aDPP->abs[0] == bDPP->abs[0] &&
aDPP->abs[1] == bDPP->abs[1] &&
aDPP->neg[0] == bDPP->neg[0] &&
aDPP->neg[1] == bDPP->neg[1];
DPP_instruction& aDPP = a->dpp();
DPP_instruction& bDPP = b->dpp();
return aDPP.pass_flags == bDPP.pass_flags &&
aDPP.dpp_ctrl == bDPP.dpp_ctrl &&
aDPP.bank_mask == bDPP.bank_mask &&
aDPP.row_mask == bDPP.row_mask &&
aDPP.bound_ctrl == bDPP.bound_ctrl &&
aDPP.abs[0] == bDPP.abs[0] &&
aDPP.abs[1] == bDPP.abs[1] &&
aDPP.neg[0] == bDPP.neg[0] &&
aDPP.neg[1] == bDPP.neg[1];
}
if (a->isSDWA()) {
SDWA_instruction* aSDWA = a->sdwa();
SDWA_instruction* bSDWA = b->sdwa();
return aSDWA->sel[0] == bSDWA->sel[0] &&
aSDWA->sel[1] == bSDWA->sel[1] &&
aSDWA->dst_sel == bSDWA->dst_sel &&
aSDWA->abs[0] == bSDWA->abs[0] &&
aSDWA->abs[1] == bSDWA->abs[1] &&
aSDWA->neg[0] == bSDWA->neg[0] &&
aSDWA->neg[1] == bSDWA->neg[1] &&
aSDWA->dst_preserve == bSDWA->dst_preserve &&
aSDWA->clamp == bSDWA->clamp &&
aSDWA->omod == bSDWA->omod;
SDWA_instruction& aSDWA = a->sdwa();
SDWA_instruction& bSDWA = b->sdwa();
return aSDWA.sel[0] == bSDWA.sel[0] &&
aSDWA.sel[1] == bSDWA.sel[1] &&
aSDWA.dst_sel == bSDWA.dst_sel &&
aSDWA.abs[0] == bSDWA.abs[0] &&
aSDWA.abs[1] == bSDWA.abs[1] &&
aSDWA.neg[0] == bSDWA.neg[0] &&
aSDWA.neg[1] == bSDWA.neg[1] &&
aSDWA.dst_preserve == bSDWA.dst_preserve &&
aSDWA.clamp == bSDWA.clamp &&
aSDWA.omod == bSDWA.omod;
}
switch (a->format) {
case Format::SOPK: {
if (a->opcode == aco_opcode::s_getreg_b32)
return false;
SOPK_instruction* aK = a->sopk();
SOPK_instruction* bK = b->sopk();
return aK->imm == bK->imm;
SOPK_instruction& aK = a->sopk();
SOPK_instruction& bK = b->sopk();
return aK.imm == bK.imm;
}
case Format::SMEM: {
SMEM_instruction* aS = a->smem();
SMEM_instruction* bS = b->smem();
SMEM_instruction& aS = a->smem();
SMEM_instruction& bS = b->smem();
/* isel shouldn't be creating situations where this assertion fails */
assert(aS->prevent_overflow == bS->prevent_overflow);
return aS->sync.can_reorder() && bS->sync.can_reorder() &&
aS->sync == bS->sync && aS->glc == bS->glc && aS->dlc == bS->dlc &&
aS->nv == bS->nv && aS->disable_wqm == bS->disable_wqm &&
aS->prevent_overflow == bS->prevent_overflow;
assert(aS.prevent_overflow == bS.prevent_overflow);
return aS.sync.can_reorder() && bS.sync.can_reorder() &&
aS.sync == bS.sync && aS.glc == bS.glc && aS.dlc == bS.dlc &&
aS.nv == bS.nv && aS.disable_wqm == bS.disable_wqm &&
aS.prevent_overflow == bS.prevent_overflow;
}
case Format::VINTRP: {
Interp_instruction* aI = a->vintrp();
Interp_instruction* bI = b->vintrp();
if (aI->attribute != bI->attribute)
Interp_instruction& aI = a->vintrp();
Interp_instruction& bI = b->vintrp();
if (aI.attribute != bI.attribute)
return false;
if (aI->component != bI->component)
if (aI.component != bI.component)
return false;
return true;
}
case Format::VOP3P: {
VOP3P_instruction* a3P = a->vop3p();
VOP3P_instruction* b3P = b->vop3p();
VOP3P_instruction& a3P = a->vop3p();
VOP3P_instruction& b3P = b->vop3p();
for (unsigned i = 0; i < 3; i++) {
if (a3P->neg_lo[i] != b3P->neg_lo[i] ||
a3P->neg_hi[i] != b3P->neg_hi[i])
if (a3P.neg_lo[i] != b3P.neg_lo[i] ||
a3P.neg_hi[i] != b3P.neg_hi[i])
return false;
}
return a3P->opsel_lo == b3P->opsel_lo &&
a3P->opsel_hi == b3P->opsel_hi &&
a3P->clamp == b3P->clamp;
return a3P.opsel_lo == b3P.opsel_lo &&
a3P.opsel_hi == b3P.opsel_hi &&
a3P.clamp == b3P.clamp;
}
case Format::PSEUDO_REDUCTION: {
Pseudo_reduction_instruction *aR = a->reduction();
Pseudo_reduction_instruction *bR = b->reduction();
return aR->pass_flags == bR->pass_flags &&
aR->reduce_op == bR->reduce_op &&
aR->cluster_size == bR->cluster_size;
Pseudo_reduction_instruction& aR = a->reduction();
Pseudo_reduction_instruction& bR = b->reduction();
return aR.pass_flags == bR.pass_flags &&
aR.reduce_op == bR.reduce_op &&
aR.cluster_size == bR.cluster_size;
}
case Format::MTBUF: {
MTBUF_instruction* aM = a->mtbuf();
MTBUF_instruction* bM = b->mtbuf();
return aM->sync.can_reorder() && bM->sync.can_reorder() &&
aM->sync == bM->sync &&
aM->dfmt == bM->dfmt &&
aM->nfmt == bM->nfmt &&
aM->offset == bM->offset &&
aM->offen == bM->offen &&
aM->idxen == bM->idxen &&
aM->glc == bM->glc &&
aM->dlc == bM->dlc &&
aM->slc == bM->slc &&
aM->tfe == bM->tfe &&
aM->disable_wqm == bM->disable_wqm;
MTBUF_instruction& aM = a->mtbuf();
MTBUF_instruction& bM = b->mtbuf();
return aM.sync.can_reorder() && bM.sync.can_reorder() &&
aM.sync == bM.sync &&
aM.dfmt == bM.dfmt &&
aM.nfmt == bM.nfmt &&
aM.offset == bM.offset &&
aM.offen == bM.offen &&
aM.idxen == bM.idxen &&
aM.glc == bM.glc &&
aM.dlc == bM.dlc &&
aM.slc == bM.slc &&
aM.tfe == bM.tfe &&
aM.disable_wqm == bM.disable_wqm;
}
case Format::MUBUF: {
MUBUF_instruction* aM = a->mubuf();
MUBUF_instruction* bM = b->mubuf();
return aM->sync.can_reorder() && bM->sync.can_reorder() &&
aM->sync == bM->sync &&
aM->offset == bM->offset &&
aM->offen == bM->offen &&
aM->idxen == bM->idxen &&
aM->glc == bM->glc &&
aM->dlc == bM->dlc &&
aM->slc == bM->slc &&
aM->tfe == bM->tfe &&
aM->lds == bM->lds &&
aM->disable_wqm == bM->disable_wqm;
MUBUF_instruction& aM = a->mubuf();
MUBUF_instruction& bM = b->mubuf();
return aM.sync.can_reorder() && bM.sync.can_reorder() &&
aM.sync == bM.sync &&
aM.offset == bM.offset &&
aM.offen == bM.offen &&
aM.idxen == bM.idxen &&
aM.glc == bM.glc &&
aM.dlc == bM.dlc &&
aM.slc == bM.slc &&
aM.tfe == bM.tfe &&
aM.lds == bM.lds &&
aM.disable_wqm == bM.disable_wqm;
}
/* we want to optimize these in NIR and don't hassle with load-store dependencies */
case Format::FLAT:
@ -308,31 +308,31 @@ struct InstrPred {
a->opcode != aco_opcode::ds_permute_b32 &&
a->opcode != aco_opcode::ds_swizzle_b32)
return false;
DS_instruction* aD = a->ds();
DS_instruction* bD = b->ds();
return aD->sync.can_reorder() && bD->sync.can_reorder() &&
aD->sync == bD->sync &&
aD->pass_flags == bD->pass_flags &&
aD->gds == bD->gds &&
aD->offset0 == bD->offset0 &&
aD->offset1 == bD->offset1;
DS_instruction& aD = a->ds();
DS_instruction& bD = b->ds();
return aD.sync.can_reorder() && bD.sync.can_reorder() &&
aD.sync == bD.sync &&
aD.pass_flags == bD.pass_flags &&
aD.gds == bD.gds &&
aD.offset0 == bD.offset0 &&
aD.offset1 == bD.offset1;
}
case Format::MIMG: {
MIMG_instruction* aM = a->mimg();
MIMG_instruction* bM = b->mimg();
return aM->sync.can_reorder() && bM->sync.can_reorder() &&
aM->sync == bM->sync &&
aM->dmask == bM->dmask &&
aM->unrm == bM->unrm &&
aM->glc == bM->glc &&
aM->slc == bM->slc &&
aM->tfe == bM->tfe &&
aM->da == bM->da &&
aM->lwe == bM->lwe &&
aM->r128 == bM->r128 &&
aM->a16 == bM->a16 &&
aM->d16 == bM->d16 &&
aM->disable_wqm == bM->disable_wqm;
MIMG_instruction& aM = a->mimg();
MIMG_instruction& bM = b->mimg();
return aM.sync.can_reorder() && bM.sync.can_reorder() &&
aM.sync == bM.sync &&
aM.dmask == bM.dmask &&
aM.unrm == bM.unrm &&
aM.glc == bM.glc &&
aM.slc == bM.slc &&
aM.tfe == bM.tfe &&
aM.da == bM.da &&
aM.lwe == bM.lwe &&
aM.r128 == bM.r128 &&
aM.a16 == bM.a16 &&
aM.d16 == bM.d16 &&
aM.disable_wqm == bM.disable_wqm;
}
default:
return true;

View file

@ -940,7 +940,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode];
if (instr->isSDWA())
can_use_mod = can_use_mod && (instr->sdwa()->sel[i] & sdwa_asuint) == sdwa_udword;
can_use_mod = can_use_mod && (instr->sdwa().sel[i] & sdwa_asuint) == sdwa_udword;
else
can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr));
@ -949,11 +949,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
to_VOP3(ctx, instr);
instr->operands[i] = Operand(info.temp);
if (instr->isDPP())
instr->dpp()->abs[i] = true;
instr->dpp().abs[i] = true;
else if (instr->isSDWA())
instr->sdwa()->abs[i] = true;
instr->sdwa().abs[i] = true;
else
instr->vop3()->abs[i] = true;
instr->vop3().abs[i] = true;
}
if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) {
instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32;
@ -968,11 +968,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
to_VOP3(ctx, instr);
instr->operands[i].setTemp(info.temp);
if (instr->isDPP())
instr->dpp()->neg[i] = true;
instr->dpp().neg[i] = true;
else if (instr->isSDWA())
instr->sdwa()->neg[i] = true;
instr->sdwa().neg[i] = true;
else
instr->vop3()->neg[i] = true;
instr->vop3().neg[i] = true;
continue;
}
unsigned bits = get_operand_size(instr, i);
@ -999,7 +999,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
/* MUBUF: propagate constants and combine additions */
else if (instr->isMUBUF()) {
MUBUF_instruction *mubuf = instr->mubuf();
MUBUF_instruction& mubuf = instr->mubuf();
Temp base;
uint32_t offset;
while (info.is_temp())
@ -1011,29 +1011,29 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
* scratch accesses and other accesses and swizzling changing how
* addressing works significantly, this probably applies to swizzled
* MUBUF accesses. */
bool vaddr_prevent_overflow = mubuf->swizzled && ctx.program->chip_class < GFX9;
bool saddr_prevent_overflow = mubuf->swizzled;
bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->chip_class < GFX9;
bool saddr_prevent_overflow = mubuf.swizzled;
if (mubuf->offen && i == 1 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) {
assert(!mubuf->idxen);
if (mubuf.offen && i == 1 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) {
assert(!mubuf.idxen);
instr->operands[1] = Operand(v1);
mubuf->offset += info.val;
mubuf->offen = false;
mubuf.offset += info.val;
mubuf.offen = false;
continue;
} else if (i == 2 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) {
} else if (i == 2 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) {
instr->operands[2] = Operand((uint32_t) 0);
mubuf->offset += info.val;
mubuf.offset += info.val;
continue;
} else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) &&
base.regClass() == v1 && mubuf->offset + offset < 4096) {
assert(!mubuf->idxen);
} else if (mubuf.offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) &&
base.regClass() == v1 && mubuf.offset + offset < 4096) {
assert(!mubuf.idxen);
instr->operands[1].setTemp(base);
mubuf->offset += offset;
mubuf.offset += offset;
continue;
} else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, saddr_prevent_overflow) &&
base.regClass() == s1 && mubuf->offset + offset < 4096) {
base.regClass() == s1 && mubuf.offset + offset < 4096) {
instr->operands[i].setTemp(base);
mubuf->offset += offset;
mubuf.offset += offset;
continue;
}
}
@ -1041,7 +1041,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
/* DS: combine additions */
else if (instr->isDS()) {
DS_instruction *ds = instr->ds();
DS_instruction& ds = instr->ds();
Temp base;
uint32_t offset;
bool has_usable_ds_offset = ctx.program->chip_class >= GFX7;
@ -1055,16 +1055,16 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
unsigned shifts = (instr->opcode == aco_opcode::ds_write2_b64 || instr->opcode == aco_opcode::ds_read2_b64) ? 3 : 2;
if ((offset & mask) == 0 &&
ds->offset0 + (offset >> shifts) <= 255 &&
ds->offset1 + (offset >> shifts) <= 255) {
ds.offset0 + (offset >> shifts) <= 255 &&
ds.offset1 + (offset >> shifts) <= 255) {
instr->operands[i].setTemp(base);
ds->offset0 += offset >> shifts;
ds->offset1 += offset >> shifts;
ds.offset0 += offset >> shifts;
ds.offset1 += offset >> shifts;
}
} else {
if (ds->offset0 + offset <= 65535) {
if (ds.offset0 + offset <= 65535) {
instr->operands[i].setTemp(base);
ds->offset0 += offset;
ds.offset0 += offset;
}
}
}
@ -1073,10 +1073,10 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
/* SMEM: propagate constants and combine additions */
else if (instr->isSMEM()) {
SMEM_instruction *smem = instr->smem();
SMEM_instruction& smem = instr->smem();
Temp base;
uint32_t offset;
bool prevent_overflow = smem->operands[0].size() > 2 || smem->prevent_overflow;
bool prevent_overflow = smem.operands[0].size() > 2 || smem.prevent_overflow;
if (i == 1 && info.is_constant_or_literal(32) &&
((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) ||
(ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) ||
@ -1084,31 +1084,30 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
instr->operands[i] = Operand(info.val);
continue;
} else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) {
bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4);
bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);
if (soe &&
(!ctx.info[smem->operands.back().tempId()].is_constant_or_literal(32) ||
ctx.info[smem->operands.back().tempId()].val != 0)) {
(!ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) ||
ctx.info[smem.operands.back().tempId()].val != 0)) {
continue;
}
if (soe) {
smem->operands[1] = Operand(offset);
smem->operands.back() = Operand(base);
smem.operands[1] = Operand(offset);
smem.operands.back() = Operand(base);
} else {
SMEM_instruction *new_instr = create_instruction<SMEM_instruction>(smem->opcode, Format::SMEM, smem->operands.size() + 1, smem->definitions.size());
new_instr->operands[0] = smem->operands[0];
SMEM_instruction *new_instr = create_instruction<SMEM_instruction>(smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
new_instr->operands[0] = smem.operands[0];
new_instr->operands[1] = Operand(offset);
if (smem->definitions.empty())
new_instr->operands[2] = smem->operands[2];
if (smem.definitions.empty())
new_instr->operands[2] = smem.operands[2];
new_instr->operands.back() = Operand(base);
if (!smem->definitions.empty())
new_instr->definitions[0] = smem->definitions[0];
new_instr->sync = smem->sync;
new_instr->glc = smem->glc;
new_instr->dlc = smem->dlc;
new_instr->nv = smem->nv;
new_instr->disable_wqm = smem->disable_wqm;
if (!smem.definitions.empty())
new_instr->definitions[0] = smem.definitions[0];
new_instr->sync = smem.sync;
new_instr->glc = smem.glc;
new_instr->dlc = smem.dlc;
new_instr->nv = smem.nv;
new_instr->disable_wqm = smem.disable_wqm;
instr.reset(new_instr);
smem = instr->smem();
}
continue;
}
@ -1365,10 +1364,10 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
}
case aco_opcode::v_med3_f16:
case aco_opcode::v_med3_f32: { /* clamp */
VOP3_instruction* vop3 = instr->vop3();
if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
vop3->omod != 0 || vop3->opsel != 0)
VOP3_instruction& vop3 = instr->vop3();
if (vop3.abs[0] || vop3.abs[1] || vop3.abs[2] ||
vop3.neg[0] || vop3.neg[1] || vop3.neg[2] ||
vop3.omod != 0 || vop3.opsel != 0)
break;
unsigned idx = 0;
@ -1682,12 +1681,12 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
return false;
if (op_instr[i]->isVOP3()) {
VOP3_instruction *vop3 = op_instr[i]->vop3();
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
VOP3_instruction& vop3 = op_instr[i]->vop3();
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 || vop3.opsel == 2)
return false;
neg[i] = vop3->neg[0];
abs[i] = vop3->abs[0];
opsel |= (vop3->opsel & 1) << i;
neg[i] = vop3.neg[0];
abs[i] = vop3.abs[0];
opsel |= (vop3.opsel & 1) << i;
} else if (op_instr[i]->isSDWA()) {
return false;
}
@ -1798,12 +1797,12 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
Instruction *new_instr;
if (cmp->isVOP3()) {
VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
VOP3_instruction *cmp_vop3 = cmp->vop3();
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
new_vop3->omod = cmp_vop3->omod;
new_vop3->opsel = cmp_vop3->opsel;
VOP3_instruction& cmp_vop3 = cmp->vop3();
memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3.clamp;
new_vop3->omod = cmp_vop3.omod;
new_vop3->opsel = cmp_vop3.opsel;
new_instr = new_vop3;
} else {
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
@ -1885,8 +1884,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
return false;
if (nan_test->isVOP3()) {
VOP3_instruction *vop3 = nan_test->vop3();
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
VOP3_instruction& vop3 = nan_test->vop3();
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 || vop3.opsel == 2)
return false;
}
@ -1917,12 +1916,12 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
Instruction *new_instr;
if (cmp->isVOP3()) {
VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
VOP3_instruction *cmp_vop3 = cmp->vop3();
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
new_vop3->omod = cmp_vop3->omod;
new_vop3->opsel = cmp_vop3->opsel;
VOP3_instruction& cmp_vop3 = cmp->vop3();
memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3.clamp;
new_vop3->omod = cmp_vop3.omod;
new_vop3->opsel = cmp_vop3.opsel;
new_instr = new_vop3;
} else {
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
@ -1966,24 +1965,24 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr<Instruction>& instr)
Instruction *new_instr;
if (cmp->isVOP3()) {
VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
VOP3_instruction *cmp_vop3 = cmp->vop3();
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
new_vop3->omod = cmp_vop3->omod;
new_vop3->opsel = cmp_vop3->opsel;
VOP3_instruction& cmp_vop3 = cmp->vop3();
memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3.clamp;
new_vop3->omod = cmp_vop3.omod;
new_vop3->opsel = cmp_vop3.opsel;
new_instr = new_vop3;
} else if (cmp->isSDWA()) {
SDWA_instruction *new_sdwa = create_instruction<SDWA_instruction>(
new_opcode, (Format)((uint16_t)Format::SDWA | (uint16_t)Format::VOPC), 2, 1);
SDWA_instruction *cmp_sdwa = cmp->sdwa();
memcpy(new_sdwa->abs, cmp_sdwa->abs, sizeof(new_sdwa->abs));
memcpy(new_sdwa->sel, cmp_sdwa->sel, sizeof(new_sdwa->sel));
memcpy(new_sdwa->neg, cmp_sdwa->neg, sizeof(new_sdwa->neg));
new_sdwa->dst_sel = cmp_sdwa->dst_sel;
new_sdwa->dst_preserve = cmp_sdwa->dst_preserve;
new_sdwa->clamp = cmp_sdwa->clamp;
new_sdwa->omod = cmp_sdwa->omod;
SDWA_instruction& cmp_sdwa = cmp->sdwa();
memcpy(new_sdwa->abs, cmp_sdwa.abs, sizeof(new_sdwa->abs));
memcpy(new_sdwa->sel, cmp_sdwa.sel, sizeof(new_sdwa->sel));
memcpy(new_sdwa->neg, cmp_sdwa.neg, sizeof(new_sdwa->neg));
new_sdwa->dst_sel = cmp_sdwa.dst_sel;
new_sdwa->dst_preserve = cmp_sdwa.dst_preserve;
new_sdwa->clamp = cmp_sdwa.clamp;
new_sdwa->omod = cmp_sdwa.omod;
new_instr = new_sdwa;
} else {
new_instr = create_instruction<VOPC_instruction>(new_opcode, Format::VOPC, 2, 1);
@ -2019,8 +2018,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
if (fixed_to_exec(op2_instr->operands[0]) || fixed_to_exec(op2_instr->operands[1]))
return false;
VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? op1_instr->vop3() : NULL;
VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? op2_instr->vop3() : NULL;
VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? &op1_instr->vop3() : NULL;
VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? &op2_instr->vop3() : NULL;
if (op1_instr->isSDWA() || op2_instr->isSDWA())
return false;
@ -2641,11 +2640,11 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
if (instr->isSDWA()) {
if (!apply_omod_clamp_helper(ctx, instr->sdwa(), def_info))
if (!apply_omod_clamp_helper(ctx, &instr->sdwa(), def_info))
return false;
} else {
to_VOP3(ctx, instr);
if (!apply_omod_clamp_helper(ctx, instr->vop3(), def_info))
if (!apply_omod_clamp_helper(ctx, &instr->vop3(), def_info))
return false;
}
@ -2767,7 +2766,7 @@ void propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opse
void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
{
VOP3P_instruction* vop3p = instr->vop3p();
VOP3P_instruction* vop3p = &instr->vop3p();
/* apply clamp */
if (instr->opcode == aco_opcode::v_pk_mul_f16 &&
@ -2778,7 +2777,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
ssa_info& info = ctx.info[instr->operands[0].tempId()];
if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) {
VOP3P_instruction* candidate = ctx.info[instr->operands[0].tempId()].instr->vop3p();
VOP3P_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->vop3p();
candidate->clamp = true;
propagate_swizzles(candidate, vop3p->opsel_lo, vop3p->opsel_hi);
std::swap(instr->definitions[0], candidate->definitions[0]);
@ -2804,7 +2803,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
if (!check_vop3_operands(ctx, 2, ops))
continue;
VOP3P_instruction* fneg = info.instr->vop3p();
VOP3P_instruction* fneg = &info.instr->vop3p();
if (fneg->clamp)
continue;
instr->operands[i] = fneg->operands[0];
@ -2849,7 +2848,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
continue;
/* no clamp allowed between mul and add */
if (info.instr->vop3p()->clamp)
if (info.instr->vop3p().clamp)
continue;
mul_instr = info.instr;
@ -2875,7 +2874,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
/* turn packed mul+add into v_pk_fma_f16 */
assert(mul_instr->isVOP3P());
aco_ptr<VOP3P_instruction> fma{create_instruction<VOP3P_instruction>(aco_opcode::v_pk_fma_f16, Format::VOP3P, 3, 1)};
VOP3P_instruction* mul = mul_instr->vop3p();
VOP3P_instruction* mul = &mul_instr->vop3p();
for (unsigned i = 0; i < 2; i++) {
fma->operands[i] = op[i];
fma->neg_lo[i] = mul->neg_lo[i];
@ -2944,7 +2943,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
if (mul_instr->operands[0].isLiteral())
return;
if (mul_instr->isVOP3() && mul_instr->vop3()->clamp)
if (mul_instr->isVOP3() && mul_instr->vop3().clamp)
return;
if (mul_instr->isSDWA())
return;
@ -2958,17 +2957,17 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
instr->operands[0] = mul_instr->operands[0];
instr->operands[1] = mul_instr->operands[1];
instr->definitions[0] = def;
VOP3_instruction* new_mul = instr->vop3();
VOP3_instruction& new_mul = instr->vop3();
if (mul_instr->isVOP3()) {
VOP3_instruction* mul = mul_instr->vop3();
new_mul->neg[0] = mul->neg[0] && !is_abs;
new_mul->neg[1] = mul->neg[1] && !is_abs;
new_mul->abs[0] = mul->abs[0] || is_abs;
new_mul->abs[1] = mul->abs[1] || is_abs;
new_mul->omod = mul->omod;
VOP3_instruction& mul = mul_instr->vop3();
new_mul.neg[0] = mul.neg[0] && !is_abs;
new_mul.neg[1] = mul.neg[1] && !is_abs;
new_mul.abs[0] = mul.abs[0] || is_abs;
new_mul.abs[1] = mul.abs[1] || is_abs;
new_mul.omod = mul.omod;
}
new_mul->neg[0] ^= true;
new_mul->clamp = false;
new_mul.neg[0] ^= true;
new_mul.clamp = false;
ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
return;
@ -3002,7 +3001,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
continue;
/* no clamp/omod allowed between mul and add */
if (info.instr->isVOP3() && (info.instr->vop3()->clamp || info.instr->vop3()->omod))
if (info.instr->isVOP3() && (info.instr->vop3().clamp || info.instr->vop3().omod))
continue;
Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]};
@ -3033,28 +3032,28 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
bool clamp = false;
if (mul_instr->isVOP3()) {
VOP3_instruction* vop3 = mul_instr->vop3();
neg[0] = vop3->neg[0];
neg[1] = vop3->neg[1];
abs[0] = vop3->abs[0];
abs[1] = vop3->abs[1];
VOP3_instruction& vop3 = mul_instr->vop3();
neg[0] = vop3.neg[0];
neg[1] = vop3.neg[1];
abs[0] = vop3.abs[0];
abs[1] = vop3.abs[1];
}
if (instr->isVOP3()) {
VOP3_instruction* vop3 = instr->vop3();
neg[2] = vop3->neg[add_op_idx];
abs[2] = vop3->abs[add_op_idx];
omod = vop3->omod;
clamp = vop3->clamp;
VOP3_instruction& vop3 = instr->vop3();
neg[2] = vop3.neg[add_op_idx];
abs[2] = vop3.abs[add_op_idx];
omod = vop3.omod;
clamp = vop3.clamp;
/* abs of the multiplication result */
if (vop3->abs[1 - add_op_idx]) {
if (vop3.abs[1 - add_op_idx]) {
neg[0] = false;
neg[1] = false;
abs[0] = true;
abs[1] = true;
}
/* neg of the multiplication result */
neg[1] = neg[1] ^ vop3->neg[1 - add_op_idx];
neg[1] = neg[1] ^ vop3.neg[1 - add_op_idx];
}
if (instr->opcode == aco_opcode::v_sub_f32 || instr->opcode == aco_opcode::v_sub_f16)
neg[1 + add_op_idx] = neg[1 + add_op_idx] ^ true;

View file

@ -273,12 +273,12 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
{
switch (instr->format) {
case Format::SOPK: {
const SOPK_instruction* sopk = instr->sopk();
fprintf(output, " imm:%d", sopk->imm & 0x8000 ? (sopk->imm - 65536) : sopk->imm);
const SOPK_instruction& sopk = instr->sopk();
fprintf(output, " imm:%d", sopk.imm & 0x8000 ? (sopk.imm - 65536) : sopk.imm);
break;
}
case Format::SOPP: {
uint16_t imm = instr->sopp()->imm;
uint16_t imm = instr->sopp().imm;
switch (instr->opcode) {
case aco_opcode::s_waitcnt: {
/* we usually should check the chip class for vmcnt/lgkm, but
@ -340,74 +340,74 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
break;
}
}
if (instr->sopp()->block != -1)
fprintf(output, " block:BB%d", instr->sopp()->block);
if (instr->sopp().block != -1)
fprintf(output, " block:BB%d", instr->sopp().block);
break;
}
case Format::SMEM: {
const SMEM_instruction* smem = instr->smem();
if (smem->glc)
const SMEM_instruction& smem = instr->smem();
if (smem.glc)
fprintf(output, " glc");
if (smem->dlc)
if (smem.dlc)
fprintf(output, " dlc");
if (smem->nv)
if (smem.nv)
fprintf(output, " nv");
print_sync(smem->sync, output);
print_sync(smem.sync, output);
break;
}
case Format::VINTRP: {
const Interp_instruction* vintrp = instr->vintrp();
fprintf(output, " attr%d.%c", vintrp->attribute, "xyzw"[vintrp->component]);
const Interp_instruction& vintrp = instr->vintrp();
fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]);
break;
}
case Format::DS: {
const DS_instruction* ds = instr->ds();
if (ds->offset0)
fprintf(output, " offset0:%u", ds->offset0);
if (ds->offset1)
fprintf(output, " offset1:%u", ds->offset1);
if (ds->gds)
const DS_instruction& ds = instr->ds();
if (ds.offset0)
fprintf(output, " offset0:%u", ds.offset0);
if (ds.offset1)
fprintf(output, " offset1:%u", ds.offset1);
if (ds.gds)
fprintf(output, " gds");
print_sync(ds->sync, output);
print_sync(ds.sync, output);
break;
}
case Format::MUBUF: {
const MUBUF_instruction* mubuf = instr->mubuf();
if (mubuf->offset)
fprintf(output, " offset:%u", mubuf->offset);
if (mubuf->offen)
const MUBUF_instruction& mubuf = instr->mubuf();
if (mubuf.offset)
fprintf(output, " offset:%u", mubuf.offset);
if (mubuf.offen)
fprintf(output, " offen");
if (mubuf->idxen)
if (mubuf.idxen)
fprintf(output, " idxen");
if (mubuf->addr64)
if (mubuf.addr64)
fprintf(output, " addr64");
if (mubuf->glc)
if (mubuf.glc)
fprintf(output, " glc");
if (mubuf->dlc)
if (mubuf.dlc)
fprintf(output, " dlc");
if (mubuf->slc)
if (mubuf.slc)
fprintf(output, " slc");
if (mubuf->tfe)
if (mubuf.tfe)
fprintf(output, " tfe");
if (mubuf->lds)
if (mubuf.lds)
fprintf(output, " lds");
if (mubuf->disable_wqm)
if (mubuf.disable_wqm)
fprintf(output, " disable_wqm");
print_sync(mubuf->sync, output);
print_sync(mubuf.sync, output);
break;
}
case Format::MIMG: {
const MIMG_instruction* mimg = instr->mimg();
const MIMG_instruction& mimg = instr->mimg();
unsigned identity_dmask = !instr->definitions.empty() ?
(1 << instr->definitions[0].size()) - 1 :
0xf;
if ((mimg->dmask & identity_dmask) != identity_dmask)
if ((mimg.dmask & identity_dmask) != identity_dmask)
fprintf(output, " dmask:%s%s%s%s",
mimg->dmask & 0x1 ? "x" : "",
mimg->dmask & 0x2 ? "y" : "",
mimg->dmask & 0x4 ? "z" : "",
mimg->dmask & 0x8 ? "w" : "");
switch (mimg->dim) {
mimg.dmask & 0x1 ? "x" : "",
mimg.dmask & 0x2 ? "y" : "",
mimg.dmask & 0x4 ? "z" : "",
mimg.dmask & 0x8 ? "w" : "");
switch (mimg.dim) {
case ac_image_1d:
fprintf(output, " 1d");
break;
@ -433,104 +433,104 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
fprintf(output, " 2darraymsaa");
break;
}
if (mimg->unrm)
if (mimg.unrm)
fprintf(output, " unrm");
if (mimg->glc)
if (mimg.glc)
fprintf(output, " glc");
if (mimg->dlc)
if (mimg.dlc)
fprintf(output, " dlc");
if (mimg->slc)
if (mimg.slc)
fprintf(output, " slc");
if (mimg->tfe)
if (mimg.tfe)
fprintf(output, " tfe");
if (mimg->da)
if (mimg.da)
fprintf(output, " da");
if (mimg->lwe)
if (mimg.lwe)
fprintf(output, " lwe");
if (mimg->r128 || mimg->a16)
if (mimg.r128 || mimg.a16)
fprintf(output, " r128/a16");
if (mimg->d16)
if (mimg.d16)
fprintf(output, " d16");
if (mimg->disable_wqm)
if (mimg.disable_wqm)
fprintf(output, " disable_wqm");
print_sync(mimg->sync, output);
print_sync(mimg.sync, output);
break;
}
case Format::EXP: {
const Export_instruction* exp = instr->exp();
unsigned identity_mask = exp->compressed ? 0x5 : 0xf;
if ((exp->enabled_mask & identity_mask) != identity_mask)
const Export_instruction& exp = instr->exp();
unsigned identity_mask = exp.compressed ? 0x5 : 0xf;
if ((exp.enabled_mask & identity_mask) != identity_mask)
fprintf(output, " en:%c%c%c%c",
exp->enabled_mask & 0x1 ? 'r' : '*',
exp->enabled_mask & 0x2 ? 'g' : '*',
exp->enabled_mask & 0x4 ? 'b' : '*',
exp->enabled_mask & 0x8 ? 'a' : '*');
if (exp->compressed)
exp.enabled_mask & 0x1 ? 'r' : '*',
exp.enabled_mask & 0x2 ? 'g' : '*',
exp.enabled_mask & 0x4 ? 'b' : '*',
exp.enabled_mask & 0x8 ? 'a' : '*');
if (exp.compressed)
fprintf(output, " compr");
if (exp->done)
if (exp.done)
fprintf(output, " done");
if (exp->valid_mask)
if (exp.valid_mask)
fprintf(output, " vm");
if (exp->dest <= V_008DFC_SQ_EXP_MRT + 7)
fprintf(output, " mrt%d", exp->dest - V_008DFC_SQ_EXP_MRT);
else if (exp->dest == V_008DFC_SQ_EXP_MRTZ)
if (exp.dest <= V_008DFC_SQ_EXP_MRT + 7)
fprintf(output, " mrt%d", exp.dest - V_008DFC_SQ_EXP_MRT);
else if (exp.dest == V_008DFC_SQ_EXP_MRTZ)
fprintf(output, " mrtz");
else if (exp->dest == V_008DFC_SQ_EXP_NULL)
else if (exp.dest == V_008DFC_SQ_EXP_NULL)
fprintf(output, " null");
else if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= V_008DFC_SQ_EXP_POS + 3)
fprintf(output, " pos%d", exp->dest - V_008DFC_SQ_EXP_POS);
else if (exp->dest >= V_008DFC_SQ_EXP_PARAM && exp->dest <= V_008DFC_SQ_EXP_PARAM + 31)
fprintf(output, " param%d", exp->dest - V_008DFC_SQ_EXP_PARAM);
else if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= V_008DFC_SQ_EXP_POS + 3)
fprintf(output, " pos%d", exp.dest - V_008DFC_SQ_EXP_POS);
else if (exp.dest >= V_008DFC_SQ_EXP_PARAM && exp.dest <= V_008DFC_SQ_EXP_PARAM + 31)
fprintf(output, " param%d", exp.dest - V_008DFC_SQ_EXP_PARAM);
break;
}
case Format::PSEUDO_BRANCH: {
const Pseudo_branch_instruction* branch = instr->branch();
const Pseudo_branch_instruction& branch = instr->branch();
/* Note: BB0 cannot be a branch target */
if (branch->target[0] != 0)
fprintf(output, " BB%d", branch->target[0]);
if (branch->target[1] != 0)
fprintf(output, ", BB%d", branch->target[1]);
if (branch.target[0] != 0)
fprintf(output, " BB%d", branch.target[0]);
if (branch.target[1] != 0)
fprintf(output, ", BB%d", branch.target[1]);
break;
}
case Format::PSEUDO_REDUCTION: {
const Pseudo_reduction_instruction* reduce = instr->reduction();
fprintf(output, " op:%s", reduce_ops[reduce->reduce_op]);
if (reduce->cluster_size)
fprintf(output, " cluster_size:%u", reduce->cluster_size);
const Pseudo_reduction_instruction& reduce = instr->reduction();
fprintf(output, " op:%s", reduce_ops[reduce.reduce_op]);
if (reduce.cluster_size)
fprintf(output, " cluster_size:%u", reduce.cluster_size);
break;
}
case Format::PSEUDO_BARRIER: {
const Pseudo_barrier_instruction* barrier = instr->barrier();
print_sync(barrier->sync, output);
print_scope(barrier->exec_scope, output, "exec_scope");
const Pseudo_barrier_instruction& barrier = instr->barrier();
print_sync(barrier.sync, output);
print_scope(barrier.exec_scope, output, "exec_scope");
break;
}
case Format::FLAT:
case Format::GLOBAL:
case Format::SCRATCH: {
const FLAT_instruction* flat = instr->flatlike();
if (flat->offset)
fprintf(output, " offset:%u", flat->offset);
if (flat->glc)
const FLAT_instruction& flat = instr->flatlike();
if (flat.offset)
fprintf(output, " offset:%u", flat.offset);
if (flat.glc)
fprintf(output, " glc");
if (flat->dlc)
if (flat.dlc)
fprintf(output, " dlc");
if (flat->slc)
if (flat.slc)
fprintf(output, " slc");
if (flat->lds)
if (flat.lds)
fprintf(output, " lds");
if (flat->nv)
if (flat.nv)
fprintf(output, " nv");
if (flat->disable_wqm)
if (flat.disable_wqm)
fprintf(output, " disable_wqm");
print_sync(flat->sync, output);
print_sync(flat.sync, output);
break;
}
case Format::MTBUF: {
const MTBUF_instruction* mtbuf = instr->mtbuf();
const MTBUF_instruction& mtbuf = instr->mtbuf();
fprintf(output, " dfmt:");
switch (mtbuf->dfmt) {
switch (mtbuf.dfmt) {
case V_008F0C_BUF_DATA_FORMAT_8: fprintf(output, "8"); break;
case V_008F0C_BUF_DATA_FORMAT_16: fprintf(output, "16"); break;
case V_008F0C_BUF_DATA_FORMAT_8_8: fprintf(output, "8_8"); break;
@ -548,7 +548,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
case V_008F0C_BUF_DATA_FORMAT_RESERVED_15: fprintf(output, "reserved15"); break;
}
fprintf(output, " nfmt:");
switch (mtbuf->nfmt) {
switch (mtbuf.nfmt) {
case V_008F0C_BUF_NUM_FORMAT_UNORM: fprintf(output, "unorm"); break;
case V_008F0C_BUF_NUM_FORMAT_SNORM: fprintf(output, "snorm"); break;
case V_008F0C_BUF_NUM_FORMAT_USCALED: fprintf(output, "uscaled"); break;
@ -558,27 +558,27 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
case V_008F0C_BUF_NUM_FORMAT_SNORM_OGL: fprintf(output, "snorm"); break;
case V_008F0C_BUF_NUM_FORMAT_FLOAT: fprintf(output, "float"); break;
}
if (mtbuf->offset)
fprintf(output, " offset:%u", mtbuf->offset);
if (mtbuf->offen)
if (mtbuf.offset)
fprintf(output, " offset:%u", mtbuf.offset);
if (mtbuf.offen)
fprintf(output, " offen");
if (mtbuf->idxen)
if (mtbuf.idxen)
fprintf(output, " idxen");
if (mtbuf->glc)
if (mtbuf.glc)
fprintf(output, " glc");
if (mtbuf->dlc)
if (mtbuf.dlc)
fprintf(output, " dlc");
if (mtbuf->slc)
if (mtbuf.slc)
fprintf(output, " slc");
if (mtbuf->tfe)
if (mtbuf.tfe)
fprintf(output, " tfe");
if (mtbuf->disable_wqm)
if (mtbuf.disable_wqm)
fprintf(output, " disable_wqm");
print_sync(mtbuf->sync, output);
print_sync(mtbuf.sync, output);
break;
}
case Format::VOP3P: {
if (instr->vop3p()->clamp)
if (instr->vop3p().clamp)
fprintf(output, " clamp");
break;
}
@ -587,8 +587,8 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
}
}
if (instr->isVOP3()) {
const VOP3_instruction* vop3 = instr->vop3();
switch (vop3->omod) {
const VOP3_instruction& vop3 = instr->vop3();
switch (vop3.omod) {
case 1:
fprintf(output, " *2");
break;
@ -599,50 +599,50 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
fprintf(output, " *0.5");
break;
}
if (vop3->clamp)
if (vop3.clamp)
fprintf(output, " clamp");
if (vop3->opsel & (1 << 3))
if (vop3.opsel & (1 << 3))
fprintf(output, " opsel_hi");
} else if (instr->isDPP()) {
const DPP_instruction* dpp = instr->dpp();
if (dpp->dpp_ctrl <= 0xff) {
const DPP_instruction& dpp = instr->dpp();
if (dpp.dpp_ctrl <= 0xff) {
fprintf(output, " quad_perm:[%d,%d,%d,%d]",
dpp->dpp_ctrl & 0x3, (dpp->dpp_ctrl >> 2) & 0x3,
(dpp->dpp_ctrl >> 4) & 0x3, (dpp->dpp_ctrl >> 6) & 0x3);
} else if (dpp->dpp_ctrl >= 0x101 && dpp->dpp_ctrl <= 0x10f) {
fprintf(output, " row_shl:%d", dpp->dpp_ctrl & 0xf);
} else if (dpp->dpp_ctrl >= 0x111 && dpp->dpp_ctrl <= 0x11f) {
fprintf(output, " row_shr:%d", dpp->dpp_ctrl & 0xf);
} else if (dpp->dpp_ctrl >= 0x121 && dpp->dpp_ctrl <= 0x12f) {
fprintf(output, " row_ror:%d", dpp->dpp_ctrl & 0xf);
} else if (dpp->dpp_ctrl == dpp_wf_sl1) {
dpp.dpp_ctrl & 0x3, (dpp.dpp_ctrl >> 2) & 0x3,
(dpp.dpp_ctrl >> 4) & 0x3, (dpp.dpp_ctrl >> 6) & 0x3);
} else if (dpp.dpp_ctrl >= 0x101 && dpp.dpp_ctrl <= 0x10f) {
fprintf(output, " row_shl:%d", dpp.dpp_ctrl & 0xf);
} else if (dpp.dpp_ctrl >= 0x111 && dpp.dpp_ctrl <= 0x11f) {
fprintf(output, " row_shr:%d", dpp.dpp_ctrl & 0xf);
} else if (dpp.dpp_ctrl >= 0x121 && dpp.dpp_ctrl <= 0x12f) {
fprintf(output, " row_ror:%d", dpp.dpp_ctrl & 0xf);
} else if (dpp.dpp_ctrl == dpp_wf_sl1) {
fprintf(output, " wave_shl:1");
} else if (dpp->dpp_ctrl == dpp_wf_rl1) {
} else if (dpp.dpp_ctrl == dpp_wf_rl1) {
fprintf(output, " wave_rol:1");
} else if (dpp->dpp_ctrl == dpp_wf_sr1) {
} else if (dpp.dpp_ctrl == dpp_wf_sr1) {
fprintf(output, " wave_shr:1");
} else if (dpp->dpp_ctrl == dpp_wf_rr1) {
} else if (dpp.dpp_ctrl == dpp_wf_rr1) {
fprintf(output, " wave_ror:1");
} else if (dpp->dpp_ctrl == dpp_row_mirror) {
} else if (dpp.dpp_ctrl == dpp_row_mirror) {
fprintf(output, " row_mirror");
} else if (dpp->dpp_ctrl == dpp_row_half_mirror) {
} else if (dpp.dpp_ctrl == dpp_row_half_mirror) {
fprintf(output, " row_half_mirror");
} else if (dpp->dpp_ctrl == dpp_row_bcast15) {
} else if (dpp.dpp_ctrl == dpp_row_bcast15) {
fprintf(output, " row_bcast:15");
} else if (dpp->dpp_ctrl == dpp_row_bcast31) {
} else if (dpp.dpp_ctrl == dpp_row_bcast31) {
fprintf(output, " row_bcast:31");
} else {
fprintf(output, " dpp_ctrl:0x%.3x", dpp->dpp_ctrl);
fprintf(output, " dpp_ctrl:0x%.3x", dpp.dpp_ctrl);
}
if (dpp->row_mask != 0xf)
fprintf(output, " row_mask:0x%.1x", dpp->row_mask);
if (dpp->bank_mask != 0xf)
fprintf(output, " bank_mask:0x%.1x", dpp->bank_mask);
if (dpp->bound_ctrl)
if (dpp.row_mask != 0xf)
fprintf(output, " row_mask:0x%.1x", dpp.row_mask);
if (dpp.bank_mask != 0xf)
fprintf(output, " bank_mask:0x%.1x", dpp.bank_mask);
if (dpp.bound_ctrl)
fprintf(output, " bound_ctrl:1");
} else if (instr->isSDWA()) {
const SDWA_instruction* sdwa = instr->sdwa();
switch (sdwa->omod) {
const SDWA_instruction& sdwa = instr->sdwa();
switch (sdwa.omod) {
case 1:
fprintf(output, " *2");
break;
@ -653,25 +653,25 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
fprintf(output, " *0.5");
break;
}
if (sdwa->clamp)
if (sdwa.clamp)
fprintf(output, " clamp");
switch (sdwa->dst_sel & sdwa_asuint) {
switch (sdwa.dst_sel & sdwa_asuint) {
case sdwa_udword:
break;
case sdwa_ubyte0:
case sdwa_ubyte1:
case sdwa_ubyte2:
case sdwa_ubyte3:
fprintf(output, " dst_sel:%sbyte%u", sdwa->dst_sel & sdwa_sext ? "s" : "u",
sdwa->dst_sel & sdwa_bytenum);
fprintf(output, " dst_sel:%sbyte%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
sdwa.dst_sel & sdwa_bytenum);
break;
case sdwa_uword0:
case sdwa_uword1:
fprintf(output, " dst_sel:%sword%u", sdwa->dst_sel & sdwa_sext ? "s" : "u",
sdwa->dst_sel & sdwa_wordnum);
fprintf(output, " dst_sel:%sword%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
sdwa.dst_sel & sdwa_wordnum);
break;
}
if (sdwa->dst_preserve)
if (sdwa.dst_preserve)
fprintf(output, " dst_preserve");
}
}
@ -693,28 +693,28 @@ void aco_print_instr(const Instruction *instr, FILE *output)
bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool));
uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t));
if (instr->isVOP3()) {
const VOP3_instruction* vop3 = instr->vop3();
const VOP3_instruction& vop3 = instr->vop3();
for (unsigned i = 0; i < instr->operands.size(); ++i) {
abs[i] = vop3->abs[i];
neg[i] = vop3->neg[i];
opsel[i] = vop3->opsel & (1 << i);
abs[i] = vop3.abs[i];
neg[i] = vop3.neg[i];
opsel[i] = vop3.opsel & (1 << i);
sel[i] = sdwa_udword;
}
} else if (instr->isDPP()) {
const DPP_instruction* dpp = instr->dpp();
const DPP_instruction& dpp = instr->dpp();
for (unsigned i = 0; i < instr->operands.size(); ++i) {
abs[i] = i < 2 ? dpp->abs[i] : false;
neg[i] = i < 2 ? dpp->neg[i] : false;
abs[i] = i < 2 ? dpp.abs[i] : false;
neg[i] = i < 2 ? dpp.neg[i] : false;
opsel[i] = false;
sel[i] = sdwa_udword;
}
} else if (instr->isSDWA()) {
const SDWA_instruction* sdwa = instr->sdwa();
const SDWA_instruction& sdwa = instr->sdwa();
for (unsigned i = 0; i < instr->operands.size(); ++i) {
abs[i] = i < 2 ? sdwa->abs[i] : false;
neg[i] = i < 2 ? sdwa->neg[i] : false;
abs[i] = i < 2 ? sdwa.abs[i] : false;
neg[i] = i < 2 ? sdwa.neg[i] : false;
opsel[i] = false;
sel[i] = i < 2 ? sdwa->sel[i] : sdwa_udword;
sel[i] = i < 2 ? sdwa.sel[i] : sdwa_udword;
}
} else {
for (unsigned i = 0; i < instr->operands.size(); ++i) {
@ -756,17 +756,17 @@ void aco_print_instr(const Instruction *instr, FILE *output)
fprintf(output, "|");
if (instr->isVOP3P()) {
const VOP3P_instruction* vop3 = instr->vop3p();
if ((vop3->opsel_lo & (1 << i)) || !(vop3->opsel_hi & (1 << i))) {
const VOP3P_instruction& vop3 = instr->vop3p();
if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) {
fprintf(output, ".%c%c",
vop3->opsel_lo & (1 << i) ? 'y' : 'x',
vop3->opsel_hi & (1 << i) ? 'y' : 'x');
vop3.opsel_lo & (1 << i) ? 'y' : 'x',
vop3.opsel_hi & (1 << i) ? 'y' : 'x');
}
if (vop3->neg_lo[i] && vop3->neg_hi[i])
if (vop3.neg_lo[i] && vop3.neg_hi[i])
fprintf(output, "*[-1,-1]");
else if (vop3->neg_lo[i])
else if (vop3.neg_lo[i])
fprintf(output, "*[-1,1]");
else if (vop3->neg_hi[i])
else if (vop3.neg_hi[i])
fprintf(output, "*[1,-1]");
}
}

View file

@ -91,7 +91,7 @@ void setup_reduce_temp(Program* program)
if (instr->format != Format::PSEUDO_REDUCTION)
continue;
ReduceOp op = instr->reduction()->reduce_op;
ReduceOp op = instr->reduction().reduce_op;
reduceTmp_in_loop |= block.loop_nest_depth > 0;
if ((int)last_top_level_block_idx != inserted_at) {
@ -115,7 +115,7 @@ void setup_reduce_temp(Program* program)
}
/* same as before, except for the vector temporary instead of the reduce temporary */
unsigned cluster_size = instr->reduction()->cluster_size;
unsigned cluster_size = instr->reduction().cluster_size;
bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 ||
op == fmin64 || op == fmax64 || op == umin64 ||
op == umax64 || op == imin64 || op == imax64 ||

View file

@ -503,13 +503,13 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx
update_phi_map(ctx, tmp.get(), instr.get());
return;
} else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) {
instr->vop3()->opsel |= (byte / 2) << idx;
instr->vop3().opsel |= (byte / 2) << idx;
return;
} else if (instr->isVOP3P() && byte == 2) {
VOP3P_instruction* vop3p = instr->vop3p();
assert(!(vop3p->opsel_lo & (1 << idx)));
vop3p->opsel_lo |= 1 << idx;
vop3p->opsel_hi |= 1 << idx;
VOP3P_instruction& vop3p = instr->vop3p();
assert(!(vop3p.opsel_lo & (1 << idx)));
vop3p.opsel_lo |= 1 << idx;
vop3p.opsel_hi |= 1 << idx;
return;
}
@ -613,9 +613,9 @@ void add_subdword_definition(Program *program, aco_ptr<Instruction>& instr, unsi
convert_to_SDWA(chip, instr);
return;
} else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) {
VOP3_instruction *vop3 = instr->vop3();
VOP3_instruction& vop3 = instr->vop3();
if (reg.byte() == 2)
vop3->opsel |= (1 << 3); /* dst in high half */
vop3.opsel |= (1 << 3); /* dst in high half */
return;
}
@ -1569,7 +1569,7 @@ void handle_pseudo(ra_ctx& ctx,
return;
if (reg_file[scc]) {
instr->pseudo()->tmp_in_scc = true;
instr->pseudo().tmp_in_scc = true;
int reg = ctx.max_used_sgpr;
for (; reg >= 0 && reg_file[PhysReg{(unsigned)reg}]; reg--)
@ -1585,9 +1585,9 @@ void handle_pseudo(ra_ctx& ctx,
}
adjust_max_used_regs(ctx, s1, reg);
instr->pseudo()->scratch_sgpr = PhysReg{(unsigned)reg};
instr->pseudo().scratch_sgpr = PhysReg{(unsigned)reg};
} else {
instr->pseudo()->tmp_in_scc = false;
instr->pseudo().tmp_in_scc = false;
}
}
@ -2156,7 +2156,7 @@ void register_allocation(Program *program, std::vector<IDSet>& live_out_per_bloc
if (instr->isEXP() ||
(instr->isVMEM() && i == 3 && ctx.program->chip_class == GFX6) ||
(instr->isDS() && instr->ds()->gds)) {
(instr->isDS() && instr->ds().gds)) {
for (unsigned j = 0; j < operand.size(); j++)
ctx.war_hint.set(operand.physReg().reg() + j);
}

View file

@ -320,7 +320,7 @@ void MoveState::upwards_skip()
bool is_gs_or_done_sendmsg(const Instruction *instr)
{
if (instr->opcode == aco_opcode::s_sendmsg) {
uint16_t imm = instr->sopp()->imm;
uint16_t imm = instr->sopp().imm;
return (imm & sendmsg_id_mask) == _sendmsg_gs ||
(imm & sendmsg_id_mask) == _sendmsg_gs_done;
}
@ -330,7 +330,7 @@ bool is_gs_or_done_sendmsg(const Instruction *instr)
bool is_done_sendmsg(const Instruction *instr)
{
if (instr->opcode == aco_opcode::s_sendmsg)
return (instr->sopp()->imm & sendmsg_id_mask) == _sendmsg_gs_done;
return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done;
return false;
}
@ -380,14 +380,14 @@ void add_memory_event(memory_event_set *set, Instruction *instr, memory_sync_inf
{
set->has_control_barrier |= is_done_sendmsg(instr);
if (instr->opcode == aco_opcode::p_barrier) {
Pseudo_barrier_instruction *bar = instr->barrier();
if (bar->sync.semantics & semantic_acquire)
set->bar_acquire |= bar->sync.storage;
if (bar->sync.semantics & semantic_release)
set->bar_release |= bar->sync.storage;
set->bar_classes |= bar->sync.storage;
Pseudo_barrier_instruction& bar = instr->barrier();
if (bar.sync.semantics & semantic_acquire)
set->bar_acquire |= bar.sync.storage;
if (bar.sync.semantics & semantic_release)
set->bar_release |= bar.sync.storage;
set->bar_classes |= bar.sync.storage;
set->has_control_barrier |= bar->exec_scope > scope_invocation;
set->has_control_barrier |= bar.exec_scope > scope_invocation;
}
if (!sync->storage)
@ -857,7 +857,7 @@ void schedule_block(sched_ctx& ctx, Program *program, Block* block, live& live_v
Instruction* current = block->instructions[idx].get();
if (block->kind & block_kind_export_end && current->isEXP()) {
unsigned target = current->exp()->dest;
unsigned target = current->exp().dest;
if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) {
ctx.mv.current = current;
schedule_position_export(ctx, block, live_vars.register_demand[block->index], current, idx);

View file

@ -283,7 +283,7 @@ aco_ptr<Instruction> do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t
res.reset(create_instruction<Pseudo_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
} else if (instr->isSOPK()) {
res.reset(create_instruction<SOPK_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
res->sopk()->imm = instr->sopk()->imm;
res->sopk().imm = instr->sopk().imm;
}
for (unsigned i = 0; i < instr->operands.size(); i++) {
res->operands[i] = instr->operands[i];
@ -1589,11 +1589,11 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
bld.insert(split);
for (unsigned i = 0; i < temp.size(); i++) {
Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false, true);
instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
}
} else {
Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false, true);
instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
}
} else {
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
@ -1658,12 +1658,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
Temp tmp = bld.tmp(v1);
vec->operands[i] = Operand(tmp);
Instruction *instr = bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false, true);
instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
}
bld.insert(vec);
} else {
Instruction *instr = bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false, true);
instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
}
} else {
uint32_t spill_slot = slots[spill_id];

View file

@ -178,10 +178,10 @@ void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
pred->linear_succs[0] = succ_idx;
ctx.program->blocks[succ_idx].linear_preds[i] = pred->index;
Pseudo_branch_instruction *branch = pred->instructions.back()->branch();
assert(branch->isBranch());
branch->target[0] = succ_idx;
branch->target[1] = succ_idx;
Pseudo_branch_instruction& branch = pred->instructions.back()->branch();
assert(branch.isBranch());
branch.target[0] = succ_idx;
branch.target[1] = succ_idx;
}
block->instructions.clear();
@ -196,17 +196,17 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
Block& pred = ctx.program->blocks[block->linear_preds[0]];
Block& succ = ctx.program->blocks[block->linear_succs[0]];
Pseudo_branch_instruction* branch = pred.instructions.back()->branch();
if (branch->opcode == aco_opcode::p_branch) {
branch->target[0] = succ.index;
branch->target[1] = succ.index;
} else if (branch->target[0] == block->index) {
branch->target[0] = succ.index;
} else if (branch->target[0] == succ.index) {
assert(branch->target[1] == block->index);
branch->target[1] = succ.index;
branch->opcode = aco_opcode::p_branch;
} else if (branch->target[1] == block->index) {
Pseudo_branch_instruction& branch = pred.instructions.back()->branch();
if (branch.opcode == aco_opcode::p_branch) {
branch.target[0] = succ.index;
branch.target[1] = succ.index;
} else if (branch.target[0] == block->index) {
branch.target[0] = succ.index;
} else if (branch.target[0] == succ.index) {
assert(branch.target[1] == block->index);
branch.target[1] = succ.index;
branch.opcode = aco_opcode::p_branch;
} else if (branch.target[1] == block->index) {
/* check if there is a fall-through path from block to succ */
bool falls_through = block->index < succ.index;
for (unsigned j = block->index + 1; falls_through && j < succ.index; j++) {
@ -215,35 +215,35 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
falls_through = false;
}
if (falls_through) {
branch->target[1] = succ.index;
branch.target[1] = succ.index;
} else {
/* check if there is a fall-through path for the alternative target */
if (block->index >= branch->target[0])
if (block->index >= branch.target[0])
return;
for (unsigned j = block->index + 1; j < branch->target[0]; j++) {
for (unsigned j = block->index + 1; j < branch.target[0]; j++) {
if (!ctx.program->blocks[j].instructions.empty())
return;
}
/* This is a (uniform) break or continue block. The branch condition has to be inverted. */
if (branch->opcode == aco_opcode::p_cbranch_z)
branch->opcode = aco_opcode::p_cbranch_nz;
else if (branch->opcode == aco_opcode::p_cbranch_nz)
branch->opcode = aco_opcode::p_cbranch_z;
if (branch.opcode == aco_opcode::p_cbranch_z)
branch.opcode = aco_opcode::p_cbranch_nz;
else if (branch.opcode == aco_opcode::p_cbranch_nz)
branch.opcode = aco_opcode::p_cbranch_z;
else
assert(false);
/* also invert the linear successors */
pred.linear_succs[0] = pred.linear_succs[1];
pred.linear_succs[1] = succ.index;
branch->target[1] = branch->target[0];
branch->target[0] = succ.index;
branch.target[1] = branch.target[0];
branch.target[0] = succ.index;
}
} else {
assert(false);
}
if (branch->target[0] == branch->target[1])
branch->opcode = aco_opcode::p_branch;
if (branch.target[0] == branch.target[1])
branch.opcode = aco_opcode::p_branch;
for (unsigned i = 0; i < pred.linear_succs.size(); i++)
if (pred.linear_succs[i] == block->index)

View file

@ -46,7 +46,7 @@ void collect_preasm_stats(Program *program)
program->statistics[statistic_instructions] += block.instructions.size();
for (aco_ptr<Instruction>& instr : block.instructions) {
if (instr->isSOPP() && instr->sopp()->block != -1)
if (instr->isSOPP() && instr->sopp().block != -1)
program->statistics[statistic_branches]++;
if (instr->opcode == aco_opcode::p_constaddr)

View file

@ -148,10 +148,10 @@ bool validate_ir(Program* program)
check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get());
SDWA_instruction *sdwa = instr->sdwa();
check(sdwa->omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get());
SDWA_instruction& sdwa = instr->sdwa();
check(sdwa.omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get());
if (base_format == Format::VOPC) {
check(sdwa->clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get());
check(sdwa.clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get());
check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
program->chip_class >= GFX9,
"SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
@ -183,21 +183,21 @@ bool validate_ir(Program* program)
}
if (instr->definitions[0].regClass().is_subdword())
check((sdwa->dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()), "Unexpected SDWA sel for sub-dword definition", instr.get());
check((sdwa.dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()), "Unexpected SDWA sel for sub-dword definition", instr.get());
}
/* check opsel */
if (instr->isVOP3()) {
VOP3_instruction *vop3 = instr->vop3();
check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
VOP3_instruction& vop3 = instr->vop3();
check(vop3.opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
for (unsigned i = 0; i < 3; i++) {
if (i >= instr->operands.size() ||
(instr->operands[i].hasRegClass() && instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
check((vop3->opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
}
if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
check((vop3->opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get());
check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get());
}
/* check for undefs */
@ -377,7 +377,7 @@ bool validate_ir(Program* program)
for (const Operand &op : instr->operands)
check(op.regClass().type() == RegType::vgpr, "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.", instr.get());
if (instr->opcode == aco_opcode::p_reduce && instr->reduction()->cluster_size == program->wave_size)
if (instr->opcode == aco_opcode::p_reduce && instr->reduction().cluster_size == program->wave_size)
check(instr->definitions[0].regClass().type() == RegType::sgpr, "The result of unclustered reductions must go into an SGPR.", instr.get());
else
check(instr->definitions[0].regClass().type() == RegType::vgpr, "The result of scans and clustered reductions must go into a VGPR.", instr.get());
@ -549,7 +549,7 @@ bool validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& inst
return byte == 0;
if (instr->isPseudo() && chip >= GFX8)
return true;
if (instr->isSDWA() && (instr->sdwa()->sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes()))
if (instr->isSDWA() && (instr->sdwa().sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes()))
return true;
if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
return true;
@ -599,7 +599,7 @@ bool validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& i
if (instr->isPseudo() && chip >= GFX8)
return true;
if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes()))
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
return true;
if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))
return true;
@ -630,7 +630,7 @@ unsigned get_subdword_bytes_written(Program *program, const aco_ptr<Instruction>
if (instr->isPseudo())
return chip >= GFX8 ? def.bytes() : def.size() * 4u;
if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes()))
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
return def.bytes();
switch (instr->opcode) {

View file

@ -735,7 +735,7 @@ BEGIN_TEST(optimize.add3)
//! v1: %res1 = v_add_u32 %a, %tmp1
//! p_unit_test 1, %res1
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
tmp.instr->vop3()->clamp = true;
tmp.instr->vop3().clamp = true;
writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
//! v1: %tmp2 = v_add_u32 %b, %c
@ -743,7 +743,7 @@ BEGIN_TEST(optimize.add3)
//! p_unit_test 2, %res2
tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
tmp.instr->vop3()->clamp = true;
tmp.instr->vop3().clamp = true;
writeout(2, tmp);
finish_opt_test();

View file

@ -197,7 +197,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
Definition(v0_lo, v1),
Operand(v0_lo, v1b), Operand(v0_lo, v1b),
Operand(v0_lo, v1b), Operand(v0_lo, v1b));
pseudo->pseudo()->scratch_sgpr = m0;
pseudo->pseudo().scratch_sgpr = m0;
//~gfx[67]! p_unit_test 14
//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]