mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 12:08:06 +02:00
aco: return references in instruction cast methods
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8595>
This commit is contained in:
parent
1d245cd18b
commit
e115b01948
20 changed files with 752 additions and 753 deletions
|
|
@ -109,7 +109,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
break;
|
||||
}
|
||||
case Format::SOPK: {
|
||||
SOPK_instruction *sopk = instr->sopk();
|
||||
SOPK_instruction& sopk = instr->sopk();
|
||||
|
||||
if (instr->opcode == aco_opcode::s_subvector_loop_begin) {
|
||||
assert(ctx.chip_class >= GFX10);
|
||||
|
|
@ -121,7 +121,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
/* Adjust s_subvector_loop_begin instruction to the address after the end */
|
||||
out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos);
|
||||
/* Adjust s_subvector_loop_end instruction to the address after the beginning */
|
||||
sopk->imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());
|
||||
sopk.imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());
|
||||
ctx.subvector_begin_pos = -1;
|
||||
}
|
||||
|
||||
|
|
@ -132,7 +132,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
instr->definitions[0].physReg() << 16 :
|
||||
!instr->operands.empty() && instr->operands[0].physReg() <= 127 ?
|
||||
instr->operands[0].physReg() << 16 : 0;
|
||||
encoding |= sopk->imm;
|
||||
encoding |= sopk.imm;
|
||||
out.push_back(encoding);
|
||||
break;
|
||||
}
|
||||
|
|
@ -157,19 +157,19 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
break;
|
||||
}
|
||||
case Format::SOPP: {
|
||||
SOPP_instruction* sopp = instr->sopp();
|
||||
SOPP_instruction& sopp = instr->sopp();
|
||||
uint32_t encoding = (0b101111111 << 23);
|
||||
encoding |= opcode << 16;
|
||||
encoding |= (uint16_t) sopp->imm;
|
||||
if (sopp->block != -1) {
|
||||
sopp->pass_flags = 0;
|
||||
ctx.branches.emplace_back(out.size(), sopp);
|
||||
encoding |= (uint16_t) sopp.imm;
|
||||
if (sopp.block != -1) {
|
||||
sopp.pass_flags = 0;
|
||||
ctx.branches.emplace_back(out.size(), &sopp);
|
||||
}
|
||||
out.push_back(encoding);
|
||||
break;
|
||||
}
|
||||
case Format::SMEM: {
|
||||
SMEM_instruction* smem = instr->smem();
|
||||
SMEM_instruction& smem = instr->smem();
|
||||
bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);
|
||||
bool is_load = !instr->definitions.empty();
|
||||
uint32_t encoding = 0;
|
||||
|
|
@ -196,16 +196,16 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
encoding = (0b110000 << 26);
|
||||
assert(!smem->dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
encoding |= smem->nv ? 1 << 15 : 0;
|
||||
assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
encoding |= smem.nv ? 1 << 15 : 0;
|
||||
} else {
|
||||
encoding = (0b111101 << 26);
|
||||
assert(!smem->nv); /* Non-volatile is not supported on GFX10 */
|
||||
encoding |= smem->dlc ? 1 << 14 : 0;
|
||||
assert(!smem.nv); /* Non-volatile is not supported on GFX10 */
|
||||
encoding |= smem.dlc ? 1 << 14 : 0;
|
||||
}
|
||||
|
||||
encoding |= opcode << 18;
|
||||
encoding |= smem->glc ? 1 << 16 : 0;
|
||||
encoding |= smem.glc ? 1 << 16 : 0;
|
||||
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
if (instr->operands.size() >= 2)
|
||||
|
|
@ -284,7 +284,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
break;
|
||||
}
|
||||
case Format::VINTRP: {
|
||||
Interp_instruction* interp = instr->vintrp();
|
||||
Interp_instruction& interp = instr->vintrp();
|
||||
uint32_t encoding = 0;
|
||||
|
||||
if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
|
||||
|
|
@ -304,8 +304,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
out.push_back(encoding);
|
||||
|
||||
encoding = 0;
|
||||
encoding |= interp->attribute;
|
||||
encoding |= interp->component << 6;
|
||||
encoding |= interp.attribute;
|
||||
encoding |= interp.component << 6;
|
||||
encoding |= instr->operands[0].physReg() << 9;
|
||||
if (instr->opcode == aco_opcode::v_interp_p2_f16 ||
|
||||
instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
|
||||
|
|
@ -323,8 +323,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
assert(encoding);
|
||||
encoding |= (0xFF & instr->definitions[0].physReg()) << 18;
|
||||
encoding |= opcode << 16;
|
||||
encoding |= interp->attribute << 10;
|
||||
encoding |= interp->component << 8;
|
||||
encoding |= interp.attribute << 10;
|
||||
encoding |= interp.component << 8;
|
||||
if (instr->opcode == aco_opcode::v_interp_mov_f32)
|
||||
encoding |= (0x3 & instr->operands[0].constantValue());
|
||||
else
|
||||
|
|
@ -334,17 +334,17 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
break;
|
||||
}
|
||||
case Format::DS: {
|
||||
DS_instruction* ds = instr->ds();
|
||||
DS_instruction& ds = instr->ds();
|
||||
uint32_t encoding = (0b110110 << 26);
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
encoding |= opcode << 17;
|
||||
encoding |= (ds->gds ? 1 : 0) << 16;
|
||||
encoding |= (ds.gds ? 1 : 0) << 16;
|
||||
} else {
|
||||
encoding |= opcode << 18;
|
||||
encoding |= (ds->gds ? 1 : 0) << 17;
|
||||
encoding |= (ds.gds ? 1 : 0) << 17;
|
||||
}
|
||||
encoding |= ((0xFF & ds->offset1) << 8);
|
||||
encoding |= (0xFFFF & ds->offset0);
|
||||
encoding |= ((0xFF & ds.offset1) << 8);
|
||||
encoding |= (0xFFFF & ds.offset0);
|
||||
out.push_back(encoding);
|
||||
encoding = 0;
|
||||
unsigned reg = !instr->definitions.empty() ? instr->definitions[0].physReg() : 0;
|
||||
|
|
@ -358,30 +358,30 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
break;
|
||||
}
|
||||
case Format::MUBUF: {
|
||||
MUBUF_instruction* mubuf = instr->mubuf();
|
||||
MUBUF_instruction& mubuf = instr->mubuf();
|
||||
uint32_t encoding = (0b111000 << 26);
|
||||
encoding |= opcode << 18;
|
||||
encoding |= (mubuf->lds ? 1 : 0) << 16;
|
||||
encoding |= (mubuf->glc ? 1 : 0) << 14;
|
||||
encoding |= (mubuf->idxen ? 1 : 0) << 13;
|
||||
assert(!mubuf->addr64 || ctx.chip_class <= GFX7);
|
||||
encoding |= (mubuf.lds ? 1 : 0) << 16;
|
||||
encoding |= (mubuf.glc ? 1 : 0) << 14;
|
||||
encoding |= (mubuf.idxen ? 1 : 0) << 13;
|
||||
assert(!mubuf.addr64 || ctx.chip_class <= GFX7);
|
||||
if (ctx.chip_class == GFX6 || ctx.chip_class == GFX7)
|
||||
encoding |= (mubuf->addr64 ? 1 : 0) << 15;
|
||||
encoding |= (mubuf->offen ? 1 : 0) << 12;
|
||||
encoding |= (mubuf.addr64 ? 1 : 0) << 15;
|
||||
encoding |= (mubuf.offen ? 1 : 0) << 12;
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
assert(!mubuf->dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
encoding |= (mubuf->slc ? 1 : 0) << 17;
|
||||
assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
encoding |= (mubuf.slc ? 1 : 0) << 17;
|
||||
} else if (ctx.chip_class >= GFX10) {
|
||||
encoding |= (mubuf->dlc ? 1 : 0) << 15;
|
||||
encoding |= (mubuf.dlc ? 1 : 0) << 15;
|
||||
}
|
||||
encoding |= 0x0FFF & mubuf->offset;
|
||||
encoding |= 0x0FFF & mubuf.offset;
|
||||
out.push_back(encoding);
|
||||
encoding = 0;
|
||||
if (ctx.chip_class <= GFX7 || ctx.chip_class >= GFX10) {
|
||||
encoding |= (mubuf->slc ? 1 : 0) << 22;
|
||||
encoding |= (mubuf.slc ? 1 : 0) << 22;
|
||||
}
|
||||
encoding |= instr->operands[2].physReg() << 24;
|
||||
encoding |= (mubuf->tfe ? 1 : 0) << 23;
|
||||
encoding |= (mubuf.tfe ? 1 : 0) << 23;
|
||||
encoding |= (instr->operands[0].physReg() >> 2) << 16;
|
||||
unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
|
||||
encoding |= (0xFF & reg) << 8;
|
||||
|
|
@ -390,17 +390,17 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
break;
|
||||
}
|
||||
case Format::MTBUF: {
|
||||
MTBUF_instruction* mtbuf = instr->mtbuf();
|
||||
MTBUF_instruction& mtbuf = instr->mtbuf();
|
||||
|
||||
uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt);
|
||||
uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf.dfmt, mtbuf.nfmt);
|
||||
uint32_t encoding = (0b111010 << 26);
|
||||
assert(img_format <= 0x7F);
|
||||
assert(!mtbuf->dlc || ctx.chip_class >= GFX10);
|
||||
encoding |= (mtbuf->dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
|
||||
encoding |= (mtbuf->glc ? 1 : 0) << 14;
|
||||
encoding |= (mtbuf->idxen ? 1 : 0) << 13;
|
||||
encoding |= (mtbuf->offen ? 1 : 0) << 12;
|
||||
encoding |= 0x0FFF & mtbuf->offset;
|
||||
assert(!mtbuf.dlc || ctx.chip_class >= GFX10);
|
||||
encoding |= (mtbuf.dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
|
||||
encoding |= (mtbuf.glc ? 1 : 0) << 14;
|
||||
encoding |= (mtbuf.idxen ? 1 : 0) << 13;
|
||||
encoding |= (mtbuf.offen ? 1 : 0) << 12;
|
||||
encoding |= 0x0FFF & mtbuf.offset;
|
||||
encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
|
||||
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
|
|
@ -413,8 +413,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
encoding = 0;
|
||||
|
||||
encoding |= instr->operands[2].physReg() << 24;
|
||||
encoding |= (mtbuf->tfe ? 1 : 0) << 23;
|
||||
encoding |= (mtbuf->slc ? 1 : 0) << 22;
|
||||
encoding |= (mtbuf.tfe ? 1 : 0) << 23;
|
||||
encoding |= (mtbuf.slc ? 1 : 0) << 22;
|
||||
encoding |= (instr->operands[0].physReg() >> 2) << 16;
|
||||
unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
|
||||
encoding |= (0xFF & reg) << 8;
|
||||
|
|
@ -437,26 +437,26 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
assert(!use_nsa || ctx.chip_class >= GFX10);
|
||||
unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
|
||||
|
||||
MIMG_instruction* mimg = instr->mimg();
|
||||
MIMG_instruction& mimg = instr->mimg();
|
||||
uint32_t encoding = (0b111100 << 26);
|
||||
encoding |= mimg->slc ? 1 << 25 : 0;
|
||||
encoding |= mimg.slc ? 1 << 25 : 0;
|
||||
encoding |= opcode << 18;
|
||||
encoding |= mimg->lwe ? 1 << 17 : 0;
|
||||
encoding |= mimg->tfe ? 1 << 16 : 0;
|
||||
encoding |= mimg->glc ? 1 << 13 : 0;
|
||||
encoding |= mimg->unrm ? 1 << 12 : 0;
|
||||
encoding |= mimg.lwe ? 1 << 17 : 0;
|
||||
encoding |= mimg.tfe ? 1 << 16 : 0;
|
||||
encoding |= mimg.glc ? 1 << 13 : 0;
|
||||
encoding |= mimg.unrm ? 1 << 12 : 0;
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
assert(!mimg->dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
assert(!mimg->r128);
|
||||
encoding |= mimg->a16 ? 1 << 15 : 0;
|
||||
encoding |= mimg->da ? 1 << 14 : 0;
|
||||
assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
assert(!mimg.r128);
|
||||
encoding |= mimg.a16 ? 1 << 15 : 0;
|
||||
encoding |= mimg.da ? 1 << 14 : 0;
|
||||
} else {
|
||||
encoding |= mimg->r128 ? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
|
||||
encoding |= mimg.r128 ? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
|
||||
encoding |= nsa_dwords << 1;
|
||||
encoding |= mimg->dim << 3; /* GFX10: dimensionality instead of declare array */
|
||||
encoding |= mimg->dlc ? 1 << 7 : 0;
|
||||
encoding |= mimg.dim << 3; /* GFX10: dimensionality instead of declare array */
|
||||
encoding |= mimg.dlc ? 1 << 7 : 0;
|
||||
}
|
||||
encoding |= (0xF & mimg->dmask) << 8;
|
||||
encoding |= (0xF & mimg.dmask) << 8;
|
||||
out.push_back(encoding);
|
||||
encoding = (0xFF & instr->operands[3].physReg()); /* VADDR */
|
||||
if (!instr->definitions.empty()) {
|
||||
|
|
@ -468,10 +468,10 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
if (!instr->operands[1].isUndefined())
|
||||
encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */
|
||||
|
||||
assert(!mimg->d16 || ctx.chip_class >= GFX9);
|
||||
encoding |= mimg->d16 ? 1 << 15 : 0;
|
||||
assert(!mimg.d16 || ctx.chip_class >= GFX9);
|
||||
encoding |= mimg.d16 ? 1 << 15 : 0;
|
||||
if (ctx.chip_class >= GFX10) {
|
||||
encoding |= mimg->a16 ? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */
|
||||
encoding |= mimg.a16 ? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */
|
||||
}
|
||||
|
||||
out.push_back(encoding);
|
||||
|
|
@ -487,33 +487,33 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
case Format::FLAT:
|
||||
case Format::SCRATCH:
|
||||
case Format::GLOBAL: {
|
||||
FLAT_instruction *flat = instr->flatlike();
|
||||
FLAT_instruction& flat = instr->flatlike();
|
||||
uint32_t encoding = (0b110111 << 26);
|
||||
encoding |= opcode << 18;
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
assert(flat->offset <= 0x1fff);
|
||||
encoding |= flat->offset & 0x1fff;
|
||||
assert(flat.offset <= 0x1fff);
|
||||
encoding |= flat.offset & 0x1fff;
|
||||
} else if (instr->isFlat()) {
|
||||
/* GFX10 has a 12-bit immediate OFFSET field,
|
||||
* but it has a hw bug: it ignores the offset, called FlatSegmentOffsetBug
|
||||
*/
|
||||
assert(flat->offset == 0);
|
||||
assert(flat.offset == 0);
|
||||
} else {
|
||||
assert(flat->offset <= 0xfff);
|
||||
encoding |= flat->offset & 0xfff;
|
||||
assert(flat.offset <= 0xfff);
|
||||
encoding |= flat.offset & 0xfff;
|
||||
}
|
||||
if (instr->isScratch())
|
||||
encoding |= 1 << 14;
|
||||
else if (instr->isGlobal())
|
||||
encoding |= 2 << 14;
|
||||
encoding |= flat->lds ? 1 << 13 : 0;
|
||||
encoding |= flat->glc ? 1 << 16 : 0;
|
||||
encoding |= flat->slc ? 1 << 17 : 0;
|
||||
encoding |= flat.lds ? 1 << 13 : 0;
|
||||
encoding |= flat.glc ? 1 << 16 : 0;
|
||||
encoding |= flat.slc ? 1 << 17 : 0;
|
||||
if (ctx.chip_class >= GFX10) {
|
||||
assert(!flat->nv);
|
||||
encoding |= flat->dlc ? 1 << 12 : 0;
|
||||
assert(!flat.nv);
|
||||
encoding |= flat.dlc ? 1 << 12 : 0;
|
||||
} else {
|
||||
assert(!flat->dlc);
|
||||
assert(!flat.dlc);
|
||||
}
|
||||
out.push_back(encoding);
|
||||
encoding = (0xFF & instr->operands[0].physReg());
|
||||
|
|
@ -531,12 +531,12 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
else
|
||||
encoding |= sgpr_null << 16;
|
||||
}
|
||||
encoding |= flat->nv ? 1 << 23 : 0;
|
||||
encoding |= flat.nv ? 1 << 23 : 0;
|
||||
out.push_back(encoding);
|
||||
break;
|
||||
}
|
||||
case Format::EXP: {
|
||||
Export_instruction* exp = instr->exp();
|
||||
Export_instruction& exp = instr->exp();
|
||||
uint32_t encoding;
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
encoding = (0b110001 << 26);
|
||||
|
|
@ -544,16 +544,16 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
encoding = (0b111110 << 26);
|
||||
}
|
||||
|
||||
encoding |= exp->valid_mask ? 0b1 << 12 : 0;
|
||||
encoding |= exp->done ? 0b1 << 11 : 0;
|
||||
encoding |= exp->compressed ? 0b1 << 10 : 0;
|
||||
encoding |= exp->dest << 4;
|
||||
encoding |= exp->enabled_mask;
|
||||
encoding |= exp.valid_mask ? 0b1 << 12 : 0;
|
||||
encoding |= exp.done ? 0b1 << 11 : 0;
|
||||
encoding |= exp.compressed ? 0b1 << 10 : 0;
|
||||
encoding |= exp.dest << 4;
|
||||
encoding |= exp.enabled_mask;
|
||||
out.push_back(encoding);
|
||||
encoding = 0xFF & exp->operands[0].physReg();
|
||||
encoding |= (0xFF & exp->operands[1].physReg()) << 8;
|
||||
encoding |= (0xFF & exp->operands[2].physReg()) << 16;
|
||||
encoding |= (0xFF & exp->operands[3].physReg()) << 24;
|
||||
encoding = 0xFF & exp.operands[0].physReg();
|
||||
encoding |= (0xFF & exp.operands[1].physReg()) << 8;
|
||||
encoding |= (0xFF & exp.operands[2].physReg()) << 16;
|
||||
encoding |= (0xFF & exp.operands[3].physReg()) << 24;
|
||||
out.push_back(encoding);
|
||||
break;
|
||||
}
|
||||
|
|
@ -564,7 +564,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
break;
|
||||
default:
|
||||
if (instr->isVOP3()) {
|
||||
VOP3_instruction* vop3 = instr->vop3();
|
||||
VOP3_instruction& vop3 = instr->vop3();
|
||||
|
||||
if (instr->isVOP2()) {
|
||||
opcode = opcode + 0x100;
|
||||
|
|
@ -590,14 +590,14 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
|
||||
if (ctx.chip_class <= GFX7) {
|
||||
encoding |= opcode << 17;
|
||||
encoding |= (vop3->clamp ? 1 : 0) << 11;
|
||||
encoding |= (vop3.clamp ? 1 : 0) << 11;
|
||||
} else {
|
||||
encoding |= opcode << 16;
|
||||
encoding |= (vop3->clamp ? 1 : 0) << 15;
|
||||
encoding |= (vop3.clamp ? 1 : 0) << 15;
|
||||
}
|
||||
encoding |= vop3->opsel << 11;
|
||||
encoding |= vop3.opsel << 11;
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
encoding |= vop3->abs[i] << (8+i);
|
||||
encoding |= vop3.abs[i] << (8+i);
|
||||
if (instr->definitions.size() == 2)
|
||||
encoding |= instr->definitions[1].physReg() << 8;
|
||||
encoding |= (0xFF & instr->definitions[0].physReg());
|
||||
|
|
@ -609,13 +609,13 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
for (unsigned i = 0; i < instr->operands.size(); i++)
|
||||
encoding |= instr->operands[i].physReg() << (i * 9);
|
||||
}
|
||||
encoding |= vop3->omod << 27;
|
||||
encoding |= vop3.omod << 27;
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
encoding |= vop3->neg[i] << (29+i);
|
||||
encoding |= vop3.neg[i] << (29+i);
|
||||
out.push_back(encoding);
|
||||
|
||||
} else if (instr->isVOP3P()) {
|
||||
VOP3P_instruction* vop3 = instr->vop3p();
|
||||
VOP3P_instruction& vop3 = instr->vop3p();
|
||||
|
||||
uint32_t encoding;
|
||||
if (ctx.chip_class == GFX9) {
|
||||
|
|
@ -627,45 +627,45 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
}
|
||||
|
||||
encoding |= opcode << 16;
|
||||
encoding |= (vop3->clamp ? 1 : 0) << 15;
|
||||
encoding |= vop3->opsel_lo << 11;
|
||||
encoding |= ((vop3->opsel_hi & 0x4) ? 1 : 0) << 14;
|
||||
encoding |= (vop3.clamp ? 1 : 0) << 15;
|
||||
encoding |= vop3.opsel_lo << 11;
|
||||
encoding |= ((vop3.opsel_hi & 0x4) ? 1 : 0) << 14;
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
encoding |= vop3->neg_hi[i] << (8+i);
|
||||
encoding |= vop3.neg_hi[i] << (8+i);
|
||||
encoding |= (0xFF & instr->definitions[0].physReg());
|
||||
out.push_back(encoding);
|
||||
encoding = 0;
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++)
|
||||
encoding |= instr->operands[i].physReg() << (i * 9);
|
||||
encoding |= (vop3->opsel_hi & 0x3) << 27;
|
||||
encoding |= (vop3.opsel_hi & 0x3) << 27;
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
encoding |= vop3->neg_lo[i] << (29+i);
|
||||
encoding |= vop3.neg_lo[i] << (29+i);
|
||||
out.push_back(encoding);
|
||||
|
||||
} else if (instr->isDPP()){
|
||||
assert(ctx.chip_class >= GFX8);
|
||||
DPP_instruction* dpp = instr->dpp();
|
||||
DPP_instruction& dpp = instr->dpp();
|
||||
|
||||
/* first emit the instruction without the DPP operand */
|
||||
Operand dpp_op = instr->operands[0];
|
||||
instr->operands[0] = Operand(PhysReg{250}, v1);
|
||||
instr->format = (Format) ((uint16_t) instr->format & ~(uint16_t)Format::DPP);
|
||||
emit_instruction(ctx, out, instr);
|
||||
uint32_t encoding = (0xF & dpp->row_mask) << 28;
|
||||
encoding |= (0xF & dpp->bank_mask) << 24;
|
||||
encoding |= dpp->abs[1] << 23;
|
||||
encoding |= dpp->neg[1] << 22;
|
||||
encoding |= dpp->abs[0] << 21;
|
||||
encoding |= dpp->neg[0] << 20;
|
||||
uint32_t encoding = (0xF & dpp.row_mask) << 28;
|
||||
encoding |= (0xF & dpp.bank_mask) << 24;
|
||||
encoding |= dpp.abs[1] << 23;
|
||||
encoding |= dpp.neg[1] << 22;
|
||||
encoding |= dpp.abs[0] << 21;
|
||||
encoding |= dpp.neg[0] << 20;
|
||||
if (ctx.chip_class >= GFX10)
|
||||
encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
|
||||
encoding |= dpp->bound_ctrl << 19;
|
||||
encoding |= dpp->dpp_ctrl << 8;
|
||||
encoding |= dpp.bound_ctrl << 19;
|
||||
encoding |= dpp.dpp_ctrl << 8;
|
||||
encoding |= (0xFF) & dpp_op.physReg();
|
||||
out.push_back(encoding);
|
||||
return;
|
||||
} else if (instr->isSDWA()) {
|
||||
SDWA_instruction* sdwa = instr->sdwa();
|
||||
SDWA_instruction& sdwa = instr->sdwa();
|
||||
|
||||
/* first emit the instruction without the SDWA operand */
|
||||
Operand sdwa_op = instr->operands[0];
|
||||
|
|
@ -680,27 +680,27 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
encoding |= instr->definitions[0].physReg() << 8;
|
||||
encoding |= 1 << 15;
|
||||
}
|
||||
encoding |= (sdwa->clamp ? 1 : 0) << 13;
|
||||
encoding |= (sdwa.clamp ? 1 : 0) << 13;
|
||||
} else {
|
||||
encoding |= get_sdwa_sel(sdwa->dst_sel, instr->definitions[0].physReg()) << 8;
|
||||
uint32_t dst_u = sdwa->dst_sel & sdwa_sext ? 1 : 0;
|
||||
if (sdwa->dst_preserve || (sdwa->dst_sel & sdwa_isra))
|
||||
encoding |= get_sdwa_sel(sdwa.dst_sel, instr->definitions[0].physReg()) << 8;
|
||||
uint32_t dst_u = sdwa.dst_sel & sdwa_sext ? 1 : 0;
|
||||
if (sdwa.dst_preserve || (sdwa.dst_sel & sdwa_isra))
|
||||
dst_u = 2;
|
||||
encoding |= dst_u << 11;
|
||||
encoding |= (sdwa->clamp ? 1 : 0) << 13;
|
||||
encoding |= sdwa->omod << 14;
|
||||
encoding |= (sdwa.clamp ? 1 : 0) << 13;
|
||||
encoding |= sdwa.omod << 14;
|
||||
}
|
||||
|
||||
encoding |= get_sdwa_sel(sdwa->sel[0], sdwa_op.physReg()) << 16;
|
||||
encoding |= sdwa->sel[0] & sdwa_sext ? 1 << 19 : 0;
|
||||
encoding |= sdwa->abs[0] << 21;
|
||||
encoding |= sdwa->neg[0] << 20;
|
||||
encoding |= get_sdwa_sel(sdwa.sel[0], sdwa_op.physReg()) << 16;
|
||||
encoding |= sdwa.sel[0] & sdwa_sext ? 1 << 19 : 0;
|
||||
encoding |= sdwa.abs[0] << 21;
|
||||
encoding |= sdwa.neg[0] << 20;
|
||||
|
||||
if (instr->operands.size() >= 2) {
|
||||
encoding |= get_sdwa_sel(sdwa->sel[1], instr->operands[1].physReg()) << 24;
|
||||
encoding |= sdwa->sel[1] & sdwa_sext ? 1 << 27 : 0;
|
||||
encoding |= sdwa->abs[1] << 29;
|
||||
encoding |= sdwa->neg[1] << 28;
|
||||
encoding |= get_sdwa_sel(sdwa.sel[1], instr->operands[1].physReg()) << 24;
|
||||
encoding |= sdwa.sel[1] & sdwa_sext ? 1 << 27 : 0;
|
||||
encoding |= sdwa.abs[1] << 29;
|
||||
encoding |= sdwa.neg[1] << 28;
|
||||
}
|
||||
|
||||
encoding |= 0xFF & sdwa_op.physReg();
|
||||
|
|
@ -750,16 +750,16 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
|
|||
while ( it != block.instructions.rend())
|
||||
{
|
||||
if ((*it)->isEXP()) {
|
||||
Export_instruction* exp = (*it)->exp();
|
||||
Export_instruction& exp = (*it)->exp();
|
||||
if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) {
|
||||
if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) {
|
||||
exp->done = true;
|
||||
if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) {
|
||||
exp.done = true;
|
||||
exported = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
exp->done = true;
|
||||
exp->valid_mask = true;
|
||||
exp.done = true;
|
||||
exp.valid_mask = true;
|
||||
exported = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ struct NOP_ctx_gfx10 {
|
|||
int get_wait_states(aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::s_nop)
|
||||
return instr->sopp()->imm + 1;
|
||||
return instr->sopp().imm + 1;
|
||||
else if (instr->opcode == aco_opcode::p_constaddr)
|
||||
return 3; /* lowered to 3 instructions in the assembler */
|
||||
else
|
||||
|
|
@ -351,7 +351,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
|
|||
|
||||
if (instr->opcode == aco_opcode::s_sendmsg || instr->opcode == aco_opcode::s_ttracedata)
|
||||
NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
|
||||
} else if (instr->isDS() && instr->ds()->gds) {
|
||||
} else if (instr->isDS() && instr->ds().gds) {
|
||||
NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
|
||||
} else if (instr->isVALU() || instr->isVINTRP()) {
|
||||
for (Operand op : instr->operands) {
|
||||
|
|
@ -407,7 +407,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
|
|||
|
||||
if (program->chip_class == GFX9) {
|
||||
bool lds_scratch_global = (instr->isScratch() || instr->isGlobal()) &&
|
||||
instr->flatlike()->lds;
|
||||
instr->flatlike().lds;
|
||||
if (instr->isVINTRP() ||
|
||||
instr->opcode == aco_opcode::ds_read_addtid_b32 ||
|
||||
instr->opcode == aco_opcode::ds_write_addtid_b32 ||
|
||||
|
|
@ -480,10 +480,10 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
|
|||
ctx.salu_wr_m0_then_moverel = 1;
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::s_setreg_b32 || instr->opcode == aco_opcode::s_setreg_imm32_b32) {
|
||||
SOPK_instruction *sopk = instr->sopk();
|
||||
unsigned offset = (sopk->imm >> 6) & 0x1f;
|
||||
unsigned size = ((sopk->imm >> 11) & 0x1f) + 1;
|
||||
unsigned reg = sopk->imm & 0x3f;
|
||||
SOPK_instruction& sopk = instr->sopk();
|
||||
unsigned offset = (sopk.imm >> 6) & 0x1f;
|
||||
unsigned size = ((sopk.imm >> 11) & 0x1f) + 1;
|
||||
unsigned reg = sopk.imm & 0x3f;
|
||||
ctx.setreg_then_getsetreg = 2;
|
||||
|
||||
if (reg == 1 && offset >= 28 && size > (28 - offset))
|
||||
|
|
@ -603,13 +603,13 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
|
|||
} else if (instr->isSALU() || instr->isSMEM()) {
|
||||
if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
/* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
|
||||
uint16_t imm = instr->sopp()->imm;
|
||||
uint16_t imm = instr->sopp().imm;
|
||||
unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
|
||||
if (vmcnt == 0)
|
||||
ctx.sgprs_read_by_VMEM.reset();
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
|
||||
/* Hazard is mitigated by a s_waitcnt_depctr with a magic imm */
|
||||
if (instr->sopp()->imm == 0xffe3)
|
||||
if (instr->sopp().imm == 0xffe3)
|
||||
ctx.sgprs_read_by_VMEM.reset();
|
||||
}
|
||||
|
||||
|
|
@ -667,7 +667,7 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
|
|||
}
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
|
||||
/* s_waitcnt_depctr can mitigate the problem if it has a magic imm */
|
||||
if ((instr->sopp()->imm & 0xfffe) == 0xfffe)
|
||||
if ((instr->sopp().imm & 0xfffe) == 0xfffe)
|
||||
ctx.has_nonVALU_exec_read = false;
|
||||
}
|
||||
|
||||
|
|
@ -694,12 +694,12 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
|
|||
ctx.sgprs_read_by_SMEM.reset();
|
||||
} else {
|
||||
/* Reducing lgkmcnt count to 0 always mitigates the hazard. */
|
||||
const SOPP_instruction *sopp = instr->sopp();
|
||||
if (sopp->opcode == aco_opcode::s_waitcnt_lgkmcnt) {
|
||||
if (sopp->imm == 0 && sopp->definitions[0].physReg() == sgpr_null)
|
||||
const SOPP_instruction& sopp = instr->sopp();
|
||||
if (sopp.opcode == aco_opcode::s_waitcnt_lgkmcnt) {
|
||||
if (sopp.imm == 0 && sopp.definitions[0].physReg() == sgpr_null)
|
||||
ctx.sgprs_read_by_SMEM.reset();
|
||||
} else if (sopp->opcode == aco_opcode::s_waitcnt) {
|
||||
unsigned lgkm = (sopp->imm >> 8) & 0x3f;
|
||||
} else if (sopp.opcode == aco_opcode::s_waitcnt) {
|
||||
unsigned lgkm = (sopp.imm >> 8) & 0x3f;
|
||||
if (lgkm == 0)
|
||||
ctx.sgprs_read_by_SMEM.reset();
|
||||
}
|
||||
|
|
@ -724,8 +724,8 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
|
|||
ctx.has_branch_after_DS = ctx.has_DS;
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
|
||||
/* Only s_waitcnt_vscnt can mitigate the hazard */
|
||||
const SOPK_instruction *sopk = instr->sopk();
|
||||
if (sopk->definitions[0].physReg() == sgpr_null && sopk->imm == 0)
|
||||
const SOPK_instruction& sopk = instr->sopk();
|
||||
if (sopk.definitions[0].physReg() == sgpr_null && sopk.imm == 0)
|
||||
ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false;
|
||||
}
|
||||
if ((ctx.has_VMEM && ctx.has_branch_after_DS) || (ctx.has_DS && ctx.has_branch_after_VMEM)) {
|
||||
|
|
|
|||
|
|
@ -98,13 +98,13 @@ struct exec_ctx {
|
|||
|
||||
bool needs_exact(aco_ptr<Instruction>& instr) {
|
||||
if (instr->isMUBUF()) {
|
||||
return instr->mubuf()->disable_wqm;
|
||||
return instr->mubuf().disable_wqm;
|
||||
} else if (instr->isMTBUF()) {
|
||||
return instr->mtbuf()->disable_wqm;
|
||||
return instr->mtbuf().disable_wqm;
|
||||
} else if (instr->isMIMG()) {
|
||||
return instr->mimg()->disable_wqm;
|
||||
return instr->mimg().disable_wqm;
|
||||
} else if (instr->isFlatLike()) {
|
||||
return instr->flatlike()->disable_wqm;
|
||||
return instr->flatlike().disable_wqm;
|
||||
} else {
|
||||
return instr->isEXP();
|
||||
}
|
||||
|
|
@ -908,12 +908,12 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
}
|
||||
|
||||
if (block->kind & block_kind_uniform) {
|
||||
Pseudo_branch_instruction* branch = block->instructions.back()->branch();
|
||||
if (branch->opcode == aco_opcode::p_branch) {
|
||||
branch->target[0] = block->linear_succs[0];
|
||||
Pseudo_branch_instruction& branch = block->instructions.back()->branch();
|
||||
if (branch.opcode == aco_opcode::p_branch) {
|
||||
branch.target[0] = block->linear_succs[0];
|
||||
} else {
|
||||
branch->target[0] = block->linear_succs[1];
|
||||
branch->target[1] = block->linear_succs[0];
|
||||
branch.target[0] = block->linear_succs[1];
|
||||
branch.target[1] = block->linear_succs[0];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -422,7 +422,7 @@ wait_imm check_instr(Instruction* instr, wait_ctx& ctx)
|
|||
continue;
|
||||
|
||||
/* LDS reads and writes return in the order they were issued. same for GDS */
|
||||
if (instr->isDS() && (it->second.events & lgkm_events) == (instr->ds()->gds ? event_gds : event_lds))
|
||||
if (instr->isDS() && (it->second.events & lgkm_events) == (instr->ds().gds ? event_gds : event_lds))
|
||||
continue;
|
||||
|
||||
wait.combine(it->second.imm);
|
||||
|
|
@ -437,10 +437,10 @@ wait_imm parse_wait_instr(wait_ctx& ctx, Instruction *instr)
|
|||
if (instr->opcode == aco_opcode::s_waitcnt_vscnt &&
|
||||
instr->definitions[0].physReg() == sgpr_null) {
|
||||
wait_imm imm;
|
||||
imm.vs = std::min<uint8_t>(imm.vs, instr->sopk()->imm);
|
||||
imm.vs = std::min<uint8_t>(imm.vs, instr->sopk().imm);
|
||||
return imm;
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
return wait_imm(ctx.chip_class, instr->sopp()->imm);
|
||||
return wait_imm(ctx.chip_class, instr->sopp().imm);
|
||||
}
|
||||
return wait_imm();
|
||||
}
|
||||
|
|
@ -521,15 +521,15 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
|
|||
* TODO: Refine this when we have proper alias analysis.
|
||||
*/
|
||||
if (ctx.pending_s_buffer_store &&
|
||||
!instr->smem()->definitions.empty() &&
|
||||
!instr->smem()->sync.can_reorder()) {
|
||||
!instr->smem().definitions.empty() &&
|
||||
!instr->smem().sync.can_reorder()) {
|
||||
imm.lgkm = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx.program->early_rast && instr->opcode == aco_opcode::exp) {
|
||||
if (instr->exp()->dest >= V_008DFC_SQ_EXP_POS &&
|
||||
instr->exp()->dest < V_008DFC_SQ_EXP_PRIM) {
|
||||
if (instr->exp().dest >= V_008DFC_SQ_EXP_POS &&
|
||||
instr->exp().dest < V_008DFC_SQ_EXP_PRIM) {
|
||||
|
||||
/* With early_rast, the HW will start clipping and rasterization after the 1st DONE pos export.
|
||||
* Wait for all stores (and atomics) to complete, so PS can read them.
|
||||
|
|
@ -543,7 +543,7 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
|
|||
}
|
||||
|
||||
if (instr->opcode == aco_opcode::p_barrier)
|
||||
imm.combine(perform_barrier(ctx, instr->barrier()->sync, semantic_acqrel));
|
||||
imm.combine(perform_barrier(ctx, instr->barrier().sync, semantic_acqrel));
|
||||
else
|
||||
imm.combine(perform_barrier(ctx, sync_info, semantic_release));
|
||||
|
||||
|
|
@ -760,12 +760,12 @@ void gen(Instruction* instr, wait_ctx& ctx)
|
|||
{
|
||||
switch (instr->format) {
|
||||
case Format::EXP: {
|
||||
Export_instruction* exp_instr = instr->exp();
|
||||
Export_instruction& exp_instr = instr->exp();
|
||||
|
||||
wait_event ev;
|
||||
if (exp_instr->dest <= 9)
|
||||
if (exp_instr.dest <= 9)
|
||||
ev = event_exp_mrt_null;
|
||||
else if (exp_instr->dest <= 15)
|
||||
else if (exp_instr.dest <= 15)
|
||||
ev = event_exp_pos;
|
||||
else
|
||||
ev = event_exp_param;
|
||||
|
|
@ -774,10 +774,10 @@ void gen(Instruction* instr, wait_ctx& ctx)
|
|||
/* insert new entries for exported vgprs */
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
{
|
||||
if (exp_instr->enabled_mask & (1 << i)) {
|
||||
unsigned idx = exp_instr->compressed ? i >> 1 : i;
|
||||
assert(idx < exp_instr->operands.size());
|
||||
insert_wait_entry(ctx, exp_instr->operands[idx], ev);
|
||||
if (exp_instr.enabled_mask & (1 << i)) {
|
||||
unsigned idx = exp_instr.compressed ? i >> 1 : i;
|
||||
assert(idx < exp_instr.operands.size());
|
||||
insert_wait_entry(ctx, exp_instr.operands[idx], ev);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -785,38 +785,38 @@ void gen(Instruction* instr, wait_ctx& ctx)
|
|||
break;
|
||||
}
|
||||
case Format::FLAT: {
|
||||
FLAT_instruction *flat = instr->flat();
|
||||
FLAT_instruction& flat = instr->flat();
|
||||
if (ctx.chip_class < GFX10 && !instr->definitions.empty())
|
||||
update_counters_for_flat_load(ctx, flat->sync);
|
||||
update_counters_for_flat_load(ctx, flat.sync);
|
||||
else
|
||||
update_counters(ctx, event_flat, flat->sync);
|
||||
update_counters(ctx, event_flat, flat.sync);
|
||||
|
||||
if (!instr->definitions.empty())
|
||||
insert_wait_entry(ctx, instr->definitions[0], event_flat);
|
||||
break;
|
||||
}
|
||||
case Format::SMEM: {
|
||||
SMEM_instruction *smem = instr->smem();
|
||||
update_counters(ctx, event_smem, smem->sync);
|
||||
SMEM_instruction& smem = instr->smem();
|
||||
update_counters(ctx, event_smem, smem.sync);
|
||||
|
||||
if (!instr->definitions.empty())
|
||||
insert_wait_entry(ctx, instr->definitions[0], event_smem);
|
||||
else if (ctx.chip_class >= GFX10 &&
|
||||
!smem->sync.can_reorder())
|
||||
!smem.sync.can_reorder())
|
||||
ctx.pending_s_buffer_store = true;
|
||||
|
||||
break;
|
||||
}
|
||||
case Format::DS: {
|
||||
DS_instruction *ds = instr->ds();
|
||||
update_counters(ctx, ds->gds ? event_gds : event_lds, ds->sync);
|
||||
if (ds->gds)
|
||||
DS_instruction& ds = instr->ds();
|
||||
update_counters(ctx, ds.gds ? event_gds : event_lds, ds.sync);
|
||||
if (ds.gds)
|
||||
update_counters(ctx, event_gds_gpr_lock);
|
||||
|
||||
if (!instr->definitions.empty())
|
||||
insert_wait_entry(ctx, instr->definitions[0], ds->gds ? event_gds : event_lds);
|
||||
insert_wait_entry(ctx, instr->definitions[0], ds.gds ? event_gds : event_lds);
|
||||
|
||||
if (ds->gds) {
|
||||
if (ds.gds) {
|
||||
for (const Operand& op : instr->operands)
|
||||
insert_wait_entry(ctx, op, event_gds_gpr_lock);
|
||||
insert_wait_entry(ctx, exec, s2, event_gds_gpr_lock, false);
|
||||
|
|
|
|||
|
|
@ -1224,7 +1224,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
|
|||
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
|
||||
|
||||
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
|
||||
add->vop3()->neg[1] = true;
|
||||
add->vop3().neg[1] = true;
|
||||
|
||||
return add->definitions[0].getTemp();
|
||||
}
|
||||
|
|
@ -1692,7 +1692,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
std::swap(src0, src1);
|
||||
add_instr = bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
|
||||
}
|
||||
add_instr->vop3()->clamp = 1;
|
||||
add_instr->vop3().clamp = 1;
|
||||
} else if (dst.regClass() == v1) {
|
||||
if (ctx->options->chip_class >= GFX9) {
|
||||
aco_ptr<VOP3_instruction> add{create_instruction<VOP3_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)};
|
||||
|
|
@ -1944,9 +1944,9 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
case nir_op_fsub: {
|
||||
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
|
||||
Instruction* add = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst);
|
||||
VOP3P_instruction* sub = add->vop3p();
|
||||
sub->neg_lo[1] = true;
|
||||
sub->neg_hi[1] = true;
|
||||
VOP3P_instruction& sub = add->vop3p();
|
||||
sub.neg_lo[1] = true;
|
||||
sub.neg_hi[1] = true;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -1965,7 +1965,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
} else if (dst.regClass() == v2) {
|
||||
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst),
|
||||
as_vgpr(ctx, src0), as_vgpr(ctx, src1));
|
||||
add->vop3()->neg[1] = true;
|
||||
add->vop3().neg[1] = true;
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
}
|
||||
|
|
@ -2101,7 +2101,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
|
||||
Instruction* vop3p = bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand(uint16_t(0x3C00)),
|
||||
instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
|
||||
vop3p->vop3p()->clamp = true;
|
||||
vop3p->vop3p().clamp = true;
|
||||
emit_split_vector(ctx, dst, 2);
|
||||
break;
|
||||
}
|
||||
|
|
@ -2114,7 +2114,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
// TODO: confirm that this holds under any circumstances
|
||||
} else if (dst.regClass() == v2) {
|
||||
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u));
|
||||
add->vop3()->clamp = true;
|
||||
add->vop3().clamp = true;
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
}
|
||||
|
|
@ -2253,12 +2253,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi));
|
||||
Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
|
||||
Instruction *sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
|
||||
sub->vop3()->neg[1] = true;
|
||||
sub->vop3().neg[1] = true;
|
||||
tmp = sub->definitions[0].getTemp();
|
||||
|
||||
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x432fffffu));
|
||||
Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v);
|
||||
vop3->vop3()->abs[0] = true;
|
||||
vop3->vop3().abs[0] = true;
|
||||
Temp cond = vop3->definitions[0].getTemp();
|
||||
|
||||
Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
|
||||
|
|
@ -2924,7 +2924,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
|
||||
Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u));
|
||||
Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest);
|
||||
vop3->vop3()->abs[0] = true;
|
||||
vop3->vop3().abs[0] = true;
|
||||
cmp_res = vop3->definitions[0].getTemp();
|
||||
}
|
||||
|
||||
|
|
@ -3515,7 +3515,7 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info,
|
|||
instr = bld.ds(op, Definition(val), offset, m, const_offset, const_offset + 1);
|
||||
else
|
||||
instr = bld.ds(op, Definition(val), offset, m, const_offset);
|
||||
instr->ds()->sync = info.sync;
|
||||
instr->ds().sync = info.sync;
|
||||
|
||||
if (size < 4)
|
||||
val = bld.pseudo(aco_opcode::p_extract_vector, bld.def(RegClass::get(RegType::vgpr, size)), val, Operand(0u));
|
||||
|
|
@ -3931,7 +3931,7 @@ void store_lds(isel_context *ctx, unsigned elem_size_bytes, Temp data, uint32_t
|
|||
} else {
|
||||
instr = bld.ds(op, address_offset, split_data, m, inline_offset);
|
||||
}
|
||||
instr->ds()->sync = memory_sync_info(storage_shared);
|
||||
instr->ds().sync = memory_sync_info(storage_shared);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4094,7 +4094,7 @@ void emit_single_mubuf_store(isel_context *ctx, Temp descriptor, Temp voffset, T
|
|||
/* idxen*/ false, /* addr64 */ false, /* disable_wqm */ false, /* glc */ true,
|
||||
/* dlc*/ false, /* slc */ slc);
|
||||
|
||||
r.instr->mubuf()->sync = sync;
|
||||
r.instr->mubuf().sync = sync;
|
||||
}
|
||||
|
||||
void store_vmem_mubuf(isel_context *ctx, Temp src, Temp descriptor, Temp voffset, Temp soffset,
|
||||
|
|
@ -5503,7 +5503,7 @@ void visit_load_push_constant(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
unreachable("unimplemented or forbidden load_push_constant.");
|
||||
}
|
||||
|
||||
bld.smem(op, Definition(vec), ptr, index).instr->smem()->prevent_overflow = true;
|
||||
bld.smem(op, Definition(vec), ptr, index).instr->smem().prevent_overflow = true;
|
||||
|
||||
if (!aligned) {
|
||||
Operand byte_offset = index_cv ? Operand((offset + index_cv->u32) % 4) : Operand(index);
|
||||
|
|
@ -7147,7 +7147,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
|
|||
for (unsigned i = 0; i < write_count; i++) {
|
||||
aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
|
||||
Instruction *mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true, true);
|
||||
mubuf->mubuf()->sync = memory_sync_info(storage_scratch, semantic_private);
|
||||
mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -140,19 +140,19 @@ memory_sync_info get_sync_info(const Instruction* instr)
|
|||
{
|
||||
switch (instr->format) {
|
||||
case Format::SMEM:
|
||||
return instr->smem()->sync;
|
||||
return instr->smem().sync;
|
||||
case Format::MUBUF:
|
||||
return instr->mubuf()->sync;
|
||||
return instr->mubuf().sync;
|
||||
case Format::MIMG:
|
||||
return instr->mimg()->sync;
|
||||
return instr->mimg().sync;
|
||||
case Format::MTBUF:
|
||||
return instr->mtbuf()->sync;
|
||||
return instr->mtbuf().sync;
|
||||
case Format::FLAT:
|
||||
case Format::GLOBAL:
|
||||
case Format::SCRATCH:
|
||||
return instr->flatlike()->sync;
|
||||
return instr->flatlike().sync;
|
||||
case Format::DS:
|
||||
return instr->ds()->sync;
|
||||
return instr->ds().sync;
|
||||
default:
|
||||
return memory_sync_info();
|
||||
}
|
||||
|
|
@ -170,12 +170,12 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
|
|||
return true;
|
||||
|
||||
if (instr->isVOP3()) {
|
||||
VOP3_instruction *vop3 = instr->vop3();
|
||||
VOP3_instruction& vop3 = instr->vop3();
|
||||
if (instr->format == Format::VOP3)
|
||||
return false;
|
||||
if (vop3->clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8)
|
||||
if (vop3.clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8)
|
||||
return false;
|
||||
if (vop3->omod && chip < GFX9)
|
||||
if (vop3.omod && chip < GFX9)
|
||||
return false;
|
||||
|
||||
//TODO: return true if we know we will use vcc
|
||||
|
|
@ -232,14 +232,14 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst
|
|||
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
|
||||
std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(), instr->definitions.begin());
|
||||
|
||||
SDWA_instruction *sdwa = instr->sdwa();
|
||||
SDWA_instruction& sdwa = instr->sdwa();
|
||||
|
||||
if (tmp->isVOP3()) {
|
||||
VOP3_instruction *vop3 = tmp->vop3();
|
||||
memcpy(sdwa->neg, vop3->neg, sizeof(sdwa->neg));
|
||||
memcpy(sdwa->abs, vop3->abs, sizeof(sdwa->abs));
|
||||
sdwa->omod = vop3->omod;
|
||||
sdwa->clamp = vop3->clamp;
|
||||
VOP3_instruction& vop3 = tmp->vop3();
|
||||
memcpy(sdwa.neg, vop3.neg, sizeof(sdwa.neg));
|
||||
memcpy(sdwa.abs, vop3.abs, sizeof(sdwa.abs));
|
||||
sdwa.omod = vop3.omod;
|
||||
sdwa.clamp = vop3.clamp;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
|
|
@ -249,27 +249,27 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst
|
|||
|
||||
switch (instr->operands[i].bytes()) {
|
||||
case 1:
|
||||
sdwa->sel[i] = sdwa_ubyte;
|
||||
sdwa.sel[i] = sdwa_ubyte;
|
||||
break;
|
||||
case 2:
|
||||
sdwa->sel[i] = sdwa_uword;
|
||||
sdwa.sel[i] = sdwa_uword;
|
||||
break;
|
||||
case 4:
|
||||
sdwa->sel[i] = sdwa_udword;
|
||||
sdwa.sel[i] = sdwa_udword;
|
||||
break;
|
||||
}
|
||||
}
|
||||
switch (instr->definitions[0].bytes()) {
|
||||
case 1:
|
||||
sdwa->dst_sel = sdwa_ubyte;
|
||||
sdwa->dst_preserve = true;
|
||||
sdwa.dst_sel = sdwa_ubyte;
|
||||
sdwa.dst_preserve = true;
|
||||
break;
|
||||
case 2:
|
||||
sdwa->dst_sel = sdwa_uword;
|
||||
sdwa->dst_preserve = true;
|
||||
sdwa.dst_sel = sdwa_uword;
|
||||
sdwa.dst_preserve = true;
|
||||
break;
|
||||
case 4:
|
||||
sdwa->dst_sel = sdwa_udword;
|
||||
sdwa.dst_sel = sdwa_udword;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1030,93 +1030,93 @@ struct Instruction {
|
|||
return false;
|
||||
}
|
||||
|
||||
Pseudo_instruction *pseudo() noexcept {assert(isPseudo()); return (Pseudo_instruction *)this;}
|
||||
const Pseudo_instruction *pseudo() const noexcept {assert(isPseudo()); return (Pseudo_instruction *)this;}
|
||||
Pseudo_instruction& pseudo() noexcept {assert(isPseudo()); return *(Pseudo_instruction *)this;}
|
||||
const Pseudo_instruction& pseudo() const noexcept {assert(isPseudo()); return *(Pseudo_instruction *)this;}
|
||||
constexpr bool isPseudo() const noexcept {return format == Format::PSEUDO;}
|
||||
SOP1_instruction *sop1() noexcept {assert(isSOP1()); return (SOP1_instruction *)this;}
|
||||
const SOP1_instruction *sop1() const noexcept {assert(isSOP1()); return (SOP1_instruction *)this;}
|
||||
SOP1_instruction& sop1() noexcept {assert(isSOP1()); return *(SOP1_instruction *)this;}
|
||||
const SOP1_instruction& sop1() const noexcept {assert(isSOP1()); return *(SOP1_instruction *)this;}
|
||||
constexpr bool isSOP1() const noexcept {return format == Format::SOP1;}
|
||||
SOP2_instruction *sop2() noexcept {assert(isSOP2()); return (SOP2_instruction *)this;}
|
||||
const SOP2_instruction *sop2() const noexcept {assert(isSOP2()); return (SOP2_instruction *)this;}
|
||||
SOP2_instruction& sop2() noexcept {assert(isSOP2()); return *(SOP2_instruction *)this;}
|
||||
const SOP2_instruction& sop2() const noexcept {assert(isSOP2()); return *(SOP2_instruction *)this;}
|
||||
constexpr bool isSOP2() const noexcept {return format == Format::SOP2;}
|
||||
SOPK_instruction *sopk() noexcept {assert(isSOPK()); return (SOPK_instruction *)this;}
|
||||
const SOPK_instruction *sopk() const noexcept {assert(isSOPK()); return (SOPK_instruction *)this;}
|
||||
SOPK_instruction& sopk() noexcept {assert(isSOPK()); return *(SOPK_instruction *)this;}
|
||||
const SOPK_instruction& sopk() const noexcept {assert(isSOPK()); return *(SOPK_instruction *)this;}
|
||||
constexpr bool isSOPK() const noexcept {return format == Format::SOPK;}
|
||||
SOPP_instruction *sopp() noexcept {assert(isSOPP()); return (SOPP_instruction *)this;}
|
||||
const SOPP_instruction *sopp() const noexcept {assert(isSOPP()); return (SOPP_instruction *)this;}
|
||||
SOPP_instruction& sopp() noexcept {assert(isSOPP()); return *(SOPP_instruction *)this;}
|
||||
const SOPP_instruction& sopp() const noexcept {assert(isSOPP()); return *(SOPP_instruction *)this;}
|
||||
constexpr bool isSOPP() const noexcept {return format == Format::SOPP;}
|
||||
SOPC_instruction *sopc() noexcept {assert(isSOPC()); return (SOPC_instruction *)this;}
|
||||
const SOPC_instruction *sopc() const noexcept {assert(isSOPC()); return (SOPC_instruction *)this;}
|
||||
SOPC_instruction& sopc() noexcept {assert(isSOPC()); return *(SOPC_instruction *)this;}
|
||||
const SOPC_instruction& sopc() const noexcept {assert(isSOPC()); return *(SOPC_instruction *)this;}
|
||||
constexpr bool isSOPC() const noexcept {return format == Format::SOPC;}
|
||||
SMEM_instruction *smem() noexcept {assert(isSMEM()); return (SMEM_instruction *)this;}
|
||||
const SMEM_instruction *smem() const noexcept {assert(isSMEM()); return (SMEM_instruction *)this;}
|
||||
SMEM_instruction& smem() noexcept {assert(isSMEM()); return *(SMEM_instruction *)this;}
|
||||
const SMEM_instruction& smem() const noexcept {assert(isSMEM()); return *(SMEM_instruction *)this;}
|
||||
constexpr bool isSMEM() const noexcept {return format == Format::SMEM;}
|
||||
DS_instruction *ds() noexcept {assert(isDS()); return (DS_instruction *)this;}
|
||||
const DS_instruction *ds() const noexcept {assert(isDS()); return (DS_instruction *)this;}
|
||||
DS_instruction& ds() noexcept {assert(isDS()); return *(DS_instruction *)this;}
|
||||
const DS_instruction& ds() const noexcept {assert(isDS()); return *(DS_instruction *)this;}
|
||||
constexpr bool isDS() const noexcept {return format == Format::DS;}
|
||||
MTBUF_instruction *mtbuf() noexcept {assert(isMTBUF()); return (MTBUF_instruction *)this;}
|
||||
const MTBUF_instruction *mtbuf() const noexcept {assert(isMTBUF()); return (MTBUF_instruction *)this;}
|
||||
MTBUF_instruction& mtbuf() noexcept {assert(isMTBUF()); return *(MTBUF_instruction *)this;}
|
||||
const MTBUF_instruction& mtbuf() const noexcept {assert(isMTBUF()); return *(MTBUF_instruction *)this;}
|
||||
constexpr bool isMTBUF() const noexcept {return format == Format::MTBUF;}
|
||||
MUBUF_instruction *mubuf() noexcept {assert(isMUBUF()); return (MUBUF_instruction *)this;}
|
||||
const MUBUF_instruction *mubuf() const noexcept {assert(isMUBUF()); return (MUBUF_instruction *)this;}
|
||||
MUBUF_instruction& mubuf() noexcept {assert(isMUBUF()); return *(MUBUF_instruction *)this;}
|
||||
const MUBUF_instruction& mubuf() const noexcept {assert(isMUBUF()); return *(MUBUF_instruction *)this;}
|
||||
constexpr bool isMUBUF() const noexcept {return format == Format::MUBUF;}
|
||||
MIMG_instruction *mimg() noexcept {assert(isMIMG()); return (MIMG_instruction *)this;}
|
||||
const MIMG_instruction *mimg() const noexcept {assert(isMIMG()); return (MIMG_instruction *)this;}
|
||||
MIMG_instruction& mimg() noexcept {assert(isMIMG()); return *(MIMG_instruction *)this;}
|
||||
const MIMG_instruction& mimg() const noexcept {assert(isMIMG()); return *(MIMG_instruction *)this;}
|
||||
constexpr bool isMIMG() const noexcept {return format == Format::MIMG;}
|
||||
Export_instruction *exp() noexcept {assert(isEXP()); return (Export_instruction *)this;}
|
||||
const Export_instruction *exp() const noexcept {assert(isEXP()); return (Export_instruction *)this;}
|
||||
Export_instruction& exp() noexcept {assert(isEXP()); return *(Export_instruction *)this;}
|
||||
const Export_instruction& exp() const noexcept {assert(isEXP()); return *(Export_instruction *)this;}
|
||||
constexpr bool isEXP() const noexcept {return format == Format::EXP;}
|
||||
FLAT_instruction *flat() noexcept {assert(isFlat()); return (FLAT_instruction *)this;}
|
||||
const FLAT_instruction *flat() const noexcept {assert(isFlat()); return (FLAT_instruction *)this;}
|
||||
FLAT_instruction& flat() noexcept {assert(isFlat()); return *(FLAT_instruction *)this;}
|
||||
const FLAT_instruction& flat() const noexcept {assert(isFlat()); return *(FLAT_instruction *)this;}
|
||||
constexpr bool isFlat() const noexcept {return format == Format::FLAT;}
|
||||
FLAT_instruction *global() noexcept {assert(isGlobal()); return (FLAT_instruction *)this;}
|
||||
const FLAT_instruction *global() const noexcept {assert(isGlobal()); return (FLAT_instruction *)this;}
|
||||
FLAT_instruction& global() noexcept {assert(isGlobal()); return *(FLAT_instruction *)this;}
|
||||
const FLAT_instruction& global() const noexcept {assert(isGlobal()); return *(FLAT_instruction *)this;}
|
||||
constexpr bool isGlobal() const noexcept {return format == Format::GLOBAL;}
|
||||
FLAT_instruction *scratch() noexcept {assert(isScratch()); return (FLAT_instruction *)this;}
|
||||
const FLAT_instruction *scratch() const noexcept {assert(isScratch()); return (FLAT_instruction *)this;}
|
||||
FLAT_instruction& scratch() noexcept {assert(isScratch()); return *(FLAT_instruction *)this;}
|
||||
const FLAT_instruction& scratch() const noexcept {assert(isScratch()); return *(FLAT_instruction *)this;}
|
||||
constexpr bool isScratch() const noexcept {return format == Format::SCRATCH;}
|
||||
Pseudo_branch_instruction *branch() noexcept {assert(isBranch()); return (Pseudo_branch_instruction *)this;}
|
||||
const Pseudo_branch_instruction *branch() const noexcept {assert(isBranch()); return (Pseudo_branch_instruction *)this;}
|
||||
Pseudo_branch_instruction& branch() noexcept {assert(isBranch()); return *(Pseudo_branch_instruction *)this;}
|
||||
const Pseudo_branch_instruction& branch() const noexcept {assert(isBranch()); return *(Pseudo_branch_instruction *)this;}
|
||||
constexpr bool isBranch() const noexcept {return format == Format::PSEUDO_BRANCH;}
|
||||
Pseudo_barrier_instruction *barrier() noexcept {assert(isBarrier()); return (Pseudo_barrier_instruction *)this;}
|
||||
const Pseudo_barrier_instruction *barrier() const noexcept {assert(isBarrier()); return (Pseudo_barrier_instruction *)this;}
|
||||
Pseudo_barrier_instruction& barrier() noexcept {assert(isBarrier()); return *(Pseudo_barrier_instruction *)this;}
|
||||
const Pseudo_barrier_instruction& barrier() const noexcept {assert(isBarrier()); return *(Pseudo_barrier_instruction *)this;}
|
||||
constexpr bool isBarrier() const noexcept {return format == Format::PSEUDO_BARRIER;}
|
||||
Pseudo_reduction_instruction *reduction() noexcept {assert(isReduction()); return (Pseudo_reduction_instruction *)this;}
|
||||
const Pseudo_reduction_instruction *reduction() const noexcept {assert(isReduction()); return (Pseudo_reduction_instruction *)this;}
|
||||
Pseudo_reduction_instruction& reduction() noexcept {assert(isReduction()); return *(Pseudo_reduction_instruction *)this;}
|
||||
const Pseudo_reduction_instruction& reduction() const noexcept {assert(isReduction()); return *(Pseudo_reduction_instruction *)this;}
|
||||
constexpr bool isReduction() const noexcept {return format == Format::PSEUDO_REDUCTION;}
|
||||
VOP3P_instruction *vop3p() noexcept {assert(isVOP3P()); return (VOP3P_instruction *)this;}
|
||||
const VOP3P_instruction *vop3p() const noexcept {assert(isVOP3P()); return (VOP3P_instruction *)this;}
|
||||
VOP3P_instruction& vop3p() noexcept {assert(isVOP3P()); return *(VOP3P_instruction *)this;}
|
||||
const VOP3P_instruction& vop3p() const noexcept {assert(isVOP3P()); return *(VOP3P_instruction *)this;}
|
||||
constexpr bool isVOP3P() const noexcept {return format == Format::VOP3P;}
|
||||
VOP1_instruction *vop1() noexcept {assert(isVOP1()); return (VOP1_instruction *)this;}
|
||||
const VOP1_instruction *vop1() const noexcept {assert(isVOP1()); return (VOP1_instruction *)this;}
|
||||
VOP1_instruction& vop1() noexcept {assert(isVOP1()); return *(VOP1_instruction *)this;}
|
||||
const VOP1_instruction& vop1() const noexcept {assert(isVOP1()); return *(VOP1_instruction *)this;}
|
||||
constexpr bool isVOP1() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP1;}
|
||||
VOP2_instruction *vop2() noexcept {assert(isVOP2()); return (VOP2_instruction *)this;}
|
||||
const VOP2_instruction *vop2() const noexcept {assert(isVOP2()); return (VOP2_instruction *)this;}
|
||||
VOP2_instruction& vop2() noexcept {assert(isVOP2()); return *(VOP2_instruction *)this;}
|
||||
const VOP2_instruction& vop2() const noexcept {assert(isVOP2()); return *(VOP2_instruction *)this;}
|
||||
constexpr bool isVOP2() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP2;}
|
||||
VOPC_instruction *vopc() noexcept {assert(isVOPC()); return (VOPC_instruction *)this;}
|
||||
const VOPC_instruction *vopc() const noexcept {assert(isVOPC()); return (VOPC_instruction *)this;}
|
||||
VOPC_instruction& vopc() noexcept {assert(isVOPC()); return *(VOPC_instruction *)this;}
|
||||
const VOPC_instruction& vopc() const noexcept {assert(isVOPC()); return *(VOPC_instruction *)this;}
|
||||
constexpr bool isVOPC() const noexcept {return (uint16_t)format & (uint16_t)Format::VOPC;}
|
||||
VOP3_instruction *vop3() noexcept {assert(isVOP3()); return (VOP3_instruction *)this;}
|
||||
const VOP3_instruction *vop3() const noexcept {assert(isVOP3()); return (VOP3_instruction *)this;}
|
||||
VOP3_instruction& vop3() noexcept {assert(isVOP3()); return *(VOP3_instruction *)this;}
|
||||
const VOP3_instruction& vop3() const noexcept {assert(isVOP3()); return *(VOP3_instruction *)this;}
|
||||
constexpr bool isVOP3() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP3;}
|
||||
Interp_instruction *vintrp() noexcept {assert(isVINTRP()); return (Interp_instruction *)this;}
|
||||
const Interp_instruction *vintrp() const noexcept {assert(isVINTRP()); return (Interp_instruction *)this;}
|
||||
Interp_instruction& vintrp() noexcept {assert(isVINTRP()); return *(Interp_instruction *)this;}
|
||||
const Interp_instruction& vintrp() const noexcept {assert(isVINTRP()); return *(Interp_instruction *)this;}
|
||||
constexpr bool isVINTRP() const noexcept {return (uint16_t)format & (uint16_t)Format::VINTRP;}
|
||||
DPP_instruction *dpp() noexcept {assert(isDPP()); return (DPP_instruction *)this;}
|
||||
const DPP_instruction *dpp() const noexcept {assert(isDPP()); return (DPP_instruction *)this;}
|
||||
DPP_instruction& dpp() noexcept {assert(isDPP()); return *(DPP_instruction *)this;}
|
||||
const DPP_instruction& dpp() const noexcept {assert(isDPP()); return *(DPP_instruction *)this;}
|
||||
constexpr bool isDPP() const noexcept {return (uint16_t)format & (uint16_t)Format::DPP;}
|
||||
SDWA_instruction *sdwa() noexcept {assert(isSDWA()); return (SDWA_instruction *)this;}
|
||||
const SDWA_instruction *sdwa() const noexcept {assert(isSDWA()); return (SDWA_instruction *)this;}
|
||||
SDWA_instruction& sdwa() noexcept {assert(isSDWA()); return *(SDWA_instruction *)this;}
|
||||
const SDWA_instruction& sdwa() const noexcept {assert(isSDWA()); return *(SDWA_instruction *)this;}
|
||||
constexpr bool isSDWA() const noexcept {return (uint16_t)format & (uint16_t)Format::SDWA;}
|
||||
|
||||
FLAT_instruction *flatlike()
|
||||
FLAT_instruction& flatlike()
|
||||
{
|
||||
return (FLAT_instruction *)this;
|
||||
return *(FLAT_instruction *)this;
|
||||
}
|
||||
|
||||
const FLAT_instruction *flatlike() const
|
||||
const FLAT_instruction& flatlike() const
|
||||
{
|
||||
return (FLAT_instruction *)this;
|
||||
return *(FLAT_instruction *)this;
|
||||
}
|
||||
|
||||
constexpr bool isFlatLike() const noexcept
|
||||
|
|
@ -1524,23 +1524,23 @@ constexpr bool Instruction::usesModifiers() const noexcept
|
|||
return true;
|
||||
|
||||
if (isVOP3P()) {
|
||||
const VOP3P_instruction *vop3p = this->vop3p();
|
||||
const VOP3P_instruction& vop3p = this->vop3p();
|
||||
for (unsigned i = 0; i < operands.size(); i++) {
|
||||
if (vop3p->neg_lo[i] || vop3p->neg_hi[i])
|
||||
if (vop3p.neg_lo[i] || vop3p.neg_hi[i])
|
||||
return true;
|
||||
|
||||
/* opsel_hi must be 1 to not be considered a modifier - even for constants */
|
||||
if (!(vop3p->opsel_hi & (1 << i)))
|
||||
if (!(vop3p.opsel_hi & (1 << i)))
|
||||
return true;
|
||||
}
|
||||
return vop3p->opsel_lo || vop3p->clamp;
|
||||
return vop3p.opsel_lo || vop3p.clamp;
|
||||
} else if (isVOP3()) {
|
||||
const VOP3_instruction *vop3 = this->vop3();
|
||||
const VOP3_instruction& vop3 = this->vop3();
|
||||
for (unsigned i = 0; i < operands.size(); i++) {
|
||||
if (vop3->abs[i] || vop3->neg[i])
|
||||
if (vop3.abs[i] || vop3.neg[i])
|
||||
return true;
|
||||
}
|
||||
return vop3->opsel || vop3->clamp || vop3->omod;
|
||||
return vop3.opsel || vop3.clamp || vop3.omod;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -646,7 +646,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
|
|||
Definition(PhysReg{vtmp+i}, v1),
|
||||
Operand(PhysReg{tmp+i}, v1),
|
||||
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
|
||||
perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */
|
||||
perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
|
||||
}
|
||||
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX));
|
||||
|
||||
|
|
@ -757,7 +757,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
|
|||
Definition(PhysReg{vtmp+i}, v1),
|
||||
Operand(PhysReg{tmp+i}, v1),
|
||||
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
|
||||
perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */
|
||||
perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
|
||||
}
|
||||
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
|
||||
|
||||
|
|
@ -1052,12 +1052,12 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op)
|
|||
if (dst.physReg().byte() == 2) {
|
||||
Operand def_lo(dst.physReg().advance(-2), v2b);
|
||||
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
|
||||
instr->vop3()->opsel = 0;
|
||||
instr->vop3().opsel = 0;
|
||||
} else {
|
||||
assert(dst.physReg().byte() == 0);
|
||||
Operand def_hi(dst.physReg().advance(2), v2b);
|
||||
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
|
||||
instr->vop3()->opsel = 2;
|
||||
instr->vop3().opsel = 2;
|
||||
}
|
||||
} else {
|
||||
uint32_t offset = dst.physReg().byte() * 8u;
|
||||
|
|
@ -1251,7 +1251,7 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo,
|
|||
if (can_use_pack) {
|
||||
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi);
|
||||
/* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */
|
||||
instr->vop3()->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
|
||||
instr->vop3().opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1810,7 +1810,7 @@ void lower_to_hw_instr(Program* program)
|
|||
aco_ptr<Instruction>& instr = block->instructions[instr_idx];
|
||||
aco_ptr<Instruction> mov;
|
||||
if (instr->isPseudo() && instr->opcode != aco_opcode::p_unit_test) {
|
||||
Pseudo_instruction *pi = instr->pseudo();
|
||||
Pseudo_instruction *pi = &instr->pseudo();
|
||||
|
||||
switch (instr->opcode)
|
||||
{
|
||||
|
|
@ -1897,7 +1897,7 @@ void lower_to_hw_instr(Program* program)
|
|||
instr2->opcode == aco_opcode::p_logical_end)
|
||||
continue;
|
||||
else if (instr2->opcode == aco_opcode::exp &&
|
||||
instr2->exp()->dest == null_exp_dest)
|
||||
instr2->exp().dest == null_exp_dest)
|
||||
continue;
|
||||
else if (instr2->opcode == aco_opcode::p_parallelcopy &&
|
||||
instr2->definitions[0].isFixed() &&
|
||||
|
|
@ -1983,7 +1983,7 @@ void lower_to_hw_instr(Program* program)
|
|||
break;
|
||||
}
|
||||
} else if (instr->isBranch()) {
|
||||
Pseudo_branch_instruction* branch = instr->branch();
|
||||
Pseudo_branch_instruction* branch = &instr->branch();
|
||||
uint32_t target = branch->target[0];
|
||||
|
||||
/* check if all blocks from current to target are empty */
|
||||
|
|
@ -2055,20 +2055,20 @@ void lower_to_hw_instr(Program* program)
|
|||
}
|
||||
|
||||
} else if (instr->isReduction()) {
|
||||
Pseudo_reduction_instruction* reduce = instr->reduction();
|
||||
emit_reduction(&ctx, reduce->opcode, reduce->reduce_op, reduce->cluster_size,
|
||||
reduce->operands[1].physReg(), // tmp
|
||||
reduce->definitions[1].physReg(), // stmp
|
||||
reduce->operands[2].physReg(), // vtmp
|
||||
reduce->definitions[2].physReg(), // sitmp
|
||||
reduce->operands[0], reduce->definitions[0]);
|
||||
Pseudo_reduction_instruction& reduce = instr->reduction();
|
||||
emit_reduction(&ctx, reduce.opcode, reduce.reduce_op, reduce.cluster_size,
|
||||
reduce.operands[1].physReg(), // tmp
|
||||
reduce.definitions[1].physReg(), // stmp
|
||||
reduce.operands[2].physReg(), // vtmp
|
||||
reduce.definitions[2].physReg(), // sitmp
|
||||
reduce.operands[0], reduce.definitions[0]);
|
||||
} else if (instr->isBarrier()) {
|
||||
Pseudo_barrier_instruction* barrier = instr->barrier();
|
||||
Pseudo_barrier_instruction& barrier = instr->barrier();
|
||||
|
||||
/* Anything larger than a workgroup isn't possible. Anything
|
||||
* smaller requires no instructions and this pseudo instruction
|
||||
* would only be included to control optimizations. */
|
||||
bool emit_s_barrier = barrier->exec_scope == scope_workgroup &&
|
||||
bool emit_s_barrier = barrier.exec_scope == scope_workgroup &&
|
||||
program->workgroup_size > program->wave_size;
|
||||
|
||||
bld.insert(std::move(instr));
|
||||
|
|
|
|||
|
|
@ -178,121 +178,121 @@ struct InstrPred {
|
|||
return false;
|
||||
|
||||
if (a->isVOP3()) {
|
||||
VOP3_instruction* a3 = a->vop3();
|
||||
VOP3_instruction* b3 = b->vop3();
|
||||
VOP3_instruction& a3 = a->vop3();
|
||||
VOP3_instruction& b3 = b->vop3();
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (a3->abs[i] != b3->abs[i] ||
|
||||
a3->neg[i] != b3->neg[i])
|
||||
if (a3.abs[i] != b3.abs[i] ||
|
||||
a3.neg[i] != b3.neg[i])
|
||||
return false;
|
||||
}
|
||||
return a3->clamp == b3->clamp &&
|
||||
a3->omod == b3->omod &&
|
||||
a3->opsel == b3->opsel;
|
||||
return a3.clamp == b3.clamp &&
|
||||
a3.omod == b3.omod &&
|
||||
a3.opsel == b3.opsel;
|
||||
}
|
||||
if (a->isDPP()) {
|
||||
DPP_instruction* aDPP = a->dpp();
|
||||
DPP_instruction* bDPP = b->dpp();
|
||||
return aDPP->pass_flags == bDPP->pass_flags &&
|
||||
aDPP->dpp_ctrl == bDPP->dpp_ctrl &&
|
||||
aDPP->bank_mask == bDPP->bank_mask &&
|
||||
aDPP->row_mask == bDPP->row_mask &&
|
||||
aDPP->bound_ctrl == bDPP->bound_ctrl &&
|
||||
aDPP->abs[0] == bDPP->abs[0] &&
|
||||
aDPP->abs[1] == bDPP->abs[1] &&
|
||||
aDPP->neg[0] == bDPP->neg[0] &&
|
||||
aDPP->neg[1] == bDPP->neg[1];
|
||||
DPP_instruction& aDPP = a->dpp();
|
||||
DPP_instruction& bDPP = b->dpp();
|
||||
return aDPP.pass_flags == bDPP.pass_flags &&
|
||||
aDPP.dpp_ctrl == bDPP.dpp_ctrl &&
|
||||
aDPP.bank_mask == bDPP.bank_mask &&
|
||||
aDPP.row_mask == bDPP.row_mask &&
|
||||
aDPP.bound_ctrl == bDPP.bound_ctrl &&
|
||||
aDPP.abs[0] == bDPP.abs[0] &&
|
||||
aDPP.abs[1] == bDPP.abs[1] &&
|
||||
aDPP.neg[0] == bDPP.neg[0] &&
|
||||
aDPP.neg[1] == bDPP.neg[1];
|
||||
}
|
||||
if (a->isSDWA()) {
|
||||
SDWA_instruction* aSDWA = a->sdwa();
|
||||
SDWA_instruction* bSDWA = b->sdwa();
|
||||
return aSDWA->sel[0] == bSDWA->sel[0] &&
|
||||
aSDWA->sel[1] == bSDWA->sel[1] &&
|
||||
aSDWA->dst_sel == bSDWA->dst_sel &&
|
||||
aSDWA->abs[0] == bSDWA->abs[0] &&
|
||||
aSDWA->abs[1] == bSDWA->abs[1] &&
|
||||
aSDWA->neg[0] == bSDWA->neg[0] &&
|
||||
aSDWA->neg[1] == bSDWA->neg[1] &&
|
||||
aSDWA->dst_preserve == bSDWA->dst_preserve &&
|
||||
aSDWA->clamp == bSDWA->clamp &&
|
||||
aSDWA->omod == bSDWA->omod;
|
||||
SDWA_instruction& aSDWA = a->sdwa();
|
||||
SDWA_instruction& bSDWA = b->sdwa();
|
||||
return aSDWA.sel[0] == bSDWA.sel[0] &&
|
||||
aSDWA.sel[1] == bSDWA.sel[1] &&
|
||||
aSDWA.dst_sel == bSDWA.dst_sel &&
|
||||
aSDWA.abs[0] == bSDWA.abs[0] &&
|
||||
aSDWA.abs[1] == bSDWA.abs[1] &&
|
||||
aSDWA.neg[0] == bSDWA.neg[0] &&
|
||||
aSDWA.neg[1] == bSDWA.neg[1] &&
|
||||
aSDWA.dst_preserve == bSDWA.dst_preserve &&
|
||||
aSDWA.clamp == bSDWA.clamp &&
|
||||
aSDWA.omod == bSDWA.omod;
|
||||
}
|
||||
|
||||
switch (a->format) {
|
||||
case Format::SOPK: {
|
||||
if (a->opcode == aco_opcode::s_getreg_b32)
|
||||
return false;
|
||||
SOPK_instruction* aK = a->sopk();
|
||||
SOPK_instruction* bK = b->sopk();
|
||||
return aK->imm == bK->imm;
|
||||
SOPK_instruction& aK = a->sopk();
|
||||
SOPK_instruction& bK = b->sopk();
|
||||
return aK.imm == bK.imm;
|
||||
}
|
||||
case Format::SMEM: {
|
||||
SMEM_instruction* aS = a->smem();
|
||||
SMEM_instruction* bS = b->smem();
|
||||
SMEM_instruction& aS = a->smem();
|
||||
SMEM_instruction& bS = b->smem();
|
||||
/* isel shouldn't be creating situations where this assertion fails */
|
||||
assert(aS->prevent_overflow == bS->prevent_overflow);
|
||||
return aS->sync.can_reorder() && bS->sync.can_reorder() &&
|
||||
aS->sync == bS->sync && aS->glc == bS->glc && aS->dlc == bS->dlc &&
|
||||
aS->nv == bS->nv && aS->disable_wqm == bS->disable_wqm &&
|
||||
aS->prevent_overflow == bS->prevent_overflow;
|
||||
assert(aS.prevent_overflow == bS.prevent_overflow);
|
||||
return aS.sync.can_reorder() && bS.sync.can_reorder() &&
|
||||
aS.sync == bS.sync && aS.glc == bS.glc && aS.dlc == bS.dlc &&
|
||||
aS.nv == bS.nv && aS.disable_wqm == bS.disable_wqm &&
|
||||
aS.prevent_overflow == bS.prevent_overflow;
|
||||
}
|
||||
case Format::VINTRP: {
|
||||
Interp_instruction* aI = a->vintrp();
|
||||
Interp_instruction* bI = b->vintrp();
|
||||
if (aI->attribute != bI->attribute)
|
||||
Interp_instruction& aI = a->vintrp();
|
||||
Interp_instruction& bI = b->vintrp();
|
||||
if (aI.attribute != bI.attribute)
|
||||
return false;
|
||||
if (aI->component != bI->component)
|
||||
if (aI.component != bI.component)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
case Format::VOP3P: {
|
||||
VOP3P_instruction* a3P = a->vop3p();
|
||||
VOP3P_instruction* b3P = b->vop3p();
|
||||
VOP3P_instruction& a3P = a->vop3p();
|
||||
VOP3P_instruction& b3P = b->vop3p();
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (a3P->neg_lo[i] != b3P->neg_lo[i] ||
|
||||
a3P->neg_hi[i] != b3P->neg_hi[i])
|
||||
if (a3P.neg_lo[i] != b3P.neg_lo[i] ||
|
||||
a3P.neg_hi[i] != b3P.neg_hi[i])
|
||||
return false;
|
||||
}
|
||||
return a3P->opsel_lo == b3P->opsel_lo &&
|
||||
a3P->opsel_hi == b3P->opsel_hi &&
|
||||
a3P->clamp == b3P->clamp;
|
||||
return a3P.opsel_lo == b3P.opsel_lo &&
|
||||
a3P.opsel_hi == b3P.opsel_hi &&
|
||||
a3P.clamp == b3P.clamp;
|
||||
}
|
||||
case Format::PSEUDO_REDUCTION: {
|
||||
Pseudo_reduction_instruction *aR = a->reduction();
|
||||
Pseudo_reduction_instruction *bR = b->reduction();
|
||||
return aR->pass_flags == bR->pass_flags &&
|
||||
aR->reduce_op == bR->reduce_op &&
|
||||
aR->cluster_size == bR->cluster_size;
|
||||
Pseudo_reduction_instruction& aR = a->reduction();
|
||||
Pseudo_reduction_instruction& bR = b->reduction();
|
||||
return aR.pass_flags == bR.pass_flags &&
|
||||
aR.reduce_op == bR.reduce_op &&
|
||||
aR.cluster_size == bR.cluster_size;
|
||||
}
|
||||
case Format::MTBUF: {
|
||||
MTBUF_instruction* aM = a->mtbuf();
|
||||
MTBUF_instruction* bM = b->mtbuf();
|
||||
return aM->sync.can_reorder() && bM->sync.can_reorder() &&
|
||||
aM->sync == bM->sync &&
|
||||
aM->dfmt == bM->dfmt &&
|
||||
aM->nfmt == bM->nfmt &&
|
||||
aM->offset == bM->offset &&
|
||||
aM->offen == bM->offen &&
|
||||
aM->idxen == bM->idxen &&
|
||||
aM->glc == bM->glc &&
|
||||
aM->dlc == bM->dlc &&
|
||||
aM->slc == bM->slc &&
|
||||
aM->tfe == bM->tfe &&
|
||||
aM->disable_wqm == bM->disable_wqm;
|
||||
MTBUF_instruction& aM = a->mtbuf();
|
||||
MTBUF_instruction& bM = b->mtbuf();
|
||||
return aM.sync.can_reorder() && bM.sync.can_reorder() &&
|
||||
aM.sync == bM.sync &&
|
||||
aM.dfmt == bM.dfmt &&
|
||||
aM.nfmt == bM.nfmt &&
|
||||
aM.offset == bM.offset &&
|
||||
aM.offen == bM.offen &&
|
||||
aM.idxen == bM.idxen &&
|
||||
aM.glc == bM.glc &&
|
||||
aM.dlc == bM.dlc &&
|
||||
aM.slc == bM.slc &&
|
||||
aM.tfe == bM.tfe &&
|
||||
aM.disable_wqm == bM.disable_wqm;
|
||||
}
|
||||
case Format::MUBUF: {
|
||||
MUBUF_instruction* aM = a->mubuf();
|
||||
MUBUF_instruction* bM = b->mubuf();
|
||||
return aM->sync.can_reorder() && bM->sync.can_reorder() &&
|
||||
aM->sync == bM->sync &&
|
||||
aM->offset == bM->offset &&
|
||||
aM->offen == bM->offen &&
|
||||
aM->idxen == bM->idxen &&
|
||||
aM->glc == bM->glc &&
|
||||
aM->dlc == bM->dlc &&
|
||||
aM->slc == bM->slc &&
|
||||
aM->tfe == bM->tfe &&
|
||||
aM->lds == bM->lds &&
|
||||
aM->disable_wqm == bM->disable_wqm;
|
||||
MUBUF_instruction& aM = a->mubuf();
|
||||
MUBUF_instruction& bM = b->mubuf();
|
||||
return aM.sync.can_reorder() && bM.sync.can_reorder() &&
|
||||
aM.sync == bM.sync &&
|
||||
aM.offset == bM.offset &&
|
||||
aM.offen == bM.offen &&
|
||||
aM.idxen == bM.idxen &&
|
||||
aM.glc == bM.glc &&
|
||||
aM.dlc == bM.dlc &&
|
||||
aM.slc == bM.slc &&
|
||||
aM.tfe == bM.tfe &&
|
||||
aM.lds == bM.lds &&
|
||||
aM.disable_wqm == bM.disable_wqm;
|
||||
}
|
||||
/* we want to optimize these in NIR and don't hassle with load-store dependencies */
|
||||
case Format::FLAT:
|
||||
|
|
@ -308,31 +308,31 @@ struct InstrPred {
|
|||
a->opcode != aco_opcode::ds_permute_b32 &&
|
||||
a->opcode != aco_opcode::ds_swizzle_b32)
|
||||
return false;
|
||||
DS_instruction* aD = a->ds();
|
||||
DS_instruction* bD = b->ds();
|
||||
return aD->sync.can_reorder() && bD->sync.can_reorder() &&
|
||||
aD->sync == bD->sync &&
|
||||
aD->pass_flags == bD->pass_flags &&
|
||||
aD->gds == bD->gds &&
|
||||
aD->offset0 == bD->offset0 &&
|
||||
aD->offset1 == bD->offset1;
|
||||
DS_instruction& aD = a->ds();
|
||||
DS_instruction& bD = b->ds();
|
||||
return aD.sync.can_reorder() && bD.sync.can_reorder() &&
|
||||
aD.sync == bD.sync &&
|
||||
aD.pass_flags == bD.pass_flags &&
|
||||
aD.gds == bD.gds &&
|
||||
aD.offset0 == bD.offset0 &&
|
||||
aD.offset1 == bD.offset1;
|
||||
}
|
||||
case Format::MIMG: {
|
||||
MIMG_instruction* aM = a->mimg();
|
||||
MIMG_instruction* bM = b->mimg();
|
||||
return aM->sync.can_reorder() && bM->sync.can_reorder() &&
|
||||
aM->sync == bM->sync &&
|
||||
aM->dmask == bM->dmask &&
|
||||
aM->unrm == bM->unrm &&
|
||||
aM->glc == bM->glc &&
|
||||
aM->slc == bM->slc &&
|
||||
aM->tfe == bM->tfe &&
|
||||
aM->da == bM->da &&
|
||||
aM->lwe == bM->lwe &&
|
||||
aM->r128 == bM->r128 &&
|
||||
aM->a16 == bM->a16 &&
|
||||
aM->d16 == bM->d16 &&
|
||||
aM->disable_wqm == bM->disable_wqm;
|
||||
MIMG_instruction& aM = a->mimg();
|
||||
MIMG_instruction& bM = b->mimg();
|
||||
return aM.sync.can_reorder() && bM.sync.can_reorder() &&
|
||||
aM.sync == bM.sync &&
|
||||
aM.dmask == bM.dmask &&
|
||||
aM.unrm == bM.unrm &&
|
||||
aM.glc == bM.glc &&
|
||||
aM.slc == bM.slc &&
|
||||
aM.tfe == bM.tfe &&
|
||||
aM.da == bM.da &&
|
||||
aM.lwe == bM.lwe &&
|
||||
aM.r128 == bM.r128 &&
|
||||
aM.a16 == bM.a16 &&
|
||||
aM.d16 == bM.d16 &&
|
||||
aM.disable_wqm == bM.disable_wqm;
|
||||
}
|
||||
default:
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -940,7 +940,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode];
|
||||
|
||||
if (instr->isSDWA())
|
||||
can_use_mod = can_use_mod && (instr->sdwa()->sel[i] & sdwa_asuint) == sdwa_udword;
|
||||
can_use_mod = can_use_mod && (instr->sdwa().sel[i] & sdwa_asuint) == sdwa_udword;
|
||||
else
|
||||
can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr));
|
||||
|
||||
|
|
@ -949,11 +949,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
to_VOP3(ctx, instr);
|
||||
instr->operands[i] = Operand(info.temp);
|
||||
if (instr->isDPP())
|
||||
instr->dpp()->abs[i] = true;
|
||||
instr->dpp().abs[i] = true;
|
||||
else if (instr->isSDWA())
|
||||
instr->sdwa()->abs[i] = true;
|
||||
instr->sdwa().abs[i] = true;
|
||||
else
|
||||
instr->vop3()->abs[i] = true;
|
||||
instr->vop3().abs[i] = true;
|
||||
}
|
||||
if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) {
|
||||
instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32;
|
||||
|
|
@ -968,11 +968,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
to_VOP3(ctx, instr);
|
||||
instr->operands[i].setTemp(info.temp);
|
||||
if (instr->isDPP())
|
||||
instr->dpp()->neg[i] = true;
|
||||
instr->dpp().neg[i] = true;
|
||||
else if (instr->isSDWA())
|
||||
instr->sdwa()->neg[i] = true;
|
||||
instr->sdwa().neg[i] = true;
|
||||
else
|
||||
instr->vop3()->neg[i] = true;
|
||||
instr->vop3().neg[i] = true;
|
||||
continue;
|
||||
}
|
||||
unsigned bits = get_operand_size(instr, i);
|
||||
|
|
@ -999,7 +999,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
|
||||
/* MUBUF: propagate constants and combine additions */
|
||||
else if (instr->isMUBUF()) {
|
||||
MUBUF_instruction *mubuf = instr->mubuf();
|
||||
MUBUF_instruction& mubuf = instr->mubuf();
|
||||
Temp base;
|
||||
uint32_t offset;
|
||||
while (info.is_temp())
|
||||
|
|
@ -1011,29 +1011,29 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
* scratch accesses and other accesses and swizzling changing how
|
||||
* addressing works significantly, this probably applies to swizzled
|
||||
* MUBUF accesses. */
|
||||
bool vaddr_prevent_overflow = mubuf->swizzled && ctx.program->chip_class < GFX9;
|
||||
bool saddr_prevent_overflow = mubuf->swizzled;
|
||||
bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->chip_class < GFX9;
|
||||
bool saddr_prevent_overflow = mubuf.swizzled;
|
||||
|
||||
if (mubuf->offen && i == 1 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) {
|
||||
assert(!mubuf->idxen);
|
||||
if (mubuf.offen && i == 1 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) {
|
||||
assert(!mubuf.idxen);
|
||||
instr->operands[1] = Operand(v1);
|
||||
mubuf->offset += info.val;
|
||||
mubuf->offen = false;
|
||||
mubuf.offset += info.val;
|
||||
mubuf.offen = false;
|
||||
continue;
|
||||
} else if (i == 2 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) {
|
||||
} else if (i == 2 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) {
|
||||
instr->operands[2] = Operand((uint32_t) 0);
|
||||
mubuf->offset += info.val;
|
||||
mubuf.offset += info.val;
|
||||
continue;
|
||||
} else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) &&
|
||||
base.regClass() == v1 && mubuf->offset + offset < 4096) {
|
||||
assert(!mubuf->idxen);
|
||||
} else if (mubuf.offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) &&
|
||||
base.regClass() == v1 && mubuf.offset + offset < 4096) {
|
||||
assert(!mubuf.idxen);
|
||||
instr->operands[1].setTemp(base);
|
||||
mubuf->offset += offset;
|
||||
mubuf.offset += offset;
|
||||
continue;
|
||||
} else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, saddr_prevent_overflow) &&
|
||||
base.regClass() == s1 && mubuf->offset + offset < 4096) {
|
||||
base.regClass() == s1 && mubuf.offset + offset < 4096) {
|
||||
instr->operands[i].setTemp(base);
|
||||
mubuf->offset += offset;
|
||||
mubuf.offset += offset;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
@ -1041,7 +1041,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
/* DS: combine additions */
|
||||
else if (instr->isDS()) {
|
||||
|
||||
DS_instruction *ds = instr->ds();
|
||||
DS_instruction& ds = instr->ds();
|
||||
Temp base;
|
||||
uint32_t offset;
|
||||
bool has_usable_ds_offset = ctx.program->chip_class >= GFX7;
|
||||
|
|
@ -1055,16 +1055,16 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
unsigned shifts = (instr->opcode == aco_opcode::ds_write2_b64 || instr->opcode == aco_opcode::ds_read2_b64) ? 3 : 2;
|
||||
|
||||
if ((offset & mask) == 0 &&
|
||||
ds->offset0 + (offset >> shifts) <= 255 &&
|
||||
ds->offset1 + (offset >> shifts) <= 255) {
|
||||
ds.offset0 + (offset >> shifts) <= 255 &&
|
||||
ds.offset1 + (offset >> shifts) <= 255) {
|
||||
instr->operands[i].setTemp(base);
|
||||
ds->offset0 += offset >> shifts;
|
||||
ds->offset1 += offset >> shifts;
|
||||
ds.offset0 += offset >> shifts;
|
||||
ds.offset1 += offset >> shifts;
|
||||
}
|
||||
} else {
|
||||
if (ds->offset0 + offset <= 65535) {
|
||||
if (ds.offset0 + offset <= 65535) {
|
||||
instr->operands[i].setTemp(base);
|
||||
ds->offset0 += offset;
|
||||
ds.offset0 += offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1073,10 +1073,10 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
/* SMEM: propagate constants and combine additions */
|
||||
else if (instr->isSMEM()) {
|
||||
|
||||
SMEM_instruction *smem = instr->smem();
|
||||
SMEM_instruction& smem = instr->smem();
|
||||
Temp base;
|
||||
uint32_t offset;
|
||||
bool prevent_overflow = smem->operands[0].size() > 2 || smem->prevent_overflow;
|
||||
bool prevent_overflow = smem.operands[0].size() > 2 || smem.prevent_overflow;
|
||||
if (i == 1 && info.is_constant_or_literal(32) &&
|
||||
((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) ||
|
||||
(ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) ||
|
||||
|
|
@ -1084,31 +1084,30 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
instr->operands[i] = Operand(info.val);
|
||||
continue;
|
||||
} else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) {
|
||||
bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4);
|
||||
bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);
|
||||
if (soe &&
|
||||
(!ctx.info[smem->operands.back().tempId()].is_constant_or_literal(32) ||
|
||||
ctx.info[smem->operands.back().tempId()].val != 0)) {
|
||||
(!ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) ||
|
||||
ctx.info[smem.operands.back().tempId()].val != 0)) {
|
||||
continue;
|
||||
}
|
||||
if (soe) {
|
||||
smem->operands[1] = Operand(offset);
|
||||
smem->operands.back() = Operand(base);
|
||||
smem.operands[1] = Operand(offset);
|
||||
smem.operands.back() = Operand(base);
|
||||
} else {
|
||||
SMEM_instruction *new_instr = create_instruction<SMEM_instruction>(smem->opcode, Format::SMEM, smem->operands.size() + 1, smem->definitions.size());
|
||||
new_instr->operands[0] = smem->operands[0];
|
||||
SMEM_instruction *new_instr = create_instruction<SMEM_instruction>(smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
|
||||
new_instr->operands[0] = smem.operands[0];
|
||||
new_instr->operands[1] = Operand(offset);
|
||||
if (smem->definitions.empty())
|
||||
new_instr->operands[2] = smem->operands[2];
|
||||
if (smem.definitions.empty())
|
||||
new_instr->operands[2] = smem.operands[2];
|
||||
new_instr->operands.back() = Operand(base);
|
||||
if (!smem->definitions.empty())
|
||||
new_instr->definitions[0] = smem->definitions[0];
|
||||
new_instr->sync = smem->sync;
|
||||
new_instr->glc = smem->glc;
|
||||
new_instr->dlc = smem->dlc;
|
||||
new_instr->nv = smem->nv;
|
||||
new_instr->disable_wqm = smem->disable_wqm;
|
||||
if (!smem.definitions.empty())
|
||||
new_instr->definitions[0] = smem.definitions[0];
|
||||
new_instr->sync = smem.sync;
|
||||
new_instr->glc = smem.glc;
|
||||
new_instr->dlc = smem.dlc;
|
||||
new_instr->nv = smem.nv;
|
||||
new_instr->disable_wqm = smem.disable_wqm;
|
||||
instr.reset(new_instr);
|
||||
smem = instr->smem();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1365,10 +1364,10 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
case aco_opcode::v_med3_f16:
|
||||
case aco_opcode::v_med3_f32: { /* clamp */
|
||||
VOP3_instruction* vop3 = instr->vop3();
|
||||
if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
|
||||
vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
|
||||
vop3->omod != 0 || vop3->opsel != 0)
|
||||
VOP3_instruction& vop3 = instr->vop3();
|
||||
if (vop3.abs[0] || vop3.abs[1] || vop3.abs[2] ||
|
||||
vop3.neg[0] || vop3.neg[1] || vop3.neg[2] ||
|
||||
vop3.omod != 0 || vop3.opsel != 0)
|
||||
break;
|
||||
|
||||
unsigned idx = 0;
|
||||
|
|
@ -1682,12 +1681,12 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
|||
return false;
|
||||
|
||||
if (op_instr[i]->isVOP3()) {
|
||||
VOP3_instruction *vop3 = op_instr[i]->vop3();
|
||||
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
|
||||
VOP3_instruction& vop3 = op_instr[i]->vop3();
|
||||
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 || vop3.opsel == 2)
|
||||
return false;
|
||||
neg[i] = vop3->neg[0];
|
||||
abs[i] = vop3->abs[0];
|
||||
opsel |= (vop3->opsel & 1) << i;
|
||||
neg[i] = vop3.neg[0];
|
||||
abs[i] = vop3.abs[0];
|
||||
opsel |= (vop3.opsel & 1) << i;
|
||||
} else if (op_instr[i]->isSDWA()) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1798,12 +1797,12 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
|||
Instruction *new_instr;
|
||||
if (cmp->isVOP3()) {
|
||||
VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3_instruction *cmp_vop3 = cmp->vop3();
|
||||
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
||||
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
||||
new_vop3->clamp = cmp_vop3->clamp;
|
||||
new_vop3->omod = cmp_vop3->omod;
|
||||
new_vop3->opsel = cmp_vop3->opsel;
|
||||
VOP3_instruction& cmp_vop3 = cmp->vop3();
|
||||
memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
|
||||
memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
|
||||
new_vop3->clamp = cmp_vop3.clamp;
|
||||
new_vop3->omod = cmp_vop3.omod;
|
||||
new_vop3->opsel = cmp_vop3.opsel;
|
||||
new_instr = new_vop3;
|
||||
} else {
|
||||
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
|
||||
|
|
@ -1885,8 +1884,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
|
|||
return false;
|
||||
|
||||
if (nan_test->isVOP3()) {
|
||||
VOP3_instruction *vop3 = nan_test->vop3();
|
||||
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
|
||||
VOP3_instruction& vop3 = nan_test->vop3();
|
||||
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 || vop3.opsel == 2)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -1917,12 +1916,12 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
|
|||
Instruction *new_instr;
|
||||
if (cmp->isVOP3()) {
|
||||
VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3_instruction *cmp_vop3 = cmp->vop3();
|
||||
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
||||
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
||||
new_vop3->clamp = cmp_vop3->clamp;
|
||||
new_vop3->omod = cmp_vop3->omod;
|
||||
new_vop3->opsel = cmp_vop3->opsel;
|
||||
VOP3_instruction& cmp_vop3 = cmp->vop3();
|
||||
memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
|
||||
memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
|
||||
new_vop3->clamp = cmp_vop3.clamp;
|
||||
new_vop3->omod = cmp_vop3.omod;
|
||||
new_vop3->opsel = cmp_vop3.opsel;
|
||||
new_instr = new_vop3;
|
||||
} else {
|
||||
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
|
||||
|
|
@ -1966,24 +1965,24 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
|||
Instruction *new_instr;
|
||||
if (cmp->isVOP3()) {
|
||||
VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3_instruction *cmp_vop3 = cmp->vop3();
|
||||
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
||||
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
||||
new_vop3->clamp = cmp_vop3->clamp;
|
||||
new_vop3->omod = cmp_vop3->omod;
|
||||
new_vop3->opsel = cmp_vop3->opsel;
|
||||
VOP3_instruction& cmp_vop3 = cmp->vop3();
|
||||
memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
|
||||
memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
|
||||
new_vop3->clamp = cmp_vop3.clamp;
|
||||
new_vop3->omod = cmp_vop3.omod;
|
||||
new_vop3->opsel = cmp_vop3.opsel;
|
||||
new_instr = new_vop3;
|
||||
} else if (cmp->isSDWA()) {
|
||||
SDWA_instruction *new_sdwa = create_instruction<SDWA_instruction>(
|
||||
new_opcode, (Format)((uint16_t)Format::SDWA | (uint16_t)Format::VOPC), 2, 1);
|
||||
SDWA_instruction *cmp_sdwa = cmp->sdwa();
|
||||
memcpy(new_sdwa->abs, cmp_sdwa->abs, sizeof(new_sdwa->abs));
|
||||
memcpy(new_sdwa->sel, cmp_sdwa->sel, sizeof(new_sdwa->sel));
|
||||
memcpy(new_sdwa->neg, cmp_sdwa->neg, sizeof(new_sdwa->neg));
|
||||
new_sdwa->dst_sel = cmp_sdwa->dst_sel;
|
||||
new_sdwa->dst_preserve = cmp_sdwa->dst_preserve;
|
||||
new_sdwa->clamp = cmp_sdwa->clamp;
|
||||
new_sdwa->omod = cmp_sdwa->omod;
|
||||
SDWA_instruction& cmp_sdwa = cmp->sdwa();
|
||||
memcpy(new_sdwa->abs, cmp_sdwa.abs, sizeof(new_sdwa->abs));
|
||||
memcpy(new_sdwa->sel, cmp_sdwa.sel, sizeof(new_sdwa->sel));
|
||||
memcpy(new_sdwa->neg, cmp_sdwa.neg, sizeof(new_sdwa->neg));
|
||||
new_sdwa->dst_sel = cmp_sdwa.dst_sel;
|
||||
new_sdwa->dst_preserve = cmp_sdwa.dst_preserve;
|
||||
new_sdwa->clamp = cmp_sdwa.clamp;
|
||||
new_sdwa->omod = cmp_sdwa.omod;
|
||||
new_instr = new_sdwa;
|
||||
} else {
|
||||
new_instr = create_instruction<VOPC_instruction>(new_opcode, Format::VOPC, 2, 1);
|
||||
|
|
@ -2019,8 +2018,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
|
|||
if (fixed_to_exec(op2_instr->operands[0]) || fixed_to_exec(op2_instr->operands[1]))
|
||||
return false;
|
||||
|
||||
VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? op1_instr->vop3() : NULL;
|
||||
VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? op2_instr->vop3() : NULL;
|
||||
VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? &op1_instr->vop3() : NULL;
|
||||
VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? &op2_instr->vop3() : NULL;
|
||||
|
||||
if (op1_instr->isSDWA() || op2_instr->isSDWA())
|
||||
return false;
|
||||
|
|
@ -2641,11 +2640,11 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
|
||||
|
||||
if (instr->isSDWA()) {
|
||||
if (!apply_omod_clamp_helper(ctx, instr->sdwa(), def_info))
|
||||
if (!apply_omod_clamp_helper(ctx, &instr->sdwa(), def_info))
|
||||
return false;
|
||||
} else {
|
||||
to_VOP3(ctx, instr);
|
||||
if (!apply_omod_clamp_helper(ctx, instr->vop3(), def_info))
|
||||
if (!apply_omod_clamp_helper(ctx, &instr->vop3(), def_info))
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -2767,7 +2766,7 @@ void propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opse
|
|||
|
||||
void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
VOP3P_instruction* vop3p = instr->vop3p();
|
||||
VOP3P_instruction* vop3p = &instr->vop3p();
|
||||
|
||||
/* apply clamp */
|
||||
if (instr->opcode == aco_opcode::v_pk_mul_f16 &&
|
||||
|
|
@ -2778,7 +2777,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
|
||||
ssa_info& info = ctx.info[instr->operands[0].tempId()];
|
||||
if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) {
|
||||
VOP3P_instruction* candidate = ctx.info[instr->operands[0].tempId()].instr->vop3p();
|
||||
VOP3P_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->vop3p();
|
||||
candidate->clamp = true;
|
||||
propagate_swizzles(candidate, vop3p->opsel_lo, vop3p->opsel_hi);
|
||||
std::swap(instr->definitions[0], candidate->definitions[0]);
|
||||
|
|
@ -2804,7 +2803,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
if (!check_vop3_operands(ctx, 2, ops))
|
||||
continue;
|
||||
|
||||
VOP3P_instruction* fneg = info.instr->vop3p();
|
||||
VOP3P_instruction* fneg = &info.instr->vop3p();
|
||||
if (fneg->clamp)
|
||||
continue;
|
||||
instr->operands[i] = fneg->operands[0];
|
||||
|
|
@ -2849,7 +2848,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
continue;
|
||||
|
||||
/* no clamp allowed between mul and add */
|
||||
if (info.instr->vop3p()->clamp)
|
||||
if (info.instr->vop3p().clamp)
|
||||
continue;
|
||||
|
||||
mul_instr = info.instr;
|
||||
|
|
@ -2875,7 +2874,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
|||
/* turn packed mul+add into v_pk_fma_f16 */
|
||||
assert(mul_instr->isVOP3P());
|
||||
aco_ptr<VOP3P_instruction> fma{create_instruction<VOP3P_instruction>(aco_opcode::v_pk_fma_f16, Format::VOP3P, 3, 1)};
|
||||
VOP3P_instruction* mul = mul_instr->vop3p();
|
||||
VOP3P_instruction* mul = &mul_instr->vop3p();
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
fma->operands[i] = op[i];
|
||||
fma->neg_lo[i] = mul->neg_lo[i];
|
||||
|
|
@ -2944,7 +2943,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
|||
|
||||
if (mul_instr->operands[0].isLiteral())
|
||||
return;
|
||||
if (mul_instr->isVOP3() && mul_instr->vop3()->clamp)
|
||||
if (mul_instr->isVOP3() && mul_instr->vop3().clamp)
|
||||
return;
|
||||
if (mul_instr->isSDWA())
|
||||
return;
|
||||
|
|
@ -2958,17 +2957,17 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
|||
instr->operands[0] = mul_instr->operands[0];
|
||||
instr->operands[1] = mul_instr->operands[1];
|
||||
instr->definitions[0] = def;
|
||||
VOP3_instruction* new_mul = instr->vop3();
|
||||
VOP3_instruction& new_mul = instr->vop3();
|
||||
if (mul_instr->isVOP3()) {
|
||||
VOP3_instruction* mul = mul_instr->vop3();
|
||||
new_mul->neg[0] = mul->neg[0] && !is_abs;
|
||||
new_mul->neg[1] = mul->neg[1] && !is_abs;
|
||||
new_mul->abs[0] = mul->abs[0] || is_abs;
|
||||
new_mul->abs[1] = mul->abs[1] || is_abs;
|
||||
new_mul->omod = mul->omod;
|
||||
VOP3_instruction& mul = mul_instr->vop3();
|
||||
new_mul.neg[0] = mul.neg[0] && !is_abs;
|
||||
new_mul.neg[1] = mul.neg[1] && !is_abs;
|
||||
new_mul.abs[0] = mul.abs[0] || is_abs;
|
||||
new_mul.abs[1] = mul.abs[1] || is_abs;
|
||||
new_mul.omod = mul.omod;
|
||||
}
|
||||
new_mul->neg[0] ^= true;
|
||||
new_mul->clamp = false;
|
||||
new_mul.neg[0] ^= true;
|
||||
new_mul.clamp = false;
|
||||
|
||||
ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
|
||||
return;
|
||||
|
|
@ -3002,7 +3001,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
|||
continue;
|
||||
|
||||
/* no clamp/omod allowed between mul and add */
|
||||
if (info.instr->isVOP3() && (info.instr->vop3()->clamp || info.instr->vop3()->omod))
|
||||
if (info.instr->isVOP3() && (info.instr->vop3().clamp || info.instr->vop3().omod))
|
||||
continue;
|
||||
|
||||
Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]};
|
||||
|
|
@ -3033,28 +3032,28 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
|||
bool clamp = false;
|
||||
|
||||
if (mul_instr->isVOP3()) {
|
||||
VOP3_instruction* vop3 = mul_instr->vop3();
|
||||
neg[0] = vop3->neg[0];
|
||||
neg[1] = vop3->neg[1];
|
||||
abs[0] = vop3->abs[0];
|
||||
abs[1] = vop3->abs[1];
|
||||
VOP3_instruction& vop3 = mul_instr->vop3();
|
||||
neg[0] = vop3.neg[0];
|
||||
neg[1] = vop3.neg[1];
|
||||
abs[0] = vop3.abs[0];
|
||||
abs[1] = vop3.abs[1];
|
||||
}
|
||||
|
||||
if (instr->isVOP3()) {
|
||||
VOP3_instruction* vop3 = instr->vop3();
|
||||
neg[2] = vop3->neg[add_op_idx];
|
||||
abs[2] = vop3->abs[add_op_idx];
|
||||
omod = vop3->omod;
|
||||
clamp = vop3->clamp;
|
||||
VOP3_instruction& vop3 = instr->vop3();
|
||||
neg[2] = vop3.neg[add_op_idx];
|
||||
abs[2] = vop3.abs[add_op_idx];
|
||||
omod = vop3.omod;
|
||||
clamp = vop3.clamp;
|
||||
/* abs of the multiplication result */
|
||||
if (vop3->abs[1 - add_op_idx]) {
|
||||
if (vop3.abs[1 - add_op_idx]) {
|
||||
neg[0] = false;
|
||||
neg[1] = false;
|
||||
abs[0] = true;
|
||||
abs[1] = true;
|
||||
}
|
||||
/* neg of the multiplication result */
|
||||
neg[1] = neg[1] ^ vop3->neg[1 - add_op_idx];
|
||||
neg[1] = neg[1] ^ vop3.neg[1 - add_op_idx];
|
||||
}
|
||||
if (instr->opcode == aco_opcode::v_sub_f32 || instr->opcode == aco_opcode::v_sub_f16)
|
||||
neg[1 + add_op_idx] = neg[1 + add_op_idx] ^ true;
|
||||
|
|
|
|||
|
|
@ -273,12 +273,12 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
|
|||
{
|
||||
switch (instr->format) {
|
||||
case Format::SOPK: {
|
||||
const SOPK_instruction* sopk = instr->sopk();
|
||||
fprintf(output, " imm:%d", sopk->imm & 0x8000 ? (sopk->imm - 65536) : sopk->imm);
|
||||
const SOPK_instruction& sopk = instr->sopk();
|
||||
fprintf(output, " imm:%d", sopk.imm & 0x8000 ? (sopk.imm - 65536) : sopk.imm);
|
||||
break;
|
||||
}
|
||||
case Format::SOPP: {
|
||||
uint16_t imm = instr->sopp()->imm;
|
||||
uint16_t imm = instr->sopp().imm;
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::s_waitcnt: {
|
||||
/* we usually should check the chip class for vmcnt/lgkm, but
|
||||
|
|
@ -340,74 +340,74 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (instr->sopp()->block != -1)
|
||||
fprintf(output, " block:BB%d", instr->sopp()->block);
|
||||
if (instr->sopp().block != -1)
|
||||
fprintf(output, " block:BB%d", instr->sopp().block);
|
||||
break;
|
||||
}
|
||||
case Format::SMEM: {
|
||||
const SMEM_instruction* smem = instr->smem();
|
||||
if (smem->glc)
|
||||
const SMEM_instruction& smem = instr->smem();
|
||||
if (smem.glc)
|
||||
fprintf(output, " glc");
|
||||
if (smem->dlc)
|
||||
if (smem.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (smem->nv)
|
||||
if (smem.nv)
|
||||
fprintf(output, " nv");
|
||||
print_sync(smem->sync, output);
|
||||
print_sync(smem.sync, output);
|
||||
break;
|
||||
}
|
||||
case Format::VINTRP: {
|
||||
const Interp_instruction* vintrp = instr->vintrp();
|
||||
fprintf(output, " attr%d.%c", vintrp->attribute, "xyzw"[vintrp->component]);
|
||||
const Interp_instruction& vintrp = instr->vintrp();
|
||||
fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]);
|
||||
break;
|
||||
}
|
||||
case Format::DS: {
|
||||
const DS_instruction* ds = instr->ds();
|
||||
if (ds->offset0)
|
||||
fprintf(output, " offset0:%u", ds->offset0);
|
||||
if (ds->offset1)
|
||||
fprintf(output, " offset1:%u", ds->offset1);
|
||||
if (ds->gds)
|
||||
const DS_instruction& ds = instr->ds();
|
||||
if (ds.offset0)
|
||||
fprintf(output, " offset0:%u", ds.offset0);
|
||||
if (ds.offset1)
|
||||
fprintf(output, " offset1:%u", ds.offset1);
|
||||
if (ds.gds)
|
||||
fprintf(output, " gds");
|
||||
print_sync(ds->sync, output);
|
||||
print_sync(ds.sync, output);
|
||||
break;
|
||||
}
|
||||
case Format::MUBUF: {
|
||||
const MUBUF_instruction* mubuf = instr->mubuf();
|
||||
if (mubuf->offset)
|
||||
fprintf(output, " offset:%u", mubuf->offset);
|
||||
if (mubuf->offen)
|
||||
const MUBUF_instruction& mubuf = instr->mubuf();
|
||||
if (mubuf.offset)
|
||||
fprintf(output, " offset:%u", mubuf.offset);
|
||||
if (mubuf.offen)
|
||||
fprintf(output, " offen");
|
||||
if (mubuf->idxen)
|
||||
if (mubuf.idxen)
|
||||
fprintf(output, " idxen");
|
||||
if (mubuf->addr64)
|
||||
if (mubuf.addr64)
|
||||
fprintf(output, " addr64");
|
||||
if (mubuf->glc)
|
||||
if (mubuf.glc)
|
||||
fprintf(output, " glc");
|
||||
if (mubuf->dlc)
|
||||
if (mubuf.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (mubuf->slc)
|
||||
if (mubuf.slc)
|
||||
fprintf(output, " slc");
|
||||
if (mubuf->tfe)
|
||||
if (mubuf.tfe)
|
||||
fprintf(output, " tfe");
|
||||
if (mubuf->lds)
|
||||
if (mubuf.lds)
|
||||
fprintf(output, " lds");
|
||||
if (mubuf->disable_wqm)
|
||||
if (mubuf.disable_wqm)
|
||||
fprintf(output, " disable_wqm");
|
||||
print_sync(mubuf->sync, output);
|
||||
print_sync(mubuf.sync, output);
|
||||
break;
|
||||
}
|
||||
case Format::MIMG: {
|
||||
const MIMG_instruction* mimg = instr->mimg();
|
||||
const MIMG_instruction& mimg = instr->mimg();
|
||||
unsigned identity_dmask = !instr->definitions.empty() ?
|
||||
(1 << instr->definitions[0].size()) - 1 :
|
||||
0xf;
|
||||
if ((mimg->dmask & identity_dmask) != identity_dmask)
|
||||
if ((mimg.dmask & identity_dmask) != identity_dmask)
|
||||
fprintf(output, " dmask:%s%s%s%s",
|
||||
mimg->dmask & 0x1 ? "x" : "",
|
||||
mimg->dmask & 0x2 ? "y" : "",
|
||||
mimg->dmask & 0x4 ? "z" : "",
|
||||
mimg->dmask & 0x8 ? "w" : "");
|
||||
switch (mimg->dim) {
|
||||
mimg.dmask & 0x1 ? "x" : "",
|
||||
mimg.dmask & 0x2 ? "y" : "",
|
||||
mimg.dmask & 0x4 ? "z" : "",
|
||||
mimg.dmask & 0x8 ? "w" : "");
|
||||
switch (mimg.dim) {
|
||||
case ac_image_1d:
|
||||
fprintf(output, " 1d");
|
||||
break;
|
||||
|
|
@ -433,104 +433,104 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
|
|||
fprintf(output, " 2darraymsaa");
|
||||
break;
|
||||
}
|
||||
if (mimg->unrm)
|
||||
if (mimg.unrm)
|
||||
fprintf(output, " unrm");
|
||||
if (mimg->glc)
|
||||
if (mimg.glc)
|
||||
fprintf(output, " glc");
|
||||
if (mimg->dlc)
|
||||
if (mimg.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (mimg->slc)
|
||||
if (mimg.slc)
|
||||
fprintf(output, " slc");
|
||||
if (mimg->tfe)
|
||||
if (mimg.tfe)
|
||||
fprintf(output, " tfe");
|
||||
if (mimg->da)
|
||||
if (mimg.da)
|
||||
fprintf(output, " da");
|
||||
if (mimg->lwe)
|
||||
if (mimg.lwe)
|
||||
fprintf(output, " lwe");
|
||||
if (mimg->r128 || mimg->a16)
|
||||
if (mimg.r128 || mimg.a16)
|
||||
fprintf(output, " r128/a16");
|
||||
if (mimg->d16)
|
||||
if (mimg.d16)
|
||||
fprintf(output, " d16");
|
||||
if (mimg->disable_wqm)
|
||||
if (mimg.disable_wqm)
|
||||
fprintf(output, " disable_wqm");
|
||||
print_sync(mimg->sync, output);
|
||||
print_sync(mimg.sync, output);
|
||||
break;
|
||||
}
|
||||
case Format::EXP: {
|
||||
const Export_instruction* exp = instr->exp();
|
||||
unsigned identity_mask = exp->compressed ? 0x5 : 0xf;
|
||||
if ((exp->enabled_mask & identity_mask) != identity_mask)
|
||||
const Export_instruction& exp = instr->exp();
|
||||
unsigned identity_mask = exp.compressed ? 0x5 : 0xf;
|
||||
if ((exp.enabled_mask & identity_mask) != identity_mask)
|
||||
fprintf(output, " en:%c%c%c%c",
|
||||
exp->enabled_mask & 0x1 ? 'r' : '*',
|
||||
exp->enabled_mask & 0x2 ? 'g' : '*',
|
||||
exp->enabled_mask & 0x4 ? 'b' : '*',
|
||||
exp->enabled_mask & 0x8 ? 'a' : '*');
|
||||
if (exp->compressed)
|
||||
exp.enabled_mask & 0x1 ? 'r' : '*',
|
||||
exp.enabled_mask & 0x2 ? 'g' : '*',
|
||||
exp.enabled_mask & 0x4 ? 'b' : '*',
|
||||
exp.enabled_mask & 0x8 ? 'a' : '*');
|
||||
if (exp.compressed)
|
||||
fprintf(output, " compr");
|
||||
if (exp->done)
|
||||
if (exp.done)
|
||||
fprintf(output, " done");
|
||||
if (exp->valid_mask)
|
||||
if (exp.valid_mask)
|
||||
fprintf(output, " vm");
|
||||
|
||||
if (exp->dest <= V_008DFC_SQ_EXP_MRT + 7)
|
||||
fprintf(output, " mrt%d", exp->dest - V_008DFC_SQ_EXP_MRT);
|
||||
else if (exp->dest == V_008DFC_SQ_EXP_MRTZ)
|
||||
if (exp.dest <= V_008DFC_SQ_EXP_MRT + 7)
|
||||
fprintf(output, " mrt%d", exp.dest - V_008DFC_SQ_EXP_MRT);
|
||||
else if (exp.dest == V_008DFC_SQ_EXP_MRTZ)
|
||||
fprintf(output, " mrtz");
|
||||
else if (exp->dest == V_008DFC_SQ_EXP_NULL)
|
||||
else if (exp.dest == V_008DFC_SQ_EXP_NULL)
|
||||
fprintf(output, " null");
|
||||
else if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= V_008DFC_SQ_EXP_POS + 3)
|
||||
fprintf(output, " pos%d", exp->dest - V_008DFC_SQ_EXP_POS);
|
||||
else if (exp->dest >= V_008DFC_SQ_EXP_PARAM && exp->dest <= V_008DFC_SQ_EXP_PARAM + 31)
|
||||
fprintf(output, " param%d", exp->dest - V_008DFC_SQ_EXP_PARAM);
|
||||
else if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= V_008DFC_SQ_EXP_POS + 3)
|
||||
fprintf(output, " pos%d", exp.dest - V_008DFC_SQ_EXP_POS);
|
||||
else if (exp.dest >= V_008DFC_SQ_EXP_PARAM && exp.dest <= V_008DFC_SQ_EXP_PARAM + 31)
|
||||
fprintf(output, " param%d", exp.dest - V_008DFC_SQ_EXP_PARAM);
|
||||
break;
|
||||
}
|
||||
case Format::PSEUDO_BRANCH: {
|
||||
const Pseudo_branch_instruction* branch = instr->branch();
|
||||
const Pseudo_branch_instruction& branch = instr->branch();
|
||||
/* Note: BB0 cannot be a branch target */
|
||||
if (branch->target[0] != 0)
|
||||
fprintf(output, " BB%d", branch->target[0]);
|
||||
if (branch->target[1] != 0)
|
||||
fprintf(output, ", BB%d", branch->target[1]);
|
||||
if (branch.target[0] != 0)
|
||||
fprintf(output, " BB%d", branch.target[0]);
|
||||
if (branch.target[1] != 0)
|
||||
fprintf(output, ", BB%d", branch.target[1]);
|
||||
break;
|
||||
}
|
||||
case Format::PSEUDO_REDUCTION: {
|
||||
const Pseudo_reduction_instruction* reduce = instr->reduction();
|
||||
fprintf(output, " op:%s", reduce_ops[reduce->reduce_op]);
|
||||
if (reduce->cluster_size)
|
||||
fprintf(output, " cluster_size:%u", reduce->cluster_size);
|
||||
const Pseudo_reduction_instruction& reduce = instr->reduction();
|
||||
fprintf(output, " op:%s", reduce_ops[reduce.reduce_op]);
|
||||
if (reduce.cluster_size)
|
||||
fprintf(output, " cluster_size:%u", reduce.cluster_size);
|
||||
break;
|
||||
}
|
||||
case Format::PSEUDO_BARRIER: {
|
||||
const Pseudo_barrier_instruction* barrier = instr->barrier();
|
||||
print_sync(barrier->sync, output);
|
||||
print_scope(barrier->exec_scope, output, "exec_scope");
|
||||
const Pseudo_barrier_instruction& barrier = instr->barrier();
|
||||
print_sync(barrier.sync, output);
|
||||
print_scope(barrier.exec_scope, output, "exec_scope");
|
||||
break;
|
||||
}
|
||||
case Format::FLAT:
|
||||
case Format::GLOBAL:
|
||||
case Format::SCRATCH: {
|
||||
const FLAT_instruction* flat = instr->flatlike();
|
||||
if (flat->offset)
|
||||
fprintf(output, " offset:%u", flat->offset);
|
||||
if (flat->glc)
|
||||
const FLAT_instruction& flat = instr->flatlike();
|
||||
if (flat.offset)
|
||||
fprintf(output, " offset:%u", flat.offset);
|
||||
if (flat.glc)
|
||||
fprintf(output, " glc");
|
||||
if (flat->dlc)
|
||||
if (flat.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (flat->slc)
|
||||
if (flat.slc)
|
||||
fprintf(output, " slc");
|
||||
if (flat->lds)
|
||||
if (flat.lds)
|
||||
fprintf(output, " lds");
|
||||
if (flat->nv)
|
||||
if (flat.nv)
|
||||
fprintf(output, " nv");
|
||||
if (flat->disable_wqm)
|
||||
if (flat.disable_wqm)
|
||||
fprintf(output, " disable_wqm");
|
||||
print_sync(flat->sync, output);
|
||||
print_sync(flat.sync, output);
|
||||
break;
|
||||
}
|
||||
case Format::MTBUF: {
|
||||
const MTBUF_instruction* mtbuf = instr->mtbuf();
|
||||
const MTBUF_instruction& mtbuf = instr->mtbuf();
|
||||
fprintf(output, " dfmt:");
|
||||
switch (mtbuf->dfmt) {
|
||||
switch (mtbuf.dfmt) {
|
||||
case V_008F0C_BUF_DATA_FORMAT_8: fprintf(output, "8"); break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_16: fprintf(output, "16"); break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_8_8: fprintf(output, "8_8"); break;
|
||||
|
|
@ -548,7 +548,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
|
|||
case V_008F0C_BUF_DATA_FORMAT_RESERVED_15: fprintf(output, "reserved15"); break;
|
||||
}
|
||||
fprintf(output, " nfmt:");
|
||||
switch (mtbuf->nfmt) {
|
||||
switch (mtbuf.nfmt) {
|
||||
case V_008F0C_BUF_NUM_FORMAT_UNORM: fprintf(output, "unorm"); break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_SNORM: fprintf(output, "snorm"); break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_USCALED: fprintf(output, "uscaled"); break;
|
||||
|
|
@ -558,27 +558,27 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
|
|||
case V_008F0C_BUF_NUM_FORMAT_SNORM_OGL: fprintf(output, "snorm"); break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_FLOAT: fprintf(output, "float"); break;
|
||||
}
|
||||
if (mtbuf->offset)
|
||||
fprintf(output, " offset:%u", mtbuf->offset);
|
||||
if (mtbuf->offen)
|
||||
if (mtbuf.offset)
|
||||
fprintf(output, " offset:%u", mtbuf.offset);
|
||||
if (mtbuf.offen)
|
||||
fprintf(output, " offen");
|
||||
if (mtbuf->idxen)
|
||||
if (mtbuf.idxen)
|
||||
fprintf(output, " idxen");
|
||||
if (mtbuf->glc)
|
||||
if (mtbuf.glc)
|
||||
fprintf(output, " glc");
|
||||
if (mtbuf->dlc)
|
||||
if (mtbuf.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (mtbuf->slc)
|
||||
if (mtbuf.slc)
|
||||
fprintf(output, " slc");
|
||||
if (mtbuf->tfe)
|
||||
if (mtbuf.tfe)
|
||||
fprintf(output, " tfe");
|
||||
if (mtbuf->disable_wqm)
|
||||
if (mtbuf.disable_wqm)
|
||||
fprintf(output, " disable_wqm");
|
||||
print_sync(mtbuf->sync, output);
|
||||
print_sync(mtbuf.sync, output);
|
||||
break;
|
||||
}
|
||||
case Format::VOP3P: {
|
||||
if (instr->vop3p()->clamp)
|
||||
if (instr->vop3p().clamp)
|
||||
fprintf(output, " clamp");
|
||||
break;
|
||||
}
|
||||
|
|
@ -587,8 +587,8 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
|
|||
}
|
||||
}
|
||||
if (instr->isVOP3()) {
|
||||
const VOP3_instruction* vop3 = instr->vop3();
|
||||
switch (vop3->omod) {
|
||||
const VOP3_instruction& vop3 = instr->vop3();
|
||||
switch (vop3.omod) {
|
||||
case 1:
|
||||
fprintf(output, " *2");
|
||||
break;
|
||||
|
|
@ -599,50 +599,50 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
|
|||
fprintf(output, " *0.5");
|
||||
break;
|
||||
}
|
||||
if (vop3->clamp)
|
||||
if (vop3.clamp)
|
||||
fprintf(output, " clamp");
|
||||
if (vop3->opsel & (1 << 3))
|
||||
if (vop3.opsel & (1 << 3))
|
||||
fprintf(output, " opsel_hi");
|
||||
} else if (instr->isDPP()) {
|
||||
const DPP_instruction* dpp = instr->dpp();
|
||||
if (dpp->dpp_ctrl <= 0xff) {
|
||||
const DPP_instruction& dpp = instr->dpp();
|
||||
if (dpp.dpp_ctrl <= 0xff) {
|
||||
fprintf(output, " quad_perm:[%d,%d,%d,%d]",
|
||||
dpp->dpp_ctrl & 0x3, (dpp->dpp_ctrl >> 2) & 0x3,
|
||||
(dpp->dpp_ctrl >> 4) & 0x3, (dpp->dpp_ctrl >> 6) & 0x3);
|
||||
} else if (dpp->dpp_ctrl >= 0x101 && dpp->dpp_ctrl <= 0x10f) {
|
||||
fprintf(output, " row_shl:%d", dpp->dpp_ctrl & 0xf);
|
||||
} else if (dpp->dpp_ctrl >= 0x111 && dpp->dpp_ctrl <= 0x11f) {
|
||||
fprintf(output, " row_shr:%d", dpp->dpp_ctrl & 0xf);
|
||||
} else if (dpp->dpp_ctrl >= 0x121 && dpp->dpp_ctrl <= 0x12f) {
|
||||
fprintf(output, " row_ror:%d", dpp->dpp_ctrl & 0xf);
|
||||
} else if (dpp->dpp_ctrl == dpp_wf_sl1) {
|
||||
dpp.dpp_ctrl & 0x3, (dpp.dpp_ctrl >> 2) & 0x3,
|
||||
(dpp.dpp_ctrl >> 4) & 0x3, (dpp.dpp_ctrl >> 6) & 0x3);
|
||||
} else if (dpp.dpp_ctrl >= 0x101 && dpp.dpp_ctrl <= 0x10f) {
|
||||
fprintf(output, " row_shl:%d", dpp.dpp_ctrl & 0xf);
|
||||
} else if (dpp.dpp_ctrl >= 0x111 && dpp.dpp_ctrl <= 0x11f) {
|
||||
fprintf(output, " row_shr:%d", dpp.dpp_ctrl & 0xf);
|
||||
} else if (dpp.dpp_ctrl >= 0x121 && dpp.dpp_ctrl <= 0x12f) {
|
||||
fprintf(output, " row_ror:%d", dpp.dpp_ctrl & 0xf);
|
||||
} else if (dpp.dpp_ctrl == dpp_wf_sl1) {
|
||||
fprintf(output, " wave_shl:1");
|
||||
} else if (dpp->dpp_ctrl == dpp_wf_rl1) {
|
||||
} else if (dpp.dpp_ctrl == dpp_wf_rl1) {
|
||||
fprintf(output, " wave_rol:1");
|
||||
} else if (dpp->dpp_ctrl == dpp_wf_sr1) {
|
||||
} else if (dpp.dpp_ctrl == dpp_wf_sr1) {
|
||||
fprintf(output, " wave_shr:1");
|
||||
} else if (dpp->dpp_ctrl == dpp_wf_rr1) {
|
||||
} else if (dpp.dpp_ctrl == dpp_wf_rr1) {
|
||||
fprintf(output, " wave_ror:1");
|
||||
} else if (dpp->dpp_ctrl == dpp_row_mirror) {
|
||||
} else if (dpp.dpp_ctrl == dpp_row_mirror) {
|
||||
fprintf(output, " row_mirror");
|
||||
} else if (dpp->dpp_ctrl == dpp_row_half_mirror) {
|
||||
} else if (dpp.dpp_ctrl == dpp_row_half_mirror) {
|
||||
fprintf(output, " row_half_mirror");
|
||||
} else if (dpp->dpp_ctrl == dpp_row_bcast15) {
|
||||
} else if (dpp.dpp_ctrl == dpp_row_bcast15) {
|
||||
fprintf(output, " row_bcast:15");
|
||||
} else if (dpp->dpp_ctrl == dpp_row_bcast31) {
|
||||
} else if (dpp.dpp_ctrl == dpp_row_bcast31) {
|
||||
fprintf(output, " row_bcast:31");
|
||||
} else {
|
||||
fprintf(output, " dpp_ctrl:0x%.3x", dpp->dpp_ctrl);
|
||||
fprintf(output, " dpp_ctrl:0x%.3x", dpp.dpp_ctrl);
|
||||
}
|
||||
if (dpp->row_mask != 0xf)
|
||||
fprintf(output, " row_mask:0x%.1x", dpp->row_mask);
|
||||
if (dpp->bank_mask != 0xf)
|
||||
fprintf(output, " bank_mask:0x%.1x", dpp->bank_mask);
|
||||
if (dpp->bound_ctrl)
|
||||
if (dpp.row_mask != 0xf)
|
||||
fprintf(output, " row_mask:0x%.1x", dpp.row_mask);
|
||||
if (dpp.bank_mask != 0xf)
|
||||
fprintf(output, " bank_mask:0x%.1x", dpp.bank_mask);
|
||||
if (dpp.bound_ctrl)
|
||||
fprintf(output, " bound_ctrl:1");
|
||||
} else if (instr->isSDWA()) {
|
||||
const SDWA_instruction* sdwa = instr->sdwa();
|
||||
switch (sdwa->omod) {
|
||||
const SDWA_instruction& sdwa = instr->sdwa();
|
||||
switch (sdwa.omod) {
|
||||
case 1:
|
||||
fprintf(output, " *2");
|
||||
break;
|
||||
|
|
@ -653,25 +653,25 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
|
|||
fprintf(output, " *0.5");
|
||||
break;
|
||||
}
|
||||
if (sdwa->clamp)
|
||||
if (sdwa.clamp)
|
||||
fprintf(output, " clamp");
|
||||
switch (sdwa->dst_sel & sdwa_asuint) {
|
||||
switch (sdwa.dst_sel & sdwa_asuint) {
|
||||
case sdwa_udword:
|
||||
break;
|
||||
case sdwa_ubyte0:
|
||||
case sdwa_ubyte1:
|
||||
case sdwa_ubyte2:
|
||||
case sdwa_ubyte3:
|
||||
fprintf(output, " dst_sel:%sbyte%u", sdwa->dst_sel & sdwa_sext ? "s" : "u",
|
||||
sdwa->dst_sel & sdwa_bytenum);
|
||||
fprintf(output, " dst_sel:%sbyte%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
|
||||
sdwa.dst_sel & sdwa_bytenum);
|
||||
break;
|
||||
case sdwa_uword0:
|
||||
case sdwa_uword1:
|
||||
fprintf(output, " dst_sel:%sword%u", sdwa->dst_sel & sdwa_sext ? "s" : "u",
|
||||
sdwa->dst_sel & sdwa_wordnum);
|
||||
fprintf(output, " dst_sel:%sword%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
|
||||
sdwa.dst_sel & sdwa_wordnum);
|
||||
break;
|
||||
}
|
||||
if (sdwa->dst_preserve)
|
||||
if (sdwa.dst_preserve)
|
||||
fprintf(output, " dst_preserve");
|
||||
}
|
||||
}
|
||||
|
|
@ -693,28 +693,28 @@ void aco_print_instr(const Instruction *instr, FILE *output)
|
|||
bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool));
|
||||
uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t));
|
||||
if (instr->isVOP3()) {
|
||||
const VOP3_instruction* vop3 = instr->vop3();
|
||||
const VOP3_instruction& vop3 = instr->vop3();
|
||||
for (unsigned i = 0; i < instr->operands.size(); ++i) {
|
||||
abs[i] = vop3->abs[i];
|
||||
neg[i] = vop3->neg[i];
|
||||
opsel[i] = vop3->opsel & (1 << i);
|
||||
abs[i] = vop3.abs[i];
|
||||
neg[i] = vop3.neg[i];
|
||||
opsel[i] = vop3.opsel & (1 << i);
|
||||
sel[i] = sdwa_udword;
|
||||
}
|
||||
} else if (instr->isDPP()) {
|
||||
const DPP_instruction* dpp = instr->dpp();
|
||||
const DPP_instruction& dpp = instr->dpp();
|
||||
for (unsigned i = 0; i < instr->operands.size(); ++i) {
|
||||
abs[i] = i < 2 ? dpp->abs[i] : false;
|
||||
neg[i] = i < 2 ? dpp->neg[i] : false;
|
||||
abs[i] = i < 2 ? dpp.abs[i] : false;
|
||||
neg[i] = i < 2 ? dpp.neg[i] : false;
|
||||
opsel[i] = false;
|
||||
sel[i] = sdwa_udword;
|
||||
}
|
||||
} else if (instr->isSDWA()) {
|
||||
const SDWA_instruction* sdwa = instr->sdwa();
|
||||
const SDWA_instruction& sdwa = instr->sdwa();
|
||||
for (unsigned i = 0; i < instr->operands.size(); ++i) {
|
||||
abs[i] = i < 2 ? sdwa->abs[i] : false;
|
||||
neg[i] = i < 2 ? sdwa->neg[i] : false;
|
||||
abs[i] = i < 2 ? sdwa.abs[i] : false;
|
||||
neg[i] = i < 2 ? sdwa.neg[i] : false;
|
||||
opsel[i] = false;
|
||||
sel[i] = i < 2 ? sdwa->sel[i] : sdwa_udword;
|
||||
sel[i] = i < 2 ? sdwa.sel[i] : sdwa_udword;
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < instr->operands.size(); ++i) {
|
||||
|
|
@ -756,17 +756,17 @@ void aco_print_instr(const Instruction *instr, FILE *output)
|
|||
fprintf(output, "|");
|
||||
|
||||
if (instr->isVOP3P()) {
|
||||
const VOP3P_instruction* vop3 = instr->vop3p();
|
||||
if ((vop3->opsel_lo & (1 << i)) || !(vop3->opsel_hi & (1 << i))) {
|
||||
const VOP3P_instruction& vop3 = instr->vop3p();
|
||||
if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) {
|
||||
fprintf(output, ".%c%c",
|
||||
vop3->opsel_lo & (1 << i) ? 'y' : 'x',
|
||||
vop3->opsel_hi & (1 << i) ? 'y' : 'x');
|
||||
vop3.opsel_lo & (1 << i) ? 'y' : 'x',
|
||||
vop3.opsel_hi & (1 << i) ? 'y' : 'x');
|
||||
}
|
||||
if (vop3->neg_lo[i] && vop3->neg_hi[i])
|
||||
if (vop3.neg_lo[i] && vop3.neg_hi[i])
|
||||
fprintf(output, "*[-1,-1]");
|
||||
else if (vop3->neg_lo[i])
|
||||
else if (vop3.neg_lo[i])
|
||||
fprintf(output, "*[-1,1]");
|
||||
else if (vop3->neg_hi[i])
|
||||
else if (vop3.neg_hi[i])
|
||||
fprintf(output, "*[1,-1]");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ void setup_reduce_temp(Program* program)
|
|||
if (instr->format != Format::PSEUDO_REDUCTION)
|
||||
continue;
|
||||
|
||||
ReduceOp op = instr->reduction()->reduce_op;
|
||||
ReduceOp op = instr->reduction().reduce_op;
|
||||
reduceTmp_in_loop |= block.loop_nest_depth > 0;
|
||||
|
||||
if ((int)last_top_level_block_idx != inserted_at) {
|
||||
|
|
@ -115,7 +115,7 @@ void setup_reduce_temp(Program* program)
|
|||
}
|
||||
|
||||
/* same as before, except for the vector temporary instead of the reduce temporary */
|
||||
unsigned cluster_size = instr->reduction()->cluster_size;
|
||||
unsigned cluster_size = instr->reduction().cluster_size;
|
||||
bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 ||
|
||||
op == fmin64 || op == fmax64 || op == umin64 ||
|
||||
op == umax64 || op == imin64 || op == imax64 ||
|
||||
|
|
|
|||
|
|
@ -503,13 +503,13 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx
|
|||
update_phi_map(ctx, tmp.get(), instr.get());
|
||||
return;
|
||||
} else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) {
|
||||
instr->vop3()->opsel |= (byte / 2) << idx;
|
||||
instr->vop3().opsel |= (byte / 2) << idx;
|
||||
return;
|
||||
} else if (instr->isVOP3P() && byte == 2) {
|
||||
VOP3P_instruction* vop3p = instr->vop3p();
|
||||
assert(!(vop3p->opsel_lo & (1 << idx)));
|
||||
vop3p->opsel_lo |= 1 << idx;
|
||||
vop3p->opsel_hi |= 1 << idx;
|
||||
VOP3P_instruction& vop3p = instr->vop3p();
|
||||
assert(!(vop3p.opsel_lo & (1 << idx)));
|
||||
vop3p.opsel_lo |= 1 << idx;
|
||||
vop3p.opsel_hi |= 1 << idx;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -613,9 +613,9 @@ void add_subdword_definition(Program *program, aco_ptr<Instruction>& instr, unsi
|
|||
convert_to_SDWA(chip, instr);
|
||||
return;
|
||||
} else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) {
|
||||
VOP3_instruction *vop3 = instr->vop3();
|
||||
VOP3_instruction& vop3 = instr->vop3();
|
||||
if (reg.byte() == 2)
|
||||
vop3->opsel |= (1 << 3); /* dst in high half */
|
||||
vop3.opsel |= (1 << 3); /* dst in high half */
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1569,7 +1569,7 @@ void handle_pseudo(ra_ctx& ctx,
|
|||
return;
|
||||
|
||||
if (reg_file[scc]) {
|
||||
instr->pseudo()->tmp_in_scc = true;
|
||||
instr->pseudo().tmp_in_scc = true;
|
||||
|
||||
int reg = ctx.max_used_sgpr;
|
||||
for (; reg >= 0 && reg_file[PhysReg{(unsigned)reg}]; reg--)
|
||||
|
|
@ -1585,9 +1585,9 @@ void handle_pseudo(ra_ctx& ctx,
|
|||
}
|
||||
|
||||
adjust_max_used_regs(ctx, s1, reg);
|
||||
instr->pseudo()->scratch_sgpr = PhysReg{(unsigned)reg};
|
||||
instr->pseudo().scratch_sgpr = PhysReg{(unsigned)reg};
|
||||
} else {
|
||||
instr->pseudo()->tmp_in_scc = false;
|
||||
instr->pseudo().tmp_in_scc = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2156,7 +2156,7 @@ void register_allocation(Program *program, std::vector<IDSet>& live_out_per_bloc
|
|||
|
||||
if (instr->isEXP() ||
|
||||
(instr->isVMEM() && i == 3 && ctx.program->chip_class == GFX6) ||
|
||||
(instr->isDS() && instr->ds()->gds)) {
|
||||
(instr->isDS() && instr->ds().gds)) {
|
||||
for (unsigned j = 0; j < operand.size(); j++)
|
||||
ctx.war_hint.set(operand.physReg().reg() + j);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -320,7 +320,7 @@ void MoveState::upwards_skip()
|
|||
bool is_gs_or_done_sendmsg(const Instruction *instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::s_sendmsg) {
|
||||
uint16_t imm = instr->sopp()->imm;
|
||||
uint16_t imm = instr->sopp().imm;
|
||||
return (imm & sendmsg_id_mask) == _sendmsg_gs ||
|
||||
(imm & sendmsg_id_mask) == _sendmsg_gs_done;
|
||||
}
|
||||
|
|
@ -330,7 +330,7 @@ bool is_gs_or_done_sendmsg(const Instruction *instr)
|
|||
bool is_done_sendmsg(const Instruction *instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::s_sendmsg)
|
||||
return (instr->sopp()->imm & sendmsg_id_mask) == _sendmsg_gs_done;
|
||||
return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -380,14 +380,14 @@ void add_memory_event(memory_event_set *set, Instruction *instr, memory_sync_inf
|
|||
{
|
||||
set->has_control_barrier |= is_done_sendmsg(instr);
|
||||
if (instr->opcode == aco_opcode::p_barrier) {
|
||||
Pseudo_barrier_instruction *bar = instr->barrier();
|
||||
if (bar->sync.semantics & semantic_acquire)
|
||||
set->bar_acquire |= bar->sync.storage;
|
||||
if (bar->sync.semantics & semantic_release)
|
||||
set->bar_release |= bar->sync.storage;
|
||||
set->bar_classes |= bar->sync.storage;
|
||||
Pseudo_barrier_instruction& bar = instr->barrier();
|
||||
if (bar.sync.semantics & semantic_acquire)
|
||||
set->bar_acquire |= bar.sync.storage;
|
||||
if (bar.sync.semantics & semantic_release)
|
||||
set->bar_release |= bar.sync.storage;
|
||||
set->bar_classes |= bar.sync.storage;
|
||||
|
||||
set->has_control_barrier |= bar->exec_scope > scope_invocation;
|
||||
set->has_control_barrier |= bar.exec_scope > scope_invocation;
|
||||
}
|
||||
|
||||
if (!sync->storage)
|
||||
|
|
@ -857,7 +857,7 @@ void schedule_block(sched_ctx& ctx, Program *program, Block* block, live& live_v
|
|||
Instruction* current = block->instructions[idx].get();
|
||||
|
||||
if (block->kind & block_kind_export_end && current->isEXP()) {
|
||||
unsigned target = current->exp()->dest;
|
||||
unsigned target = current->exp().dest;
|
||||
if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) {
|
||||
ctx.mv.current = current;
|
||||
schedule_position_export(ctx, block, live_vars.register_demand[block->index], current, idx);
|
||||
|
|
|
|||
|
|
@ -283,7 +283,7 @@ aco_ptr<Instruction> do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t
|
|||
res.reset(create_instruction<Pseudo_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
|
||||
} else if (instr->isSOPK()) {
|
||||
res.reset(create_instruction<SOPK_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
|
||||
res->sopk()->imm = instr->sopk()->imm;
|
||||
res->sopk().imm = instr->sopk().imm;
|
||||
}
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
res->operands[i] = instr->operands[i];
|
||||
|
|
@ -1589,11 +1589,11 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
|||
bld.insert(split);
|
||||
for (unsigned i = 0; i < temp.size(); i++) {
|
||||
Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false, true);
|
||||
instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
}
|
||||
} else {
|
||||
Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false, true);
|
||||
instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
}
|
||||
} else {
|
||||
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
|
||||
|
|
@ -1658,12 +1658,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
|||
Temp tmp = bld.tmp(v1);
|
||||
vec->operands[i] = Operand(tmp);
|
||||
Instruction *instr = bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false, true);
|
||||
instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
}
|
||||
bld.insert(vec);
|
||||
} else {
|
||||
Instruction *instr = bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false, true);
|
||||
instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
}
|
||||
} else {
|
||||
uint32_t spill_slot = slots[spill_id];
|
||||
|
|
|
|||
|
|
@ -178,10 +178,10 @@ void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
|
|||
pred->linear_succs[0] = succ_idx;
|
||||
ctx.program->blocks[succ_idx].linear_preds[i] = pred->index;
|
||||
|
||||
Pseudo_branch_instruction *branch = pred->instructions.back()->branch();
|
||||
assert(branch->isBranch());
|
||||
branch->target[0] = succ_idx;
|
||||
branch->target[1] = succ_idx;
|
||||
Pseudo_branch_instruction& branch = pred->instructions.back()->branch();
|
||||
assert(branch.isBranch());
|
||||
branch.target[0] = succ_idx;
|
||||
branch.target[1] = succ_idx;
|
||||
}
|
||||
|
||||
block->instructions.clear();
|
||||
|
|
@ -196,17 +196,17 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
|
|||
|
||||
Block& pred = ctx.program->blocks[block->linear_preds[0]];
|
||||
Block& succ = ctx.program->blocks[block->linear_succs[0]];
|
||||
Pseudo_branch_instruction* branch = pred.instructions.back()->branch();
|
||||
if (branch->opcode == aco_opcode::p_branch) {
|
||||
branch->target[0] = succ.index;
|
||||
branch->target[1] = succ.index;
|
||||
} else if (branch->target[0] == block->index) {
|
||||
branch->target[0] = succ.index;
|
||||
} else if (branch->target[0] == succ.index) {
|
||||
assert(branch->target[1] == block->index);
|
||||
branch->target[1] = succ.index;
|
||||
branch->opcode = aco_opcode::p_branch;
|
||||
} else if (branch->target[1] == block->index) {
|
||||
Pseudo_branch_instruction& branch = pred.instructions.back()->branch();
|
||||
if (branch.opcode == aco_opcode::p_branch) {
|
||||
branch.target[0] = succ.index;
|
||||
branch.target[1] = succ.index;
|
||||
} else if (branch.target[0] == block->index) {
|
||||
branch.target[0] = succ.index;
|
||||
} else if (branch.target[0] == succ.index) {
|
||||
assert(branch.target[1] == block->index);
|
||||
branch.target[1] = succ.index;
|
||||
branch.opcode = aco_opcode::p_branch;
|
||||
} else if (branch.target[1] == block->index) {
|
||||
/* check if there is a fall-through path from block to succ */
|
||||
bool falls_through = block->index < succ.index;
|
||||
for (unsigned j = block->index + 1; falls_through && j < succ.index; j++) {
|
||||
|
|
@ -215,35 +215,35 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
|
|||
falls_through = false;
|
||||
}
|
||||
if (falls_through) {
|
||||
branch->target[1] = succ.index;
|
||||
branch.target[1] = succ.index;
|
||||
} else {
|
||||
/* check if there is a fall-through path for the alternative target */
|
||||
if (block->index >= branch->target[0])
|
||||
if (block->index >= branch.target[0])
|
||||
return;
|
||||
for (unsigned j = block->index + 1; j < branch->target[0]; j++) {
|
||||
for (unsigned j = block->index + 1; j < branch.target[0]; j++) {
|
||||
if (!ctx.program->blocks[j].instructions.empty())
|
||||
return;
|
||||
}
|
||||
|
||||
/* This is a (uniform) break or continue block. The branch condition has to be inverted. */
|
||||
if (branch->opcode == aco_opcode::p_cbranch_z)
|
||||
branch->opcode = aco_opcode::p_cbranch_nz;
|
||||
else if (branch->opcode == aco_opcode::p_cbranch_nz)
|
||||
branch->opcode = aco_opcode::p_cbranch_z;
|
||||
if (branch.opcode == aco_opcode::p_cbranch_z)
|
||||
branch.opcode = aco_opcode::p_cbranch_nz;
|
||||
else if (branch.opcode == aco_opcode::p_cbranch_nz)
|
||||
branch.opcode = aco_opcode::p_cbranch_z;
|
||||
else
|
||||
assert(false);
|
||||
/* also invert the linear successors */
|
||||
pred.linear_succs[0] = pred.linear_succs[1];
|
||||
pred.linear_succs[1] = succ.index;
|
||||
branch->target[1] = branch->target[0];
|
||||
branch->target[0] = succ.index;
|
||||
branch.target[1] = branch.target[0];
|
||||
branch.target[0] = succ.index;
|
||||
}
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
if (branch->target[0] == branch->target[1])
|
||||
branch->opcode = aco_opcode::p_branch;
|
||||
if (branch.target[0] == branch.target[1])
|
||||
branch.opcode = aco_opcode::p_branch;
|
||||
|
||||
for (unsigned i = 0; i < pred.linear_succs.size(); i++)
|
||||
if (pred.linear_succs[i] == block->index)
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ void collect_preasm_stats(Program *program)
|
|||
program->statistics[statistic_instructions] += block.instructions.size();
|
||||
|
||||
for (aco_ptr<Instruction>& instr : block.instructions) {
|
||||
if (instr->isSOPP() && instr->sopp()->block != -1)
|
||||
if (instr->isSOPP() && instr->sopp().block != -1)
|
||||
program->statistics[statistic_branches]++;
|
||||
|
||||
if (instr->opcode == aco_opcode::p_constaddr)
|
||||
|
|
|
|||
|
|
@ -148,10 +148,10 @@ bool validate_ir(Program* program)
|
|||
|
||||
check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get());
|
||||
|
||||
SDWA_instruction *sdwa = instr->sdwa();
|
||||
check(sdwa->omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get());
|
||||
SDWA_instruction& sdwa = instr->sdwa();
|
||||
check(sdwa.omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get());
|
||||
if (base_format == Format::VOPC) {
|
||||
check(sdwa->clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get());
|
||||
check(sdwa.clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get());
|
||||
check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
|
||||
program->chip_class >= GFX9,
|
||||
"SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
|
||||
|
|
@ -183,21 +183,21 @@ bool validate_ir(Program* program)
|
|||
}
|
||||
|
||||
if (instr->definitions[0].regClass().is_subdword())
|
||||
check((sdwa->dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()), "Unexpected SDWA sel for sub-dword definition", instr.get());
|
||||
check((sdwa.dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()), "Unexpected SDWA sel for sub-dword definition", instr.get());
|
||||
}
|
||||
|
||||
/* check opsel */
|
||||
if (instr->isVOP3()) {
|
||||
VOP3_instruction *vop3 = instr->vop3();
|
||||
check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
|
||||
VOP3_instruction& vop3 = instr->vop3();
|
||||
check(vop3.opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (i >= instr->operands.size() ||
|
||||
(instr->operands[i].hasRegClass() && instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
|
||||
check((vop3->opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
|
||||
check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
|
||||
}
|
||||
if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
|
||||
check((vop3->opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get());
|
||||
check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get());
|
||||
}
|
||||
|
||||
/* check for undefs */
|
||||
|
|
@ -377,7 +377,7 @@ bool validate_ir(Program* program)
|
|||
for (const Operand &op : instr->operands)
|
||||
check(op.regClass().type() == RegType::vgpr, "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.", instr.get());
|
||||
|
||||
if (instr->opcode == aco_opcode::p_reduce && instr->reduction()->cluster_size == program->wave_size)
|
||||
if (instr->opcode == aco_opcode::p_reduce && instr->reduction().cluster_size == program->wave_size)
|
||||
check(instr->definitions[0].regClass().type() == RegType::sgpr, "The result of unclustered reductions must go into an SGPR.", instr.get());
|
||||
else
|
||||
check(instr->definitions[0].regClass().type() == RegType::vgpr, "The result of scans and clustered reductions must go into a VGPR.", instr.get());
|
||||
|
|
@ -549,7 +549,7 @@ bool validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& inst
|
|||
return byte == 0;
|
||||
if (instr->isPseudo() && chip >= GFX8)
|
||||
return true;
|
||||
if (instr->isSDWA() && (instr->sdwa()->sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes()))
|
||||
if (instr->isSDWA() && (instr->sdwa().sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes()))
|
||||
return true;
|
||||
if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
|
||||
return true;
|
||||
|
|
@ -599,7 +599,7 @@ bool validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& i
|
|||
|
||||
if (instr->isPseudo() && chip >= GFX8)
|
||||
return true;
|
||||
if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes()))
|
||||
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
|
||||
return true;
|
||||
if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))
|
||||
return true;
|
||||
|
|
@ -630,7 +630,7 @@ unsigned get_subdword_bytes_written(Program *program, const aco_ptr<Instruction>
|
|||
|
||||
if (instr->isPseudo())
|
||||
return chip >= GFX8 ? def.bytes() : def.size() * 4u;
|
||||
if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes()))
|
||||
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
|
||||
return def.bytes();
|
||||
|
||||
switch (instr->opcode) {
|
||||
|
|
|
|||
|
|
@ -735,7 +735,7 @@ BEGIN_TEST(optimize.add3)
|
|||
//! v1: %res1 = v_add_u32 %a, %tmp1
|
||||
//! p_unit_test 1, %res1
|
||||
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
tmp.instr->vop3()->clamp = true;
|
||||
tmp.instr->vop3().clamp = true;
|
||||
writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
|
||||
|
||||
//! v1: %tmp2 = v_add_u32 %b, %c
|
||||
|
|
@ -743,7 +743,7 @@ BEGIN_TEST(optimize.add3)
|
|||
//! p_unit_test 2, %res2
|
||||
tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
|
||||
tmp.instr->vop3()->clamp = true;
|
||||
tmp.instr->vop3().clamp = true;
|
||||
writeout(2, tmp);
|
||||
|
||||
finish_opt_test();
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
|||
Definition(v0_lo, v1),
|
||||
Operand(v0_lo, v1b), Operand(v0_lo, v1b),
|
||||
Operand(v0_lo, v1b), Operand(v0_lo, v1b));
|
||||
pseudo->pseudo()->scratch_sgpr = m0;
|
||||
pseudo->pseudo().scratch_sgpr = m0;
|
||||
|
||||
//~gfx[67]! p_unit_test 14
|
||||
//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue