aco: change return type of create_instruction() to Instruction*

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28370>
This commit is contained in:
Daniel Schürmann 2024-03-25 12:05:50 +01:00 committed by Marge Bot
parent cd62f97719
commit 9b0ebcc39b
17 changed files with 298 additions and 297 deletions

View file

@ -611,9 +611,9 @@ handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& i
// TODO: try to schedule the NOP-causing instruction up to reduce the number of stall cycles
if (NOPs) {
/* create NOP */
aco_ptr<SALU_instruction> nop{
aco_ptr<Instruction> nop{
create_instruction<SALU_instruction>(aco_opcode::s_nop, Format::SOPP, 0, 0)};
nop->imm = NOPs - 1;
nop->salu().imm = NOPs - 1;
new_instructions.emplace_back(std::move(nop));
}

View file

@ -237,7 +237,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
/* create ssa names for outer exec masks */
if (info.has_discard && preds.size() > 1) {
aco_ptr<Pseudo_instruction> phi;
aco_ptr<Instruction> phi;
for (int i = 0; i < info.num_exec_masks - 1; i++) {
phi.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
Format::PSEUDO, preds.size(), 1));
@ -251,7 +251,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
if (info.has_divergent_continue) {
/* create ssa name for loop active mask */
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(
aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
phi->definitions[0] = bld.def(bld.lm);
phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first);
@ -312,7 +312,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
ctx.info[idx].exec.emplace_back(same, type);
} else {
/* create phi for loop footer */
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(
aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
phi->definitions[0] = bld.def(bld.lm);
for (unsigned i = 0; i < phi->operands.size(); i++)

View file

@ -993,17 +993,17 @@ emit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wai
{
if (imm.vs != wait_imm::unset_counter) {
assert(ctx.gfx_level >= GFX10);
SALU_instruction* waitcnt_vs =
Instruction* waitcnt_vs =
create_instruction<SALU_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 1, 0);
waitcnt_vs->operands[0] = Operand(sgpr_null, s1);
waitcnt_vs->imm = imm.vs;
waitcnt_vs->salu().imm = imm.vs;
instructions.emplace_back(waitcnt_vs);
imm.vs = wait_imm::unset_counter;
}
if (!imm.empty()) {
SALU_instruction* waitcnt =
Instruction* waitcnt =
create_instruction<SALU_instruction>(aco_opcode::s_waitcnt, Format::SOPP, 0, 0);
waitcnt->imm = imm.pack(ctx.gfx_level);
waitcnt->salu().imm = imm.pack(ctx.gfx_level);
instructions.emplace_back(waitcnt);
}
imm = wait_imm();
@ -1030,9 +1030,9 @@ emit_delay_alu(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions,
imm |= ((uint32_t)alu_delay_wait::SALU_CYCLE_1 + cycles - 1) << (imm ? 7 : 0);
}
SALU_instruction* inst =
Instruction* inst =
create_instruction<SALU_instruction>(aco_opcode::s_delay_alu, Format::SOPP, 0, 0);
inst->imm = imm;
inst->salu().imm = imm;
inst->pass_flags = (delay.valu_cycles | (delay.trans_cycles << 16));
instructions.emplace_back(inst);
delay = alu_delay_info();

View file

@ -380,7 +380,7 @@ emit_split_vector(isel_context* ctx, Temp vec_src, unsigned num_components)
} else {
rc = RegClass(vec_src.type(), vec_src.size() / num_components);
}
aco_ptr<Pseudo_instruction> split{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> split{create_instruction<Pseudo_instruction>(
aco_opcode::p_split_vector, Format::PSEUDO, 1, num_components)};
split->operands[0] = Operand(vec_src);
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
@ -432,7 +432,7 @@ expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components
if (zero_padding)
padding = bld.copy(bld.def(dst_rc), Operand::zero(component_bytes));
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
vec->definitions[0] = Definition(dst);
unsigned k = 0;
@ -553,7 +553,7 @@ byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigne
if (dst.type() == RegType::vgpr) {
/* if dst is vgpr - split the src and create a shrunk version according to the mask. */
num_components = dst.bytes() / component_size;
aco_ptr<Pseudo_instruction> create_vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> create_vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
for (unsigned i = 0; i < num_components; i++)
create_vec->operands[i] = Operand(elems[i]);
@ -749,7 +749,7 @@ get_alu_src(struct isel_context* ctx, nir_alu_src src, unsigned size = 1)
} else {
assert(size <= 4);
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
aco_ptr<Pseudo_instruction> vec_instr{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec_instr{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, size, 1)};
for (unsigned i = 0; i < size; ++i) {
elems[i] = emit_extract_vector(ctx, vec, src.swizzle[i], elem_rc);
@ -823,7 +823,7 @@ void
emit_sop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
bool writes_scc, uint8_t uses_ub = 0)
{
aco_ptr<SALU_instruction> sop2{
aco_ptr<Instruction> sop2{
create_instruction<SALU_instruction>(op, Format::SOP2, 2, writes_scc ? 2 : 1)};
sop2->operands[0] = Operand(get_alu_src(ctx, instr->src[0]));
sop2->operands[1] = Operand(get_alu_src(ctx, instr->src[1]));
@ -1407,7 +1407,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
elems[i] = get_alu_src(ctx, instr->src[i]);
if (instr->def.bit_size >= 32 || dst.type() == RegType::vgpr) {
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, instr->def.num_components, 1)};
RegClass elem_rc = RegClass::get(RegType::vgpr, instr->def.bit_size / 8u);
for (unsigned i = 0; i < num; ++i) {
@ -1484,7 +1484,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
if (dst.size() == 1)
bld.copy(Definition(dst), packed[0]);
else {
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
vec->definitions[0] = Definition(dst);
for (unsigned i = 0; i < dst.size(); ++i)
@ -3954,7 +3954,7 @@ visit_load_const(isel_context* ctx, nir_load_const_instr* instr)
bld.copy(Definition(dst), Operand::c32(instr->value[0].u32));
} else {
assert(dst.size() != 1);
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
if (instr->def.bit_size == 64)
for (unsigned i = 0; i < dst.size(); i++)
@ -3978,7 +3978,7 @@ emit_readfirstlane(isel_context* ctx, Temp src, Temp dst)
} else if (src.size() == 1) {
bld.vop1(aco_opcode::v_readfirstlane_b32, Definition(dst), src);
} else {
aco_ptr<Pseudo_instruction> split{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> split{create_instruction<Pseudo_instruction>(
aco_opcode::p_split_vector, Format::PSEUDO, 1, src.size())};
split->operands[0] = Operand(src);
@ -3990,7 +3990,7 @@ emit_readfirstlane(isel_context* ctx, Temp src, Temp dst)
Instruction* split_raw = split.get();
ctx->block->instructions.emplace_back(std::move(split));
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, src.size(), 1)};
vec->definitions[0] = Definition(dst);
for (unsigned i = 0; i < src.size(); i++) {
@ -4246,7 +4246,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
tmp[num_tmps++] = vals[i++];
}
if (num_tmps > 1) {
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, num_tmps, 1)};
for (unsigned j = 0; j < num_tmps; j++)
vec->operands[j] = Operand(tmp[j]);
@ -4272,7 +4272,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
allocated_vec[components_split++] = tmp[0];
} else {
assert(tmp_size % elem_rc.bytes() == 0);
aco_ptr<Pseudo_instruction> split{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> split{create_instruction<Pseudo_instruction>(
aco_opcode::p_split_vector, Format::PSEUDO, 1, tmp_size / elem_rc.bytes())};
for (auto& def : split->definitions) {
Temp component = bld.tmp(elem_rc);
@ -4305,7 +4305,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
int padding_bytes =
MAX2((int)info.dst.bytes() - int(allocated_vec[0].bytes() * info.num_components), 0);
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, info.num_components + !!padding_bytes, 1)};
for (unsigned i = 0; i < info.num_components; i++)
vec->operands[i] = Operand(allocated_vec[i]);
@ -4440,7 +4440,7 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
op = buffer ? aco_opcode::s_buffer_load_dwordx16 : aco_opcode::s_load_dwordx16;
}
aco_ptr<SMEM_instruction> load{create_instruction<SMEM_instruction>(op, Format::SMEM, 2, 1)};
aco_ptr<Instruction> load{create_instruction<SMEM_instruction>(op, Format::SMEM, 2, 1)};
if (buffer) {
if (const_offset)
offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
@ -4460,9 +4460,10 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
RegClass rc(RegType::sgpr, DIV_ROUND_UP(bytes_needed, 4u));
Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc);
load->definitions[0] = Definition(val);
load->glc = info.glc;
load->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
load->sync = info.sync;
load->smem().glc = info.glc;
load->smem().dlc =
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
load->smem().sync = info.sync;
bld.insert(std::move(load));
return val;
}
@ -4514,18 +4515,19 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
bytes_size = 16;
op = aco_opcode::buffer_load_dwordx4;
}
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->operands[0] = Operand(info.resource);
mubuf->operands[1] = vaddr;
mubuf->operands[2] = soffset;
mubuf->offen = offen;
mubuf->idxen = idxen;
mubuf->glc = info.glc;
mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->slc = info.slc;
mubuf->sync = info.sync;
mubuf->offset = const_offset;
mubuf->swizzled = info.swizzle_component_size != 0;
mubuf->mubuf().offen = offen;
mubuf->mubuf().idxen = idxen;
mubuf->mubuf().glc = info.glc;
mubuf->mubuf().dlc =
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->mubuf().slc = info.slc;
mubuf->mubuf().sync = info.sync;
mubuf->mubuf().offset = const_offset;
mubuf->mubuf().swizzled = info.swizzle_component_size != 0;
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
mubuf->definitions[0] = Definition(val);
@ -4581,17 +4583,18 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
}
}
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->operands[0] = Operand(info.resource);
mubuf->operands[1] = vaddr;
mubuf->operands[2] = soffset;
mubuf->offen = offen;
mubuf->idxen = idxen;
mubuf->glc = info.glc;
mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->slc = info.slc;
mubuf->sync = info.sync;
mubuf->offset = const_offset;
mubuf->mubuf().offen = offen;
mubuf->mubuf().idxen = idxen;
mubuf->mubuf().glc = info.glc;
mubuf->mubuf().dlc =
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mubuf->mubuf().slc = info.slc;
mubuf->mubuf().sync = info.sync;
mubuf->mubuf().offset = const_offset;
RegClass rc = RegClass::get(RegType::vgpr, bytes_needed);
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
mubuf->definitions[0] = Definition(val);
@ -4629,11 +4632,11 @@ scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsig
}
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(op, Format::SCRATCH, 2, 1)};
aco_ptr<Instruction> flat{create_instruction<FLAT_instruction>(op, Format::SCRATCH, 2, 1)};
flat->operands[0] = offset.regClass() == s1 ? Operand(v1) : Operand(offset);
flat->operands[1] = offset.regClass() == s1 ? Operand(offset) : Operand(s1);
flat->sync = info.sync;
flat->offset = const_offset;
flat->scratch().sync = info.sync;
flat->scratch().offset = const_offset;
flat->definitions[0] = Definition(val);
bld.insert(std::move(flat));
@ -4793,21 +4796,20 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
if (use_mubuf) {
aco_ptr<MUBUF_instruction> mubuf{
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->operands[0] = Operand(get_gfx6_global_rsrc(bld, addr));
mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[2] = Operand(offset);
mubuf->glc = info.glc;
mubuf->dlc = false;
mubuf->offset = const_offset;
mubuf->addr64 = addr.type() == RegType::vgpr;
mubuf->disable_wqm = false;
mubuf->sync = info.sync;
mubuf->mubuf().glc = info.glc;
mubuf->mubuf().dlc = false;
mubuf->mubuf().offset = const_offset;
mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
mubuf->mubuf().disable_wqm = false;
mubuf->mubuf().sync = info.sync;
mubuf->definitions[0] = Definition(val);
bld.insert(std::move(mubuf));
} else {
aco_ptr<FLAT_instruction> flat{
aco_ptr<Instruction> flat{
create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 2, 1)};
if (addr.regClass() == s2) {
assert(global && offset.id() && offset.type() == RegType::vgpr);
@ -4818,12 +4820,12 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
flat->operands[0] = Operand(addr);
flat->operands[1] = Operand(s1);
}
flat->glc = info.glc;
flat->dlc =
flat->flatlike().glc = info.glc;
flat->flatlike().dlc =
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
flat->sync = info.sync;
flat->flatlike().sync = info.sync;
assert(global || !const_offset);
flat->offset = const_offset;
flat->flatlike().offset = const_offset;
flat->definitions[0] = Definition(val);
bld.insert(std::move(flat));
}
@ -5178,7 +5180,7 @@ create_vec_from_array(isel_context* ctx, Temp arr[], unsigned cnt, RegType reg_t
dst = bld.tmp(RegClass(reg_type, cnt * dword_size));
std::array<Temp, NIR_MAX_VEC_COMPONENTS> allocated_vec;
aco_ptr<Pseudo_instruction> instr{
aco_ptr<Instruction> instr{
create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, cnt, 1)};
instr->definitions[0] = Definition(dst);
@ -5553,7 +5555,7 @@ emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
{
Builder bld(ctx->program, ctx->block);
aco_ptr<Pseudo_instruction> vec(create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec(create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1));
for (unsigned i = 0; i < num_components; i++) {
if (ctx->args->frag_pos[i].used)
@ -5617,7 +5619,7 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr)
if (instr->def.num_components == 1) {
emit_interp_instr(ctx, idx, component, coords, dst, prim_mask);
} else {
aco_ptr<Pseudo_instruction> vec(create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec(create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, instr->def.num_components, 1));
for (unsigned i = 0; i < instr->def.num_components; i++) {
Temp tmp = ctx->program->allocateTmp(instr->def.bit_size == 16 ? v2b : v1);
@ -5709,19 +5711,20 @@ mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
abort();
}
aco_ptr<MTBUF_instruction> mtbuf{create_instruction<MTBUF_instruction>(op, Format::MTBUF, 3, 1)};
aco_ptr<Instruction> mtbuf{create_instruction<MTBUF_instruction>(op, Format::MTBUF, 3, 1)};
mtbuf->operands[0] = Operand(info.resource);
mtbuf->operands[1] = vaddr;
mtbuf->operands[2] = soffset;
mtbuf->offen = offen;
mtbuf->idxen = idxen;
mtbuf->glc = info.glc;
mtbuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mtbuf->slc = info.slc;
mtbuf->sync = info.sync;
mtbuf->offset = const_offset;
mtbuf->dfmt = fetch_fmt & 0xf;
mtbuf->nfmt = fetch_fmt >> 4;
mtbuf->mtbuf().offen = offen;
mtbuf->mtbuf().idxen = idxen;
mtbuf->mtbuf().glc = info.glc;
mtbuf->mtbuf().dlc =
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
mtbuf->mtbuf().slc = info.slc;
mtbuf->mtbuf().sync = info.sync;
mtbuf->mtbuf().offset = const_offset;
mtbuf->mtbuf().dfmt = fetch_fmt & 0xf;
mtbuf->mtbuf().nfmt = fetch_fmt >> 4;
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
mtbuf->definitions[0] = Definition(val);
@ -5757,7 +5760,7 @@ visit_load_fs_input(isel_context* ctx, nir_intrinsic_instr* instr)
unsigned num_components = instr->def.num_components;
if (instr->def.bit_size == 64)
num_components *= 2;
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
for (unsigned i = 0; i < num_components; i++) {
unsigned chan_component = (component + i) % 4;
@ -5876,7 +5879,7 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr)
if ((ctx->args->inline_push_const_mask | mask) == ctx->args->inline_push_const_mask &&
start + count <= (sizeof(ctx->args->inline_push_const_mask) * 8u)) {
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, count, 1)};
unsigned arg_index =
util_bitcount64(ctx->args->inline_push_const_mask & BITFIELD64_MASK(start));
@ -6066,7 +6069,7 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
if (nsa_size < coords.size()) {
Temp coord = coords[nsa_size];
if (coords.size() - nsa_size > 1) {
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, coords.size() - nsa_size, 1)};
unsigned coord_size = 0;
@ -6088,7 +6091,7 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
bool has_dst = dst.id() != 0;
aco_ptr<MIMG_instruction> mimg{
aco_ptr<Instruction> mimg{
create_instruction<MIMG_instruction>(op, Format::MIMG, 3 + coords.size(), has_dst)};
if (has_dst)
mimg->definitions[0] = Definition(dst);
@ -6100,11 +6103,9 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
if (coords[i].regClass().is_linear_vgpr())
mimg->operands[3 + i].setLateKill(true);
}
mimg->strict_wqm = strict_wqm;
mimg->mimg().strict_wqm = strict_wqm;
MIMG_instruction* res = mimg.get();
bld.insert(std::move(mimg));
return res;
return &bld.insert(std::move(mimg))->mimg();
}
void
@ -6257,8 +6258,8 @@ emit_tfe_init(Builder& bld, Temp dst)
{
Temp tmp = bld.tmp(dst.regClass());
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector,
Format::PSEUDO, dst.size(), 1)};
for (unsigned i = 0; i < dst.size(); i++)
vec->operands[i] = Operand::zero();
vec->definitions[0] = Definition(tmp);
@ -6332,19 +6333,19 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
default: unreachable(">4 channel buffer image load");
}
}
aco_ptr<MUBUF_instruction> load{
aco_ptr<Instruction> load{
create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 3 + is_sparse, 1)};
load->operands[0] = Operand(resource);
load->operands[1] = Operand(vindex);
load->operands[2] = Operand::c32(0);
load->definitions[0] = Definition(tmp);
load->idxen = true;
load->glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
load->dlc =
load->glc && (ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
load->sync = sync;
load->tfe = is_sparse;
if (load->tfe)
load->mubuf().idxen = true;
load->mubuf().glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
load->mubuf().dlc = load->mubuf().glc &&
(ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
load->mubuf().sync = sync;
load->mubuf().tfe = is_sparse;
if (load->mubuf().tfe)
load->operands[3] = emit_tfe_init(bld, tmp);
ctx->block->instructions.emplace_back(std::move(load));
} else {
@ -6446,7 +6447,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
if (dmask_count == 1) {
data = emit_extract_vector(ctx, data, ffs(dmask) - 1, rc);
} else {
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, dmask_count, 1)};
uint32_t index = 0;
u_foreach_bit (bit, dmask) {
@ -6480,17 +6481,17 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
default: unreachable(">4 channel buffer image store");
}
}
aco_ptr<MUBUF_instruction> store{
aco_ptr<Instruction> store{
create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)};
store->operands[0] = Operand(rsrc);
store->operands[1] = Operand(vindex);
store->operands[2] = Operand::c32(0);
store->operands[3] = Operand(data);
store->idxen = true;
store->glc = glc;
store->dlc = false;
store->disable_wqm = true;
store->sync = sync;
store->mubuf().idxen = true;
store->mubuf().glc = glc;
store->mubuf().dlc = false;
store->mubuf().disable_wqm = true;
store->mubuf().sync = sync;
ctx->program->needs_exact = true;
ctx->block->instructions.emplace_back(std::move(store));
return;
@ -6634,7 +6635,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
// assert(ctx->options->gfx_level < GFX9 && "GFX9 stride size workaround not yet
// implemented.");
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(
is_64bit ? buf_op64 : buf_op, Format::MUBUF, 4, return_previous ? 1 : 0)};
mubuf->operands[0] = Operand(resource);
mubuf->operands[1] = Operand(vindex);
@ -6644,12 +6645,12 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
if (return_previous)
mubuf->definitions[0] = def;
mubuf->offset = 0;
mubuf->idxen = true;
mubuf->glc = return_previous;
mubuf->dlc = false; /* Not needed for atomics */
mubuf->disable_wqm = true;
mubuf->sync = sync;
mubuf->mubuf().offset = 0;
mubuf->mubuf().idxen = true;
mubuf->mubuf().glc = return_previous;
mubuf->mubuf().dlc = false; /* Not needed for atomics */
mubuf->mubuf().disable_wqm = true;
mubuf->mubuf().sync = sync;
ctx->program->needs_exact = true;
ctx->block->instructions.emplace_back(std::move(mubuf));
if (return_previous && cmpswap)
@ -6728,18 +6729,17 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
for (unsigned i = 0; i < write_count; i++) {
aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
aco_ptr<MUBUF_instruction> store{
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
aco_ptr<Instruction> store{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
store->operands[0] = Operand(rsrc);
store->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
store->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0);
store->operands[3] = Operand(write_datas[i]);
store->offset = offsets[i];
store->offen = (offset.type() == RegType::vgpr);
store->glc = glc;
store->dlc = false;
store->disable_wqm = true;
store->sync = sync;
store->mubuf().offset = offsets[i];
store->mubuf().offen = (offset.type() == RegType::vgpr);
store->mubuf().glc = glc;
store->mubuf().dlc = false;
store->mubuf().disable_wqm = true;
store->mubuf().sync = sync;
ctx->program->needs_exact = true;
ctx->block->instructions.emplace_back(std::move(store));
}
@ -6767,7 +6767,7 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->def);
aco_opcode op = instr->def.bit_size == 32 ? op32 : op64;
aco_ptr<MUBUF_instruction> mubuf{
aco_ptr<Instruction> mubuf{
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
mubuf->operands[0] = Operand(rsrc);
mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
@ -6777,12 +6777,12 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
if (return_previous)
mubuf->definitions[0] = def;
mubuf->offset = 0;
mubuf->offen = (offset.type() == RegType::vgpr);
mubuf->glc = return_previous;
mubuf->dlc = false; /* Not needed for atomics */
mubuf->disable_wqm = true;
mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
mubuf->mubuf().offset = 0;
mubuf->mubuf().offen = (offset.type() == RegType::vgpr);
mubuf->mubuf().glc = return_previous;
mubuf->mubuf().dlc = false; /* Not needed for atomics */
mubuf->mubuf().disable_wqm = true;
mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
ctx->program->needs_exact = true;
ctx->block->instructions.emplace_back(std::move(mubuf));
if (return_previous && cmpswap)
@ -6901,7 +6901,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
default: unreachable("store_global not implemented for this size.");
}
aco_ptr<FLAT_instruction> flat{
aco_ptr<Instruction> flat{
create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 3, 0)};
if (write_address.regClass() == s2) {
assert(global && write_offset.id() && write_offset.type() == RegType::vgpr);
@ -6913,12 +6913,12 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
flat->operands[1] = Operand(s1);
}
flat->operands[2] = Operand(write_datas[i]);
flat->glc = glc;
flat->dlc = false;
flat->flatlike().glc = glc;
flat->flatlike().dlc = false;
assert(global || !write_const_offset);
flat->offset = write_const_offset;
flat->disable_wqm = true;
flat->sync = sync;
flat->flatlike().offset = write_const_offset;
flat->flatlike().disable_wqm = true;
flat->flatlike().sync = sync;
ctx->program->needs_exact = true;
ctx->block->instructions.emplace_back(std::move(flat));
} else {
@ -6928,19 +6928,18 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
Temp rsrc = get_gfx6_global_rsrc(bld, write_address);
aco_ptr<MUBUF_instruction> mubuf{
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
mubuf->operands[0] = Operand(rsrc);
mubuf->operands[1] =
write_address.type() == RegType::vgpr ? Operand(write_address) : Operand(v1);
mubuf->operands[2] = Operand(write_offset);
mubuf->operands[3] = Operand(write_datas[i]);
mubuf->glc = glc;
mubuf->dlc = false;
mubuf->offset = write_const_offset;
mubuf->addr64 = write_address.type() == RegType::vgpr;
mubuf->disable_wqm = true;
mubuf->sync = sync;
mubuf->mubuf().glc = glc;
mubuf->mubuf().dlc = false;
mubuf->mubuf().offset = write_const_offset;
mubuf->mubuf().addr64 = write_address.type() == RegType::vgpr;
mubuf->mubuf().disable_wqm = true;
mubuf->mubuf().sync = sync;
ctx->program->needs_exact = true;
ctx->block->instructions.emplace_back(std::move(mubuf));
}
@ -7029,7 +7028,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
}
aco_opcode op = instr->def.bit_size == 32 ? op32 : op64;
aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(
aco_ptr<Instruction> flat{create_instruction<FLAT_instruction>(
op, global ? Format::GLOBAL : Format::FLAT, 3, return_previous ? 1 : 0)};
if (addr.regClass() == s2) {
assert(global && offset.id() && offset.type() == RegType::vgpr);
@ -7043,12 +7042,12 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
flat->operands[2] = Operand(data);
if (return_previous)
flat->definitions[0] = Definition(dst);
flat->glc = return_previous;
flat->dlc = false; /* Not needed for atomics */
flat->flatlike().glc = return_previous;
flat->flatlike().dlc = false; /* Not needed for atomics */
assert(global || !const_offset);
flat->offset = const_offset;
flat->disable_wqm = true;
flat->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
flat->flatlike().offset = const_offset;
flat->flatlike().disable_wqm = true;
flat->flatlike().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
ctx->program->needs_exact = true;
ctx->block->instructions.emplace_back(std::move(flat));
} else {
@ -7061,7 +7060,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
aco_opcode op = instr->def.bit_size == 32 ? op32 : op64;
aco_ptr<MUBUF_instruction> mubuf{
aco_ptr<Instruction> mubuf{
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
mubuf->operands[0] = Operand(rsrc);
mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
@ -7071,12 +7070,12 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
if (return_previous)
mubuf->definitions[0] = def;
mubuf->glc = return_previous;
mubuf->dlc = false;
mubuf->offset = const_offset;
mubuf->addr64 = addr.type() == RegType::vgpr;
mubuf->disable_wqm = true;
mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
mubuf->mubuf().glc = return_previous;
mubuf->mubuf().dlc = false;
mubuf->mubuf().offset = const_offset;
mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
mubuf->mubuf().disable_wqm = true;
mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
ctx->program->needs_exact = true;
ctx->block->instructions.emplace_back(std::move(mubuf));
if (return_previous && cmpswap)
@ -7473,7 +7472,7 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
offset = 0;
}
aco_ptr<DS_instruction> ds;
aco_ptr<Instruction> ds;
ds.reset(
create_instruction<DS_instruction>(op, Format::DS, num_operands, return_previous ? 1 : 0));
ds->operands[0] = Operand(address);
@ -7485,10 +7484,10 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
std::swap(ds->operands[1], ds->operands[2]);
}
ds->operands[num_operands - 1] = m;
ds->offset0 = offset;
ds->ds().offset0 = offset;
if (return_previous)
ds->definitions[0] = Definition(get_ssa_temp(ctx, &instr->def));
ds->sync = memory_sync_info(storage_shared, semantic_atomicrmw);
ds->ds().sync = memory_sync_info(storage_shared, semantic_atomicrmw);
if (m.isUndefined())
ds->operands.pop_back();
@ -7916,7 +7915,7 @@ emit_reduction_instr(isel_context* ctx, aco_opcode aco_op, ReduceOp op, unsigned
if (clobber_vcc)
defs[num_defs++] = bld.def(bld.lm, vcc);
Pseudo_reduction_instruction* reduce = create_instruction<Pseudo_reduction_instruction>(
Instruction* reduce = create_instruction<Pseudo_reduction_instruction>(
aco_op, Format::PSEUDO_REDUCTION, 3, num_defs);
reduce->operands[0] = Operand(src);
/* setup_reduce_temp will update these undef operands if needed */
@ -7924,8 +7923,8 @@ emit_reduction_instr(isel_context* ctx, aco_opcode aco_op, ReduceOp op, unsigned
reduce->operands[2] = Operand(v1.as_linear());
std::copy(defs, defs + num_defs, reduce->definitions.begin());
reduce->reduce_op = op;
reduce->cluster_size = cluster_size;
reduce->reduction().reduce_op = op;
reduce->reduction().cluster_size = cluster_size;
bld.insert(std::move(reduce));
return dst.getTemp();
@ -8111,7 +8110,7 @@ create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt*
{
Builder bld(ctx->program, ctx->block);
aco_ptr<Pseudo_instruction> exp{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> exp{create_instruction<Pseudo_instruction>(
aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)};
for (unsigned i = 0; i < 4; i++) {
exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1);
@ -9149,7 +9148,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)};
unsigned write_mask = nir_intrinsic_write_mask(instr);
@ -9209,36 +9208,37 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
const bool row_en = instr->intrinsic == nir_intrinsic_export_row_amd;
aco_ptr<Export_instruction> exp{
aco_ptr<Instruction> exp{
create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4 + row_en, 0)};
exp->dest = target;
exp->enabled_mask = write_mask;
exp->compressed = flags & AC_EXP_FLAG_COMPRESSED;
exp->exp().dest = target;
exp->exp().enabled_mask = write_mask;
exp->exp().compressed = flags & AC_EXP_FLAG_COMPRESSED;
/* ACO may reorder position/mrt export instructions, then mark done for last
* export instruction. So don't respect the nir AC_EXP_FLAG_DONE for position/mrt
* exports here and leave it to ACO.
*/
if (target == V_008DFC_SQ_EXP_PRIM)
exp->done = flags & AC_EXP_FLAG_DONE;
exp->exp().done = flags & AC_EXP_FLAG_DONE;
else
exp->done = false;
exp->exp().done = false;
/* ACO may reorder mrt export instructions, then mark valid mask for last
* export instruction. So don't respect the nir AC_EXP_FLAG_VALID_MASK for mrt
* exports here and leave it to ACO.
*/
if (target > V_008DFC_SQ_EXP_NULL)
exp->valid_mask = flags & AC_EXP_FLAG_VALID_MASK;
exp->exp().valid_mask = flags & AC_EXP_FLAG_VALID_MASK;
else
exp->valid_mask = false;
exp->exp().valid_mask = false;
exp->row_en = row_en;
exp->exp().row_en = row_en;
/* Compressed export uses two bits for a channel. */
uint32_t channel_mask =
exp->compressed ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) : write_mask;
uint32_t channel_mask = exp->exp().compressed
? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0)
: write_mask;
Temp value = get_ssa_temp(ctx, instr->src[0].ssa);
for (unsigned i = 0; i < 4; i++) {
@ -9287,7 +9287,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
if (it != ctx->allocated_vec.end())
num_src = src.bytes() / it->second[0].bytes();
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, num_src + !!begin_size, 1)};
if (begin_size)
@ -9730,15 +9730,15 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
}
}
aco_ptr<MUBUF_instruction> mubuf{
aco_ptr<Instruction> mubuf{
create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3 + instr->is_sparse, 1)};
mubuf->operands[0] = Operand(resource);
mubuf->operands[1] = Operand(coords[0]);
mubuf->operands[2] = Operand::c32(0);
mubuf->definitions[0] = Definition(tmp_dst);
mubuf->idxen = true;
mubuf->tfe = instr->is_sparse;
if (mubuf->tfe)
mubuf->mubuf().idxen = true;
mubuf->mubuf().tfe = instr->is_sparse;
if (mubuf->mubuf().tfe)
mubuf->operands[3] = emit_tfe_init(bld, tmp_dst);
ctx->block->instructions.emplace_back(std::move(mubuf));
@ -10008,7 +10008,7 @@ get_phi_operand(isel_context* ctx, nir_def* ssa, RegClass rc, bool logical)
void
visit_phi(isel_context* ctx, nir_phi_instr* instr)
{
aco_ptr<Pseudo_instruction> phi;
aco_ptr<Instruction> phi;
Temp dst = get_ssa_temp(ctx, &instr->def);
assert(instr->def.bit_size != 1 || dst.regClass() == ctx->program->lane_mask);
@ -10111,7 +10111,7 @@ visit_undef(isel_context* ctx, nir_undef_instr* instr)
if (dst.size() == 1) {
Builder(ctx->program, ctx->block).copy(Definition(dst), Operand::zero());
} else {
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
for (unsigned i = 0; i < dst.size(); i++)
vec->operands[i] = Operand::zero();
@ -10210,7 +10210,7 @@ end_loop(isel_context* ctx, loop_context* lc)
/* trim linear phis in loop header */
for (auto&& instr : loop_entry->instructions) {
if (instr->opcode == aco_opcode::p_linear_phi) {
aco_ptr<Pseudo_instruction> new_phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, loop_entry->linear_predecessors.size(), 1)};
aco_ptr<Instruction> new_phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, loop_entry->linear_predecessors.size(), 1)};
new_phi->definitions[0] = instr->definitions[0];
for (unsigned i = 0; i < new_phi->operands.size(); i++)
new_phi->operands[i] = instr->operands[i];
@ -10484,13 +10484,14 @@ begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond,
/* branch to linear then block */
assert(cond.regClass() == ctx->program->lane_mask);
aco_ptr<Pseudo_branch_instruction> branch;
aco_ptr<Instruction> branch;
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z,
Format::PSEUDO_BRANCH, 1, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch->operands[0] = Operand(cond);
branch->selection_control_remove = sel_ctrl == nir_selection_control_flatten ||
sel_ctrl == nir_selection_control_divergent_always_taken;
branch->branch().selection_control_remove =
sel_ctrl == nir_selection_control_flatten ||
sel_ctrl == nir_selection_control_divergent_always_taken;
ctx->block->instructions.push_back(std::move(branch));
ic->BB_if_idx = ctx->block->index;
@ -10528,7 +10529,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
Block* BB_then_logical = ctx->block;
append_logical_end(BB_then_logical);
/* branch from logical then block to invert block */
aco_ptr<Pseudo_branch_instruction> branch;
aco_ptr<Instruction> branch;
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
@ -10561,8 +10562,9 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch->selection_control_remove = sel_ctrl == nir_selection_control_flatten ||
sel_ctrl == nir_selection_control_divergent_always_taken;
branch->branch().selection_control_remove =
sel_ctrl == nir_selection_control_flatten ||
sel_ctrl == nir_selection_control_divergent_always_taken;
ctx->block->instructions.push_back(std::move(branch));
ic->exec_potentially_empty_discard_old |= ctx->cf_info.exec_potentially_empty_discard;
@ -10593,7 +10595,7 @@ end_divergent_if(isel_context* ctx, if_context* ic)
append_logical_end(BB_else_logical);
/* branch from logical else block to endif block */
aco_ptr<Pseudo_branch_instruction> branch;
aco_ptr<Instruction> branch;
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
@ -10650,7 +10652,7 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
append_logical_end(ctx->block);
ctx->block->kind |= block_kind_uniform;
aco_ptr<Pseudo_branch_instruction> branch;
aco_ptr<Instruction> branch;
aco_opcode branch_opcode = aco_opcode::p_cbranch_z;
branch.reset(
create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 1));
@ -10687,7 +10689,7 @@ begin_uniform_if_else(isel_context* ctx, if_context* ic)
if (!ic->uniform_has_then_branch) {
append_logical_end(BB_then);
/* branch from then block to endif block */
aco_ptr<Pseudo_branch_instruction> branch;
aco_ptr<Instruction> branch;
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
@ -10719,7 +10721,7 @@ end_uniform_if(isel_context* ctx, if_context* ic)
if (!ctx->cf_info.has_branch) {
append_logical_end(BB_else);
/* branch from then block to endif block */
aco_ptr<Pseudo_branch_instruction> branch;
aco_ptr<Instruction> branch;
branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
@ -10747,7 +10749,7 @@ visit_if(isel_context* ctx, nir_if* if_stmt)
{
Temp cond = get_ssa_temp(ctx, if_stmt->condition.ssa);
Builder bld(ctx->program, ctx->block);
aco_ptr<Pseudo_branch_instruction> branch;
aco_ptr<Instruction> branch;
if_context ic;
if (!nir_src_is_divergent(if_stmt->condition)) { /* uniform condition */
@ -11138,7 +11140,7 @@ create_fs_jump_to_epilog(isel_context* ctx)
Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.epilog_pc));
aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> jump{create_instruction<Pseudo_instruction>(
aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + exports.size(), 0)};
jump->operands[0] = Operand(continue_pc);
for (unsigned i = 0; i < exports.size(); i++) {
@ -11192,8 +11194,8 @@ passthrough_all_args(isel_context* ctx, std::vector<Operand>& regs)
static void
build_end_with_regs(isel_context* ctx, std::vector<Operand>& regs)
{
aco_ptr<Pseudo_instruction> end{create_instruction<Pseudo_instruction>(
aco_opcode::p_end_with_regs, Format::PSEUDO, regs.size(), 0)};
aco_ptr<Instruction> end{create_instruction<Pseudo_instruction>(aco_opcode::p_end_with_regs,
Format::PSEUDO, regs.size(), 0)};
for (unsigned i = 0; i < regs.size(); i++)
end->operands[i] = regs[i];
@ -11240,7 +11242,7 @@ create_tcs_jump_to_epilog(isel_context* ctx)
Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.epilog_pc));
aco_ptr<Pseudo_instruction> jump{
aco_ptr<Instruction> jump{
create_instruction<Pseudo_instruction>(aco_opcode::p_jump_to_epilog, Format::PSEUDO, 14, 0)};
jump->operands[0] = Operand(continue_pc);
jump->operands[1] = ring_offsets;
@ -11374,7 +11376,7 @@ create_fs_end_for_epilog(isel_context* ctx)
ctx->program->needs_exact = true;
}
Pseudo_instruction*
Instruction*
add_startpgm(struct isel_context* ctx)
{
unsigned def_count = 0;
@ -11388,7 +11390,7 @@ add_startpgm(struct isel_context* ctx)
def_count++;
}
Pseudo_instruction* startpgm =
Instruction* startpgm =
create_instruction<Pseudo_instruction>(aco_opcode::p_startpgm, Format::PSEUDO, 0, def_count);
ctx->block->instructions.emplace_back(startpgm);
for (unsigned i = 0, arg = 0; i < ctx->args->arg_count; i++) {
@ -11478,7 +11480,7 @@ fix_ls_vgpr_init_bug(isel_context* ctx)
}
void
split_arguments(isel_context* ctx, Pseudo_instruction* startpgm)
split_arguments(isel_context* ctx, Instruction* startpgm)
{
/* Split all arguments except for the first (ring_offsets) and the last
* (exec) so that the dead channels don't stay live throughout the program.
@ -11645,7 +11647,7 @@ insert_rt_jump_next(isel_context& ctx, const struct ac_shader_args* args)
for (unsigned i = 0; i < ctx.args->arg_count; i++)
src_count += !!BITSET_TEST(ctx.output_args, i);
Pseudo_instruction* ret =
Instruction* ret =
create_instruction<Pseudo_instruction>(aco_opcode::p_return, Format::PSEUDO, src_count, 0);
ctx.block->instructions.emplace_back(ret);
@ -11682,7 +11684,7 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c
init_context(&ctx, nir);
setup_fp_mode(&ctx, nir);
Pseudo_instruction* startpgm = add_startpgm(&ctx);
Instruction* startpgm = add_startpgm(&ctx);
append_logical_start(ctx.block);
split_arguments(&ctx, startpgm);
visit_cf_list(&ctx, &nir_shader_get_entrypoint(nir)->body);
@ -11839,7 +11841,7 @@ create_merged_jump_to_epilog(isel_context* ctx)
Temp continue_pc =
convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.next_stage_pc));
aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> jump{create_instruction<Pseudo_instruction>(
aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + regs.size(), 0)};
jump->operands[0] = Operand(continue_pc);
for (unsigned i = 0; i < regs.size(); i++) {
@ -11884,7 +11886,7 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons
if (need_startpgm) {
/* Needs to be after init_context() for FS. */
Pseudo_instruction* startpgm = add_startpgm(&ctx);
Instruction* startpgm = add_startpgm(&ctx);
append_logical_start(ctx.block);
if (ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs &&

View file

@ -1671,7 +1671,7 @@ struct instr_deleter_functor {
template <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
template <typename T>
T*
Instruction*
create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
uint32_t num_definitions)
{
@ -1679,7 +1679,7 @@ create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
void* data = instruction_buffer->allocate(size, alignof(uint32_t));
memset(data, 0, size);
T* inst = (T*)data;
Instruction* inst = (Instruction*)data;
inst->opcode = opcode;
inst->format = format;

View file

@ -108,8 +108,8 @@ get_output(Program* program, unsigned block_idx, ssa_state* state)
}
/* create phi */
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)};
aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
Format::PSEUDO, num_preds, 1)};
for (unsigned i = 0; i < num_preds; i++)
phi->operands[i] = state->outputs[block.linear_preds[i]];
phi->definitions[0] = Definition(output.getTemp());
@ -347,8 +347,8 @@ lower_divergent_bool_phi(Program* program, ssa_state* state, Block* block,
unsigned num_preds = block->linear_preds.size();
if (phi->operands.size() != num_preds) {
Pseudo_instruction* new_phi{create_instruction<Pseudo_instruction>(
aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)};
Instruction* new_phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
Format::PSEUDO, num_preds, 1)};
new_phi->definitions[0] = phi->definitions[0];
phi.reset(new_phi);
} else {

View file

@ -424,8 +424,8 @@ emit_copies_block(Builder& bld, std::map<uint32_t, ltg_node>& ltg, RegType type)
// TODO: this should be restricted to a feasible number of registers
// and otherwise use a temporary to avoid having to reload more (spilled)
// variables than we have registers.
aco_ptr<Pseudo_instruction> copy{create_instruction<Pseudo_instruction>(
aco_opcode::p_parallelcopy, Format::PSEUDO, num, num)};
aco_ptr<Instruction> copy{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy,
Format::PSEUDO, num, num)};
it = ltg.begin();
for (unsigned i = 0; i < num; i++) {
while (it->second.cp.def.regClass().type() != type)

View file

@ -601,13 +601,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
if (src.regClass() == v1b) {
if (ctx->program->gfx_level >= GFX8 && ctx->program->gfx_level < GFX11) {
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
aco_ptr<Instruction> sdwa{create_instruction<SDWA_instruction>(
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
bool sext = reduce_op == imin8 || reduce_op == imax8;
sdwa->sel[0] = SubdwordSel(1, 0, sext);
sdwa->dst_sel = SubdwordSel::dword;
sdwa->sdwa().sel[0] = SubdwordSel(1, 0, sext);
sdwa->sdwa().dst_sel = SubdwordSel::dword;
bld.insert(std::move(sdwa));
} else {
aco_opcode opcode;
@ -624,13 +624,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
bool is_add_cmp = reduce_op == iadd16 || reduce_op == imax16 || reduce_op == imin16 ||
reduce_op == umin16 || reduce_op == umax16;
if (ctx->program->gfx_level >= GFX10 && ctx->program->gfx_level < GFX11 && is_add_cmp) {
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
aco_ptr<Instruction> sdwa{create_instruction<SDWA_instruction>(
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
bool sext = reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16;
sdwa->sel[0] = SubdwordSel(2, 0, sext);
sdwa->dst_sel = SubdwordSel::dword;
sdwa->sdwa().sel[0] = SubdwordSel(2, 0, sext);
sdwa->sdwa().dst_sel = SubdwordSel::dword;
bld.insert(std::move(sdwa));
} else if (ctx->program->gfx_level <= GFX7 ||
(ctx->program->gfx_level >= GFX11 && is_add_cmp)) {
@ -2259,7 +2259,7 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
instr->mimg().strict_wqm = false;
if ((3 + num_vaddr) > instr->operands.size()) {
MIMG_instruction* new_instr = create_instruction<MIMG_instruction>(
Instruction* new_instr = create_instruction<MIMG_instruction>(
instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
new_instr->definitions.begin());

View file

@ -905,7 +905,7 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
smem.operands.back() = Operand(base);
}
} else {
SMEM_instruction* new_instr = create_instruction<SMEM_instruction>(
Instruction* new_instr = create_instruction<SMEM_instruction>(
smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
new_instr->operands[0] = smem.operands[0];
new_instr->operands[1] = Operand::c32(offset);
@ -914,11 +914,11 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
new_instr->operands.back() = Operand(base);
if (!smem.definitions.empty())
new_instr->definitions[0] = smem.definitions[0];
new_instr->sync = smem.sync;
new_instr->glc = smem.glc;
new_instr->dlc = smem.dlc;
new_instr->nv = smem.nv;
new_instr->disable_wqm = smem.disable_wqm;
new_instr->smem().sync = smem.sync;
new_instr->smem().glc = smem.glc;
new_instr->smem().dlc = smem.dlc;
new_instr->smem().nv = smem.nv;
new_instr->smem().disable_wqm = smem.disable_wqm;
instr.reset(new_instr);
}
}
@ -2312,10 +2312,10 @@ combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
case 64: new_op = is_or ? aco_opcode::v_cmp_u_f64 : aco_opcode::v_cmp_o_f64; break;
}
bool needs_vop3 = num_sgprs > 1 || (opsel[0] && op[0].type() != RegType::vgpr);
VALU_instruction* new_instr = create_instruction<VALU_instruction>(
Instruction* new_instr = create_instruction<VALU_instruction>(
new_op, needs_vop3 ? asVOP3(Format::VOPC) : Format::VOPC, 2, 1);
new_instr->opsel = opsel;
new_instr->valu().opsel = opsel;
new_instr->operands[0] = copy_operand(ctx, Operand(op[0]));
new_instr->operands[1] = copy_operand(ctx, Operand(op[1]));
new_instr->definitions[0] = instr->definitions[0];
@ -2381,13 +2381,13 @@ combine_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return false;
aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
VALU_instruction* new_instr = create_instruction<VALU_instruction>(
Instruction* new_instr = create_instruction<VALU_instruction>(
new_op, cmp->isVOP3() ? asVOP3(Format::VOPC) : Format::VOPC, 2, 1);
new_instr->neg = cmp_valu.neg;
new_instr->abs = cmp_valu.abs;
new_instr->clamp = cmp_valu.clamp;
new_instr->omod = cmp_valu.omod;
new_instr->opsel = cmp_valu.opsel;
new_instr->valu().neg = cmp_valu.neg;
new_instr->valu().abs = cmp_valu.abs;
new_instr->valu().clamp = cmp_valu.clamp;
new_instr->valu().omod = cmp_valu.omod;
new_instr->valu().opsel = cmp_valu.opsel;
new_instr->operands[0] = copy_operand(ctx, cmp->operands[0]);
new_instr->operands[1] = copy_operand(ctx, cmp->operands[1]);
new_instr->definitions[0] = instr->definitions[0];
@ -2701,12 +2701,12 @@ create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr
Operand operands[3], uint8_t neg, uint8_t abs, uint8_t opsel, bool clamp,
unsigned omod)
{
VALU_instruction* new_instr = create_instruction<VALU_instruction>(opcode, Format::VOP3, 3, 1);
new_instr->neg = neg;
new_instr->abs = abs;
new_instr->clamp = clamp;
new_instr->omod = omod;
new_instr->opsel = opsel;
Instruction* new_instr = create_instruction<VALU_instruction>(opcode, Format::VOP3, 3, 1);
new_instr->valu().neg = neg;
new_instr->valu().abs = abs;
new_instr->valu().clamp = clamp;
new_instr->valu().omod = omod;
new_instr->valu().opsel = opsel;
new_instr->operands[0] = operands[0];
new_instr->operands[1] = operands[1];
new_instr->operands[2] = operands[2];
@ -3746,7 +3746,7 @@ combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub)
ctx.uses[instr->operands[i].tempId()]--;
aco_opcode mad_op = is_sub ? aco_opcode::v_mad_i32_i24 : aco_opcode::v_mad_u32_u24;
aco_ptr<VALU_instruction> new_instr{
aco_ptr<Instruction> new_instr{
create_instruction<VALU_instruction>(mad_op, Format::VOP3, 3, 1)};
for (unsigned op_idx = 0; op_idx < 3; ++op_idx)
new_instr->operands[op_idx] = ops[op_idx];
@ -3930,23 +3930,23 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
/* turn mul + packed add into v_pk_fma_f16 */
aco_opcode mad = fadd ? aco_opcode::v_pk_fma_f16 : aco_opcode::v_pk_mad_u16;
aco_ptr<VALU_instruction> fma{create_instruction<VALU_instruction>(mad, Format::VOP3P, 3, 1)};
aco_ptr<Instruction> fma{create_instruction<VALU_instruction>(mad, Format::VOP3P, 3, 1)};
fma->operands[0] = copy_operand(ctx, mul_instr->operands[0]);
fma->operands[1] = copy_operand(ctx, mul_instr->operands[1]);
fma->operands[2] = instr->operands[add_op_idx];
fma->clamp = vop3p->clamp;
fma->neg_lo = mul_neg_lo;
fma->neg_hi = mul_neg_hi;
fma->opsel_lo = mul_opsel_lo;
fma->opsel_hi = mul_opsel_hi;
propagate_swizzles(fma.get(), vop3p->opsel_lo[1 - add_op_idx],
fma->valu().clamp = vop3p->clamp;
fma->valu().neg_lo = mul_neg_lo;
fma->valu().neg_hi = mul_neg_hi;
fma->valu().opsel_lo = mul_opsel_lo;
fma->valu().opsel_hi = mul_opsel_hi;
propagate_swizzles(&fma->valu(), vop3p->opsel_lo[1 - add_op_idx],
vop3p->opsel_hi[1 - add_op_idx]);
fma->opsel_lo[2] = vop3p->opsel_lo[add_op_idx];
fma->opsel_hi[2] = vop3p->opsel_hi[add_op_idx];
fma->neg_lo[2] = vop3p->neg_lo[add_op_idx];
fma->neg_hi[2] = vop3p->neg_hi[add_op_idx];
fma->neg_lo[1] = fma->neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx];
fma->neg_hi[1] = fma->neg_hi[1] ^ vop3p->neg_hi[1 - add_op_idx];
fma->valu().opsel_lo[2] = vop3p->opsel_lo[add_op_idx];
fma->valu().opsel_hi[2] = vop3p->opsel_hi[add_op_idx];
fma->valu().neg_lo[2] = vop3p->neg_lo[add_op_idx];
fma->valu().neg_hi[2] = vop3p->neg_hi[add_op_idx];
fma->valu().neg_lo[1] = fma->valu().neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx];
fma->valu().neg_hi[1] = fma->valu().neg_hi[1] ^ vop3p->neg_hi[1 - add_op_idx];
fma->definitions[0] = instr->definitions[0];
fma->pass_flags = instr->pass_flags;
instr = std::move(fma);
@ -3995,26 +3995,26 @@ to_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
bool is_add = instr->opcode != aco_opcode::v_mul_f32;
aco_ptr<VALU_instruction> vop3p{
aco_ptr<Instruction> vop3p{
create_instruction<VALU_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1)};
for (unsigned i = 0; i < instr->operands.size(); i++) {
vop3p->operands[is_add + i] = instr->operands[i];
vop3p->neg_lo[is_add + i] = instr->valu().neg[i];
vop3p->neg_hi[is_add + i] = instr->valu().abs[i];
vop3p->valu().neg_lo[is_add + i] = instr->valu().neg[i];
vop3p->valu().neg_hi[is_add + i] = instr->valu().abs[i];
}
if (instr->opcode == aco_opcode::v_mul_f32) {
vop3p->operands[2] = Operand::zero();
vop3p->neg_lo[2] = true;
vop3p->valu().neg_lo[2] = true;
} else if (is_add) {
vop3p->operands[0] = Operand::c32(0x3f800000);
if (instr->opcode == aco_opcode::v_sub_f32)
vop3p->neg_lo[2] ^= true;
vop3p->valu().neg_lo[2] ^= true;
else if (instr->opcode == aco_opcode::v_subrev_f32)
vop3p->neg_lo[1] ^= true;
vop3p->valu().neg_lo[1] ^= true;
}
vop3p->definitions[0] = instr->definitions[0];
vop3p->clamp = instr->valu().clamp;
vop3p->valu().clamp = instr->valu().clamp;
vop3p->pass_flags = instr->pass_flags;
instr = std::move(vop3p);
@ -4418,7 +4418,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
neg[2 - add_op_idx] = neg[2 - add_op_idx] ^ true;
aco_ptr<Instruction> add_instr = std::move(instr);
aco_ptr<VALU_instruction> mad;
aco_ptr<Instruction> mad;
if (add_instr->isVOP3P() || mul_instr->isVOP3P()) {
assert(!omod);
assert(!opsel);
@ -4448,14 +4448,14 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
for (unsigned i = 0; i < 3; i++) {
mad->operands[i] = op[i];
mad->neg[i] = neg[i];
mad->abs[i] = abs[i];
mad->valu().neg[i] = neg[i];
mad->valu().abs[i] = abs[i];
}
mad->omod = omod;
mad->clamp = clamp;
mad->opsel_lo = opsel_lo;
mad->opsel_hi = opsel_hi;
mad->opsel = opsel;
mad->valu().omod = omod;
mad->valu().clamp = clamp;
mad->valu().opsel_lo = opsel_lo;
mad->valu().opsel_hi = opsel_hi;
mad->valu().opsel = opsel;
mad->definitions[0] = add_instr->definitions[0];
mad->definitions[0].setPrecise(add_instr->definitions[0].isPrecise() ||
mul_instr->definitions[0].isPrecise());
@ -4481,7 +4481,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
ctx.uses[instr->operands[i].tempId()]--;
ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++;
aco_ptr<VALU_instruction> new_instr{
aco_ptr<Instruction> new_instr{
create_instruction<VALU_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)};
new_instr->operands[0] = Operand::zero();
new_instr->operands[1] = instr->operands[!i];
@ -4805,7 +4805,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (op.isTemp())
ctx.uses[op.tempId()]++;
aco_ptr<Pseudo_instruction> extract{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> extract{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, 1, 1)};
extract->operands[0] = op;
extract->definitions[0] = instr->definitions[idx];
@ -4818,7 +4818,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (!done && num_used == 1 &&
instr->operands[0].bytes() % instr->definitions[idx].bytes() == 0 &&
split_offset % instr->definitions[idx].bytes() == 0) {
aco_ptr<Pseudo_instruction> extract{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> extract{create_instruction<Pseudo_instruction>(
aco_opcode::p_extract_vector, Format::PSEUDO, 2, 1)};
extract->operands[0] = instr->operands[0];
extract->operands[1] =

View file

@ -109,7 +109,7 @@ setup_reduce_temp(Program* program)
if ((int)last_top_level_block_idx != inserted_at) {
reduceTmp = program->allocateTmp(reduceTmp.regClass());
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
create->definitions[0] = Definition(reduceTmp);
/* find the right place to insert this definition */
@ -154,7 +154,7 @@ setup_reduce_temp(Program* program)
if (need_vtmp && (int)last_top_level_block_idx != vtmp_inserted_at) {
vtmp = program->allocateTmp(vtmp.regClass());
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
create->definitions[0] = Definition(vtmp);
if (last_top_level_block_idx == block.index) {

View file

@ -2885,7 +2885,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
if (parallelcopy.empty())
return;
aco_ptr<Pseudo_instruction> pc;
aco_ptr<Instruction> pc;
pc.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO,
parallelcopy.size(), parallelcopy.size()));
bool linear_vgpr = false;
@ -2935,8 +2935,8 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
handle_pseudo(ctx, tmp_file, pc.get());
} else {
pc->needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
pc->tmp_in_scc = false;
pc->pseudo().needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
pc->pseudo().tmp_in_scc = false;
}
instructions.emplace_back(std::move(pc));

View file

@ -686,9 +686,8 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx)
get_vopd_opcode_operands(x, x_info, swap_x, &x_op, &num_operands, operands);
get_vopd_opcode_operands(y, y_info, swap_y, &y_op, &num_operands, operands + num_operands);
VOPD_instruction* instr =
create_instruction<VOPD_instruction>(x_op, Format::VOPD, num_operands, 2);
instr->opy = y_op;
Instruction* instr = create_instruction<VOPD_instruction>(x_op, Format::VOPD, num_operands, 2);
instr->vopd().opy = y_op;
instr->definitions[0] = x->definitions[0];
instr->definitions[1] = y->definitions[0];
std::copy(operands, operands + num_operands, instr->operands.begin());

View file

@ -376,7 +376,7 @@ do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
res->definitions[0] = Definition(new_name);
return res;
} else {
aco_ptr<Pseudo_instruction> reload{
aco_ptr<Instruction> reload{
create_instruction<Pseudo_instruction>(aco_opcode::p_reload, Format::PSEUDO, 1, 1)};
reload->operands[0] = Operand::c32(spill_id);
reload->definitions[0] = Definition(new_name);
@ -845,7 +845,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx])
ctx.add_interference(def_spill_id, pair.second);
aco_ptr<Pseudo_instruction> spill{
aco_ptr<Instruction> spill{
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
spill->operands[0] = spill_op;
spill->operands[1] = Operand::c32(def_spill_id);
@ -915,7 +915,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
ctx.renames[pred_idx].erase(rename_it);
}
aco_ptr<Pseudo_instruction> spill{
aco_ptr<Instruction> spill{
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
spill->operands[0] = Operand(var);
spill->operands[1] = Operand::c32(pair.second);
@ -1054,7 +1054,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
if (!is_same) {
/* the variable was renamed differently in the predecessors: we have to create a phi */
aco_opcode opcode = pair.first.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
aco_ptr<Pseudo_instruction> phi{
aco_ptr<Instruction> phi{
create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)};
rename = ctx.program->allocateTmp(pair.first.regClass());
for (unsigned i = 0; i < phi->operands.size(); i++) {
@ -1229,7 +1229,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s
}
/* add spill to new instructions */
aco_ptr<Pseudo_instruction> spill{
aco_ptr<Instruction> spill{
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
spill->operands[0] = Operand(to_spill);
spill->operands[1] = Operand::c32(spill_id);
@ -1757,7 +1757,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
create->definitions[0] = Definition(linear_vgpr);
/* find the right place to insert this definition */
@ -1774,7 +1774,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
}
/* spill sgpr: just add the vgpr temp to operands */
Pseudo_instruction* spill =
Instruction* spill =
create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
spill->operands[0].setLateKill(true);
@ -1798,7 +1798,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
create->definitions[0] = Definition(linear_vgpr);
/* find the right place to insert this definition */
@ -1815,8 +1815,8 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
}
/* reload sgpr: just add the vgpr temp to operands */
Pseudo_instruction* reload = create_instruction<Pseudo_instruction>(
aco_opcode::p_reload, Format::PSEUDO, 2, 1);
Instruction* reload = create_instruction<Pseudo_instruction>(aco_opcode::p_reload,
Format::PSEUDO, 2, 1);
reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
reload->operands[0].setLateKill(true);
reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size);

View file

@ -97,7 +97,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
}
std::vector<aco_ptr<Instruction>>::iterator it = std::next(block.instructions.begin(), idx);
aco_ptr<Pseudo_instruction> pc{
aco_ptr<Instruction> pc{
create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO,
logical_phi_info.size(), logical_phi_info.size())};
unsigned i = 0;
@ -107,7 +107,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
i++;
}
/* this shouldn't be needed since we're only copying vgprs */
pc->tmp_in_scc = false;
pc->pseudo().tmp_in_scc = false;
block.instructions.insert(it, std::move(pc));
}
@ -122,7 +122,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
--it;
assert((*it)->isBranch());
PhysReg scratch_sgpr = (*it)->definitions[0].physReg();
aco_ptr<Pseudo_instruction> pc{
aco_ptr<Instruction> pc{
create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO,
linear_phi_info.size(), linear_phi_info.size())};
unsigned i = 0;
@ -131,9 +131,9 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
pc->operands[i] = phi_info.op;
i++;
}
pc->tmp_in_scc = block.scc_live_out;
pc->scratch_sgpr = scratch_sgpr;
pc->needs_scratch_reg = true;
pc->pseudo().tmp_in_scc = block.scc_live_out;
pc->pseudo().scratch_sgpr = scratch_sgpr;
pc->pseudo().needs_scratch_reg = true;
block.instructions.insert(it, std::move(pc));
}
}

View file

@ -269,7 +269,7 @@ BEGIN_TEST(assembler.v_add3)
//~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
//~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
aco_ptr<VALU_instruction> add3{
aco_ptr<Instruction> add3{
create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
add3->operands[0] = Operand::zero();
add3->operands[1] = Operand::zero();
@ -288,13 +288,13 @@ BEGIN_TEST(assembler.v_add3_clamp)
//~gfx9>> integer addition + clamp ; d1ff8000 02010080
//~gfx10>> integer addition + clamp ; d76d8000 02010080
aco_ptr<VALU_instruction> add3{
aco_ptr<Instruction> add3{
create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
add3->operands[0] = Operand::zero();
add3->operands[1] = Operand::zero();
add3->operands[2] = Operand::zero();
add3->definitions[0] = Definition(PhysReg(0), v1);
add3->clamp = 1;
add3->valu().clamp = 1;
bld.insert(std::move(add3));
finish_assembler_test();

View file

@ -69,7 +69,7 @@ create_global()
static void
create_mimg(bool nsa, Temp desc = Temp(0, s8))
{
aco_ptr<MIMG_instruction> mimg{
aco_ptr<Instruction> mimg{
create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 5, 1)};
mimg->definitions[0] = Definition(PhysReg(256), v1);
mimg->operands[0] = Operand(desc);
@ -78,8 +78,8 @@ create_mimg(bool nsa, Temp desc = Temp(0, s8))
mimg->operands[2] = Operand(v1);
for (unsigned i = 0; i < 2; i++)
mimg->operands[3 + i] = Operand(PhysReg(256 + (nsa ? i * 2 : i)), v1);
mimg->dmask = 0x1;
mimg->dim = ac_image_2d;
mimg->mimg().dmask = 0x1;
mimg->mimg().dim = ac_image_2d;
bld.insert(std::move(mimg));
}

View file

@ -42,7 +42,7 @@ create_mubuf_store(PhysReg src = PhysReg(256))
void
create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
{
aco_ptr<MIMG_instruction> mimg{
aco_ptr<Instruction> mimg{
create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
mimg->definitions[0] = Definition(PhysReg(256), v1);
mimg->operands[0] = Operand(PhysReg(0), s8);
@ -50,8 +50,8 @@ create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
mimg->operands[2] = Operand(v1);
for (unsigned i = 0; i < addrs; i++)
mimg->operands[3 + i] = Operand(PhysReg(256 + (nsa ? i * 2 : i)), v1);
mimg->dmask = 0x1;
mimg->dim = ac_image_2d;
mimg->mimg().dmask = 0x1;
mimg->mimg().dim = ac_image_2d;
assert(get_mimg_nsa_dwords(mimg.get()) + 2 == instr_dwords);