diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index f0500e1bbc0..ed6dfa3728d 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -611,9 +611,9 @@ handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr& i // TODO: try to schedule the NOP-causing instruction up to reduce the number of stall cycles if (NOPs) { /* create NOP */ - aco_ptr nop{ + aco_ptr nop{ create_instruction(aco_opcode::s_nop, Format::SOPP, 0, 0)}; - nop->imm = NOPs - 1; + nop->salu().imm = NOPs - 1; new_instructions.emplace_back(std::move(nop)); } diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index a4ba5783b7c..84c4d7c97bc 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -237,7 +237,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> /* create ssa names for outer exec masks */ if (info.has_discard && preds.size() > 1) { - aco_ptr phi; + aco_ptr phi; for (int i = 0; i < info.num_exec_masks - 1; i++) { phi.reset(create_instruction(aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)); @@ -251,7 +251,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> if (info.has_divergent_continue) { /* create ssa name for loop active mask */ - aco_ptr phi{create_instruction( + aco_ptr phi{create_instruction( aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)}; phi->definitions[0] = bld.def(bld.lm); phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first); @@ -312,7 +312,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> ctx.info[idx].exec.emplace_back(same, type); } else { /* create phi for loop footer */ - aco_ptr phi{create_instruction( + aco_ptr phi{create_instruction( aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)}; phi->definitions[0] = bld.def(bld.lm); for (unsigned i = 0; i < phi->operands.size(); i++) diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index b1f4722cc73..c1e20459b93 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -993,17 +993,17 @@ emit_waitcnt(wait_ctx& ctx, std::vector>& instructions, wai { if (imm.vs != wait_imm::unset_counter) { assert(ctx.gfx_level >= GFX10); - SALU_instruction* waitcnt_vs = + Instruction* waitcnt_vs = create_instruction(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 1, 0); waitcnt_vs->operands[0] = Operand(sgpr_null, s1); - waitcnt_vs->imm = imm.vs; + waitcnt_vs->salu().imm = imm.vs; instructions.emplace_back(waitcnt_vs); imm.vs = wait_imm::unset_counter; } if (!imm.empty()) { - SALU_instruction* waitcnt = + Instruction* waitcnt = create_instruction(aco_opcode::s_waitcnt, Format::SOPP, 0, 0); - waitcnt->imm = imm.pack(ctx.gfx_level); + waitcnt->salu().imm = imm.pack(ctx.gfx_level); instructions.emplace_back(waitcnt); } imm = wait_imm(); @@ -1030,9 +1030,9 @@ emit_delay_alu(wait_ctx& ctx, std::vector>& instructions, imm |= ((uint32_t)alu_delay_wait::SALU_CYCLE_1 + cycles - 1) << (imm ? 7 : 0); } - SALU_instruction* inst = + Instruction* inst = create_instruction(aco_opcode::s_delay_alu, Format::SOPP, 0, 0); - inst->imm = imm; + inst->salu().imm = imm; inst->pass_flags = (delay.valu_cycles | (delay.trans_cycles << 16)); instructions.emplace_back(inst); delay = alu_delay_info(); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1460a27dfdd..a6370885e44 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -380,7 +380,7 @@ emit_split_vector(isel_context* ctx, Temp vec_src, unsigned num_components) } else { rc = RegClass(vec_src.type(), vec_src.size() / num_components); } - aco_ptr split{create_instruction( + aco_ptr split{create_instruction( aco_opcode::p_split_vector, Format::PSEUDO, 1, num_components)}; split->operands[0] = Operand(vec_src); std::array elems; @@ -432,7 +432,7 @@ expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components if (zero_padding) padding = bld.copy(bld.def(dst_rc), Operand::zero(component_bytes)); - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)}; vec->definitions[0] = Definition(dst); unsigned k = 0; @@ -553,7 +553,7 @@ byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigne if (dst.type() == RegType::vgpr) { /* if dst is vgpr - split the src and create a shrunk version according to the mask. */ num_components = dst.bytes() / component_size; - aco_ptr create_vec{create_instruction( + aco_ptr create_vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)}; for (unsigned i = 0; i < num_components; i++) create_vec->operands[i] = Operand(elems[i]); @@ -749,7 +749,7 @@ get_alu_src(struct isel_context* ctx, nir_alu_src src, unsigned size = 1) } else { assert(size <= 4); std::array elems; - aco_ptr vec_instr{create_instruction( + aco_ptr vec_instr{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, size, 1)}; for (unsigned i = 0; i < size; ++i) { elems[i] = emit_extract_vector(ctx, vec, src.swizzle[i], elem_rc); @@ -823,7 +823,7 @@ void emit_sop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool writes_scc, uint8_t uses_ub = 0) { - aco_ptr sop2{ + aco_ptr sop2{ create_instruction(op, Format::SOP2, 2, writes_scc ? 2 : 1)}; sop2->operands[0] = Operand(get_alu_src(ctx, instr->src[0])); sop2->operands[1] = Operand(get_alu_src(ctx, instr->src[1])); @@ -1407,7 +1407,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) elems[i] = get_alu_src(ctx, instr->src[i]); if (instr->def.bit_size >= 32 || dst.type() == RegType::vgpr) { - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, instr->def.num_components, 1)}; RegClass elem_rc = RegClass::get(RegType::vgpr, instr->def.bit_size / 8u); for (unsigned i = 0; i < num; ++i) { @@ -1484,7 +1484,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (dst.size() == 1) bld.copy(Definition(dst), packed[0]); else { - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)}; vec->definitions[0] = Definition(dst); for (unsigned i = 0; i < dst.size(); ++i) @@ -3954,7 +3954,7 @@ visit_load_const(isel_context* ctx, nir_load_const_instr* instr) bld.copy(Definition(dst), Operand::c32(instr->value[0].u32)); } else { assert(dst.size() != 1); - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)}; if (instr->def.bit_size == 64) for (unsigned i = 0; i < dst.size(); i++) @@ -3978,7 +3978,7 @@ emit_readfirstlane(isel_context* ctx, Temp src, Temp dst) } else if (src.size() == 1) { bld.vop1(aco_opcode::v_readfirstlane_b32, Definition(dst), src); } else { - aco_ptr split{create_instruction( + aco_ptr split{create_instruction( aco_opcode::p_split_vector, Format::PSEUDO, 1, src.size())}; split->operands[0] = Operand(src); @@ -3990,7 +3990,7 @@ emit_readfirstlane(isel_context* ctx, Temp src, Temp dst) Instruction* split_raw = split.get(); ctx->block->instructions.emplace_back(std::move(split)); - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, src.size(), 1)}; vec->definitions[0] = Definition(dst); for (unsigned i = 0; i < src.size(); i++) { @@ -4246,7 +4246,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info, tmp[num_tmps++] = vals[i++]; } if (num_tmps > 1) { - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, num_tmps, 1)}; for (unsigned j = 0; j < num_tmps; j++) vec->operands[j] = Operand(tmp[j]); @@ -4272,7 +4272,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info, allocated_vec[components_split++] = tmp[0]; } else { assert(tmp_size % elem_rc.bytes() == 0); - aco_ptr split{create_instruction( + aco_ptr split{create_instruction( aco_opcode::p_split_vector, Format::PSEUDO, 1, tmp_size / elem_rc.bytes())}; for (auto& def : split->definitions) { Temp component = bld.tmp(elem_rc); @@ -4305,7 +4305,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info, int padding_bytes = MAX2((int)info.dst.bytes() - int(allocated_vec[0].bytes() * info.num_components), 0); - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, info.num_components + !!padding_bytes, 1)}; for (unsigned i = 0; i < info.num_components; i++) vec->operands[i] = Operand(allocated_vec[i]); @@ -4440,7 +4440,7 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned op = buffer ? aco_opcode::s_buffer_load_dwordx16 : aco_opcode::s_load_dwordx16; } - aco_ptr load{create_instruction(op, Format::SMEM, 2, 1)}; + aco_ptr load{create_instruction(op, Format::SMEM, 2, 1)}; if (buffer) { if (const_offset) offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset, @@ -4460,9 +4460,10 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned RegClass rc(RegType::sgpr, DIV_ROUND_UP(bytes_needed, 4u)); Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc); load->definitions[0] = Definition(val); - load->glc = info.glc; - load->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3); - load->sync = info.sync; + load->smem().glc = info.glc; + load->smem().dlc = + info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3); + load->smem().sync = info.sync; bld.insert(std::move(load)); return val; } @@ -4514,18 +4515,19 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne bytes_size = 16; op = aco_opcode::buffer_load_dwordx4; } - aco_ptr mubuf{create_instruction(op, Format::MUBUF, 3, 1)}; + aco_ptr mubuf{create_instruction(op, Format::MUBUF, 3, 1)}; mubuf->operands[0] = Operand(info.resource); mubuf->operands[1] = vaddr; mubuf->operands[2] = soffset; - mubuf->offen = offen; - mubuf->idxen = idxen; - mubuf->glc = info.glc; - mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3); - mubuf->slc = info.slc; - mubuf->sync = info.sync; - mubuf->offset = const_offset; - mubuf->swizzled = info.swizzle_component_size != 0; + mubuf->mubuf().offen = offen; + mubuf->mubuf().idxen = idxen; + mubuf->mubuf().glc = info.glc; + mubuf->mubuf().dlc = + info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3); + mubuf->mubuf().slc = info.slc; + mubuf->mubuf().sync = info.sync; + mubuf->mubuf().offset = const_offset; + mubuf->mubuf().swizzled = info.swizzle_component_size != 0; RegClass rc = RegClass::get(RegType::vgpr, bytes_size); Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); mubuf->definitions[0] = Definition(val); @@ -4581,17 +4583,18 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, } } - aco_ptr mubuf{create_instruction(op, Format::MUBUF, 3, 1)}; + aco_ptr mubuf{create_instruction(op, Format::MUBUF, 3, 1)}; mubuf->operands[0] = Operand(info.resource); mubuf->operands[1] = vaddr; mubuf->operands[2] = soffset; - mubuf->offen = offen; - mubuf->idxen = idxen; - mubuf->glc = info.glc; - mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3); - mubuf->slc = info.slc; - mubuf->sync = info.sync; - mubuf->offset = const_offset; + mubuf->mubuf().offen = offen; + mubuf->mubuf().idxen = idxen; + mubuf->mubuf().glc = info.glc; + mubuf->mubuf().dlc = + info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3); + mubuf->mubuf().slc = info.slc; + mubuf->mubuf().sync = info.sync; + mubuf->mubuf().offset = const_offset; RegClass rc = RegClass::get(RegType::vgpr, bytes_needed); Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); mubuf->definitions[0] = Definition(val); @@ -4629,11 +4632,11 @@ scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsig } RegClass rc = RegClass::get(RegType::vgpr, bytes_size); Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); - aco_ptr flat{create_instruction(op, Format::SCRATCH, 2, 1)}; + aco_ptr flat{create_instruction(op, Format::SCRATCH, 2, 1)}; flat->operands[0] = offset.regClass() == s1 ? Operand(v1) : Operand(offset); flat->operands[1] = offset.regClass() == s1 ? Operand(offset) : Operand(s1); - flat->sync = info.sync; - flat->offset = const_offset; + flat->scratch().sync = info.sync; + flat->scratch().offset = const_offset; flat->definitions[0] = Definition(val); bld.insert(std::move(flat)); @@ -4793,21 +4796,20 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign RegClass rc = RegClass::get(RegType::vgpr, bytes_size); Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); if (use_mubuf) { - aco_ptr mubuf{ - create_instruction(op, Format::MUBUF, 3, 1)}; + aco_ptr mubuf{create_instruction(op, Format::MUBUF, 3, 1)}; mubuf->operands[0] = Operand(get_gfx6_global_rsrc(bld, addr)); mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1); mubuf->operands[2] = Operand(offset); - mubuf->glc = info.glc; - mubuf->dlc = false; - mubuf->offset = const_offset; - mubuf->addr64 = addr.type() == RegType::vgpr; - mubuf->disable_wqm = false; - mubuf->sync = info.sync; + mubuf->mubuf().glc = info.glc; + mubuf->mubuf().dlc = false; + mubuf->mubuf().offset = const_offset; + mubuf->mubuf().addr64 = addr.type() == RegType::vgpr; + mubuf->mubuf().disable_wqm = false; + mubuf->mubuf().sync = info.sync; mubuf->definitions[0] = Definition(val); bld.insert(std::move(mubuf)); } else { - aco_ptr flat{ + aco_ptr flat{ create_instruction(op, global ? Format::GLOBAL : Format::FLAT, 2, 1)}; if (addr.regClass() == s2) { assert(global && offset.id() && offset.type() == RegType::vgpr); @@ -4818,12 +4820,12 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign flat->operands[0] = Operand(addr); flat->operands[1] = Operand(s1); } - flat->glc = info.glc; - flat->dlc = + flat->flatlike().glc = info.glc; + flat->flatlike().dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3); - flat->sync = info.sync; + flat->flatlike().sync = info.sync; assert(global || !const_offset); - flat->offset = const_offset; + flat->flatlike().offset = const_offset; flat->definitions[0] = Definition(val); bld.insert(std::move(flat)); } @@ -5178,7 +5180,7 @@ create_vec_from_array(isel_context* ctx, Temp arr[], unsigned cnt, RegType reg_t dst = bld.tmp(RegClass(reg_type, cnt * dword_size)); std::array allocated_vec; - aco_ptr instr{ + aco_ptr instr{ create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, cnt, 1)}; instr->definitions[0] = Definition(dst); @@ -5553,7 +5555,7 @@ emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components) { Builder bld(ctx->program, ctx->block); - aco_ptr vec(create_instruction( + aco_ptr vec(create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)); for (unsigned i = 0; i < num_components; i++) { if (ctx->args->frag_pos[i].used) @@ -5617,7 +5619,7 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr) if (instr->def.num_components == 1) { emit_interp_instr(ctx, idx, component, coords, dst, prim_mask); } else { - aco_ptr vec(create_instruction( + aco_ptr vec(create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, instr->def.num_components, 1)); for (unsigned i = 0; i < instr->def.num_components; i++) { Temp tmp = ctx->program->allocateTmp(instr->def.bit_size == 16 ? v2b : v1); @@ -5709,19 +5711,20 @@ mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne abort(); } - aco_ptr mtbuf{create_instruction(op, Format::MTBUF, 3, 1)}; + aco_ptr mtbuf{create_instruction(op, Format::MTBUF, 3, 1)}; mtbuf->operands[0] = Operand(info.resource); mtbuf->operands[1] = vaddr; mtbuf->operands[2] = soffset; - mtbuf->offen = offen; - mtbuf->idxen = idxen; - mtbuf->glc = info.glc; - mtbuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3); - mtbuf->slc = info.slc; - mtbuf->sync = info.sync; - mtbuf->offset = const_offset; - mtbuf->dfmt = fetch_fmt & 0xf; - mtbuf->nfmt = fetch_fmt >> 4; + mtbuf->mtbuf().offen = offen; + mtbuf->mtbuf().idxen = idxen; + mtbuf->mtbuf().glc = info.glc; + mtbuf->mtbuf().dlc = + info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3); + mtbuf->mtbuf().slc = info.slc; + mtbuf->mtbuf().sync = info.sync; + mtbuf->mtbuf().offset = const_offset; + mtbuf->mtbuf().dfmt = fetch_fmt & 0xf; + mtbuf->mtbuf().nfmt = fetch_fmt >> 4; RegClass rc = RegClass::get(RegType::vgpr, bytes_size); Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc); mtbuf->definitions[0] = Definition(val); @@ -5757,7 +5760,7 @@ visit_load_fs_input(isel_context* ctx, nir_intrinsic_instr* instr) unsigned num_components = instr->def.num_components; if (instr->def.bit_size == 64) num_components *= 2; - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)}; for (unsigned i = 0; i < num_components; i++) { unsigned chan_component = (component + i) % 4; @@ -5876,7 +5879,7 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr) if ((ctx->args->inline_push_const_mask | mask) == ctx->args->inline_push_const_mask && start + count <= (sizeof(ctx->args->inline_push_const_mask) * 8u)) { std::array elems; - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, count, 1)}; unsigned arg_index = util_bitcount64(ctx->args->inline_push_const_mask & BITFIELD64_MASK(start)); @@ -6066,7 +6069,7 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v if (nsa_size < coords.size()) { Temp coord = coords[nsa_size]; if (coords.size() - nsa_size > 1) { - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, coords.size() - nsa_size, 1)}; unsigned coord_size = 0; @@ -6088,7 +6091,7 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v bool has_dst = dst.id() != 0; - aco_ptr mimg{ + aco_ptr mimg{ create_instruction(op, Format::MIMG, 3 + coords.size(), has_dst)}; if (has_dst) mimg->definitions[0] = Definition(dst); @@ -6100,11 +6103,9 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v if (coords[i].regClass().is_linear_vgpr()) mimg->operands[3 + i].setLateKill(true); } - mimg->strict_wqm = strict_wqm; + mimg->mimg().strict_wqm = strict_wqm; - MIMG_instruction* res = mimg.get(); - bld.insert(std::move(mimg)); - return res; + return &bld.insert(std::move(mimg))->mimg(); } void @@ -6257,8 +6258,8 @@ emit_tfe_init(Builder& bld, Temp dst) { Temp tmp = bld.tmp(dst.regClass()); - aco_ptr vec{create_instruction( - aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)}; + aco_ptr vec{create_instruction(aco_opcode::p_create_vector, + Format::PSEUDO, dst.size(), 1)}; for (unsigned i = 0; i < dst.size(); i++) vec->operands[i] = Operand::zero(); vec->definitions[0] = Definition(tmp); @@ -6332,19 +6333,19 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) default: unreachable(">4 channel buffer image load"); } } - aco_ptr load{ + aco_ptr load{ create_instruction(opcode, Format::MUBUF, 3 + is_sparse, 1)}; load->operands[0] = Operand(resource); load->operands[1] = Operand(vindex); load->operands[2] = Operand::c32(0); load->definitions[0] = Definition(tmp); - load->idxen = true; - load->glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT); - load->dlc = - load->glc && (ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3); - load->sync = sync; - load->tfe = is_sparse; - if (load->tfe) + load->mubuf().idxen = true; + load->mubuf().glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT); + load->mubuf().dlc = load->mubuf().glc && + (ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3); + load->mubuf().sync = sync; + load->mubuf().tfe = is_sparse; + if (load->mubuf().tfe) load->operands[3] = emit_tfe_init(bld, tmp); ctx->block->instructions.emplace_back(std::move(load)); } else { @@ -6446,7 +6447,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) if (dmask_count == 1) { data = emit_extract_vector(ctx, data, ffs(dmask) - 1, rc); } else { - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, dmask_count, 1)}; uint32_t index = 0; u_foreach_bit (bit, dmask) { @@ -6480,17 +6481,17 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) default: unreachable(">4 channel buffer image store"); } } - aco_ptr store{ + aco_ptr store{ create_instruction(opcode, Format::MUBUF, 4, 0)}; store->operands[0] = Operand(rsrc); store->operands[1] = Operand(vindex); store->operands[2] = Operand::c32(0); store->operands[3] = Operand(data); - store->idxen = true; - store->glc = glc; - store->dlc = false; - store->disable_wqm = true; - store->sync = sync; + store->mubuf().idxen = true; + store->mubuf().glc = glc; + store->mubuf().dlc = false; + store->mubuf().disable_wqm = true; + store->mubuf().sync = sync; ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(store)); return; @@ -6634,7 +6635,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); // assert(ctx->options->gfx_level < GFX9 && "GFX9 stride size workaround not yet // implemented."); - aco_ptr mubuf{create_instruction( + aco_ptr mubuf{create_instruction( is_64bit ? buf_op64 : buf_op, Format::MUBUF, 4, return_previous ? 1 : 0)}; mubuf->operands[0] = Operand(resource); mubuf->operands[1] = Operand(vindex); @@ -6644,12 +6645,12 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); if (return_previous) mubuf->definitions[0] = def; - mubuf->offset = 0; - mubuf->idxen = true; - mubuf->glc = return_previous; - mubuf->dlc = false; /* Not needed for atomics */ - mubuf->disable_wqm = true; - mubuf->sync = sync; + mubuf->mubuf().offset = 0; + mubuf->mubuf().idxen = true; + mubuf->mubuf().glc = return_previous; + mubuf->mubuf().dlc = false; /* Not needed for atomics */ + mubuf->mubuf().disable_wqm = true; + mubuf->mubuf().sync = sync; ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(mubuf)); if (return_previous && cmpswap) @@ -6728,18 +6729,17 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) for (unsigned i = 0; i < write_count; i++) { aco_opcode op = get_buffer_store_op(write_datas[i].bytes()); - aco_ptr store{ - create_instruction(op, Format::MUBUF, 4, 0)}; + aco_ptr store{create_instruction(op, Format::MUBUF, 4, 0)}; store->operands[0] = Operand(rsrc); store->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1); store->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0); store->operands[3] = Operand(write_datas[i]); - store->offset = offsets[i]; - store->offen = (offset.type() == RegType::vgpr); - store->glc = glc; - store->dlc = false; - store->disable_wqm = true; - store->sync = sync; + store->mubuf().offset = offsets[i]; + store->mubuf().offen = (offset.type() == RegType::vgpr); + store->mubuf().glc = glc; + store->mubuf().dlc = false; + store->mubuf().disable_wqm = true; + store->mubuf().sync = sync; ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(store)); } @@ -6767,7 +6767,7 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) Temp dst = get_ssa_temp(ctx, &instr->def); aco_opcode op = instr->def.bit_size == 32 ? op32 : op64; - aco_ptr mubuf{ + aco_ptr mubuf{ create_instruction(op, Format::MUBUF, 4, return_previous ? 1 : 0)}; mubuf->operands[0] = Operand(rsrc); mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1); @@ -6777,12 +6777,12 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); if (return_previous) mubuf->definitions[0] = def; - mubuf->offset = 0; - mubuf->offen = (offset.type() == RegType::vgpr); - mubuf->glc = return_previous; - mubuf->dlc = false; /* Not needed for atomics */ - mubuf->disable_wqm = true; - mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw); + mubuf->mubuf().offset = 0; + mubuf->mubuf().offen = (offset.type() == RegType::vgpr); + mubuf->mubuf().glc = return_previous; + mubuf->mubuf().dlc = false; /* Not needed for atomics */ + mubuf->mubuf().disable_wqm = true; + mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw); ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(mubuf)); if (return_previous && cmpswap) @@ -6901,7 +6901,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr) default: unreachable("store_global not implemented for this size."); } - aco_ptr flat{ + aco_ptr flat{ create_instruction(op, global ? Format::GLOBAL : Format::FLAT, 3, 0)}; if (write_address.regClass() == s2) { assert(global && write_offset.id() && write_offset.type() == RegType::vgpr); @@ -6913,12 +6913,12 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr) flat->operands[1] = Operand(s1); } flat->operands[2] = Operand(write_datas[i]); - flat->glc = glc; - flat->dlc = false; + flat->flatlike().glc = glc; + flat->flatlike().dlc = false; assert(global || !write_const_offset); - flat->offset = write_const_offset; - flat->disable_wqm = true; - flat->sync = sync; + flat->flatlike().offset = write_const_offset; + flat->flatlike().disable_wqm = true; + flat->flatlike().sync = sync; ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(flat)); } else { @@ -6928,19 +6928,18 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr) Temp rsrc = get_gfx6_global_rsrc(bld, write_address); - aco_ptr mubuf{ - create_instruction(op, Format::MUBUF, 4, 0)}; + aco_ptr mubuf{create_instruction(op, Format::MUBUF, 4, 0)}; mubuf->operands[0] = Operand(rsrc); mubuf->operands[1] = write_address.type() == RegType::vgpr ? Operand(write_address) : Operand(v1); mubuf->operands[2] = Operand(write_offset); mubuf->operands[3] = Operand(write_datas[i]); - mubuf->glc = glc; - mubuf->dlc = false; - mubuf->offset = write_const_offset; - mubuf->addr64 = write_address.type() == RegType::vgpr; - mubuf->disable_wqm = true; - mubuf->sync = sync; + mubuf->mubuf().glc = glc; + mubuf->mubuf().dlc = false; + mubuf->mubuf().offset = write_const_offset; + mubuf->mubuf().addr64 = write_address.type() == RegType::vgpr; + mubuf->mubuf().disable_wqm = true; + mubuf->mubuf().sync = sync; ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(mubuf)); } @@ -7029,7 +7028,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr) } aco_opcode op = instr->def.bit_size == 32 ? op32 : op64; - aco_ptr flat{create_instruction( + aco_ptr flat{create_instruction( op, global ? Format::GLOBAL : Format::FLAT, 3, return_previous ? 1 : 0)}; if (addr.regClass() == s2) { assert(global && offset.id() && offset.type() == RegType::vgpr); @@ -7043,12 +7042,12 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr) flat->operands[2] = Operand(data); if (return_previous) flat->definitions[0] = Definition(dst); - flat->glc = return_previous; - flat->dlc = false; /* Not needed for atomics */ + flat->flatlike().glc = return_previous; + flat->flatlike().dlc = false; /* Not needed for atomics */ assert(global || !const_offset); - flat->offset = const_offset; - flat->disable_wqm = true; - flat->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw); + flat->flatlike().offset = const_offset; + flat->flatlike().disable_wqm = true; + flat->flatlike().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw); ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(flat)); } else { @@ -7061,7 +7060,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr) aco_opcode op = instr->def.bit_size == 32 ? op32 : op64; - aco_ptr mubuf{ + aco_ptr mubuf{ create_instruction(op, Format::MUBUF, 4, return_previous ? 1 : 0)}; mubuf->operands[0] = Operand(rsrc); mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1); @@ -7071,12 +7070,12 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr) return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition(); if (return_previous) mubuf->definitions[0] = def; - mubuf->glc = return_previous; - mubuf->dlc = false; - mubuf->offset = const_offset; - mubuf->addr64 = addr.type() == RegType::vgpr; - mubuf->disable_wqm = true; - mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw); + mubuf->mubuf().glc = return_previous; + mubuf->mubuf().dlc = false; + mubuf->mubuf().offset = const_offset; + mubuf->mubuf().addr64 = addr.type() == RegType::vgpr; + mubuf->mubuf().disable_wqm = true; + mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw); ctx->program->needs_exact = true; ctx->block->instructions.emplace_back(std::move(mubuf)); if (return_previous && cmpswap) @@ -7473,7 +7472,7 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr) offset = 0; } - aco_ptr ds; + aco_ptr ds; ds.reset( create_instruction(op, Format::DS, num_operands, return_previous ? 1 : 0)); ds->operands[0] = Operand(address); @@ -7485,10 +7484,10 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr) std::swap(ds->operands[1], ds->operands[2]); } ds->operands[num_operands - 1] = m; - ds->offset0 = offset; + ds->ds().offset0 = offset; if (return_previous) ds->definitions[0] = Definition(get_ssa_temp(ctx, &instr->def)); - ds->sync = memory_sync_info(storage_shared, semantic_atomicrmw); + ds->ds().sync = memory_sync_info(storage_shared, semantic_atomicrmw); if (m.isUndefined()) ds->operands.pop_back(); @@ -7916,7 +7915,7 @@ emit_reduction_instr(isel_context* ctx, aco_opcode aco_op, ReduceOp op, unsigned if (clobber_vcc) defs[num_defs++] = bld.def(bld.lm, vcc); - Pseudo_reduction_instruction* reduce = create_instruction( + Instruction* reduce = create_instruction( aco_op, Format::PSEUDO_REDUCTION, 3, num_defs); reduce->operands[0] = Operand(src); /* setup_reduce_temp will update these undef operands if needed */ @@ -7924,8 +7923,8 @@ emit_reduction_instr(isel_context* ctx, aco_opcode aco_op, ReduceOp op, unsigned reduce->operands[2] = Operand(v1.as_linear()); std::copy(defs, defs + num_defs, reduce->definitions.begin()); - reduce->reduce_op = op; - reduce->cluster_size = cluster_size; + reduce->reduction().reduce_op = op; + reduce->reduction().cluster_size = cluster_size; bld.insert(std::move(reduce)); return dst.getTemp(); @@ -8111,7 +8110,7 @@ create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt* { Builder bld(ctx->program, ctx->block); - aco_ptr exp{create_instruction( + aco_ptr exp{create_instruction( aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)}; for (unsigned i = 0; i < 4; i++) { exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1); @@ -9149,7 +9148,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true); ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)}; unsigned write_mask = nir_intrinsic_write_mask(instr); @@ -9209,36 +9208,37 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) const bool row_en = instr->intrinsic == nir_intrinsic_export_row_amd; - aco_ptr exp{ + aco_ptr exp{ create_instruction(aco_opcode::exp, Format::EXP, 4 + row_en, 0)}; - exp->dest = target; - exp->enabled_mask = write_mask; - exp->compressed = flags & AC_EXP_FLAG_COMPRESSED; + exp->exp().dest = target; + exp->exp().enabled_mask = write_mask; + exp->exp().compressed = flags & AC_EXP_FLAG_COMPRESSED; /* ACO may reorder position/mrt export instructions, then mark done for last * export instruction. So don't respect the nir AC_EXP_FLAG_DONE for position/mrt * exports here and leave it to ACO. */ if (target == V_008DFC_SQ_EXP_PRIM) - exp->done = flags & AC_EXP_FLAG_DONE; + exp->exp().done = flags & AC_EXP_FLAG_DONE; else - exp->done = false; + exp->exp().done = false; /* ACO may reorder mrt export instructions, then mark valid mask for last * export instruction. So don't respect the nir AC_EXP_FLAG_VALID_MASK for mrt * exports here and leave it to ACO. */ if (target > V_008DFC_SQ_EXP_NULL) - exp->valid_mask = flags & AC_EXP_FLAG_VALID_MASK; + exp->exp().valid_mask = flags & AC_EXP_FLAG_VALID_MASK; else - exp->valid_mask = false; + exp->exp().valid_mask = false; - exp->row_en = row_en; + exp->exp().row_en = row_en; /* Compressed export uses two bits for a channel. */ - uint32_t channel_mask = - exp->compressed ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) : write_mask; + uint32_t channel_mask = exp->exp().compressed + ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) + : write_mask; Temp value = get_ssa_temp(ctx, instr->src[0].ssa); for (unsigned i = 0; i < 4; i++) { @@ -9287,7 +9287,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) if (it != ctx->allocated_vec.end()) num_src = src.bytes() / it->second[0].bytes(); - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_start_linear_vgpr, Format::PSEUDO, num_src + !!begin_size, 1)}; if (begin_size) @@ -9730,15 +9730,15 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) } } - aco_ptr mubuf{ + aco_ptr mubuf{ create_instruction(op, Format::MUBUF, 3 + instr->is_sparse, 1)}; mubuf->operands[0] = Operand(resource); mubuf->operands[1] = Operand(coords[0]); mubuf->operands[2] = Operand::c32(0); mubuf->definitions[0] = Definition(tmp_dst); - mubuf->idxen = true; - mubuf->tfe = instr->is_sparse; - if (mubuf->tfe) + mubuf->mubuf().idxen = true; + mubuf->mubuf().tfe = instr->is_sparse; + if (mubuf->mubuf().tfe) mubuf->operands[3] = emit_tfe_init(bld, tmp_dst); ctx->block->instructions.emplace_back(std::move(mubuf)); @@ -10008,7 +10008,7 @@ get_phi_operand(isel_context* ctx, nir_def* ssa, RegClass rc, bool logical) void visit_phi(isel_context* ctx, nir_phi_instr* instr) { - aco_ptr phi; + aco_ptr phi; Temp dst = get_ssa_temp(ctx, &instr->def); assert(instr->def.bit_size != 1 || dst.regClass() == ctx->program->lane_mask); @@ -10111,7 +10111,7 @@ visit_undef(isel_context* ctx, nir_undef_instr* instr) if (dst.size() == 1) { Builder(ctx->program, ctx->block).copy(Definition(dst), Operand::zero()); } else { - aco_ptr vec{create_instruction( + aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)}; for (unsigned i = 0; i < dst.size(); i++) vec->operands[i] = Operand::zero(); @@ -10210,7 +10210,7 @@ end_loop(isel_context* ctx, loop_context* lc) /* trim linear phis in loop header */ for (auto&& instr : loop_entry->instructions) { if (instr->opcode == aco_opcode::p_linear_phi) { - aco_ptr new_phi{create_instruction(aco_opcode::p_linear_phi, Format::PSEUDO, loop_entry->linear_predecessors.size(), 1)}; + aco_ptr new_phi{create_instruction(aco_opcode::p_linear_phi, Format::PSEUDO, loop_entry->linear_predecessors.size(), 1)}; new_phi->definitions[0] = instr->definitions[0]; for (unsigned i = 0; i < new_phi->operands.size(); i++) new_phi->operands[i] = instr->operands[i]; @@ -10484,13 +10484,14 @@ begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond, /* branch to linear then block */ assert(cond.regClass() == ctx->program->lane_mask); - aco_ptr branch; + aco_ptr branch; branch.reset(create_instruction(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 1)); branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); branch->operands[0] = Operand(cond); - branch->selection_control_remove = sel_ctrl == nir_selection_control_flatten || - sel_ctrl == nir_selection_control_divergent_always_taken; + branch->branch().selection_control_remove = + sel_ctrl == nir_selection_control_flatten || + sel_ctrl == nir_selection_control_divergent_always_taken; ctx->block->instructions.push_back(std::move(branch)); ic->BB_if_idx = ctx->block->index; @@ -10528,7 +10529,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic, Block* BB_then_logical = ctx->block; append_logical_end(BB_then_logical); /* branch from logical then block to invert block */ - aco_ptr branch; + aco_ptr branch; branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); @@ -10561,8 +10562,9 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic, branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); - branch->selection_control_remove = sel_ctrl == nir_selection_control_flatten || - sel_ctrl == nir_selection_control_divergent_always_taken; + branch->branch().selection_control_remove = + sel_ctrl == nir_selection_control_flatten || + sel_ctrl == nir_selection_control_divergent_always_taken; ctx->block->instructions.push_back(std::move(branch)); ic->exec_potentially_empty_discard_old |= ctx->cf_info.exec_potentially_empty_discard; @@ -10593,7 +10595,7 @@ end_divergent_if(isel_context* ctx, if_context* ic) append_logical_end(BB_else_logical); /* branch from logical else block to endif block */ - aco_ptr branch; + aco_ptr branch; branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); @@ -10650,7 +10652,7 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond) append_logical_end(ctx->block); ctx->block->kind |= block_kind_uniform; - aco_ptr branch; + aco_ptr branch; aco_opcode branch_opcode = aco_opcode::p_cbranch_z; branch.reset( create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 1)); @@ -10687,7 +10689,7 @@ begin_uniform_if_else(isel_context* ctx, if_context* ic) if (!ic->uniform_has_then_branch) { append_logical_end(BB_then); /* branch from then block to endif block */ - aco_ptr branch; + aco_ptr branch; branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); @@ -10719,7 +10721,7 @@ end_uniform_if(isel_context* ctx, if_context* ic) if (!ctx->cf_info.has_branch) { append_logical_end(BB_else); /* branch from then block to endif block */ - aco_ptr branch; + aco_ptr branch; branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); @@ -10747,7 +10749,7 @@ visit_if(isel_context* ctx, nir_if* if_stmt) { Temp cond = get_ssa_temp(ctx, if_stmt->condition.ssa); Builder bld(ctx->program, ctx->block); - aco_ptr branch; + aco_ptr branch; if_context ic; if (!nir_src_is_divergent(if_stmt->condition)) { /* uniform condition */ @@ -11138,7 +11140,7 @@ create_fs_jump_to_epilog(isel_context* ctx) Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.epilog_pc)); - aco_ptr jump{create_instruction( + aco_ptr jump{create_instruction( aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + exports.size(), 0)}; jump->operands[0] = Operand(continue_pc); for (unsigned i = 0; i < exports.size(); i++) { @@ -11192,8 +11194,8 @@ passthrough_all_args(isel_context* ctx, std::vector& regs) static void build_end_with_regs(isel_context* ctx, std::vector& regs) { - aco_ptr end{create_instruction( - aco_opcode::p_end_with_regs, Format::PSEUDO, regs.size(), 0)}; + aco_ptr end{create_instruction(aco_opcode::p_end_with_regs, + Format::PSEUDO, regs.size(), 0)}; for (unsigned i = 0; i < regs.size(); i++) end->operands[i] = regs[i]; @@ -11240,7 +11242,7 @@ create_tcs_jump_to_epilog(isel_context* ctx) Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.epilog_pc)); - aco_ptr jump{ + aco_ptr jump{ create_instruction(aco_opcode::p_jump_to_epilog, Format::PSEUDO, 14, 0)}; jump->operands[0] = Operand(continue_pc); jump->operands[1] = ring_offsets; @@ -11374,7 +11376,7 @@ create_fs_end_for_epilog(isel_context* ctx) ctx->program->needs_exact = true; } -Pseudo_instruction* +Instruction* add_startpgm(struct isel_context* ctx) { unsigned def_count = 0; @@ -11388,7 +11390,7 @@ add_startpgm(struct isel_context* ctx) def_count++; } - Pseudo_instruction* startpgm = + Instruction* startpgm = create_instruction(aco_opcode::p_startpgm, Format::PSEUDO, 0, def_count); ctx->block->instructions.emplace_back(startpgm); for (unsigned i = 0, arg = 0; i < ctx->args->arg_count; i++) { @@ -11478,7 +11480,7 @@ fix_ls_vgpr_init_bug(isel_context* ctx) } void -split_arguments(isel_context* ctx, Pseudo_instruction* startpgm) +split_arguments(isel_context* ctx, Instruction* startpgm) { /* Split all arguments except for the first (ring_offsets) and the last * (exec) so that the dead channels don't stay live throughout the program. @@ -11645,7 +11647,7 @@ insert_rt_jump_next(isel_context& ctx, const struct ac_shader_args* args) for (unsigned i = 0; i < ctx.args->arg_count; i++) src_count += !!BITSET_TEST(ctx.output_args, i); - Pseudo_instruction* ret = + Instruction* ret = create_instruction(aco_opcode::p_return, Format::PSEUDO, src_count, 0); ctx.block->instructions.emplace_back(ret); @@ -11682,7 +11684,7 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c init_context(&ctx, nir); setup_fp_mode(&ctx, nir); - Pseudo_instruction* startpgm = add_startpgm(&ctx); + Instruction* startpgm = add_startpgm(&ctx); append_logical_start(ctx.block); split_arguments(&ctx, startpgm); visit_cf_list(&ctx, &nir_shader_get_entrypoint(nir)->body); @@ -11839,7 +11841,7 @@ create_merged_jump_to_epilog(isel_context* ctx) Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.next_stage_pc)); - aco_ptr jump{create_instruction( + aco_ptr jump{create_instruction( aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + regs.size(), 0)}; jump->operands[0] = Operand(continue_pc); for (unsigned i = 0; i < regs.size(); i++) { @@ -11884,7 +11886,7 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons if (need_startpgm) { /* Needs to be after init_context() for FS. */ - Pseudo_instruction* startpgm = add_startpgm(&ctx); + Instruction* startpgm = add_startpgm(&ctx); append_logical_start(ctx.block); if (ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs && diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index ccbdf64a426..0b1791139c7 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1671,7 +1671,7 @@ struct instr_deleter_functor { template using aco_ptr = std::unique_ptr; template -T* +Instruction* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions) { @@ -1679,7 +1679,7 @@ create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition); void* data = instruction_buffer->allocate(size, alignof(uint32_t)); memset(data, 0, size); - T* inst = (T*)data; + Instruction* inst = (Instruction*)data; inst->opcode = opcode; inst->format = format; diff --git a/src/amd/compiler/aco_lower_phis.cpp b/src/amd/compiler/aco_lower_phis.cpp index bd803c68b5f..3948b8ddd49 100644 --- a/src/amd/compiler/aco_lower_phis.cpp +++ b/src/amd/compiler/aco_lower_phis.cpp @@ -108,8 +108,8 @@ get_output(Program* program, unsigned block_idx, ssa_state* state) } /* create phi */ - aco_ptr phi{create_instruction( - aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)}; + aco_ptr phi{create_instruction(aco_opcode::p_linear_phi, + Format::PSEUDO, num_preds, 1)}; for (unsigned i = 0; i < num_preds; i++) phi->operands[i] = state->outputs[block.linear_preds[i]]; phi->definitions[0] = Definition(output.getTemp()); @@ -347,8 +347,8 @@ lower_divergent_bool_phi(Program* program, ssa_state* state, Block* block, unsigned num_preds = block->linear_preds.size(); if (phi->operands.size() != num_preds) { - Pseudo_instruction* new_phi{create_instruction( - aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)}; + Instruction* new_phi{create_instruction(aco_opcode::p_linear_phi, + Format::PSEUDO, num_preds, 1)}; new_phi->definitions[0] = phi->definitions[0]; phi.reset(new_phi); } else { diff --git a/src/amd/compiler/aco_lower_to_cssa.cpp b/src/amd/compiler/aco_lower_to_cssa.cpp index 65fcdac40b9..62e02b6d71f 100644 --- a/src/amd/compiler/aco_lower_to_cssa.cpp +++ b/src/amd/compiler/aco_lower_to_cssa.cpp @@ -424,8 +424,8 @@ emit_copies_block(Builder& bld, std::map& ltg, RegType type) // TODO: this should be restricted to a feasible number of registers // and otherwise use a temporary to avoid having to reload more (spilled) // variables than we have registers. - aco_ptr copy{create_instruction( - aco_opcode::p_parallelcopy, Format::PSEUDO, num, num)}; + aco_ptr copy{create_instruction(aco_opcode::p_parallelcopy, + Format::PSEUDO, num, num)}; it = ltg.begin(); for (unsigned i = 0; i < num; i++) { while (it->second.cp.def.regClass().type() != type) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 7d63bab1c1e..6de7d449d23 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -601,13 +601,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c if (src.regClass() == v1b) { if (ctx->program->gfx_level >= GFX8 && ctx->program->gfx_level < GFX11) { - aco_ptr sdwa{create_instruction( + aco_ptr sdwa{create_instruction( aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)}; sdwa->operands[0] = Operand(PhysReg{tmp}, v1); sdwa->definitions[0] = Definition(PhysReg{tmp}, v1); bool sext = reduce_op == imin8 || reduce_op == imax8; - sdwa->sel[0] = SubdwordSel(1, 0, sext); - sdwa->dst_sel = SubdwordSel::dword; + sdwa->sdwa().sel[0] = SubdwordSel(1, 0, sext); + sdwa->sdwa().dst_sel = SubdwordSel::dword; bld.insert(std::move(sdwa)); } else { aco_opcode opcode; @@ -624,13 +624,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c bool is_add_cmp = reduce_op == iadd16 || reduce_op == imax16 || reduce_op == imin16 || reduce_op == umin16 || reduce_op == umax16; if (ctx->program->gfx_level >= GFX10 && ctx->program->gfx_level < GFX11 && is_add_cmp) { - aco_ptr sdwa{create_instruction( + aco_ptr sdwa{create_instruction( aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)}; sdwa->operands[0] = Operand(PhysReg{tmp}, v1); sdwa->definitions[0] = Definition(PhysReg{tmp}, v1); bool sext = reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16; - sdwa->sel[0] = SubdwordSel(2, 0, sext); - sdwa->dst_sel = SubdwordSel::dword; + sdwa->sdwa().sel[0] = SubdwordSel(2, 0, sext); + sdwa->sdwa().dst_sel = SubdwordSel::dword; bld.insert(std::move(sdwa)); } else if (ctx->program->gfx_level <= GFX7 || (ctx->program->gfx_level >= GFX11 && is_add_cmp)) { @@ -2259,7 +2259,7 @@ lower_image_sample(lower_context* ctx, aco_ptr& instr) instr->mimg().strict_wqm = false; if ((3 + num_vaddr) > instr->operands.size()) { - MIMG_instruction* new_instr = create_instruction( + Instruction* new_instr = create_instruction( instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size()); std::copy(instr->definitions.cbegin(), instr->definitions.cend(), new_instr->definitions.begin()); diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 59f134f976c..725b3dd5082 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -905,7 +905,7 @@ smem_combine(opt_ctx& ctx, aco_ptr& instr) smem.operands.back() = Operand(base); } } else { - SMEM_instruction* new_instr = create_instruction( + Instruction* new_instr = create_instruction( smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size()); new_instr->operands[0] = smem.operands[0]; new_instr->operands[1] = Operand::c32(offset); @@ -914,11 +914,11 @@ smem_combine(opt_ctx& ctx, aco_ptr& instr) new_instr->operands.back() = Operand(base); if (!smem.definitions.empty()) new_instr->definitions[0] = smem.definitions[0]; - new_instr->sync = smem.sync; - new_instr->glc = smem.glc; - new_instr->dlc = smem.dlc; - new_instr->nv = smem.nv; - new_instr->disable_wqm = smem.disable_wqm; + new_instr->smem().sync = smem.sync; + new_instr->smem().glc = smem.glc; + new_instr->smem().dlc = smem.dlc; + new_instr->smem().nv = smem.nv; + new_instr->smem().disable_wqm = smem.disable_wqm; instr.reset(new_instr); } } @@ -2312,10 +2312,10 @@ combine_ordering_test(opt_ctx& ctx, aco_ptr& instr) case 64: new_op = is_or ? aco_opcode::v_cmp_u_f64 : aco_opcode::v_cmp_o_f64; break; } bool needs_vop3 = num_sgprs > 1 || (opsel[0] && op[0].type() != RegType::vgpr); - VALU_instruction* new_instr = create_instruction( + Instruction* new_instr = create_instruction( new_op, needs_vop3 ? asVOP3(Format::VOPC) : Format::VOPC, 2, 1); - new_instr->opsel = opsel; + new_instr->valu().opsel = opsel; new_instr->operands[0] = copy_operand(ctx, Operand(op[0])); new_instr->operands[1] = copy_operand(ctx, Operand(op[1])); new_instr->definitions[0] = instr->definitions[0]; @@ -2381,13 +2381,13 @@ combine_comparison_ordering(opt_ctx& ctx, aco_ptr& instr) return false; aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode); - VALU_instruction* new_instr = create_instruction( + Instruction* new_instr = create_instruction( new_op, cmp->isVOP3() ? asVOP3(Format::VOPC) : Format::VOPC, 2, 1); - new_instr->neg = cmp_valu.neg; - new_instr->abs = cmp_valu.abs; - new_instr->clamp = cmp_valu.clamp; - new_instr->omod = cmp_valu.omod; - new_instr->opsel = cmp_valu.opsel; + new_instr->valu().neg = cmp_valu.neg; + new_instr->valu().abs = cmp_valu.abs; + new_instr->valu().clamp = cmp_valu.clamp; + new_instr->valu().omod = cmp_valu.omod; + new_instr->valu().opsel = cmp_valu.opsel; new_instr->operands[0] = copy_operand(ctx, cmp->operands[0]); new_instr->operands[1] = copy_operand(ctx, cmp->operands[1]); new_instr->definitions[0] = instr->definitions[0]; @@ -2701,12 +2701,12 @@ create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr& instr Operand operands[3], uint8_t neg, uint8_t abs, uint8_t opsel, bool clamp, unsigned omod) { - VALU_instruction* new_instr = create_instruction(opcode, Format::VOP3, 3, 1); - new_instr->neg = neg; - new_instr->abs = abs; - new_instr->clamp = clamp; - new_instr->omod = omod; - new_instr->opsel = opsel; + Instruction* new_instr = create_instruction(opcode, Format::VOP3, 3, 1); + new_instr->valu().neg = neg; + new_instr->valu().abs = abs; + new_instr->valu().clamp = clamp; + new_instr->valu().omod = omod; + new_instr->valu().opsel = opsel; new_instr->operands[0] = operands[0]; new_instr->operands[1] = operands[1]; new_instr->operands[2] = operands[2]; @@ -3746,7 +3746,7 @@ combine_add_lshl(opt_ctx& ctx, aco_ptr& instr, bool is_sub) ctx.uses[instr->operands[i].tempId()]--; aco_opcode mad_op = is_sub ? aco_opcode::v_mad_i32_i24 : aco_opcode::v_mad_u32_u24; - aco_ptr new_instr{ + aco_ptr new_instr{ create_instruction(mad_op, Format::VOP3, 3, 1)}; for (unsigned op_idx = 0; op_idx < 3; ++op_idx) new_instr->operands[op_idx] = ops[op_idx]; @@ -3930,23 +3930,23 @@ combine_vop3p(opt_ctx& ctx, aco_ptr& instr) /* turn mul + packed add into v_pk_fma_f16 */ aco_opcode mad = fadd ? aco_opcode::v_pk_fma_f16 : aco_opcode::v_pk_mad_u16; - aco_ptr fma{create_instruction(mad, Format::VOP3P, 3, 1)}; + aco_ptr fma{create_instruction(mad, Format::VOP3P, 3, 1)}; fma->operands[0] = copy_operand(ctx, mul_instr->operands[0]); fma->operands[1] = copy_operand(ctx, mul_instr->operands[1]); fma->operands[2] = instr->operands[add_op_idx]; - fma->clamp = vop3p->clamp; - fma->neg_lo = mul_neg_lo; - fma->neg_hi = mul_neg_hi; - fma->opsel_lo = mul_opsel_lo; - fma->opsel_hi = mul_opsel_hi; - propagate_swizzles(fma.get(), vop3p->opsel_lo[1 - add_op_idx], + fma->valu().clamp = vop3p->clamp; + fma->valu().neg_lo = mul_neg_lo; + fma->valu().neg_hi = mul_neg_hi; + fma->valu().opsel_lo = mul_opsel_lo; + fma->valu().opsel_hi = mul_opsel_hi; + propagate_swizzles(&fma->valu(), vop3p->opsel_lo[1 - add_op_idx], vop3p->opsel_hi[1 - add_op_idx]); - fma->opsel_lo[2] = vop3p->opsel_lo[add_op_idx]; - fma->opsel_hi[2] = vop3p->opsel_hi[add_op_idx]; - fma->neg_lo[2] = vop3p->neg_lo[add_op_idx]; - fma->neg_hi[2] = vop3p->neg_hi[add_op_idx]; - fma->neg_lo[1] = fma->neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx]; - fma->neg_hi[1] = fma->neg_hi[1] ^ vop3p->neg_hi[1 - add_op_idx]; + fma->valu().opsel_lo[2] = vop3p->opsel_lo[add_op_idx]; + fma->valu().opsel_hi[2] = vop3p->opsel_hi[add_op_idx]; + fma->valu().neg_lo[2] = vop3p->neg_lo[add_op_idx]; + fma->valu().neg_hi[2] = vop3p->neg_hi[add_op_idx]; + fma->valu().neg_lo[1] = fma->valu().neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx]; + fma->valu().neg_hi[1] = fma->valu().neg_hi[1] ^ vop3p->neg_hi[1 - add_op_idx]; fma->definitions[0] = instr->definitions[0]; fma->pass_flags = instr->pass_flags; instr = std::move(fma); @@ -3995,26 +3995,26 @@ to_mad_mix(opt_ctx& ctx, aco_ptr& instr) bool is_add = instr->opcode != aco_opcode::v_mul_f32; - aco_ptr vop3p{ + aco_ptr vop3p{ create_instruction(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1)}; for (unsigned i = 0; i < instr->operands.size(); i++) { vop3p->operands[is_add + i] = instr->operands[i]; - vop3p->neg_lo[is_add + i] = instr->valu().neg[i]; - vop3p->neg_hi[is_add + i] = instr->valu().abs[i]; + vop3p->valu().neg_lo[is_add + i] = instr->valu().neg[i]; + vop3p->valu().neg_hi[is_add + i] = instr->valu().abs[i]; } if (instr->opcode == aco_opcode::v_mul_f32) { vop3p->operands[2] = Operand::zero(); - vop3p->neg_lo[2] = true; + vop3p->valu().neg_lo[2] = true; } else if (is_add) { vop3p->operands[0] = Operand::c32(0x3f800000); if (instr->opcode == aco_opcode::v_sub_f32) - vop3p->neg_lo[2] ^= true; + vop3p->valu().neg_lo[2] ^= true; else if (instr->opcode == aco_opcode::v_subrev_f32) - vop3p->neg_lo[1] ^= true; + vop3p->valu().neg_lo[1] ^= true; } vop3p->definitions[0] = instr->definitions[0]; - vop3p->clamp = instr->valu().clamp; + vop3p->valu().clamp = instr->valu().clamp; vop3p->pass_flags = instr->pass_flags; instr = std::move(vop3p); @@ -4418,7 +4418,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) neg[2 - add_op_idx] = neg[2 - add_op_idx] ^ true; aco_ptr add_instr = std::move(instr); - aco_ptr mad; + aco_ptr mad; if (add_instr->isVOP3P() || mul_instr->isVOP3P()) { assert(!omod); assert(!opsel); @@ -4448,14 +4448,14 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) for (unsigned i = 0; i < 3; i++) { mad->operands[i] = op[i]; - mad->neg[i] = neg[i]; - mad->abs[i] = abs[i]; + mad->valu().neg[i] = neg[i]; + mad->valu().abs[i] = abs[i]; } - mad->omod = omod; - mad->clamp = clamp; - mad->opsel_lo = opsel_lo; - mad->opsel_hi = opsel_hi; - mad->opsel = opsel; + mad->valu().omod = omod; + mad->valu().clamp = clamp; + mad->valu().opsel_lo = opsel_lo; + mad->valu().opsel_hi = opsel_hi; + mad->valu().opsel = opsel; mad->definitions[0] = add_instr->definitions[0]; mad->definitions[0].setPrecise(add_instr->definitions[0].isPrecise() || mul_instr->definitions[0].isPrecise()); @@ -4481,7 +4481,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) ctx.uses[instr->operands[i].tempId()]--; ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++; - aco_ptr new_instr{ + aco_ptr new_instr{ create_instruction(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)}; new_instr->operands[0] = Operand::zero(); new_instr->operands[1] = instr->operands[!i]; @@ -4805,7 +4805,7 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) if (op.isTemp()) ctx.uses[op.tempId()]++; - aco_ptr extract{create_instruction( + aco_ptr extract{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, 1, 1)}; extract->operands[0] = op; extract->definitions[0] = instr->definitions[idx]; @@ -4818,7 +4818,7 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) if (!done && num_used == 1 && instr->operands[0].bytes() % instr->definitions[idx].bytes() == 0 && split_offset % instr->definitions[idx].bytes() == 0) { - aco_ptr extract{create_instruction( + aco_ptr extract{create_instruction( aco_opcode::p_extract_vector, Format::PSEUDO, 2, 1)}; extract->operands[0] = instr->operands[0]; extract->operands[1] = diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp index bffeb81c35c..25f017e516d 100644 --- a/src/amd/compiler/aco_reduce_assign.cpp +++ b/src/amd/compiler/aco_reduce_assign.cpp @@ -109,7 +109,7 @@ setup_reduce_temp(Program* program) if ((int)last_top_level_block_idx != inserted_at) { reduceTmp = program->allocateTmp(reduceTmp.regClass()); - aco_ptr create{create_instruction( + aco_ptr create{create_instruction( aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)}; create->definitions[0] = Definition(reduceTmp); /* find the right place to insert this definition */ @@ -154,7 +154,7 @@ setup_reduce_temp(Program* program) if (need_vtmp && (int)last_top_level_block_idx != vtmp_inserted_at) { vtmp = program->allocateTmp(vtmp.regClass()); - aco_ptr create{create_instruction( + aco_ptr create{create_instruction( aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)}; create->definitions[0] = Definition(vtmp); if (last_top_level_block_idx == block.index) { diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 697a226e5bf..cc47153c426 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2885,7 +2885,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector pc; + aco_ptr pc; pc.reset(create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, parallelcopy.size(), parallelcopy.size())); bool linear_vgpr = false; @@ -2935,8 +2935,8 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vectorneeds_scratch_reg = sgpr_operands_alias_defs || linear_vgpr; - pc->tmp_in_scc = false; + pc->pseudo().needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr; + pc->pseudo().tmp_in_scc = false; } instructions.emplace_back(std::move(pc)); diff --git a/src/amd/compiler/aco_scheduler_ilp.cpp b/src/amd/compiler/aco_scheduler_ilp.cpp index eb513b5c3dd..6ba24168e7b 100644 --- a/src/amd/compiler/aco_scheduler_ilp.cpp +++ b/src/amd/compiler/aco_scheduler_ilp.cpp @@ -686,9 +686,8 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx) get_vopd_opcode_operands(x, x_info, swap_x, &x_op, &num_operands, operands); get_vopd_opcode_operands(y, y_info, swap_y, &y_op, &num_operands, operands + num_operands); - VOPD_instruction* instr = - create_instruction(x_op, Format::VOPD, num_operands, 2); - instr->opy = y_op; + Instruction* instr = create_instruction(x_op, Format::VOPD, num_operands, 2); + instr->vopd().opy = y_op; instr->definitions[0] = x->definitions[0]; instr->definitions[1] = y->definitions[0]; std::copy(operands, operands + num_operands, instr->operands.begin()); diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 35a4b6d435d..5bc360068de 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -376,7 +376,7 @@ do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id) res->definitions[0] = Definition(new_name); return res; } else { - aco_ptr reload{ + aco_ptr reload{ create_instruction(aco_opcode::p_reload, Format::PSEUDO, 1, 1)}; reload->operands[0] = Operand::c32(spill_id); reload->definitions[0] = Definition(new_name); @@ -845,7 +845,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) for (std::pair pair : ctx.spills_exit[pred_idx]) ctx.add_interference(def_spill_id, pair.second); - aco_ptr spill{ + aco_ptr spill{ create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)}; spill->operands[0] = spill_op; spill->operands[1] = Operand::c32(def_spill_id); @@ -915,7 +915,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) ctx.renames[pred_idx].erase(rename_it); } - aco_ptr spill{ + aco_ptr spill{ create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)}; spill->operands[0] = Operand(var); spill->operands[1] = Operand::c32(pair.second); @@ -1054,7 +1054,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) if (!is_same) { /* the variable was renamed differently in the predecessors: we have to create a phi */ aco_opcode opcode = pair.first.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi; - aco_ptr phi{ + aco_ptr phi{ create_instruction(opcode, Format::PSEUDO, preds.size(), 1)}; rename = ctx.program->allocateTmp(pair.first.regClass()); for (unsigned i = 0; i < phi->operands.size(); i++) { @@ -1229,7 +1229,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s } /* add spill to new instructions */ - aco_ptr spill{ + aco_ptr spill{ create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)}; spill->operands[0] = Operand(to_spill); spill->operands[1] = Operand::c32(spill_id); @@ -1757,7 +1757,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) { Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear()); vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr; - aco_ptr create{create_instruction( + aco_ptr create{create_instruction( aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)}; create->definitions[0] = Definition(linear_vgpr); /* find the right place to insert this definition */ @@ -1774,7 +1774,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) } /* spill sgpr: just add the vgpr temp to operands */ - Pseudo_instruction* spill = + Instruction* spill = create_instruction(aco_opcode::p_spill, Format::PSEUDO, 3, 0); spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]); spill->operands[0].setLateKill(true); @@ -1798,7 +1798,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) { Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear()); vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr; - aco_ptr create{create_instruction( + aco_ptr create{create_instruction( aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)}; create->definitions[0] = Definition(linear_vgpr); /* find the right place to insert this definition */ @@ -1815,8 +1815,8 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) } /* reload sgpr: just add the vgpr temp to operands */ - Pseudo_instruction* reload = create_instruction( - aco_opcode::p_reload, Format::PSEUDO, 2, 1); + Instruction* reload = create_instruction(aco_opcode::p_reload, + Format::PSEUDO, 2, 1); reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]); reload->operands[0].setLateKill(true); reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size); diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 43ccb5c250e..2f194e8b79c 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -97,7 +97,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx) } std::vector>::iterator it = std::next(block.instructions.begin(), idx); - aco_ptr pc{ + aco_ptr pc{ create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, logical_phi_info.size(), logical_phi_info.size())}; unsigned i = 0; @@ -107,7 +107,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx) i++; } /* this shouldn't be needed since we're only copying vgprs */ - pc->tmp_in_scc = false; + pc->pseudo().tmp_in_scc = false; block.instructions.insert(it, std::move(pc)); } @@ -122,7 +122,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx) --it; assert((*it)->isBranch()); PhysReg scratch_sgpr = (*it)->definitions[0].physReg(); - aco_ptr pc{ + aco_ptr pc{ create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, linear_phi_info.size(), linear_phi_info.size())}; unsigned i = 0; @@ -131,9 +131,9 @@ insert_parallelcopies(ssa_elimination_ctx& ctx) pc->operands[i] = phi_info.op; i++; } - pc->tmp_in_scc = block.scc_live_out; - pc->scratch_sgpr = scratch_sgpr; - pc->needs_scratch_reg = true; + pc->pseudo().tmp_in_scc = block.scc_live_out; + pc->pseudo().scratch_sgpr = scratch_sgpr; + pc->pseudo().needs_scratch_reg = true; block.instructions.insert(it, std::move(pc)); } } diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp index f47d354701c..5f2f0de17f2 100644 --- a/src/amd/compiler/tests/test_assembler.cpp +++ b/src/amd/compiler/tests/test_assembler.cpp @@ -269,7 +269,7 @@ BEGIN_TEST(assembler.v_add3) //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080 //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080 - aco_ptr add3{ + aco_ptr add3{ create_instruction(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; add3->operands[0] = Operand::zero(); add3->operands[1] = Operand::zero(); @@ -288,13 +288,13 @@ BEGIN_TEST(assembler.v_add3_clamp) //~gfx9>> integer addition + clamp ; d1ff8000 02010080 //~gfx10>> integer addition + clamp ; d76d8000 02010080 - aco_ptr add3{ + aco_ptr add3{ create_instruction(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; add3->operands[0] = Operand::zero(); add3->operands[1] = Operand::zero(); add3->operands[2] = Operand::zero(); add3->definitions[0] = Definition(PhysReg(0), v1); - add3->clamp = 1; + add3->valu().clamp = 1; bld.insert(std::move(add3)); finish_assembler_test(); diff --git a/src/amd/compiler/tests/test_hard_clause.cpp b/src/amd/compiler/tests/test_hard_clause.cpp index 436a71984ff..8319a87c0d3 100644 --- a/src/amd/compiler/tests/test_hard_clause.cpp +++ b/src/amd/compiler/tests/test_hard_clause.cpp @@ -69,7 +69,7 @@ create_global() static void create_mimg(bool nsa, Temp desc = Temp(0, s8)) { - aco_ptr mimg{ + aco_ptr mimg{ create_instruction(aco_opcode::image_sample, Format::MIMG, 5, 1)}; mimg->definitions[0] = Definition(PhysReg(256), v1); mimg->operands[0] = Operand(desc); @@ -78,8 +78,8 @@ create_mimg(bool nsa, Temp desc = Temp(0, s8)) mimg->operands[2] = Operand(v1); for (unsigned i = 0; i < 2; i++) mimg->operands[3 + i] = Operand(PhysReg(256 + (nsa ? i * 2 : i)), v1); - mimg->dmask = 0x1; - mimg->dim = ac_image_2d; + mimg->mimg().dmask = 0x1; + mimg->mimg().dim = ac_image_2d; bld.insert(std::move(mimg)); } diff --git a/src/amd/compiler/tests/test_insert_nops.cpp b/src/amd/compiler/tests/test_insert_nops.cpp index 8ac8766bbb8..c155b8d77a3 100644 --- a/src/amd/compiler/tests/test_insert_nops.cpp +++ b/src/amd/compiler/tests/test_insert_nops.cpp @@ -42,7 +42,7 @@ create_mubuf_store(PhysReg src = PhysReg(256)) void create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords) { - aco_ptr mimg{ + aco_ptr mimg{ create_instruction(aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)}; mimg->definitions[0] = Definition(PhysReg(256), v1); mimg->operands[0] = Operand(PhysReg(0), s8); @@ -50,8 +50,8 @@ create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords) mimg->operands[2] = Operand(v1); for (unsigned i = 0; i < addrs; i++) mimg->operands[3 + i] = Operand(PhysReg(256 + (nsa ? i * 2 : i)), v1); - mimg->dmask = 0x1; - mimg->dim = ac_image_2d; + mimg->mimg().dmask = 0x1; + mimg->mimg().dim = ac_image_2d; assert(get_mimg_nsa_dwords(mimg.get()) + 2 == instr_dwords);