diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index f0500e1bbc0..ed6dfa3728d 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -611,9 +611,9 @@ handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& i
    // TODO: try to schedule the NOP-causing instruction up to reduce the number of stall cycles
    if (NOPs) {
       /* create NOP */
-      aco_ptr<SALU_instruction> nop{
+      aco_ptr<Instruction> nop{
          create_instruction<SALU_instruction>(aco_opcode::s_nop, Format::SOPP, 0, 0)};
-      nop->imm = NOPs - 1;
+      nop->salu().imm = NOPs - 1;
       new_instructions.emplace_back(std::move(nop));
    }
 
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index a4ba5783b7c..84c4d7c97bc 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -237,7 +237,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
 
       /* create ssa names for outer exec masks */
       if (info.has_discard && preds.size() > 1) {
-         aco_ptr<Pseudo_instruction> phi;
+         aco_ptr<Instruction> phi;
          for (int i = 0; i < info.num_exec_masks - 1; i++) {
             phi.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
                                                              Format::PSEUDO, preds.size(), 1));
@@ -251,7 +251,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
 
       if (info.has_divergent_continue) {
          /* create ssa name for loop active mask */
-         aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
+         aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(
             aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
          phi->definitions[0] = bld.def(bld.lm);
          phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first);
@@ -312,7 +312,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
             ctx.info[idx].exec.emplace_back(same, type);
          } else {
             /* create phi for loop footer */
-            aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
+            aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(
                aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
             phi->definitions[0] = bld.def(bld.lm);
             for (unsigned i = 0; i < phi->operands.size(); i++)
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index b1f4722cc73..c1e20459b93 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -993,17 +993,17 @@ emit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wai
 {
    if (imm.vs != wait_imm::unset_counter) {
       assert(ctx.gfx_level >= GFX10);
-      SALU_instruction* waitcnt_vs =
+      Instruction* waitcnt_vs =
          create_instruction<SALU_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 1, 0);
       waitcnt_vs->operands[0] = Operand(sgpr_null, s1);
-      waitcnt_vs->imm = imm.vs;
+      waitcnt_vs->salu().imm = imm.vs;
       instructions.emplace_back(waitcnt_vs);
       imm.vs = wait_imm::unset_counter;
    }
    if (!imm.empty()) {
-      SALU_instruction* waitcnt =
+      Instruction* waitcnt =
          create_instruction<SALU_instruction>(aco_opcode::s_waitcnt, Format::SOPP, 0, 0);
-      waitcnt->imm = imm.pack(ctx.gfx_level);
+      waitcnt->salu().imm = imm.pack(ctx.gfx_level);
       instructions.emplace_back(waitcnt);
    }
    imm = wait_imm();
@@ -1030,9 +1030,9 @@ emit_delay_alu(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions,
       imm |= ((uint32_t)alu_delay_wait::SALU_CYCLE_1 + cycles - 1) << (imm ? 7 : 0);
    }
 
-   SALU_instruction* inst =
+   Instruction* inst =
       create_instruction<SALU_instruction>(aco_opcode::s_delay_alu, Format::SOPP, 0, 0);
-   inst->imm = imm;
+   inst->salu().imm = imm;
    inst->pass_flags = (delay.valu_cycles | (delay.trans_cycles << 16));
    instructions.emplace_back(inst);
    delay = alu_delay_info();
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 1460a27dfdd..a6370885e44 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -380,7 +380,7 @@ emit_split_vector(isel_context* ctx, Temp vec_src, unsigned num_components)
    } else {
       rc = RegClass(vec_src.type(), vec_src.size() / num_components);
    }
-   aco_ptr<Pseudo_instruction> split{create_instruction<Pseudo_instruction>(
+   aco_ptr<Instruction> split{create_instruction<Pseudo_instruction>(
       aco_opcode::p_split_vector, Format::PSEUDO, 1, num_components)};
    split->operands[0] = Operand(vec_src);
    std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
@@ -432,7 +432,7 @@ expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components
    if (zero_padding)
       padding = bld.copy(bld.def(dst_rc), Operand::zero(component_bytes));
 
-   aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+   aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
       aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
    vec->definitions[0] = Definition(dst);
    unsigned k = 0;
@@ -553,7 +553,7 @@ byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigne
    if (dst.type() == RegType::vgpr) {
       /* if dst is vgpr - split the src and create a shrunk version according to the mask. */
       num_components = dst.bytes() / component_size;
-      aco_ptr<Pseudo_instruction> create_vec{create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> create_vec{create_instruction<Pseudo_instruction>(
          aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
       for (unsigned i = 0; i < num_components; i++)
          create_vec->operands[i] = Operand(elems[i]);
@@ -749,7 +749,7 @@ get_alu_src(struct isel_context* ctx, nir_alu_src src, unsigned size = 1)
    } else {
       assert(size <= 4);
       std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
-      aco_ptr<Pseudo_instruction> vec_instr{create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> vec_instr{create_instruction<Pseudo_instruction>(
          aco_opcode::p_create_vector, Format::PSEUDO, size, 1)};
       for (unsigned i = 0; i < size; ++i) {
          elems[i] = emit_extract_vector(ctx, vec, src.swizzle[i], elem_rc);
@@ -823,7 +823,7 @@ void
 emit_sop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
                       bool writes_scc, uint8_t uses_ub = 0)
 {
-   aco_ptr<SALU_instruction> sop2{
+   aco_ptr<Instruction> sop2{
       create_instruction<SALU_instruction>(op, Format::SOP2, 2, writes_scc ? 2 : 1)};
    sop2->operands[0] = Operand(get_alu_src(ctx, instr->src[0]));
    sop2->operands[1] = Operand(get_alu_src(ctx, instr->src[1]));
@@ -1407,7 +1407,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
          elems[i] = get_alu_src(ctx, instr->src[i]);
 
       if (instr->def.bit_size >= 32 || dst.type() == RegType::vgpr) {
-         aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+         aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
             aco_opcode::p_create_vector, Format::PSEUDO, instr->def.num_components, 1)};
          RegClass elem_rc = RegClass::get(RegType::vgpr, instr->def.bit_size / 8u);
          for (unsigned i = 0; i < num; ++i) {
@@ -1484,7 +1484,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
          if (dst.size() == 1)
             bld.copy(Definition(dst), packed[0]);
          else {
-            aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+            aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
                aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
             vec->definitions[0] = Definition(dst);
             for (unsigned i = 0; i < dst.size(); ++i)
@@ -3954,7 +3954,7 @@ visit_load_const(isel_context* ctx, nir_load_const_instr* instr)
       bld.copy(Definition(dst), Operand::c32(instr->value[0].u32));
    } else {
       assert(dst.size() != 1);
-      aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
          aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
       if (instr->def.bit_size == 64)
          for (unsigned i = 0; i < dst.size(); i++)
@@ -3978,7 +3978,7 @@ emit_readfirstlane(isel_context* ctx, Temp src, Temp dst)
    } else if (src.size() == 1) {
       bld.vop1(aco_opcode::v_readfirstlane_b32, Definition(dst), src);
    } else {
-      aco_ptr<Pseudo_instruction> split{create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> split{create_instruction<Pseudo_instruction>(
          aco_opcode::p_split_vector, Format::PSEUDO, 1, src.size())};
       split->operands[0] = Operand(src);
 
@@ -3990,7 +3990,7 @@ emit_readfirstlane(isel_context* ctx, Temp src, Temp dst)
       Instruction* split_raw = split.get();
       ctx->block->instructions.emplace_back(std::move(split));
 
-      aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
          aco_opcode::p_create_vector, Format::PSEUDO, src.size(), 1)};
       vec->definitions[0] = Definition(dst);
       for (unsigned i = 0; i < src.size(); i++) {
@@ -4246,7 +4246,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
          tmp[num_tmps++] = vals[i++];
       }
       if (num_tmps > 1) {
-         aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+         aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
             aco_opcode::p_create_vector, Format::PSEUDO, num_tmps, 1)};
          for (unsigned j = 0; j < num_tmps; j++)
             vec->operands[j] = Operand(tmp[j]);
@@ -4272,7 +4272,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
          allocated_vec[components_split++] = tmp[0];
       } else {
          assert(tmp_size % elem_rc.bytes() == 0);
-         aco_ptr<Pseudo_instruction> split{create_instruction<Pseudo_instruction>(
+         aco_ptr<Instruction> split{create_instruction<Pseudo_instruction>(
             aco_opcode::p_split_vector, Format::PSEUDO, 1, tmp_size / elem_rc.bytes())};
          for (auto& def : split->definitions) {
             Temp component = bld.tmp(elem_rc);
@@ -4305,7 +4305,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
    int padding_bytes =
       MAX2((int)info.dst.bytes() - int(allocated_vec[0].bytes() * info.num_components), 0);
 
-   aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+   aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
       aco_opcode::p_create_vector, Format::PSEUDO, info.num_components + !!padding_bytes, 1)};
    for (unsigned i = 0; i < info.num_components; i++)
       vec->operands[i] = Operand(allocated_vec[i]);
@@ -4440,7 +4440,7 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
       op = buffer ? aco_opcode::s_buffer_load_dwordx16 : aco_opcode::s_load_dwordx16;
    }
 
-   aco_ptr<SMEM_instruction> load{create_instruction<SMEM_instruction>(op, Format::SMEM, 2, 1)};
+   aco_ptr<Instruction> load{create_instruction<SMEM_instruction>(op, Format::SMEM, 2, 1)};
    if (buffer) {
       if (const_offset)
          offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
@@ -4460,9 +4460,10 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
    RegClass rc(RegType::sgpr, DIV_ROUND_UP(bytes_needed, 4u));
    Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc);
    load->definitions[0] = Definition(val);
-   load->glc = info.glc;
-   load->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
-   load->sync = info.sync;
+   load->smem().glc = info.glc;
+   load->smem().dlc =
+      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
+   load->smem().sync = info.sync;
    bld.insert(std::move(load));
    return val;
 }
@@ -4514,18 +4515,19 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
       bytes_size = 16;
       op = aco_opcode::buffer_load_dwordx4;
    }
-   aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
+   aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
    mubuf->operands[0] = Operand(info.resource);
    mubuf->operands[1] = vaddr;
    mubuf->operands[2] = soffset;
-   mubuf->offen = offen;
-   mubuf->idxen = idxen;
-   mubuf->glc = info.glc;
-   mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
-   mubuf->slc = info.slc;
-   mubuf->sync = info.sync;
-   mubuf->offset = const_offset;
-   mubuf->swizzled = info.swizzle_component_size != 0;
+   mubuf->mubuf().offen = offen;
+   mubuf->mubuf().idxen = idxen;
+   mubuf->mubuf().glc = info.glc;
+   mubuf->mubuf().dlc =
+      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
+   mubuf->mubuf().slc = info.slc;
+   mubuf->mubuf().sync = info.sync;
+   mubuf->mubuf().offset = const_offset;
+   mubuf->mubuf().swizzled = info.swizzle_component_size != 0;
    RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
    Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
    mubuf->definitions[0] = Definition(val);
@@ -4581,17 +4583,18 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
       }
    }
 
-   aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
+   aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
    mubuf->operands[0] = Operand(info.resource);
    mubuf->operands[1] = vaddr;
    mubuf->operands[2] = soffset;
-   mubuf->offen = offen;
-   mubuf->idxen = idxen;
-   mubuf->glc = info.glc;
-   mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
-   mubuf->slc = info.slc;
-   mubuf->sync = info.sync;
-   mubuf->offset = const_offset;
+   mubuf->mubuf().offen = offen;
+   mubuf->mubuf().idxen = idxen;
+   mubuf->mubuf().glc = info.glc;
+   mubuf->mubuf().dlc =
+      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
+   mubuf->mubuf().slc = info.slc;
+   mubuf->mubuf().sync = info.sync;
+   mubuf->mubuf().offset = const_offset;
    RegClass rc = RegClass::get(RegType::vgpr, bytes_needed);
    Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
    mubuf->definitions[0] = Definition(val);
@@ -4629,11 +4632,11 @@ scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsig
    }
    RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
    Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
-   aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(op, Format::SCRATCH, 2, 1)};
+   aco_ptr<Instruction> flat{create_instruction<FLAT_instruction>(op, Format::SCRATCH, 2, 1)};
    flat->operands[0] = offset.regClass() == s1 ? Operand(v1) : Operand(offset);
    flat->operands[1] = offset.regClass() == s1 ? Operand(offset) : Operand(s1);
-   flat->sync = info.sync;
-   flat->offset = const_offset;
+   flat->scratch().sync = info.sync;
+   flat->scratch().offset = const_offset;
    flat->definitions[0] = Definition(val);
    bld.insert(std::move(flat));
 
@@ -4793,21 +4796,20 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
    RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
    Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
    if (use_mubuf) {
-      aco_ptr<MUBUF_instruction> mubuf{
-         create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
+      aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
       mubuf->operands[0] = Operand(get_gfx6_global_rsrc(bld, addr));
       mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
       mubuf->operands[2] = Operand(offset);
-      mubuf->glc = info.glc;
-      mubuf->dlc = false;
-      mubuf->offset = const_offset;
-      mubuf->addr64 = addr.type() == RegType::vgpr;
-      mubuf->disable_wqm = false;
-      mubuf->sync = info.sync;
+      mubuf->mubuf().glc = info.glc;
+      mubuf->mubuf().dlc = false;
+      mubuf->mubuf().offset = const_offset;
+      mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
+      mubuf->mubuf().disable_wqm = false;
+      mubuf->mubuf().sync = info.sync;
       mubuf->definitions[0] = Definition(val);
       bld.insert(std::move(mubuf));
    } else {
-      aco_ptr<FLAT_instruction> flat{
+      aco_ptr<Instruction> flat{
          create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 2, 1)};
       if (addr.regClass() == s2) {
          assert(global && offset.id() && offset.type() == RegType::vgpr);
@@ -4818,12 +4820,12 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
          flat->operands[0] = Operand(addr);
          flat->operands[1] = Operand(s1);
       }
-      flat->glc = info.glc;
-      flat->dlc =
+      flat->flatlike().glc = info.glc;
+      flat->flatlike().dlc =
          info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
-      flat->sync = info.sync;
+      flat->flatlike().sync = info.sync;
       assert(global || !const_offset);
-      flat->offset = const_offset;
+      flat->flatlike().offset = const_offset;
       flat->definitions[0] = Definition(val);
       bld.insert(std::move(flat));
    }
@@ -5178,7 +5180,7 @@ create_vec_from_array(isel_context* ctx, Temp arr[], unsigned cnt, RegType reg_t
       dst = bld.tmp(RegClass(reg_type, cnt * dword_size));
 
    std::array<Temp, NIR_MAX_VEC_COMPONENTS> allocated_vec;
-   aco_ptr<Pseudo_instruction> instr{
+   aco_ptr<Instruction> instr{
       create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, cnt, 1)};
    instr->definitions[0] = Definition(dst);
 
@@ -5553,7 +5555,7 @@ emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
 {
    Builder bld(ctx->program, ctx->block);
 
-   aco_ptr<Pseudo_instruction> vec(create_instruction<Pseudo_instruction>(
+   aco_ptr<Instruction> vec(create_instruction<Pseudo_instruction>(
       aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1));
    for (unsigned i = 0; i < num_components; i++) {
       if (ctx->args->frag_pos[i].used)
@@ -5617,7 +5619,7 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr)
    if (instr->def.num_components == 1) {
       emit_interp_instr(ctx, idx, component, coords, dst, prim_mask);
    } else {
-      aco_ptr<Pseudo_instruction> vec(create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> vec(create_instruction<Pseudo_instruction>(
          aco_opcode::p_create_vector, Format::PSEUDO, instr->def.num_components, 1));
       for (unsigned i = 0; i < instr->def.num_components; i++) {
          Temp tmp = ctx->program->allocateTmp(instr->def.bit_size == 16 ? v2b : v1);
@@ -5709,19 +5711,20 @@ mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
       abort();
    }
 
-   aco_ptr<MTBUF_instruction> mtbuf{create_instruction<MTBUF_instruction>(op, Format::MTBUF, 3, 1)};
+   aco_ptr<Instruction> mtbuf{create_instruction<MTBUF_instruction>(op, Format::MTBUF, 3, 1)};
    mtbuf->operands[0] = Operand(info.resource);
    mtbuf->operands[1] = vaddr;
    mtbuf->operands[2] = soffset;
-   mtbuf->offen = offen;
-   mtbuf->idxen = idxen;
-   mtbuf->glc = info.glc;
-   mtbuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
-   mtbuf->slc = info.slc;
-   mtbuf->sync = info.sync;
-   mtbuf->offset = const_offset;
-   mtbuf->dfmt = fetch_fmt & 0xf;
-   mtbuf->nfmt = fetch_fmt >> 4;
+   mtbuf->mtbuf().offen = offen;
+   mtbuf->mtbuf().idxen = idxen;
+   mtbuf->mtbuf().glc = info.glc;
+   mtbuf->mtbuf().dlc =
+      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
+   mtbuf->mtbuf().slc = info.slc;
+   mtbuf->mtbuf().sync = info.sync;
+   mtbuf->mtbuf().offset = const_offset;
+   mtbuf->mtbuf().dfmt = fetch_fmt & 0xf;
+   mtbuf->mtbuf().nfmt = fetch_fmt >> 4;
    RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
    Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
    mtbuf->definitions[0] = Definition(val);
@@ -5757,7 +5760,7 @@ visit_load_fs_input(isel_context* ctx, nir_intrinsic_instr* instr)
       unsigned num_components = instr->def.num_components;
       if (instr->def.bit_size == 64)
          num_components *= 2;
-      aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
          aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
       for (unsigned i = 0; i < num_components; i++) {
          unsigned chan_component = (component + i) % 4;
@@ -5876,7 +5879,7 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr)
       if ((ctx->args->inline_push_const_mask | mask) == ctx->args->inline_push_const_mask &&
           start + count <= (sizeof(ctx->args->inline_push_const_mask) * 8u)) {
          std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
-         aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+         aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
             aco_opcode::p_create_vector, Format::PSEUDO, count, 1)};
          unsigned arg_index =
             util_bitcount64(ctx->args->inline_push_const_mask & BITFIELD64_MASK(start));
@@ -6066,7 +6069,7 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
    if (nsa_size < coords.size()) {
       Temp coord = coords[nsa_size];
       if (coords.size() - nsa_size > 1) {
-         aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+         aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
             aco_opcode::p_create_vector, Format::PSEUDO, coords.size() - nsa_size, 1)};
 
          unsigned coord_size = 0;
@@ -6088,7 +6091,7 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
 
    bool has_dst = dst.id() != 0;
 
-   aco_ptr<MIMG_instruction> mimg{
+   aco_ptr<Instruction> mimg{
       create_instruction<MIMG_instruction>(op, Format::MIMG, 3 + coords.size(), has_dst)};
    if (has_dst)
       mimg->definitions[0] = Definition(dst);
@@ -6100,11 +6103,9 @@ emit_mimg(Builder& bld, aco_opcode op, Temp dst, Temp rsrc, Operand samp, std::v
       if (coords[i].regClass().is_linear_vgpr())
          mimg->operands[3 + i].setLateKill(true);
    }
-   mimg->strict_wqm = strict_wqm;
+   mimg->mimg().strict_wqm = strict_wqm;
 
-   MIMG_instruction* res = mimg.get();
-   bld.insert(std::move(mimg));
-   return res;
+   return &bld.insert(std::move(mimg))->mimg();
 }
 
 void
@@ -6257,8 +6258,8 @@ emit_tfe_init(Builder& bld, Temp dst)
 {
    Temp tmp = bld.tmp(dst.regClass());
 
-   aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
-      aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
+   aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector,
+                                                                   Format::PSEUDO, dst.size(), 1)};
    for (unsigned i = 0; i < dst.size(); i++)
       vec->operands[i] = Operand::zero();
    vec->definitions[0] = Definition(tmp);
@@ -6332,19 +6333,19 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
          default: unreachable(">4 channel buffer image load");
          }
       }
-      aco_ptr<MUBUF_instruction> load{
+      aco_ptr<Instruction> load{
          create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 3 + is_sparse, 1)};
       load->operands[0] = Operand(resource);
       load->operands[1] = Operand(vindex);
       load->operands[2] = Operand::c32(0);
       load->definitions[0] = Definition(tmp);
-      load->idxen = true;
-      load->glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
-      load->dlc =
-         load->glc && (ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
-      load->sync = sync;
-      load->tfe = is_sparse;
-      if (load->tfe)
+      load->mubuf().idxen = true;
+      load->mubuf().glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
+      load->mubuf().dlc = load->mubuf().glc &&
+                          (ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
+      load->mubuf().sync = sync;
+      load->mubuf().tfe = is_sparse;
+      if (load->mubuf().tfe)
          load->operands[3] = emit_tfe_init(bld, tmp);
       ctx->block->instructions.emplace_back(std::move(load));
    } else {
@@ -6446,7 +6447,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
          if (dmask_count == 1) {
             data = emit_extract_vector(ctx, data, ffs(dmask) - 1, rc);
          } else {
-            aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+            aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
                aco_opcode::p_create_vector, Format::PSEUDO, dmask_count, 1)};
             uint32_t index = 0;
             u_foreach_bit (bit, dmask) {
@@ -6480,17 +6481,17 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
          default: unreachable(">4 channel buffer image store");
          }
       }
-      aco_ptr<MUBUF_instruction> store{
+      aco_ptr<Instruction> store{
          create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)};
       store->operands[0] = Operand(rsrc);
       store->operands[1] = Operand(vindex);
       store->operands[2] = Operand::c32(0);
       store->operands[3] = Operand(data);
-      store->idxen = true;
-      store->glc = glc;
-      store->dlc = false;
-      store->disable_wqm = true;
-      store->sync = sync;
+      store->mubuf().idxen = true;
+      store->mubuf().glc = glc;
+      store->mubuf().dlc = false;
+      store->mubuf().disable_wqm = true;
+      store->mubuf().sync = sync;
       ctx->program->needs_exact = true;
       ctx->block->instructions.emplace_back(std::move(store));
       return;
@@ -6634,7 +6635,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
       Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
       // assert(ctx->options->gfx_level < GFX9 && "GFX9 stride size workaround not yet
       // implemented.");
-      aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(
+      aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(
          is_64bit ? buf_op64 : buf_op, Format::MUBUF, 4, return_previous ? 1 : 0)};
       mubuf->operands[0] = Operand(resource);
       mubuf->operands[1] = Operand(vindex);
@@ -6644,12 +6645,12 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
          return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
       if (return_previous)
          mubuf->definitions[0] = def;
-      mubuf->offset = 0;
-      mubuf->idxen = true;
-      mubuf->glc = return_previous;
-      mubuf->dlc = false; /* Not needed for atomics */
-      mubuf->disable_wqm = true;
-      mubuf->sync = sync;
+      mubuf->mubuf().offset = 0;
+      mubuf->mubuf().idxen = true;
+      mubuf->mubuf().glc = return_previous;
+      mubuf->mubuf().dlc = false; /* Not needed for atomics */
+      mubuf->mubuf().disable_wqm = true;
+      mubuf->mubuf().sync = sync;
       ctx->program->needs_exact = true;
       ctx->block->instructions.emplace_back(std::move(mubuf));
       if (return_previous && cmpswap)
@@ -6728,18 +6729,17 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
    for (unsigned i = 0; i < write_count; i++) {
       aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
 
-      aco_ptr<MUBUF_instruction> store{
-         create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
+      aco_ptr<Instruction> store{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
       store->operands[0] = Operand(rsrc);
       store->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
       store->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0);
       store->operands[3] = Operand(write_datas[i]);
-      store->offset = offsets[i];
-      store->offen = (offset.type() == RegType::vgpr);
-      store->glc = glc;
-      store->dlc = false;
-      store->disable_wqm = true;
-      store->sync = sync;
+      store->mubuf().offset = offsets[i];
+      store->mubuf().offen = (offset.type() == RegType::vgpr);
+      store->mubuf().glc = glc;
+      store->mubuf().dlc = false;
+      store->mubuf().disable_wqm = true;
+      store->mubuf().sync = sync;
       ctx->program->needs_exact = true;
       ctx->block->instructions.emplace_back(std::move(store));
    }
@@ -6767,7 +6767,7 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
    Temp dst = get_ssa_temp(ctx, &instr->def);
 
    aco_opcode op = instr->def.bit_size == 32 ? op32 : op64;
-   aco_ptr<MUBUF_instruction> mubuf{
+   aco_ptr<Instruction> mubuf{
       create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
    mubuf->operands[0] = Operand(rsrc);
    mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
@@ -6777,12 +6777,12 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
       return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
    if (return_previous)
       mubuf->definitions[0] = def;
-   mubuf->offset = 0;
-   mubuf->offen = (offset.type() == RegType::vgpr);
-   mubuf->glc = return_previous;
-   mubuf->dlc = false; /* Not needed for atomics */
-   mubuf->disable_wqm = true;
-   mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
+   mubuf->mubuf().offset = 0;
+   mubuf->mubuf().offen = (offset.type() == RegType::vgpr);
+   mubuf->mubuf().glc = return_previous;
+   mubuf->mubuf().dlc = false; /* Not needed for atomics */
+   mubuf->mubuf().disable_wqm = true;
+   mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
    ctx->program->needs_exact = true;
    ctx->block->instructions.emplace_back(std::move(mubuf));
    if (return_previous && cmpswap)
@@ -6901,7 +6901,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
          default: unreachable("store_global not implemented for this size.");
          }
 
-         aco_ptr<FLAT_instruction> flat{
+         aco_ptr<Instruction> flat{
             create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 3, 0)};
          if (write_address.regClass() == s2) {
             assert(global && write_offset.id() && write_offset.type() == RegType::vgpr);
@@ -6913,12 +6913,12 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
             flat->operands[1] = Operand(s1);
          }
          flat->operands[2] = Operand(write_datas[i]);
-         flat->glc = glc;
-         flat->dlc = false;
+         flat->flatlike().glc = glc;
+         flat->flatlike().dlc = false;
          assert(global || !write_const_offset);
-         flat->offset = write_const_offset;
-         flat->disable_wqm = true;
-         flat->sync = sync;
+         flat->flatlike().offset = write_const_offset;
+         flat->flatlike().disable_wqm = true;
+         flat->flatlike().sync = sync;
          ctx->program->needs_exact = true;
          ctx->block->instructions.emplace_back(std::move(flat));
       } else {
@@ -6928,19 +6928,18 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
 
          Temp rsrc = get_gfx6_global_rsrc(bld, write_address);
 
-         aco_ptr<MUBUF_instruction> mubuf{
-            create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
+         aco_ptr<Instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
          mubuf->operands[0] = Operand(rsrc);
          mubuf->operands[1] =
             write_address.type() == RegType::vgpr ? Operand(write_address) : Operand(v1);
          mubuf->operands[2] = Operand(write_offset);
          mubuf->operands[3] = Operand(write_datas[i]);
-         mubuf->glc = glc;
-         mubuf->dlc = false;
-         mubuf->offset = write_const_offset;
-         mubuf->addr64 = write_address.type() == RegType::vgpr;
-         mubuf->disable_wqm = true;
-         mubuf->sync = sync;
+         mubuf->mubuf().glc = glc;
+         mubuf->mubuf().dlc = false;
+         mubuf->mubuf().offset = write_const_offset;
+         mubuf->mubuf().addr64 = write_address.type() == RegType::vgpr;
+         mubuf->mubuf().disable_wqm = true;
+         mubuf->mubuf().sync = sync;
          ctx->program->needs_exact = true;
          ctx->block->instructions.emplace_back(std::move(mubuf));
       }
@@ -7029,7 +7028,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
       }
 
       aco_opcode op = instr->def.bit_size == 32 ? op32 : op64;
-      aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(
+      aco_ptr<Instruction> flat{create_instruction<FLAT_instruction>(
          op, global ? Format::GLOBAL : Format::FLAT, 3, return_previous ? 1 : 0)};
       if (addr.regClass() == s2) {
          assert(global && offset.id() && offset.type() == RegType::vgpr);
@@ -7043,12 +7042,12 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
       flat->operands[2] = Operand(data);
       if (return_previous)
          flat->definitions[0] = Definition(dst);
-      flat->glc = return_previous;
-      flat->dlc = false; /* Not needed for atomics */
+      flat->flatlike().glc = return_previous;
+      flat->flatlike().dlc = false; /* Not needed for atomics */
       assert(global || !const_offset);
-      flat->offset = const_offset;
-      flat->disable_wqm = true;
-      flat->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
+      flat->flatlike().offset = const_offset;
+      flat->flatlike().disable_wqm = true;
+      flat->flatlike().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
       ctx->program->needs_exact = true;
       ctx->block->instructions.emplace_back(std::move(flat));
    } else {
@@ -7061,7 +7060,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
 
       aco_opcode op = instr->def.bit_size == 32 ? op32 : op64;
 
-      aco_ptr<MUBUF_instruction> mubuf{
+      aco_ptr<Instruction> mubuf{
          create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
       mubuf->operands[0] = Operand(rsrc);
       mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
@@ -7071,12 +7070,12 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
          return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
       if (return_previous)
          mubuf->definitions[0] = def;
-      mubuf->glc = return_previous;
-      mubuf->dlc = false;
-      mubuf->offset = const_offset;
-      mubuf->addr64 = addr.type() == RegType::vgpr;
-      mubuf->disable_wqm = true;
-      mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
+      mubuf->mubuf().glc = return_previous;
+      mubuf->mubuf().dlc = false;
+      mubuf->mubuf().offset = const_offset;
+      mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
+      mubuf->mubuf().disable_wqm = true;
+      mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
       ctx->program->needs_exact = true;
       ctx->block->instructions.emplace_back(std::move(mubuf));
       if (return_previous && cmpswap)
@@ -7473,7 +7472,7 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
       offset = 0;
    }
 
-   aco_ptr<DS_instruction> ds;
+   aco_ptr<Instruction> ds;
    ds.reset(
       create_instruction<DS_instruction>(op, Format::DS, num_operands, return_previous ? 1 : 0));
    ds->operands[0] = Operand(address);
@@ -7485,10 +7484,10 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
          std::swap(ds->operands[1], ds->operands[2]);
    }
    ds->operands[num_operands - 1] = m;
-   ds->offset0 = offset;
+   ds->ds().offset0 = offset;
    if (return_previous)
       ds->definitions[0] = Definition(get_ssa_temp(ctx, &instr->def));
-   ds->sync = memory_sync_info(storage_shared, semantic_atomicrmw);
+   ds->ds().sync = memory_sync_info(storage_shared, semantic_atomicrmw);
 
    if (m.isUndefined())
       ds->operands.pop_back();
@@ -7916,7 +7915,7 @@ emit_reduction_instr(isel_context* ctx, aco_opcode aco_op, ReduceOp op, unsigned
    if (clobber_vcc)
       defs[num_defs++] = bld.def(bld.lm, vcc);
 
-   Pseudo_reduction_instruction* reduce = create_instruction<Pseudo_reduction_instruction>(
+   Instruction* reduce = create_instruction<Pseudo_reduction_instruction>(
       aco_op, Format::PSEUDO_REDUCTION, 3, num_defs);
    reduce->operands[0] = Operand(src);
    /* setup_reduce_temp will update these undef operands if needed */
@@ -7924,8 +7923,8 @@ emit_reduction_instr(isel_context* ctx, aco_opcode aco_op, ReduceOp op, unsigned
    reduce->operands[2] = Operand(v1.as_linear());
    std::copy(defs, defs + num_defs, reduce->definitions.begin());
 
-   reduce->reduce_op = op;
-   reduce->cluster_size = cluster_size;
+   reduce->reduction().reduce_op = op;
+   reduce->reduction().cluster_size = cluster_size;
    bld.insert(std::move(reduce));
 
    return dst.getTemp();
@@ -8111,7 +8110,7 @@ create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt*
 {
    Builder bld(ctx->program, ctx->block);
 
-   aco_ptr<Pseudo_instruction> exp{create_instruction<Pseudo_instruction>(
+   aco_ptr<Instruction> exp{create_instruction<Pseudo_instruction>(
       aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)};
    for (unsigned i = 0; i < 4; i++) {
       exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1);
@@ -9149,7 +9148,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
          bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
       ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
 
-      aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
          aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)};
       unsigned write_mask = nir_intrinsic_write_mask(instr);
 
@@ -9209,36 +9208,37 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
 
       const bool row_en = instr->intrinsic == nir_intrinsic_export_row_amd;
 
-      aco_ptr<Export_instruction> exp{
+      aco_ptr<Instruction> exp{
          create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4 + row_en, 0)};
 
-      exp->dest = target;
-      exp->enabled_mask = write_mask;
-      exp->compressed = flags & AC_EXP_FLAG_COMPRESSED;
+      exp->exp().dest = target;
+      exp->exp().enabled_mask = write_mask;
+      exp->exp().compressed = flags & AC_EXP_FLAG_COMPRESSED;
 
       /* ACO may reorder position/mrt export instructions, then mark done for last
        * export instruction. So don't respect the nir AC_EXP_FLAG_DONE for position/mrt
        * exports here and leave it to ACO.
        */
       if (target == V_008DFC_SQ_EXP_PRIM)
-         exp->done = flags & AC_EXP_FLAG_DONE;
+         exp->exp().done = flags & AC_EXP_FLAG_DONE;
       else
-         exp->done = false;
+         exp->exp().done = false;
 
       /* ACO may reorder mrt export instructions, then mark valid mask for last
        * export instruction. So don't respect the nir AC_EXP_FLAG_VALID_MASK for mrt
        * exports here and leave it to ACO.
        */
       if (target > V_008DFC_SQ_EXP_NULL)
-         exp->valid_mask = flags & AC_EXP_FLAG_VALID_MASK;
+         exp->exp().valid_mask = flags & AC_EXP_FLAG_VALID_MASK;
       else
-         exp->valid_mask = false;
+         exp->exp().valid_mask = false;
 
-      exp->row_en = row_en;
+      exp->exp().row_en = row_en;
 
       /* Compressed export uses two bits for a channel. */
-      uint32_t channel_mask =
-         exp->compressed ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) : write_mask;
+      uint32_t channel_mask = exp->exp().compressed
+                                 ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0)
+                                 : write_mask;
 
       Temp value = get_ssa_temp(ctx, instr->src[0].ssa);
       for (unsigned i = 0; i < 4; i++) {
@@ -9287,7 +9287,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       if (it != ctx->allocated_vec.end())
          num_src = src.bytes() / it->second[0].bytes();
 
-      aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
          aco_opcode::p_start_linear_vgpr, Format::PSEUDO, num_src + !!begin_size, 1)};
 
       if (begin_size)
@@ -9730,15 +9730,15 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
          }
       }
 
-      aco_ptr<MUBUF_instruction> mubuf{
+      aco_ptr<Instruction> mubuf{
          create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3 + instr->is_sparse, 1)};
       mubuf->operands[0] = Operand(resource);
       mubuf->operands[1] = Operand(coords[0]);
       mubuf->operands[2] = Operand::c32(0);
       mubuf->definitions[0] = Definition(tmp_dst);
-      mubuf->idxen = true;
-      mubuf->tfe = instr->is_sparse;
-      if (mubuf->tfe)
+      mubuf->mubuf().idxen = true;
+      mubuf->mubuf().tfe = instr->is_sparse;
+      if (mubuf->mubuf().tfe)
          mubuf->operands[3] = emit_tfe_init(bld, tmp_dst);
       ctx->block->instructions.emplace_back(std::move(mubuf));
 
@@ -10008,7 +10008,7 @@ get_phi_operand(isel_context* ctx, nir_def* ssa, RegClass rc, bool logical)
 void
 visit_phi(isel_context* ctx, nir_phi_instr* instr)
 {
-   aco_ptr<Pseudo_instruction> phi;
+   aco_ptr<Instruction> phi;
    Temp dst = get_ssa_temp(ctx, &instr->def);
    assert(instr->def.bit_size != 1 || dst.regClass() == ctx->program->lane_mask);
 
@@ -10111,7 +10111,7 @@ visit_undef(isel_context* ctx, nir_undef_instr* instr)
    if (dst.size() == 1) {
       Builder(ctx->program, ctx->block).copy(Definition(dst), Operand::zero());
    } else {
-      aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
+      aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
          aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
       for (unsigned i = 0; i < dst.size(); i++)
          vec->operands[i] = Operand::zero();
@@ -10210,7 +10210,7 @@ end_loop(isel_context* ctx, loop_context* lc)
    /* trim linear phis in loop header */
    for (auto&& instr : loop_entry->instructions) {
       if (instr->opcode == aco_opcode::p_linear_phi) {
-         aco_ptr<Pseudo_instruction> new_phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, loop_entry->linear_predecessors.size(), 1)};
+         aco_ptr<Instruction> new_phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, loop_entry->linear_predecessors.size(), 1)};
          new_phi->definitions[0] = instr->definitions[0];
          for (unsigned i = 0; i < new_phi->operands.size(); i++)
             new_phi->operands[i] = instr->operands[i];
@@ -10484,13 +10484,14 @@ begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond,
 
    /* branch to linear then block */
    assert(cond.regClass() == ctx->program->lane_mask);
-   aco_ptr<Pseudo_branch_instruction> branch;
+   aco_ptr<Instruction> branch;
    branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z,
                                                               Format::PSEUDO_BRANCH, 1, 1));
    branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
    branch->operands[0] = Operand(cond);
-   branch->selection_control_remove = sel_ctrl == nir_selection_control_flatten ||
-                                      sel_ctrl == nir_selection_control_divergent_always_taken;
+   branch->branch().selection_control_remove =
+      sel_ctrl == nir_selection_control_flatten ||
+      sel_ctrl == nir_selection_control_divergent_always_taken;
    ctx->block->instructions.push_back(std::move(branch));
 
    ic->BB_if_idx = ctx->block->index;
@@ -10528,7 +10529,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
    Block* BB_then_logical = ctx->block;
    append_logical_end(BB_then_logical);
    /* branch from logical then block to invert block */
-   aco_ptr<Pseudo_branch_instruction> branch;
+   aco_ptr<Instruction> branch;
    branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
                                                               Format::PSEUDO_BRANCH, 0, 1));
    branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
@@ -10561,8 +10562,9 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
    branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
                                                               Format::PSEUDO_BRANCH, 0, 1));
    branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
-   branch->selection_control_remove = sel_ctrl == nir_selection_control_flatten ||
-                                      sel_ctrl == nir_selection_control_divergent_always_taken;
+   branch->branch().selection_control_remove =
+      sel_ctrl == nir_selection_control_flatten ||
+      sel_ctrl == nir_selection_control_divergent_always_taken;
    ctx->block->instructions.push_back(std::move(branch));
 
    ic->exec_potentially_empty_discard_old |= ctx->cf_info.exec_potentially_empty_discard;
@@ -10593,7 +10595,7 @@ end_divergent_if(isel_context* ctx, if_context* ic)
    append_logical_end(BB_else_logical);
 
    /* branch from logical else block to endif block */
-   aco_ptr<Pseudo_branch_instruction> branch;
+   aco_ptr<Instruction> branch;
    branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
                                                               Format::PSEUDO_BRANCH, 0, 1));
    branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
@@ -10650,7 +10652,7 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
    append_logical_end(ctx->block);
    ctx->block->kind |= block_kind_uniform;
 
-   aco_ptr<Pseudo_branch_instruction> branch;
+   aco_ptr<Instruction> branch;
    aco_opcode branch_opcode = aco_opcode::p_cbranch_z;
    branch.reset(
       create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 1));
@@ -10687,7 +10689,7 @@ begin_uniform_if_else(isel_context* ctx, if_context* ic)
    if (!ic->uniform_has_then_branch) {
       append_logical_end(BB_then);
       /* branch from then block to endif block */
-      aco_ptr<Pseudo_branch_instruction> branch;
+      aco_ptr<Instruction> branch;
       branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
                                                                  Format::PSEUDO_BRANCH, 0, 1));
       branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
@@ -10719,7 +10721,7 @@ end_uniform_if(isel_context* ctx, if_context* ic)
    if (!ctx->cf_info.has_branch) {
       append_logical_end(BB_else);
       /* branch from then block to endif block */
-      aco_ptr<Pseudo_branch_instruction> branch;
+      aco_ptr<Instruction> branch;
       branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch,
                                                                  Format::PSEUDO_BRANCH, 0, 1));
       branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
@@ -10747,7 +10749,7 @@ visit_if(isel_context* ctx, nir_if* if_stmt)
 {
    Temp cond = get_ssa_temp(ctx, if_stmt->condition.ssa);
    Builder bld(ctx->program, ctx->block);
-   aco_ptr<Pseudo_branch_instruction> branch;
+   aco_ptr<Instruction> branch;
    if_context ic;
 
    if (!nir_src_is_divergent(if_stmt->condition)) { /* uniform condition */
@@ -11138,7 +11140,7 @@ create_fs_jump_to_epilog(isel_context* ctx)
 
    Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.epilog_pc));
 
-   aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
+   aco_ptr<Instruction> jump{create_instruction<Pseudo_instruction>(
       aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + exports.size(), 0)};
    jump->operands[0] = Operand(continue_pc);
    for (unsigned i = 0; i < exports.size(); i++) {
@@ -11192,8 +11194,8 @@ passthrough_all_args(isel_context* ctx, std::vector<Operand>& regs)
 static void
 build_end_with_regs(isel_context* ctx, std::vector<Operand>& regs)
 {
-   aco_ptr<Pseudo_instruction> end{create_instruction<Pseudo_instruction>(
-      aco_opcode::p_end_with_regs, Format::PSEUDO, regs.size(), 0)};
+   aco_ptr<Instruction> end{create_instruction<Pseudo_instruction>(aco_opcode::p_end_with_regs,
+                                                                   Format::PSEUDO, regs.size(), 0)};
 
    for (unsigned i = 0; i < regs.size(); i++)
       end->operands[i] = regs[i];
@@ -11240,7 +11242,7 @@ create_tcs_jump_to_epilog(isel_context* ctx)
 
    Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.epilog_pc));
 
-   aco_ptr<Pseudo_instruction> jump{
+   aco_ptr<Instruction> jump{
       create_instruction<Pseudo_instruction>(aco_opcode::p_jump_to_epilog, Format::PSEUDO, 14, 0)};
    jump->operands[0] = Operand(continue_pc);
    jump->operands[1] = ring_offsets;
@@ -11374,7 +11376,7 @@ create_fs_end_for_epilog(isel_context* ctx)
    ctx->program->needs_exact = true;
 }
 
-Pseudo_instruction*
+Instruction*
 add_startpgm(struct isel_context* ctx)
 {
    unsigned def_count = 0;
@@ -11388,7 +11390,7 @@ add_startpgm(struct isel_context* ctx)
          def_count++;
    }
 
-   Pseudo_instruction* startpgm =
+   Instruction* startpgm =
       create_instruction<Pseudo_instruction>(aco_opcode::p_startpgm, Format::PSEUDO, 0, def_count);
    ctx->block->instructions.emplace_back(startpgm);
    for (unsigned i = 0, arg = 0; i < ctx->args->arg_count; i++) {
@@ -11478,7 +11480,7 @@ fix_ls_vgpr_init_bug(isel_context* ctx)
 }
 
 void
-split_arguments(isel_context* ctx, Pseudo_instruction* startpgm)
+split_arguments(isel_context* ctx, Instruction* startpgm)
 {
    /* Split all arguments except for the first (ring_offsets) and the last
     * (exec) so that the dead channels don't stay live throughout the program.
@@ -11645,7 +11647,7 @@ insert_rt_jump_next(isel_context& ctx, const struct ac_shader_args* args)
    for (unsigned i = 0; i < ctx.args->arg_count; i++)
       src_count += !!BITSET_TEST(ctx.output_args, i);
 
-   Pseudo_instruction* ret =
+   Instruction* ret =
       create_instruction<Pseudo_instruction>(aco_opcode::p_return, Format::PSEUDO, src_count, 0);
    ctx.block->instructions.emplace_back(ret);
 
@@ -11682,7 +11684,7 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c
       init_context(&ctx, nir);
       setup_fp_mode(&ctx, nir);
 
-      Pseudo_instruction* startpgm = add_startpgm(&ctx);
+      Instruction* startpgm = add_startpgm(&ctx);
       append_logical_start(ctx.block);
       split_arguments(&ctx, startpgm);
       visit_cf_list(&ctx, &nir_shader_get_entrypoint(nir)->body);
@@ -11839,7 +11841,7 @@ create_merged_jump_to_epilog(isel_context* ctx)
    Temp continue_pc =
       convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.next_stage_pc));
 
-   aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
+   aco_ptr<Instruction> jump{create_instruction<Pseudo_instruction>(
       aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + regs.size(), 0)};
    jump->operands[0] = Operand(continue_pc);
    for (unsigned i = 0; i < regs.size(); i++) {
@@ -11884,7 +11886,7 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons
 
    if (need_startpgm) {
       /* Needs to be after init_context() for FS. */
-      Pseudo_instruction* startpgm = add_startpgm(&ctx);
+      Instruction* startpgm = add_startpgm(&ctx);
       append_logical_start(ctx.block);
 
       if (ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs &&
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index ccbdf64a426..0b1791139c7 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1671,7 +1671,7 @@ struct instr_deleter_functor {
 template <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
 
 template <typename T>
-T*
+Instruction*
 create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
                    uint32_t num_definitions)
 {
@@ -1679,7 +1679,7 @@ create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
       sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
    void* data = instruction_buffer->allocate(size, alignof(uint32_t));
    memset(data, 0, size);
-   T* inst = (T*)data;
+   Instruction* inst = (Instruction*)data;
 
    inst->opcode = opcode;
    inst->format = format;
diff --git a/src/amd/compiler/aco_lower_phis.cpp b/src/amd/compiler/aco_lower_phis.cpp
index bd803c68b5f..3948b8ddd49 100644
--- a/src/amd/compiler/aco_lower_phis.cpp
+++ b/src/amd/compiler/aco_lower_phis.cpp
@@ -108,8 +108,8 @@ get_output(Program* program, unsigned block_idx, ssa_state* state)
    }
 
    /* create phi */
-   aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
-      aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)};
+   aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
+                                                                   Format::PSEUDO, num_preds, 1)};
    for (unsigned i = 0; i < num_preds; i++)
       phi->operands[i] = state->outputs[block.linear_preds[i]];
    phi->definitions[0] = Definition(output.getTemp());
@@ -347,8 +347,8 @@ lower_divergent_bool_phi(Program* program, ssa_state* state, Block* block,
 
    unsigned num_preds = block->linear_preds.size();
    if (phi->operands.size() != num_preds) {
-      Pseudo_instruction* new_phi{create_instruction<Pseudo_instruction>(
-         aco_opcode::p_linear_phi, Format::PSEUDO, num_preds, 1)};
+      Instruction* new_phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi,
+                                                                  Format::PSEUDO, num_preds, 1)};
       new_phi->definitions[0] = phi->definitions[0];
       phi.reset(new_phi);
    } else {
diff --git a/src/amd/compiler/aco_lower_to_cssa.cpp b/src/amd/compiler/aco_lower_to_cssa.cpp
index 65fcdac40b9..62e02b6d71f 100644
--- a/src/amd/compiler/aco_lower_to_cssa.cpp
+++ b/src/amd/compiler/aco_lower_to_cssa.cpp
@@ -424,8 +424,8 @@ emit_copies_block(Builder& bld, std::map<uint32_t, ltg_node>& ltg, RegType type)
       // TODO: this should be restricted to a feasible number of registers
       // and otherwise use a temporary to avoid having to reload more (spilled)
       // variables than we have registers.
-      aco_ptr<Pseudo_instruction> copy{create_instruction<Pseudo_instruction>(
-         aco_opcode::p_parallelcopy, Format::PSEUDO, num, num)};
+      aco_ptr<Instruction> copy{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy,
+                                                                       Format::PSEUDO, num, num)};
       it = ltg.begin();
       for (unsigned i = 0; i < num; i++) {
          while (it->second.cp.def.regClass().type() != type)
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 7d63bab1c1e..6de7d449d23 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -601,13 +601,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
 
    if (src.regClass() == v1b) {
       if (ctx->program->gfx_level >= GFX8 && ctx->program->gfx_level < GFX11) {
-         aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
+         aco_ptr<Instruction> sdwa{create_instruction<SDWA_instruction>(
             aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
          sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
          sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
          bool sext = reduce_op == imin8 || reduce_op == imax8;
-         sdwa->sel[0] = SubdwordSel(1, 0, sext);
-         sdwa->dst_sel = SubdwordSel::dword;
+         sdwa->sdwa().sel[0] = SubdwordSel(1, 0, sext);
+         sdwa->sdwa().dst_sel = SubdwordSel::dword;
          bld.insert(std::move(sdwa));
       } else {
          aco_opcode opcode;
@@ -624,13 +624,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
       bool is_add_cmp = reduce_op == iadd16 || reduce_op == imax16 || reduce_op == imin16 ||
                         reduce_op == umin16 || reduce_op == umax16;
       if (ctx->program->gfx_level >= GFX10 && ctx->program->gfx_level < GFX11 && is_add_cmp) {
-         aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
+         aco_ptr<Instruction> sdwa{create_instruction<SDWA_instruction>(
             aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
          sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
          sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
          bool sext = reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16;
-         sdwa->sel[0] = SubdwordSel(2, 0, sext);
-         sdwa->dst_sel = SubdwordSel::dword;
+         sdwa->sdwa().sel[0] = SubdwordSel(2, 0, sext);
+         sdwa->sdwa().dst_sel = SubdwordSel::dword;
          bld.insert(std::move(sdwa));
       } else if (ctx->program->gfx_level <= GFX7 ||
                  (ctx->program->gfx_level >= GFX11 && is_add_cmp)) {
@@ -2259,7 +2259,7 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
    instr->mimg().strict_wqm = false;
 
    if ((3 + num_vaddr) > instr->operands.size()) {
-      MIMG_instruction* new_instr = create_instruction<MIMG_instruction>(
+      Instruction* new_instr = create_instruction<MIMG_instruction>(
          instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
       std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
                 new_instr->definitions.begin());
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 59f134f976c..725b3dd5082 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -905,7 +905,7 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
                smem.operands.back() = Operand(base);
             }
          } else {
-            SMEM_instruction* new_instr = create_instruction<SMEM_instruction>(
+            Instruction* new_instr = create_instruction<SMEM_instruction>(
                smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
             new_instr->operands[0] = smem.operands[0];
             new_instr->operands[1] = Operand::c32(offset);
@@ -914,11 +914,11 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
             new_instr->operands.back() = Operand(base);
             if (!smem.definitions.empty())
                new_instr->definitions[0] = smem.definitions[0];
-            new_instr->sync = smem.sync;
-            new_instr->glc = smem.glc;
-            new_instr->dlc = smem.dlc;
-            new_instr->nv = smem.nv;
-            new_instr->disable_wqm = smem.disable_wqm;
+            new_instr->smem().sync = smem.sync;
+            new_instr->smem().glc = smem.glc;
+            new_instr->smem().dlc = smem.dlc;
+            new_instr->smem().nv = smem.nv;
+            new_instr->smem().disable_wqm = smem.disable_wqm;
             instr.reset(new_instr);
          }
       }
@@ -2312,10 +2312,10 @@ combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
    case 64: new_op = is_or ? aco_opcode::v_cmp_u_f64 : aco_opcode::v_cmp_o_f64; break;
    }
    bool needs_vop3 = num_sgprs > 1 || (opsel[0] && op[0].type() != RegType::vgpr);
-   VALU_instruction* new_instr = create_instruction<VALU_instruction>(
+   Instruction* new_instr = create_instruction<VALU_instruction>(
       new_op, needs_vop3 ? asVOP3(Format::VOPC) : Format::VOPC, 2, 1);
 
-   new_instr->opsel = opsel;
+   new_instr->valu().opsel = opsel;
    new_instr->operands[0] = copy_operand(ctx, Operand(op[0]));
    new_instr->operands[1] = copy_operand(ctx, Operand(op[1]));
    new_instr->definitions[0] = instr->definitions[0];
@@ -2381,13 +2381,13 @@ combine_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       return false;
 
    aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
-   VALU_instruction* new_instr = create_instruction<VALU_instruction>(
+   Instruction* new_instr = create_instruction<VALU_instruction>(
       new_op, cmp->isVOP3() ? asVOP3(Format::VOPC) : Format::VOPC, 2, 1);
-   new_instr->neg = cmp_valu.neg;
-   new_instr->abs = cmp_valu.abs;
-   new_instr->clamp = cmp_valu.clamp;
-   new_instr->omod = cmp_valu.omod;
-   new_instr->opsel = cmp_valu.opsel;
+   new_instr->valu().neg = cmp_valu.neg;
+   new_instr->valu().abs = cmp_valu.abs;
+   new_instr->valu().clamp = cmp_valu.clamp;
+   new_instr->valu().omod = cmp_valu.omod;
+   new_instr->valu().opsel = cmp_valu.opsel;
    new_instr->operands[0] = copy_operand(ctx, cmp->operands[0]);
    new_instr->operands[1] = copy_operand(ctx, cmp->operands[1]);
    new_instr->definitions[0] = instr->definitions[0];
@@ -2701,12 +2701,12 @@ create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr
                     Operand operands[3], uint8_t neg, uint8_t abs, uint8_t opsel, bool clamp,
                     unsigned omod)
 {
-   VALU_instruction* new_instr = create_instruction<VALU_instruction>(opcode, Format::VOP3, 3, 1);
-   new_instr->neg = neg;
-   new_instr->abs = abs;
-   new_instr->clamp = clamp;
-   new_instr->omod = omod;
-   new_instr->opsel = opsel;
+   Instruction* new_instr = create_instruction<VALU_instruction>(opcode, Format::VOP3, 3, 1);
+   new_instr->valu().neg = neg;
+   new_instr->valu().abs = abs;
+   new_instr->valu().clamp = clamp;
+   new_instr->valu().omod = omod;
+   new_instr->valu().opsel = opsel;
    new_instr->operands[0] = operands[0];
    new_instr->operands[1] = operands[1];
    new_instr->operands[2] = operands[2];
@@ -3746,7 +3746,7 @@ combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub)
          ctx.uses[instr->operands[i].tempId()]--;
 
          aco_opcode mad_op = is_sub ? aco_opcode::v_mad_i32_i24 : aco_opcode::v_mad_u32_u24;
-         aco_ptr<VALU_instruction> new_instr{
+         aco_ptr<Instruction> new_instr{
             create_instruction<VALU_instruction>(mad_op, Format::VOP3, 3, 1)};
          for (unsigned op_idx = 0; op_idx < 3; ++op_idx)
             new_instr->operands[op_idx] = ops[op_idx];
@@ -3930,23 +3930,23 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 
       /* turn mul + packed add into v_pk_fma_f16 */
       aco_opcode mad = fadd ? aco_opcode::v_pk_fma_f16 : aco_opcode::v_pk_mad_u16;
-      aco_ptr<VALU_instruction> fma{create_instruction<VALU_instruction>(mad, Format::VOP3P, 3, 1)};
+      aco_ptr<Instruction> fma{create_instruction<VALU_instruction>(mad, Format::VOP3P, 3, 1)};
       fma->operands[0] = copy_operand(ctx, mul_instr->operands[0]);
       fma->operands[1] = copy_operand(ctx, mul_instr->operands[1]);
       fma->operands[2] = instr->operands[add_op_idx];
-      fma->clamp = vop3p->clamp;
-      fma->neg_lo = mul_neg_lo;
-      fma->neg_hi = mul_neg_hi;
-      fma->opsel_lo = mul_opsel_lo;
-      fma->opsel_hi = mul_opsel_hi;
-      propagate_swizzles(fma.get(), vop3p->opsel_lo[1 - add_op_idx],
+      fma->valu().clamp = vop3p->clamp;
+      fma->valu().neg_lo = mul_neg_lo;
+      fma->valu().neg_hi = mul_neg_hi;
+      fma->valu().opsel_lo = mul_opsel_lo;
+      fma->valu().opsel_hi = mul_opsel_hi;
+      propagate_swizzles(&fma->valu(), vop3p->opsel_lo[1 - add_op_idx],
                          vop3p->opsel_hi[1 - add_op_idx]);
-      fma->opsel_lo[2] = vop3p->opsel_lo[add_op_idx];
-      fma->opsel_hi[2] = vop3p->opsel_hi[add_op_idx];
-      fma->neg_lo[2] = vop3p->neg_lo[add_op_idx];
-      fma->neg_hi[2] = vop3p->neg_hi[add_op_idx];
-      fma->neg_lo[1] = fma->neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx];
-      fma->neg_hi[1] = fma->neg_hi[1] ^ vop3p->neg_hi[1 - add_op_idx];
+      fma->valu().opsel_lo[2] = vop3p->opsel_lo[add_op_idx];
+      fma->valu().opsel_hi[2] = vop3p->opsel_hi[add_op_idx];
+      fma->valu().neg_lo[2] = vop3p->neg_lo[add_op_idx];
+      fma->valu().neg_hi[2] = vop3p->neg_hi[add_op_idx];
+      fma->valu().neg_lo[1] = fma->valu().neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx];
+      fma->valu().neg_hi[1] = fma->valu().neg_hi[1] ^ vop3p->neg_hi[1 - add_op_idx];
       fma->definitions[0] = instr->definitions[0];
       fma->pass_flags = instr->pass_flags;
       instr = std::move(fma);
@@ -3995,26 +3995,26 @@ to_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 
    bool is_add = instr->opcode != aco_opcode::v_mul_f32;
 
-   aco_ptr<VALU_instruction> vop3p{
+   aco_ptr<Instruction> vop3p{
       create_instruction<VALU_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1)};
 
    for (unsigned i = 0; i < instr->operands.size(); i++) {
       vop3p->operands[is_add + i] = instr->operands[i];
-      vop3p->neg_lo[is_add + i] = instr->valu().neg[i];
-      vop3p->neg_hi[is_add + i] = instr->valu().abs[i];
+      vop3p->valu().neg_lo[is_add + i] = instr->valu().neg[i];
+      vop3p->valu().neg_hi[is_add + i] = instr->valu().abs[i];
    }
    if (instr->opcode == aco_opcode::v_mul_f32) {
       vop3p->operands[2] = Operand::zero();
-      vop3p->neg_lo[2] = true;
+      vop3p->valu().neg_lo[2] = true;
    } else if (is_add) {
       vop3p->operands[0] = Operand::c32(0x3f800000);
       if (instr->opcode == aco_opcode::v_sub_f32)
-         vop3p->neg_lo[2] ^= true;
+         vop3p->valu().neg_lo[2] ^= true;
       else if (instr->opcode == aco_opcode::v_subrev_f32)
-         vop3p->neg_lo[1] ^= true;
+         vop3p->valu().neg_lo[1] ^= true;
    }
    vop3p->definitions[0] = instr->definitions[0];
-   vop3p->clamp = instr->valu().clamp;
+   vop3p->valu().clamp = instr->valu().clamp;
    vop3p->pass_flags = instr->pass_flags;
    instr = std::move(vop3p);
 
@@ -4418,7 +4418,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
             neg[2 - add_op_idx] = neg[2 - add_op_idx] ^ true;
 
          aco_ptr<Instruction> add_instr = std::move(instr);
-         aco_ptr<VALU_instruction> mad;
+         aco_ptr<Instruction> mad;
          if (add_instr->isVOP3P() || mul_instr->isVOP3P()) {
             assert(!omod);
             assert(!opsel);
@@ -4448,14 +4448,14 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 
          for (unsigned i = 0; i < 3; i++) {
             mad->operands[i] = op[i];
-            mad->neg[i] = neg[i];
-            mad->abs[i] = abs[i];
+            mad->valu().neg[i] = neg[i];
+            mad->valu().abs[i] = abs[i];
          }
-         mad->omod = omod;
-         mad->clamp = clamp;
-         mad->opsel_lo = opsel_lo;
-         mad->opsel_hi = opsel_hi;
-         mad->opsel = opsel;
+         mad->valu().omod = omod;
+         mad->valu().clamp = clamp;
+         mad->valu().opsel_lo = opsel_lo;
+         mad->valu().opsel_hi = opsel_hi;
+         mad->valu().opsel = opsel;
          mad->definitions[0] = add_instr->definitions[0];
          mad->definitions[0].setPrecise(add_instr->definitions[0].isPrecise() ||
                                         mul_instr->definitions[0].isPrecise());
@@ -4481,7 +4481,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
             ctx.uses[instr->operands[i].tempId()]--;
             ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++;
 
-            aco_ptr<VALU_instruction> new_instr{
+            aco_ptr<Instruction> new_instr{
                create_instruction<VALU_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)};
             new_instr->operands[0] = Operand::zero();
             new_instr->operands[1] = instr->operands[!i];
@@ -4805,7 +4805,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
             if (op.isTemp())
                ctx.uses[op.tempId()]++;
 
-            aco_ptr<Pseudo_instruction> extract{create_instruction<Pseudo_instruction>(
+            aco_ptr<Instruction> extract{create_instruction<Pseudo_instruction>(
                aco_opcode::p_create_vector, Format::PSEUDO, 1, 1)};
             extract->operands[0] = op;
             extract->definitions[0] = instr->definitions[idx];
@@ -4818,7 +4818,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       if (!done && num_used == 1 &&
           instr->operands[0].bytes() % instr->definitions[idx].bytes() == 0 &&
           split_offset % instr->definitions[idx].bytes() == 0) {
-         aco_ptr<Pseudo_instruction> extract{create_instruction<Pseudo_instruction>(
+         aco_ptr<Instruction> extract{create_instruction<Pseudo_instruction>(
             aco_opcode::p_extract_vector, Format::PSEUDO, 2, 1)};
          extract->operands[0] = instr->operands[0];
          extract->operands[1] =
diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp
index bffeb81c35c..25f017e516d 100644
--- a/src/amd/compiler/aco_reduce_assign.cpp
+++ b/src/amd/compiler/aco_reduce_assign.cpp
@@ -109,7 +109,7 @@ setup_reduce_temp(Program* program)
 
          if ((int)last_top_level_block_idx != inserted_at) {
             reduceTmp = program->allocateTmp(reduceTmp.regClass());
-            aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
+            aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
                aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
             create->definitions[0] = Definition(reduceTmp);
             /* find the right place to insert this definition */
@@ -154,7 +154,7 @@ setup_reduce_temp(Program* program)
 
          if (need_vtmp && (int)last_top_level_block_idx != vtmp_inserted_at) {
             vtmp = program->allocateTmp(vtmp.regClass());
-            aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
+            aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
                aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
             create->definitions[0] = Definition(vtmp);
             if (last_top_level_block_idx == block.index) {
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 697a226e5bf..cc47153c426 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -2885,7 +2885,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
    if (parallelcopy.empty())
       return;
 
-   aco_ptr<Pseudo_instruction> pc;
+   aco_ptr<Instruction> pc;
    pc.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO,
                                                    parallelcopy.size(), parallelcopy.size()));
    bool linear_vgpr = false;
@@ -2935,8 +2935,8 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
 
       handle_pseudo(ctx, tmp_file, pc.get());
    } else {
-      pc->needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
-      pc->tmp_in_scc = false;
+      pc->pseudo().needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
+      pc->pseudo().tmp_in_scc = false;
    }
 
    instructions.emplace_back(std::move(pc));
diff --git a/src/amd/compiler/aco_scheduler_ilp.cpp b/src/amd/compiler/aco_scheduler_ilp.cpp
index eb513b5c3dd..6ba24168e7b 100644
--- a/src/amd/compiler/aco_scheduler_ilp.cpp
+++ b/src/amd/compiler/aco_scheduler_ilp.cpp
@@ -686,9 +686,8 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx)
    get_vopd_opcode_operands(x, x_info, swap_x, &x_op, &num_operands, operands);
    get_vopd_opcode_operands(y, y_info, swap_y, &y_op, &num_operands, operands + num_operands);
 
-   VOPD_instruction* instr =
-      create_instruction<VOPD_instruction>(x_op, Format::VOPD, num_operands, 2);
-   instr->opy = y_op;
+   Instruction* instr = create_instruction<VOPD_instruction>(x_op, Format::VOPD, num_operands, 2);
+   instr->vopd().opy = y_op;
    instr->definitions[0] = x->definitions[0];
    instr->definitions[1] = y->definitions[0];
    std::copy(operands, operands + num_operands, instr->operands.begin());
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index 35a4b6d435d..5bc360068de 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -376,7 +376,7 @@ do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
       res->definitions[0] = Definition(new_name);
       return res;
    } else {
-      aco_ptr<Pseudo_instruction> reload{
+      aco_ptr<Instruction> reload{
          create_instruction<Pseudo_instruction>(aco_opcode::p_reload, Format::PSEUDO, 1, 1)};
       reload->operands[0] = Operand::c32(spill_id);
       reload->definitions[0] = Definition(new_name);
@@ -845,7 +845,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
          for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx])
             ctx.add_interference(def_spill_id, pair.second);
 
-         aco_ptr<Pseudo_instruction> spill{
+         aco_ptr<Instruction> spill{
             create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
          spill->operands[0] = spill_op;
          spill->operands[1] = Operand::c32(def_spill_id);
@@ -915,7 +915,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
             ctx.renames[pred_idx].erase(rename_it);
          }
 
-         aco_ptr<Pseudo_instruction> spill{
+         aco_ptr<Instruction> spill{
             create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
          spill->operands[0] = Operand(var);
          spill->operands[1] = Operand::c32(pair.second);
@@ -1054,7 +1054,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
       if (!is_same) {
          /* the variable was renamed differently in the predecessors: we have to create a phi */
          aco_opcode opcode = pair.first.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
-         aco_ptr<Pseudo_instruction> phi{
+         aco_ptr<Instruction> phi{
             create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)};
          rename = ctx.program->allocateTmp(pair.first.regClass());
          for (unsigned i = 0; i < phi->operands.size(); i++) {
@@ -1229,7 +1229,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s
             }
 
             /* add spill to new instructions */
-            aco_ptr<Pseudo_instruction> spill{
+            aco_ptr<Instruction> spill{
                create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
             spill->operands[0] = Operand(to_spill);
             spill->operands[1] = Operand::c32(spill_id);
@@ -1757,7 +1757,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
                if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
                   Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
                   vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
-                  aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
+                  aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
                      aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
                   create->definitions[0] = Definition(linear_vgpr);
                   /* find the right place to insert this definition */
@@ -1774,7 +1774,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
                }
 
                /* spill sgpr: just add the vgpr temp to operands */
-               Pseudo_instruction* spill =
+               Instruction* spill =
                   create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
                spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
                spill->operands[0].setLateKill(true);
@@ -1798,7 +1798,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
                if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
                   Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
                   vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
-                  aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
+                  aco_ptr<Instruction> create{create_instruction<Pseudo_instruction>(
                      aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
                   create->definitions[0] = Definition(linear_vgpr);
                   /* find the right place to insert this definition */
@@ -1815,8 +1815,8 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
                }
 
                /* reload sgpr: just add the vgpr temp to operands */
-               Pseudo_instruction* reload = create_instruction<Pseudo_instruction>(
-                  aco_opcode::p_reload, Format::PSEUDO, 2, 1);
+               Instruction* reload = create_instruction<Pseudo_instruction>(aco_opcode::p_reload,
+                                                                            Format::PSEUDO, 2, 1);
                reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
                reload->operands[0].setLateKill(true);
                reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp
index 43ccb5c250e..2f194e8b79c 100644
--- a/src/amd/compiler/aco_ssa_elimination.cpp
+++ b/src/amd/compiler/aco_ssa_elimination.cpp
@@ -97,7 +97,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
       }
 
       std::vector<aco_ptr<Instruction>>::iterator it = std::next(block.instructions.begin(), idx);
-      aco_ptr<Pseudo_instruction> pc{
+      aco_ptr<Instruction> pc{
          create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO,
                                                 logical_phi_info.size(), logical_phi_info.size())};
       unsigned i = 0;
@@ -107,7 +107,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
          i++;
       }
       /* this shouldn't be needed since we're only copying vgprs */
-      pc->tmp_in_scc = false;
+      pc->pseudo().tmp_in_scc = false;
       block.instructions.insert(it, std::move(pc));
    }
 
@@ -122,7 +122,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
       --it;
       assert((*it)->isBranch());
       PhysReg scratch_sgpr = (*it)->definitions[0].physReg();
-      aco_ptr<Pseudo_instruction> pc{
+      aco_ptr<Instruction> pc{
          create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO,
                                                 linear_phi_info.size(), linear_phi_info.size())};
       unsigned i = 0;
@@ -131,9 +131,9 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
          pc->operands[i] = phi_info.op;
          i++;
       }
-      pc->tmp_in_scc = block.scc_live_out;
-      pc->scratch_sgpr = scratch_sgpr;
-      pc->needs_scratch_reg = true;
+      pc->pseudo().tmp_in_scc = block.scc_live_out;
+      pc->pseudo().scratch_sgpr = scratch_sgpr;
+      pc->pseudo().needs_scratch_reg = true;
       block.instructions.insert(it, std::move(pc));
    }
 }
diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp
index f47d354701c..5f2f0de17f2 100644
--- a/src/amd/compiler/tests/test_assembler.cpp
+++ b/src/amd/compiler/tests/test_assembler.cpp
@@ -269,7 +269,7 @@ BEGIN_TEST(assembler.v_add3)
 
       //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
       //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
-      aco_ptr<VALU_instruction> add3{
+      aco_ptr<Instruction> add3{
          create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
       add3->operands[0] = Operand::zero();
       add3->operands[1] = Operand::zero();
@@ -288,13 +288,13 @@ BEGIN_TEST(assembler.v_add3_clamp)
 
       //~gfx9>> integer addition + clamp ; d1ff8000 02010080
       //~gfx10>> integer addition + clamp ; d76d8000 02010080
-      aco_ptr<VALU_instruction> add3{
+      aco_ptr<Instruction> add3{
          create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
       add3->operands[0] = Operand::zero();
       add3->operands[1] = Operand::zero();
       add3->operands[2] = Operand::zero();
       add3->definitions[0] = Definition(PhysReg(0), v1);
-      add3->clamp = 1;
+      add3->valu().clamp = 1;
       bld.insert(std::move(add3));
 
       finish_assembler_test();
diff --git a/src/amd/compiler/tests/test_hard_clause.cpp b/src/amd/compiler/tests/test_hard_clause.cpp
index 436a71984ff..8319a87c0d3 100644
--- a/src/amd/compiler/tests/test_hard_clause.cpp
+++ b/src/amd/compiler/tests/test_hard_clause.cpp
@@ -69,7 +69,7 @@ create_global()
 static void
 create_mimg(bool nsa, Temp desc = Temp(0, s8))
 {
-   aco_ptr<MIMG_instruction> mimg{
+   aco_ptr<Instruction> mimg{
       create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 5, 1)};
    mimg->definitions[0] = Definition(PhysReg(256), v1);
    mimg->operands[0] = Operand(desc);
@@ -78,8 +78,8 @@ create_mimg(bool nsa, Temp desc = Temp(0, s8))
    mimg->operands[2] = Operand(v1);
    for (unsigned i = 0; i < 2; i++)
       mimg->operands[3 + i] = Operand(PhysReg(256 + (nsa ? i * 2 : i)), v1);
-   mimg->dmask = 0x1;
-   mimg->dim = ac_image_2d;
+   mimg->mimg().dmask = 0x1;
+   mimg->mimg().dim = ac_image_2d;
 
    bld.insert(std::move(mimg));
 }
diff --git a/src/amd/compiler/tests/test_insert_nops.cpp b/src/amd/compiler/tests/test_insert_nops.cpp
index 8ac8766bbb8..c155b8d77a3 100644
--- a/src/amd/compiler/tests/test_insert_nops.cpp
+++ b/src/amd/compiler/tests/test_insert_nops.cpp
@@ -42,7 +42,7 @@ create_mubuf_store(PhysReg src = PhysReg(256))
 void
 create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
 {
-   aco_ptr<MIMG_instruction> mimg{
+   aco_ptr<Instruction> mimg{
       create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
    mimg->definitions[0] = Definition(PhysReg(256), v1);
    mimg->operands[0] = Operand(PhysReg(0), s8);
@@ -50,8 +50,8 @@ create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
    mimg->operands[2] = Operand(v1);
    for (unsigned i = 0; i < addrs; i++)
       mimg->operands[3 + i] = Operand(PhysReg(256 + (nsa ? i * 2 : i)), v1);
-   mimg->dmask = 0x1;
-   mimg->dim = ac_image_2d;
+   mimg->mimg().dmask = 0x1;
+   mimg->mimg().dim = ac_image_2d;
 
    assert(get_mimg_nsa_dwords(mimg.get()) + 2 == instr_dwords);