diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 203e8719595..b135de6afd2 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -126,11 +126,14 @@ template <typename T>
 uint32_t
 get_gfx12_cpol(const T& instr)
 {
+   bool glc = instr.cache.value & ac_glc;
+   bool slc = instr.cache.value & ac_slc;
+   bool dlc = instr.cache.value & ac_dlc;
    if (instr_info.is_atomic[(int)instr.opcode]) {
-      return (instr.glc ? 1 /*TH_ATOMIC_RETURN*/ : 0) << 2;
+      return (glc ? 1 /*TH_ATOMIC_RETURN*/ : 0) << 2;
    } else {
-      return (instr.definitions.empty() || instr.glc || instr.slc || instr.dlc) ? 3 /*SCOPE_SYS*/
-                                                                                : 0 /*SCOPE_CU*/;
+      return (instr.definitions.empty() || glc || slc || dlc) ? 3 /*SCOPE_SYS*/
+                                                              : 0 /*SCOPE_CU*/;
    }
 }
 
@@ -228,6 +231,8 @@ emit_smem_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
 {
    uint32_t opcode = ctx.opcode[(int)instr->opcode];
    SMEM_instruction& smem = instr->smem();
+   bool glc = smem.cache.value & ac_glc;
+   bool dlc = smem.cache.value & ac_dlc;
 
    bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);
    bool is_load = !instr->definitions.empty();
@@ -258,22 +263,21 @@ emit_smem_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
 
    if (ctx.gfx_level <= GFX9) {
       encoding = (0b110000 << 26);
-      assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */
-      encoding |= smem.nv ? 1 << 15 : 0;
+      assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */
+      /* We don't use the NV bit. */
    } else {
       encoding = (0b111101 << 26);
-      assert(!smem.nv); /* Non-volatile is not supported on GFX10 */
       if (ctx.gfx_level <= GFX11_5)
-         encoding |= smem.dlc ? 1 << (ctx.gfx_level >= GFX11 ? 13 : 14) : 0;
+         encoding |= dlc ? 1 << (ctx.gfx_level >= GFX11 ? 13 : 14) : 0;
    }
 
    if (ctx.gfx_level <= GFX11_5) {
       encoding |= opcode << 18;
-      encoding |= smem.glc ? 1 << (ctx.gfx_level >= GFX11 ? 14 : 16) : 0;
+      encoding |= glc ? 1 << (ctx.gfx_level >= GFX11 ? 14 : 16) : 0;
    } else {
       encoding |= opcode << 13;
       if (is_load)
-         encoding |= ((smem.glc || smem.dlc) ? 3 /*SCOPE_SYS*/ : 0 /*SCOPE_CU*/) << 21;
+         encoding |= ((glc || dlc) ? 3 /*SCOPE_SYS*/ : 0 /*SCOPE_CU*/) << 21;
    }
 
    if (ctx.gfx_level <= GFX9) {
@@ -536,6 +540,9 @@ emit_mubuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
 {
    uint32_t opcode = ctx.opcode[(int)instr->opcode];
    MUBUF_instruction& mubuf = instr->mubuf();
+   bool glc = mubuf.cache.value & ac_glc;
+   bool slc = mubuf.cache.value & ac_slc;
+   bool dlc = mubuf.cache.value & ac_dlc;
 
    uint32_t encoding = (0b111000 << 26);
    if (ctx.gfx_level >= GFX11 && mubuf.lds) /* GFX11 has separate opcodes for LDS loads */
@@ -543,7 +550,7 @@ emit_mubuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
    else
       encoding |= (mubuf.lds ? 1 : 0) << 16;
    encoding |= opcode << 18;
-   encoding |= (mubuf.glc ? 1 : 0) << 14;
+   encoding |= (glc ? 1 : 0) << 14;
    if (ctx.gfx_level <= GFX10_3)
       encoding |= (mubuf.idxen ? 1 : 0) << 13;
    assert(!mubuf.addr64 || ctx.gfx_level <= GFX7);
@@ -552,19 +559,19 @@ emit_mubuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
    if (ctx.gfx_level <= GFX10_3)
       encoding |= (mubuf.offen ? 1 : 0) << 12;
    if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
-      assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */
-      encoding |= (mubuf.slc ? 1 : 0) << 17;
+      assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */
+      encoding |= (slc ? 1 : 0) << 17;
    } else if (ctx.gfx_level >= GFX11) {
-      encoding |= (mubuf.slc ? 1 : 0) << 12;
-      encoding |= (mubuf.dlc ? 1 : 0) << 13;
+      encoding |= (slc ? 1 : 0) << 12;
+      encoding |= (dlc ? 1 : 0) << 13;
    } else if (ctx.gfx_level >= GFX10) {
-      encoding |= (mubuf.dlc ? 1 : 0) << 15;
+      encoding |= (dlc ? 1 : 0) << 15;
    }
    encoding |= 0x0FFF & mubuf.offset;
    out.push_back(encoding);
    encoding = 0;
    if (ctx.gfx_level <= GFX7 || (ctx.gfx_level >= GFX10 && ctx.gfx_level <= GFX10_3)) {
-      encoding |= (mubuf.slc ? 1 : 0) << 22;
+      encoding |= (slc ? 1 : 0) << 22;
    }
    encoding |= reg(ctx, instr->operands[2]) << 24;
    if (ctx.gfx_level >= GFX11) {
@@ -625,24 +632,27 @@ emit_mtbuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
 {
    uint32_t opcode = ctx.opcode[(int)instr->opcode];
    MTBUF_instruction& mtbuf = instr->mtbuf();
+   bool glc = mtbuf.cache.value & ac_glc;
+   bool slc = mtbuf.cache.value & ac_slc;
+   bool dlc = mtbuf.cache.value & ac_dlc;
 
    uint32_t img_format = ac_get_tbuffer_format(ctx.gfx_level, mtbuf.dfmt, mtbuf.nfmt);
 
    uint32_t encoding = (0b111010 << 26);
    assert(img_format <= 0x7F);
-   assert(!mtbuf.dlc || ctx.gfx_level >= GFX10);
+   assert(!dlc || ctx.gfx_level >= GFX10);
    if (ctx.gfx_level >= GFX11) {
-      encoding |= (mtbuf.slc ? 1 : 0) << 12;
-      encoding |= (mtbuf.dlc ? 1 : 0) << 13;
+      encoding |= (slc ? 1 : 0) << 12;
+      encoding |= (dlc ? 1 : 0) << 13;
    } else {
       /* DLC bit replaces one bit of the OPCODE on GFX10 */
-      encoding |= (mtbuf.dlc ? 1 : 0) << 15;
+      encoding |= (dlc ? 1 : 0) << 15;
    }
    if (ctx.gfx_level <= GFX10_3) {
       encoding |= (mtbuf.idxen ? 1 : 0) << 13;
       encoding |= (mtbuf.offen ? 1 : 0) << 12;
    }
-   encoding |= (mtbuf.glc ? 1 : 0) << 14;
+   encoding |= (glc ? 1 : 0) << 14;
    encoding |= 0x0FFF & mtbuf.offset;
    encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
 
@@ -662,7 +672,7 @@ emit_mtbuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
       encoding |= (mtbuf.idxen ? 1 : 0) << 23;
    } else {
       encoding |= (mtbuf.tfe ? 1 : 0) << 23;
-      encoding |= (mtbuf.slc ? 1 : 0) << 22;
+      encoding |= (slc ? 1 : 0) << 22;
    }
    encoding |= (reg(ctx, instr->operands[0]) >> 2) << 16;
    if (instr->operands.size() > 3)
@@ -721,6 +731,9 @@ emit_mimg_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
 {
    uint32_t opcode = ctx.opcode[(int)instr->opcode];
    MIMG_instruction& mimg = instr->mimg();
+   bool glc = mimg.cache.value & ac_glc;
+   bool slc = mimg.cache.value & ac_slc;
+   bool dlc = mimg.cache.value & ac_dlc;
 
    unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
    assert(!nsa_dwords || ctx.gfx_level >= GFX10);
@@ -732,23 +745,23 @@ emit_mimg_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       encoding |= mimg.dim << 2;
       encoding |= mimg.unrm ? 1 << 7 : 0;
       encoding |= (0xF & mimg.dmask) << 8;
-      encoding |= mimg.slc ? 1 << 12 : 0;
-      encoding |= mimg.dlc ? 1 << 13 : 0;
-      encoding |= mimg.glc ? 1 << 14 : 0;
+      encoding |= slc ? 1 << 12 : 0;
+      encoding |= dlc ? 1 << 13 : 0;
+      encoding |= glc ? 1 << 14 : 0;
       encoding |= mimg.r128 ? 1 << 15 : 0;
       encoding |= mimg.a16 ? 1 << 16 : 0;
       encoding |= mimg.d16 ? 1 << 17 : 0;
       encoding |= (opcode & 0xFF) << 18;
    } else {
-      encoding |= mimg.slc ? 1 << 25 : 0;
+      encoding |= slc ? 1 << 25 : 0;
       encoding |= (opcode & 0x7f) << 18;
       encoding |= (opcode >> 7) & 1;
       encoding |= mimg.lwe ? 1 << 17 : 0;
       encoding |= mimg.tfe ? 1 << 16 : 0;
-      encoding |= mimg.glc ? 1 << 13 : 0;
+      encoding |= glc ? 1 << 13 : 0;
       encoding |= mimg.unrm ? 1 << 12 : 0;
       if (ctx.gfx_level <= GFX9) {
-         assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */
+         assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */
          assert(!mimg.r128);
          encoding |= mimg.a16 ? 1 << 15 : 0;
          encoding |= mimg.da ? 1 << 14 : 0;
@@ -757,7 +770,7 @@ emit_mimg_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
                                : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
          encoding |= nsa_dwords << 1;
          encoding |= mimg.dim << 3; /* GFX10: dimensionality instead of declare array */
-         encoding |= mimg.dlc ? 1 << 7 : 0;
+         encoding |= dlc ? 1 << 7 : 0;
       }
       encoding |= (0xF & mimg.dmask) << 8;
    }
@@ -856,6 +869,9 @@ emit_flatlike_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruct
 {
    uint32_t opcode = ctx.opcode[(int)instr->opcode];
    FLAT_instruction& flat = instr->flatlike();
+   bool glc = flat.cache.value & ac_glc;
+   bool slc = flat.cache.value & ac_slc;
+   bool dlc = flat.cache.value & ac_dlc;
 
    uint32_t encoding = (0b110111 << 26);
    encoding |= opcode << 18;
@@ -879,13 +895,13 @@ emit_flatlike_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruct
    else if (instr->isGlobal())
       encoding |= 2 << (ctx.gfx_level >= GFX11 ? 16 : 14);
    encoding |= flat.lds ? 1 << 13 : 0;
-   encoding |= flat.glc ? 1 << (ctx.gfx_level >= GFX11 ? 14 : 16) : 0;
-   encoding |= flat.slc ? 1 << (ctx.gfx_level >= GFX11 ? 15 : 17) : 0;
+   encoding |= glc ? 1 << (ctx.gfx_level >= GFX11 ? 14 : 16) : 0;
+   encoding |= slc ? 1 << (ctx.gfx_level >= GFX11 ? 15 : 17) : 0;
    if (ctx.gfx_level >= GFX10) {
       assert(!flat.nv);
-      encoding |= flat.dlc ? 1 << (ctx.gfx_level >= GFX11 ? 13 : 12) : 0;
+      encoding |= dlc ? 1 << (ctx.gfx_level >= GFX11 ? 13 : 12) : 0;
    } else {
-      assert(!flat.dlc);
+      assert(!dlc);
    }
    out.push_back(encoding);
    encoding = reg(ctx, instr->operands[0], 8);
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 2de459a79e5..f1dde7a0171 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -4423,6 +4423,35 @@ lds_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
 
 const EmitLoadParameters lds_load_params{lds_load_callback, false, true, UINT32_MAX};
 
+ac_hw_cache_flags
+get_gfx6_cache_flags(bool glc, bool slc, bool dlc)
+{
+   uint8_t value = 0;
+   value |= glc ? ac_glc : 0;
+   value |= slc ? ac_slc : 0;
+   value |= dlc ? ac_dlc : 0;
+   return ac_hw_cache_flags{value};
+}
+
+ac_hw_cache_flags
+get_load_cache_flags(Builder& bld, bool glc, bool slc)
+{
+   bool dlc = glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
+   return get_gfx6_cache_flags(glc, slc, dlc);
+}
+
+ac_hw_cache_flags
+get_store_cache_flags(Builder& bld, bool glc, bool slc)
+{
+   return get_gfx6_cache_flags(glc, slc, false);
+}
+
+ac_hw_cache_flags
+get_atomic_cache_flags(Builder& bld, bool return_previous)
+{
+   return get_gfx6_cache_flags(return_previous, false, false);
+}
+
 Temp
 smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
                    unsigned align, unsigned const_offset, Temp dst_hint)
@@ -4478,9 +4507,7 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
    RegClass rc(RegType::sgpr, DIV_ROUND_UP(bytes_needed, 4u));
    Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc);
    load->definitions[0] = Definition(val);
-   load->smem().glc = info.glc;
-   load->smem().dlc =
-      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
+   load->smem().cache = get_load_cache_flags(bld, info.glc, false);
    load->smem().sync = info.sync;
    bld.insert(std::move(load));
    return val;
@@ -4539,13 +4566,11 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
    mubuf->operands[2] = soffset;
    mubuf->mubuf().offen = offen;
    mubuf->mubuf().idxen = idxen;
-   mubuf->mubuf().glc = info.glc;
-   mubuf->mubuf().dlc =
-      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
-   mubuf->mubuf().slc = info.slc;
+   mubuf->mubuf().cache = get_load_cache_flags(bld, info.glc, info.slc);
+   if (info.swizzle_component_size != 0)
+      mubuf->mubuf().cache.value |= ac_swizzled;
    mubuf->mubuf().sync = info.sync;
    mubuf->mubuf().offset = const_offset;
-   mubuf->mubuf().swizzled = info.swizzle_component_size != 0;
    RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
    Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
    mubuf->definitions[0] = Definition(val);
@@ -4607,10 +4632,7 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
    mubuf->operands[2] = soffset;
    mubuf->mubuf().offen = offen;
    mubuf->mubuf().idxen = idxen;
-   mubuf->mubuf().glc = info.glc;
-   mubuf->mubuf().dlc =
-      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
-   mubuf->mubuf().slc = info.slc;
+   mubuf->mubuf().cache = get_load_cache_flags(bld, info.glc, info.slc);
    mubuf->mubuf().sync = info.sync;
    mubuf->mubuf().offset = const_offset;
    RegClass rc = RegClass::get(RegType::vgpr, bytes_needed);
@@ -4818,8 +4840,7 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
       mubuf->operands[0] = Operand(get_gfx6_global_rsrc(bld, addr));
       mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
       mubuf->operands[2] = Operand(offset);
-      mubuf->mubuf().glc = info.glc;
-      mubuf->mubuf().dlc = false;
+      mubuf->mubuf().cache = get_load_cache_flags(bld, info.glc, false);
       mubuf->mubuf().offset = const_offset;
       mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
       mubuf->mubuf().disable_wqm = false;
@@ -4838,9 +4859,7 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
          flat->operands[0] = Operand(addr);
          flat->operands[1] = Operand(s1);
       }
-      flat->flatlike().glc = info.glc;
-      flat->flatlike().dlc =
-         info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
+      flat->flatlike().cache = get_load_cache_flags(bld, info.glc, false);
       flat->flatlike().sync = info.sync;
       assert(global || !const_offset);
       flat->flatlike().offset = const_offset;
@@ -5673,10 +5692,7 @@ mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
    mtbuf->operands[2] = soffset;
    mtbuf->mtbuf().offen = offen;
    mtbuf->mtbuf().idxen = idxen;
-   mtbuf->mtbuf().glc = info.glc;
-   mtbuf->mtbuf().dlc =
-      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
-   mtbuf->mtbuf().slc = info.slc;
+   mtbuf->mtbuf().cache = get_load_cache_flags(bld, info.glc, info.slc);
    mtbuf->mtbuf().sync = info.sync;
    mtbuf->mtbuf().offset = const_offset;
    mtbuf->mtbuf().dfmt = fetch_fmt & 0xf;
@@ -6220,6 +6236,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
 
    memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
    unsigned access = nir_intrinsic_access(instr);
+   bool glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
 
    unsigned result_size = instr->def.num_components - is_sparse;
    unsigned expand_mask = nir_def_components_read(&instr->def) & u_bit_consecutive(0, result_size);
@@ -6275,9 +6292,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
       load->operands[2] = Operand::c32(0);
       load->definitions[0] = Definition(tmp);
       load->mubuf().idxen = true;
-      load->mubuf().glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
-      load->mubuf().dlc = load->mubuf().glc &&
-                          (ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
+      load->mubuf().cache = get_load_cache_flags(bld, glc, false);
       load->mubuf().sync = sync;
       load->mubuf().tfe = is_sparse;
       if (load->mubuf().tfe)
@@ -6296,9 +6311,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
 
       Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1);
       MIMG_instruction* load = emit_mimg(bld, opcode, tmp, resource, Operand(s4), coords, vdata);
-      load->glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT) ? 1 : 0;
-      load->dlc =
-         load->glc && (ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
+      load->cache = get_load_cache_flags(bld, glc, false);
       load->a16 = instr->src[1].ssa->bit_size == 16;
       load->d16 = d16;
       load->dmask = dmask;
@@ -6422,8 +6435,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
       store->operands[2] = Operand::c32(0);
       store->operands[3] = Operand(data);
       store->mubuf().idxen = true;
-      store->mubuf().glc = glc;
-      store->mubuf().dlc = false;
+      store->mubuf().cache = get_store_cache_flags(bld, glc, false);
       store->mubuf().disable_wqm = true;
       store->mubuf().sync = sync;
       ctx->program->needs_exact = true;
@@ -6440,8 +6452,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
 
    MIMG_instruction* store =
       emit_mimg(bld, opcode, Temp(0, v1), resource, Operand(s4), coords, Operand(data));
-   store->glc = glc;
-   store->dlc = false;
+   store->cache = get_store_cache_flags(bld, glc, false);
    store->a16 = instr->src[1].ssa->bit_size == 16;
    store->d16 = d16;
    store->dmask = dmask;
@@ -6581,8 +6592,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
          mubuf->definitions[0] = def;
       mubuf->mubuf().offset = 0;
       mubuf->mubuf().idxen = true;
-      mubuf->mubuf().glc = return_previous;
-      mubuf->mubuf().dlc = false; /* Not needed for atomics */
+      mubuf->mubuf().cache = get_atomic_cache_flags(bld, return_previous);
       mubuf->mubuf().disable_wqm = true;
       mubuf->mubuf().sync = sync;
       ctx->program->needs_exact = true;
@@ -6597,8 +6607,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
    Temp tmp = return_previous ? (cmpswap ? bld.tmp(data.regClass()) : dst) : Temp(0, v1);
    MIMG_instruction* mimg =
       emit_mimg(bld, image_op, tmp, resource, Operand(s4), coords, Operand(data));
-   mimg->glc = return_previous;
-   mimg->dlc = false; /* Not needed for atomics */
+   mimg->cache = get_atomic_cache_flags(bld, return_previous);
    mimg->dmask = (1 << data.size()) - 1;
    mimg->a16 = instr->src[1].ssa->bit_size == 16;
    mimg->unrm = true;
@@ -6670,8 +6679,8 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
       store->operands[3] = Operand(write_datas[i]);
       store->mubuf().offset = offsets[i];
       store->mubuf().offen = (offset.type() == RegType::vgpr);
-      store->mubuf().glc = glc || (ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4);
-      store->mubuf().dlc = false;
+      store->mubuf().cache = get_store_cache_flags(
+         bld, glc || (ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4), false);
       store->mubuf().disable_wqm = true;
       store->mubuf().sync = sync;
       ctx->program->needs_exact = true;
@@ -6712,8 +6721,7 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
       mubuf->definitions[0] = def;
    mubuf->mubuf().offset = 0;
    mubuf->mubuf().offen = (offset.type() == RegType::vgpr);
-   mubuf->mubuf().glc = return_previous;
-   mubuf->mubuf().dlc = false; /* Not needed for atomics */
+   mubuf->mubuf().cache = get_atomic_cache_flags(bld, return_previous);
    mubuf->mubuf().disable_wqm = true;
    mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
    ctx->program->needs_exact = true;
@@ -6846,8 +6854,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
             flat->operands[1] = Operand(s1);
          }
          flat->operands[2] = Operand(write_datas[i]);
-         flat->flatlike().glc = glc;
-         flat->flatlike().dlc = false;
+         flat->flatlike().cache = get_store_cache_flags(bld, glc, false);
          assert(global || !write_const_offset);
          flat->flatlike().offset = write_const_offset;
          flat->flatlike().disable_wqm = true;
@@ -6867,8 +6874,8 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
             write_address.type() == RegType::vgpr ? Operand(write_address) : Operand(v1);
          mubuf->operands[2] = Operand(write_offset);
          mubuf->operands[3] = Operand(write_datas[i]);
-         mubuf->mubuf().glc = glc || write_datas[i].bytes() < 4;
-         mubuf->mubuf().dlc = false;
+         mubuf->mubuf().cache =
+            get_store_cache_flags(bld, glc || write_datas[i].bytes() < 4, false);
          mubuf->mubuf().offset = write_const_offset;
          mubuf->mubuf().addr64 = write_address.type() == RegType::vgpr;
          mubuf->mubuf().disable_wqm = true;
@@ -6980,8 +6987,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
       flat->operands[2] = Operand(data);
       if (return_previous)
          flat->definitions[0] = Definition(dst);
-      flat->flatlike().glc = return_previous;
-      flat->flatlike().dlc = false; /* Not needed for atomics */
+      flat->flatlike().cache = get_atomic_cache_flags(bld, return_previous);
       assert(global || !const_offset);
       flat->flatlike().offset = const_offset;
       flat->flatlike().disable_wqm = true;
@@ -7007,8 +7013,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
          return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
       if (return_previous)
          mubuf->definitions[0] = def;
-      mubuf->mubuf().glc = return_previous;
-      mubuf->mubuf().dlc = false;
+      mubuf->mubuf().cache = get_atomic_cache_flags(bld, return_previous);
       mubuf->mubuf().offset = const_offset;
       mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
       mubuf->mubuf().disable_wqm = true;
@@ -7167,6 +7172,9 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
       bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT;
       glc |= ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4;
       glc &= ctx->program->gfx_level < GFX11;
+      ac_hw_cache_flags cache = get_store_cache_flags(bld, glc, slc);
+      if (swizzled)
+         cache.value |= ac_swizzled;
 
       Operand vaddr_op(v1);
       if (offen && idxen)
@@ -7177,9 +7185,8 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
          vaddr_op = Operand(idx);
 
       Instruction* mubuf = bld.mubuf(op, Operand(descriptor), vaddr_op, s_offset,
-                                     Operand(write_datas[i]), const_offset, offen, swizzled, idxen,
-                                     /* addr64 */ false, /* disable_wqm */ false, glc,
-                                     /* dlc */ false, slc)
+                                     Operand(write_datas[i]), const_offset, offen, idxen,
+                                     /* addr64 */ false, /* disable_wqm */ false, cache)
                               .instr;
       mubuf->mubuf().sync = sync;
    }
@@ -7637,9 +7644,11 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
       for (unsigned i = 0; i < write_count; i++) {
          aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
          Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset,
-                                        write_datas[i], offsets[i], true, true);
+                                        write_datas[i], offsets[i], true);
          mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private);
-         mubuf->mubuf().glc = ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4;
+         bool glc = ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4;
+         mubuf->mubuf().cache = get_store_cache_flags(bld, glc, false);
+         mubuf->mubuf().cache.value |= ac_swizzled;
       }
    }
 }
@@ -12098,9 +12107,12 @@ select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shade
    bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg{ttmp4}, s4), Operand(PhysReg{tma}, s2),
             Operand::zero());
 
+   ac_hw_cache_flags cache_glc;
+   cache_glc.value = ac_glc;
+
    /* Store TTMP0-TTMP1. */
    bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(PhysReg{ttmp4}, s4), Operand::zero(),
-            Operand(PhysReg{ttmp0}, s2), memory_sync_info(), true);
+            Operand(PhysReg{ttmp0}, s2), memory_sync_info(), cache_glc);
 
    uint32_t hw_regs_idx[] = {
       2, /* HW_REG_STATUS */
@@ -12116,7 +12128,8 @@ select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shade
                ((20 - 1) << 11) | hw_regs_idx[i]);
 
       bld.smem(aco_opcode::s_buffer_store_dword, Operand(PhysReg{ttmp4}, s4),
-               Operand::c32(8u + i * 4), Operand(PhysReg{ttmp8}, s1), memory_sync_info(), true);
+               Operand::c32(8u + i * 4), Operand(PhysReg{ttmp8}, s1), memory_sync_info(),
+               cache_glc);
    }
 
    program->config->float_mode = program->blocks[0].fp_mode.val;
@@ -12632,18 +12645,18 @@ load_unaligned_vs_attrib(Builder& bld, PhysReg dst, Operand desc, Operand index,
    PhysReg scratch(load.scratch);
    if (load.d16) {
       bld.mubuf(aco_opcode::buffer_load_ubyte_d16, Definition(dst, v1), desc, index,
-                Operand::c32(0u), offset, false, false, true);
+                Operand::c32(0u), offset, false, true);
       bld.mubuf(aco_opcode::buffer_load_ubyte_d16_hi, Definition(dst, v1), desc, index,
-                Operand::c32(0u), offset + 2, false, false, true);
+                Operand::c32(0u), offset + 2, false, true);
       bld.mubuf(aco_opcode::buffer_load_ubyte_d16, Definition(scratch, v1), desc, index,
-                Operand::c32(0u), offset + 1, false, false, true);
+                Operand::c32(0u), offset + 1, false, true);
       bld.mubuf(aco_opcode::buffer_load_ubyte_d16_hi, Definition(scratch, v1), desc, index,
-                Operand::c32(0u), offset + 3, false, false, true);
+                Operand::c32(0u), offset + 3, false, true);
    } else {
       for (unsigned i = 0; i < size; i++) {
          Definition def(i ? scratch.advance(i * 4 - 4) : dst, v1);
          bld.mubuf(aco_opcode::buffer_load_ubyte, def, desc, index, Operand::c32(0u), offset + i,
-                   false, false, true);
+                   false, true);
       }
    }
 
@@ -12835,7 +12848,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
             i += slots;
          } else {
             bld.mubuf(aco_opcode::buffer_load_format_xyzw, Definition(dest, v4),
-                      Operand(cur_desc, s4), fetch_index, Operand::c32(0u), 0u, false, false, true);
+                      Operand(cur_desc, s4), fetch_index, Operand::c32(0u), 0u, false, true);
             loc++;
             i++;
          }
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index bbac03eb410..aa3c4bb996c 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -15,6 +15,7 @@
 
 #include "ac_binary.h"
 #include "ac_hw_stage.h"
+#include "ac_shader_util.h"
 #include "amd_family.h"
 #include <algorithm>
 #include <bitset>
@@ -1309,11 +1310,7 @@ static_assert(sizeof(SALU_instruction) == sizeof(Instruction) + 4, "Unexpected p
  */
 struct SMEM_instruction : public Instruction {
    memory_sync_info sync;
-   bool glc : 1; /* VI+: globally coherent */
-   bool dlc : 1; /* NAVI: device level coherent */
-   bool nv : 1;  /* VEGA only: Non-volatile */
-   bool disable_wqm : 1;
-   uint8_t padding : 4;
+   ac_hw_cache_flags cache;
 };
 static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
 
@@ -1492,19 +1489,16 @@ static_assert(sizeof(LDSDIR_instruction) == sizeof(Instruction) + 8, "Unexpected
  */
 struct MUBUF_instruction : public Instruction {
    memory_sync_info sync;
+   ac_hw_cache_flags cache;
    bool offen : 1;           /* Supply an offset from VGPR (VADDR) */
    bool idxen : 1;           /* Supply an index from VGPR (VADDR) */
    bool addr64 : 1;          /* SI, CIK: Address size is 64-bit */
-   bool glc : 1;             /* globally coherent */
-   bool dlc : 1;             /* NAVI: device level coherent */
-   bool slc : 1;             /* system level coherent */
    bool tfe : 1;             /* texture fail enable */
    bool lds : 1;             /* Return read-data to LDS instead of VGPRs */
-   uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
-   uint16_t offset : 12;     /* Unsigned byte offset - 12 bit */
-   uint16_t swizzled : 1;
-   uint16_t padding0 : 2;
-   uint16_t padding1;
+   bool disable_wqm : 1;     /* Require an exec mask without helper invocations */
+   uint8_t padding0 : 2;
+   uint8_t padding1;
+   uint16_t offset; /* Unsigned byte offset - 12 bit */
 };
 static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
 
@@ -1518,16 +1512,14 @@ static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected
  */
 struct MTBUF_instruction : public Instruction {
    memory_sync_info sync;
+   ac_hw_cache_flags cache;
    uint8_t dfmt : 4;         /* Data Format of data in memory buffer */
    uint8_t nfmt : 3;         /* Numeric format of data in memory */
    bool offen : 1;           /* Supply an offset from VGPR (VADDR) */
-   uint16_t idxen : 1;       /* Supply an index from VGPR (VADDR) */
-   uint16_t glc : 1;         /* globally coherent */
-   uint16_t dlc : 1;         /* NAVI: device level coherent */
-   uint16_t slc : 1;         /* system level coherent */
-   uint16_t tfe : 1;         /* texture fail enable */
-   uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
-   uint16_t padding : 10;
+   bool idxen : 1;           /* Supply an index from VGPR (VADDR) */
+   bool tfe : 1;             /* texture fail enable */
+   bool disable_wqm : 1;     /* Require an exec mask without helper invocations */
+   uint8_t padding : 5;
    uint16_t offset; /* Unsigned byte offset - 12 bit */
 };
 static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
@@ -1543,12 +1535,10 @@ static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected
  */
 struct MIMG_instruction : public Instruction {
    memory_sync_info sync;
+   ac_hw_cache_flags cache;
    uint8_t dmask;        /* Data VGPR enable mask */
    uint8_t dim : 3;      /* NAVI: dimensionality */
    bool unrm : 1;        /* Force address to be un-normalized */
-   bool dlc : 1;         /* NAVI: device level coherent */
-   bool glc : 1;         /* globally coherent */
-   bool slc : 1;         /* system level coherent */
    bool tfe : 1;         /* texture fail enable */
    bool da : 1;          /* declare an array */
    bool lwe : 1;         /* LOD warning enable */
@@ -1557,9 +1547,8 @@ struct MIMG_instruction : public Instruction {
    bool d16 : 1;         /* Convert 32-bit data to 16-bit data */
    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
    bool strict_wqm : 1;  /* VADDR is a linear VGPR and additional VGPRs may be copied into it */
-   uint8_t padding0 : 1;
+   uint8_t padding0 : 4;
    uint8_t padding1;
-   uint8_t padding2;
 };
 static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
 
@@ -1572,15 +1561,13 @@ static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected p
  */
 struct FLAT_instruction : public Instruction {
    memory_sync_info sync;
-   bool slc : 1; /* system level coherent */
-   bool glc : 1; /* globally coherent */
-   bool dlc : 1; /* NAVI: device level coherent */
+   ac_hw_cache_flags cache;
    bool lds : 1;
    bool nv : 1;
    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
-   uint8_t padding0 : 2;
+   uint8_t padding0 : 5;
+   uint8_t padding1;
    int16_t offset; /* Vega/Navi only */
-   uint16_t padding1;
 };
 static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
 
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 9f23d595d78..816c59464dd 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -105,9 +105,7 @@ class Format(IntEnum):
          return [('uint32_t', 'imm', '0')]
       elif self == Format.SMEM:
          return [('memory_sync_info', 'sync', 'memory_sync_info()'),
-                 ('bool', 'glc', 'false'),
-                 ('bool', 'dlc', 'false'),
-                 ('bool', 'nv', 'false')]
+                 ('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}')]
       elif self == Format.DS:
          return [('uint16_t', 'offset0', '0'),
                  ('uint8_t', 'offset1', '0'),
@@ -125,20 +123,15 @@ class Format(IntEnum):
                  ('bool', 'offen', None),
                  ('bool', 'idxen', 'false'),
                  ('bool', 'disable_wqm', 'false'),
-                 ('bool', 'glc', 'false'),
-                 ('bool', 'dlc', 'false'),
-                 ('bool', 'slc', 'false'),
+                 ('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}'),
                  ('bool', 'tfe', 'false')]
       elif self == Format.MUBUF:
          return [('unsigned', 'offset', None),
                  ('bool', 'offen', None),
-                 ('bool', 'swizzled', 'false'),
                  ('bool', 'idxen', 'false'),
                  ('bool', 'addr64', 'false'),
                  ('bool', 'disable_wqm', 'false'),
-                 ('bool', 'glc', 'false'),
-                 ('bool', 'dlc', 'false'),
-                 ('bool', 'slc', 'false'),
+                 ('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}'),
                  ('bool', 'tfe', 'false'),
                  ('bool', 'lds', 'false')]
       elif self == Format.MIMG:
@@ -146,9 +139,7 @@ class Format(IntEnum):
                  ('bool', 'da', 'false'),
                  ('bool', 'unrm', 'false'),
                  ('bool', 'disable_wqm', 'false'),
-                 ('bool', 'glc', 'false'),
-                 ('bool', 'dlc', 'false'),
-                 ('bool', 'slc', 'false'),
+                 ('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}'),
                  ('bool', 'tfe', 'false'),
                  ('bool', 'lwe', 'false'),
                  ('bool', 'r128', 'false'),
@@ -195,8 +186,7 @@ class Format(IntEnum):
       elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]:
          return [('int16_t', 'offset', 0),
                  ('memory_sync_info', 'sync', 'memory_sync_info()'),
-                 ('bool', 'glc', 'false'),
-                 ('bool', 'slc', 'false'),
+                 ('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}'),
                  ('bool', 'lds', 'false'),
                  ('bool', 'nv', 'false')]
       else:
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index 4f46f386d69..e040221be61 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -164,8 +164,7 @@ struct InstrPred {
       case Format::SMEM: {
          SMEM_instruction& aS = a->smem();
          SMEM_instruction& bS = b->smem();
-         return aS.sync == bS.sync && aS.glc == bS.glc && aS.dlc == bS.dlc && aS.nv == bS.nv &&
-                aS.disable_wqm == bS.disable_wqm;
+         return aS.sync == bS.sync && aS.cache.value == bS.cache.value;
       }
       case Format::VINTRP: {
          VINTRP_instruction& aI = a->vintrp();
@@ -203,21 +202,21 @@ struct InstrPred {
          MTBUF_instruction& bM = b->mtbuf();
          return aM.sync == bM.sync && aM.dfmt == bM.dfmt && aM.nfmt == bM.nfmt &&
                 aM.offset == bM.offset && aM.offen == bM.offen && aM.idxen == bM.idxen &&
-                aM.glc == bM.glc && aM.dlc == bM.dlc && aM.slc == bM.slc && aM.tfe == bM.tfe &&
+                aM.cache.value == bM.cache.value && aM.tfe == bM.tfe &&
                 aM.disable_wqm == bM.disable_wqm;
       }
       case Format::MUBUF: {
          MUBUF_instruction& aM = a->mubuf();
          MUBUF_instruction& bM = b->mubuf();
          return aM.sync == bM.sync && aM.offset == bM.offset && aM.offen == bM.offen &&
-                aM.idxen == bM.idxen && aM.glc == bM.glc && aM.dlc == bM.dlc && aM.slc == bM.slc &&
-                aM.tfe == bM.tfe && aM.lds == bM.lds && aM.disable_wqm == bM.disable_wqm;
+                aM.idxen == bM.idxen && aM.cache.value == bM.cache.value && aM.tfe == bM.tfe &&
+                aM.lds == bM.lds && aM.disable_wqm == bM.disable_wqm;
       }
       case Format::MIMG: {
          MIMG_instruction& aM = a->mimg();
          MIMG_instruction& bM = b->mimg();
          return aM.sync == bM.sync && aM.dmask == bM.dmask && aM.unrm == bM.unrm &&
-                aM.glc == bM.glc && aM.slc == bM.slc && aM.tfe == bM.tfe && aM.da == bM.da &&
+                aM.cache.value == bM.cache.value && aM.tfe == bM.tfe && aM.da == bM.da &&
                 aM.lwe == bM.lwe && aM.r128 == bM.r128 && aM.a16 == bM.a16 && aM.d16 == bM.d16 &&
                 aM.disable_wqm == bM.disable_wqm;
       }
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index ecce23e8bf0..61e3d605745 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -860,10 +860,7 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
             if (!smem.definitions.empty())
                new_instr->definitions[0] = smem.definitions[0];
             new_instr->smem().sync = smem.sync;
-            new_instr->smem().glc = smem.glc;
-            new_instr->smem().dlc = smem.dlc;
-            new_instr->smem().nv = smem.nv;
-            new_instr->smem().disable_wqm = smem.disable_wqm;
+            new_instr->smem().cache = smem.cache;
             instr.reset(new_instr);
          }
       }
@@ -1429,13 +1426,14 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
          while (info.is_temp())
             info = ctx.info[info.temp.id()];
 
+         bool swizzled = mubuf.cache.value & ac_swizzled;
          /* According to AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(), vaddr
           * overflow for scratch accesses works only on GFX9+ and saddr overflow
           * never works. Since swizzling is the only thing that separates
           * scratch accesses and other accesses and swizzling changing how
           * addressing works significantly, this probably applies to swizzled
           * MUBUF accesses. */
-         bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->gfx_level < GFX9;
+         bool vaddr_prevent_overflow = swizzled && ctx.program->gfx_level < GFX9;
 
          if (mubuf.offen && mubuf.idxen && i == 1 && info.is_vec() &&
              info.instr->operands.size() == 2 && info.instr->operands[0].isTemp() &&
@@ -1465,7 +1463,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
             mubuf.offset += offset;
             continue;
          } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) &&
-                    base.regClass() == s1 && mubuf.offset + offset < 4096 && !mubuf.swizzled) {
+                    base.regClass() == s1 && mubuf.offset + offset < 4096 && !swizzled) {
             instr->operands[i].setTemp(base);
             mubuf.offset += offset;
             continue;
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index e34abb9b5fd..dfd86114998 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -262,6 +262,20 @@ print_sync(memory_sync_info sync, FILE* output)
       print_scope(sync.scope, output);
 }
 
+template <typename T>
+static void
+print_cache_flags(enum amd_gfx_level gfx_level, const T& instr, FILE* output)
+{
+   if (instr.cache.value & ac_glc)
+      fprintf(output, " glc");
+   if (instr.cache.value & ac_slc)
+      fprintf(output, " slc");
+   if (instr.cache.value & ac_dlc)
+      fprintf(output, " dlc");
+   if (instr.cache.value & ac_swizzled)
+      fprintf(output, " swizzled");
+}
+
 static void
 print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output)
 {
@@ -428,12 +442,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
    }
    case Format::SMEM: {
       const SMEM_instruction& smem = instr->smem();
-      if (smem.glc)
-         fprintf(output, " glc");
-      if (smem.dlc)
-         fprintf(output, " dlc");
-      if (smem.nv)
-         fprintf(output, " nv");
+      print_cache_flags(gfx_level, smem, output);
       print_sync(smem.sync, output);
       break;
    }
@@ -482,12 +491,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
          fprintf(output, " idxen");
       if (mubuf.addr64)
          fprintf(output, " addr64");
-      if (mubuf.glc)
-         fprintf(output, " glc");
-      if (mubuf.dlc)
-         fprintf(output, " dlc");
-      if (mubuf.slc)
-         fprintf(output, " slc");
+      print_cache_flags(gfx_level, mubuf, output);
       if (mubuf.tfe)
          fprintf(output, " tfe");
       if (mubuf.lds)
@@ -517,12 +521,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
       }
       if (mimg.unrm)
          fprintf(output, " unrm");
-      if (mimg.glc)
-         fprintf(output, " glc");
-      if (mimg.dlc)
-         fprintf(output, " dlc");
-      if (mimg.slc)
-         fprintf(output, " slc");
+      print_cache_flags(gfx_level, mimg, output);
       if (mimg.tfe)
          fprintf(output, " tfe");
       if (mimg.da)
@@ -594,12 +593,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
       const FLAT_instruction& flat = instr->flatlike();
       if (flat.offset)
          fprintf(output, " offset:%d", flat.offset);
-      if (flat.glc)
-         fprintf(output, " glc");
-      if (flat.dlc)
-         fprintf(output, " dlc");
-      if (flat.slc)
-         fprintf(output, " slc");
+      print_cache_flags(gfx_level, flat, output);
       if (flat.lds)
          fprintf(output, " lds");
       if (flat.nv)
@@ -646,12 +640,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
          fprintf(output, " offen");
       if (mtbuf.idxen)
          fprintf(output, " idxen");
-      if (mtbuf.glc)
-         fprintf(output, " glc");
-      if (mtbuf.dlc)
-         fprintf(output, " dlc");
-      if (mtbuf.slc)
-         fprintf(output, " slc");
+      print_cache_flags(gfx_level, mtbuf, output);
       if (mtbuf.tfe)
          fprintf(output, " tfe");
       if (mtbuf.disable_wqm)
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index 10b8992578d..37d6b7408bb 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -1324,8 +1324,9 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
                         offset, memory_sync_info(storage_vgpr_spill, semantic_private));
          } else {
             Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc,
-                                           Operand(v1), scratch_offset, elem, offset, false, true);
+                                           Operand(v1), scratch_offset, elem, offset, false);
             instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
+            instr->mubuf().cache.value = ac_swizzled;
          }
       }
    } else if (ctx.program->gfx_level >= GFX9) {
@@ -1333,8 +1334,9 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
                   memory_sync_info(storage_vgpr_spill, semantic_private));
    } else {
       Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1),
-                                     scratch_offset, temp, offset, false, true);
+                                     scratch_offset, temp, offset, false);
       instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
+      instr->mubuf().cache.value = ac_swizzled;
    }
 }
 
@@ -1366,8 +1368,9 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
          } else {
             Instruction* instr =
                bld.mubuf(aco_opcode::buffer_load_dword, Definition(tmp), ctx.scratch_rsrc,
-                         Operand(v1), scratch_offset, offset, false, true);
+                         Operand(v1), scratch_offset, offset, false);
             instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
+            instr->mubuf().cache.value = ac_swizzled;
          }
       }
       bld.insert(vec);
@@ -1376,8 +1379,9 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
                   memory_sync_info(storage_vgpr_spill, semantic_private));
    } else {
       Instruction* instr = bld.mubuf(aco_opcode::buffer_load_dword, def, ctx.scratch_rsrc,
-                                     Operand(v1), scratch_offset, offset, false, true);
+                                     Operand(v1), scratch_offset, offset, false);
       instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
+      instr->mubuf().cache.value = ac_swizzled;
    }
 }
 
diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp
index 57a6b3c5d87..604c91acc68 100644
--- a/src/amd/compiler/tests/test_assembler.cpp
+++ b/src/amd/compiler/tests/test_assembler.cpp
@@ -411,13 +411,19 @@ BEGIN_TEST(assembler.smem)
       //! s_load_b32 s4, s[16:17], s8 offset:0x2a                     ; f4000108 1000002a
       bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42), op_s1);
 
+      ac_hw_cache_flags cache_coherent;
+      ac_hw_cache_flags cache_non_temporal;
+      cache_coherent.value = ac_glc;
+      cache_non_temporal.value = ac_dlc;
+
       //~gfx11! s_buffer_load_b32 s4, s[32:35], s8 glc                      ; f4204110 10000000
       //~gfx12! s_buffer_load_b32 s4, s[32:35], s8 offset:0x0 scope:SCOPE_SYS ; f4620110 10000000
-      bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().glc = true;
+      bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().cache = cache_coherent;
 
       //~gfx11! s_buffer_load_b32 s4, s[32:35], s8 dlc                      ; f4202110 10000000
       //~gfx12! (then repeated 1 times)
-      bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().dlc = true;
+      bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().cache =
+         cache_non_temporal;
 
       finish_assembler_test();
    }
@@ -482,22 +488,31 @@ BEGIN_TEST(assembler.mubuf)
       bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
 
       /* Various flags */
+      ac_hw_cache_flags cache_coherent;
+      ac_hw_cache_flags cache_sys_coherent;
+      ac_hw_cache_flags cache_non_temporal;
+      ac_hw_cache_flags cache_atomic_rtn;
+      cache_coherent.value = ac_glc;
+      cache_sys_coherent.value = ac_slc;
+      cache_non_temporal.value = ac_dlc;
+      cache_atomic_rtn.value = ac_glc;
+
       //~gfx11! buffer_load_b32 v42, off, s[32:35], 0 glc                   ; e0504000 80082a80
       //~gfx12! buffer_load_b32 v42, off, s[32:35], null scope:SCOPE_SYS    ; c405007c 008c402a 00000000
       bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
          ->mubuf()
-         .glc = true;
+         .cache = cache_coherent;
 
       //~gfx11! buffer_load_b32 v42, off, s[32:35], 0 dlc                   ; e0502000 80082a80
       //~gfx12! (then repeated 2 times)
       bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
          ->mubuf()
-         .dlc = true;
+         .cache = cache_non_temporal;
 
       //~gfx11! buffer_load_b32 v42, off, s[32:35], 0 slc                   ; e0501000 80082a80
       bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
          ->mubuf()
-         .slc = true;
+         .cache = cache_sys_coherent;
 
       //; if llvm_ver >= 16 and variant == 'gfx11':
       //;    insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe              ; e0500000 80282a80')
@@ -562,7 +577,7 @@ BEGIN_TEST(assembler.mubuf)
       bld.mubuf(aco_opcode::buffer_atomic_add, Definition(op_v1.physReg(), v1), op_s4, Operand(v1),
                 Operand::zero(), op_v1, 0, false)
          ->mubuf()
-         .glc = true;
+         .cache = cache_atomic_rtn;
 
       finish_assembler_test();
    }
@@ -632,25 +647,32 @@ BEGIN_TEST(assembler.mtbuf)
                 false);
 
       /* Various flags */
+      ac_hw_cache_flags cache_coherent;
+      ac_hw_cache_flags cache_sys_coherent;
+      ac_hw_cache_flags cache_non_temporal;
+      cache_coherent.value = ac_glc;
+      cache_sys_coherent.value = ac_slc;
+      cache_non_temporal.value = ac_dlc;
+
       //~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
       //~gfx12! tbuffer_load_format_x v42, off, s[32:35], null format:[BUF_FMT_32_32_FLOAT] scope:SCOPE_SYS ; c420007c 190c402a 00000080
       bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
                 nfmt, 0, false)
          ->mtbuf()
-         .glc = true;
+         .cache = cache_coherent;
 
       //~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
       //~gfx12! (then repeated 2 times)
       bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
                 nfmt, 0, false)
          ->mtbuf()
-         .dlc = true;
+         .cache = cache_non_temporal;
 
       //~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
       bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
                 nfmt, 0, false)
          ->mtbuf()
-         .slc = true;
+         .cache = cache_sys_coherent;
 
       //; if llvm_ver >= 16 and variant == 'gfx11':
       //;    insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
@@ -718,19 +740,28 @@ BEGIN_TEST(assembler.mimg)
          0x1;
 
       /* Various flags */
+      ac_hw_cache_flags cache_coherent;
+      ac_hw_cache_flags cache_sys_coherent;
+      ac_hw_cache_flags cache_non_temporal;
+      ac_hw_cache_flags cache_atomic_rtn;
+      cache_coherent.value = ac_glc;
+      cache_sys_coherent.value = ac_slc;
+      cache_non_temporal.value = ac_dlc;
+      cache_atomic_rtn.value = ac_glc;
+
       //~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a
       //~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D scope:SCOPE_SYS ; e7c6c000 100c8054 0000000a
-      bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().dlc =
-         true;
+      bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().cache =
+         cache_non_temporal;
 
       //~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a
       //~gfx12! (then repeated 2 times)
-      bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().glc =
-         true;
+      bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().cache =
+         cache_coherent;
 
       //~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a
-      bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().slc =
-         true;
+      bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().cache =
+         cache_sys_coherent;
 
       //~gfx11! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a
       //~gfx12! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; e7c6c008 10008054 0000000a
@@ -799,7 +830,7 @@ BEGIN_TEST(assembler.mimg)
       //~gfx11! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D glc ; f0304f04 00100a14
       //~gfx12! image_atomic_add_uint v10, [v20, v21, v0, v0], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN ; d3c30001 0010800a 00001514
       bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
-               op_v1, op_v2, 0xf, false, false, false, true)
+               op_v1, op_v2, 0xf, false, false, false, cache_atomic_rtn)
          ->mimg()
          .dim = ac_image_2d;
 
@@ -876,16 +907,28 @@ BEGIN_TEST(assembler.flat)
       bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), 84);
 
       /* Various flags */
+      ac_hw_cache_flags cache_coherent;
+      ac_hw_cache_flags cache_sys_coherent;
+      ac_hw_cache_flags cache_non_temporal;
+      ac_hw_cache_flags cache_atomic_rtn;
+      cache_coherent.value = ac_glc;
+      cache_sys_coherent.value = ac_slc;
+      cache_non_temporal.value = ac_dlc;
+      cache_atomic_rtn.value = ac_glc;
+
       //~gfx11! flat_load_b32 v42, v[20:21] slc                             ; dc508000 2a7c0014
       //~gfx12! flat_load_b32 v42, v[20:21] scope:SCOPE_SYS                 ; ec05007c 000c002a 00000014
-      bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().slc = true;
+      bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().cache =
+         cache_sys_coherent;
 
       //~gfx11! flat_load_b32 v42, v[20:21] glc                             ; dc504000 2a7c0014
       //~gfx12! (then repeated 2 times)
-      bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().glc = true;
+      bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().cache =
+         cache_coherent;
 
       //~gfx11! flat_load_b32 v42, v[20:21] dlc                             ; dc502000 2a7c0014
-      bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().dlc = true;
+      bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().cache =
+         cache_non_temporal;
 
       /* Stores */
       //~gfx11! flat_store_b32 v[20:21], v10                                ; dc680000 007c0a14
@@ -895,8 +938,8 @@ BEGIN_TEST(assembler.flat)
       /* Atomic with return */
       //~gfx11! global_atomic_add_u32 v42, v[20:21], v10, off glc           ; dcd64000 2a7c0a14
       //~gfx12! global_atomic_add_u32 v42, v[20:21], v10, off th:TH_ATOMIC_RETURN ; ee0d407c 0510002a 00000014
-      bld.global(aco_opcode::global_atomic_add, dst_v1, op_v2, Operand(s1), op_v1)->global().glc =
-         true;
+      bld.global(aco_opcode::global_atomic_add, dst_v1, op_v2, Operand(s1), op_v1)->global().cache =
+         cache_atomic_rtn;
 
       finish_assembler_test();
    }