mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 08:50:09 +01:00
aco: use ac_hw_cache_flags
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29243>
This commit is contained in:
parent
cdaf269924
commit
b41f0f6cc1
9 changed files with 244 additions and 205 deletions
|
|
@ -126,11 +126,14 @@ template <typename T>
|
|||
uint32_t
|
||||
get_gfx12_cpol(const T& instr)
|
||||
{
|
||||
bool glc = instr.cache.value & ac_glc;
|
||||
bool slc = instr.cache.value & ac_slc;
|
||||
bool dlc = instr.cache.value & ac_dlc;
|
||||
if (instr_info.is_atomic[(int)instr.opcode]) {
|
||||
return (instr.glc ? 1 /*TH_ATOMIC_RETURN*/ : 0) << 2;
|
||||
return (glc ? 1 /*TH_ATOMIC_RETURN*/ : 0) << 2;
|
||||
} else {
|
||||
return (instr.definitions.empty() || instr.glc || instr.slc || instr.dlc) ? 3 /*SCOPE_SYS*/
|
||||
: 0 /*SCOPE_CU*/;
|
||||
return (instr.definitions.empty() || glc || slc || dlc) ? 3 /*SCOPE_SYS*/
|
||||
: 0 /*SCOPE_CU*/;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -228,6 +231,8 @@ emit_smem_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
{
|
||||
uint32_t opcode = ctx.opcode[(int)instr->opcode];
|
||||
SMEM_instruction& smem = instr->smem();
|
||||
bool glc = smem.cache.value & ac_glc;
|
||||
bool dlc = smem.cache.value & ac_dlc;
|
||||
|
||||
bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);
|
||||
bool is_load = !instr->definitions.empty();
|
||||
|
|
@ -258,22 +263,21 @@ emit_smem_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
|
||||
if (ctx.gfx_level <= GFX9) {
|
||||
encoding = (0b110000 << 26);
|
||||
assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
encoding |= smem.nv ? 1 << 15 : 0;
|
||||
assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
/* We don't use the NV bit. */
|
||||
} else {
|
||||
encoding = (0b111101 << 26);
|
||||
assert(!smem.nv); /* Non-volatile is not supported on GFX10 */
|
||||
if (ctx.gfx_level <= GFX11_5)
|
||||
encoding |= smem.dlc ? 1 << (ctx.gfx_level >= GFX11 ? 13 : 14) : 0;
|
||||
encoding |= dlc ? 1 << (ctx.gfx_level >= GFX11 ? 13 : 14) : 0;
|
||||
}
|
||||
|
||||
if (ctx.gfx_level <= GFX11_5) {
|
||||
encoding |= opcode << 18;
|
||||
encoding |= smem.glc ? 1 << (ctx.gfx_level >= GFX11 ? 14 : 16) : 0;
|
||||
encoding |= glc ? 1 << (ctx.gfx_level >= GFX11 ? 14 : 16) : 0;
|
||||
} else {
|
||||
encoding |= opcode << 13;
|
||||
if (is_load)
|
||||
encoding |= ((smem.glc || smem.dlc) ? 3 /*SCOPE_SYS*/ : 0 /*SCOPE_CU*/) << 21;
|
||||
encoding |= ((glc || dlc) ? 3 /*SCOPE_SYS*/ : 0 /*SCOPE_CU*/) << 21;
|
||||
}
|
||||
|
||||
if (ctx.gfx_level <= GFX9) {
|
||||
|
|
@ -536,6 +540,9 @@ emit_mubuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
|
|||
{
|
||||
uint32_t opcode = ctx.opcode[(int)instr->opcode];
|
||||
MUBUF_instruction& mubuf = instr->mubuf();
|
||||
bool glc = mubuf.cache.value & ac_glc;
|
||||
bool slc = mubuf.cache.value & ac_slc;
|
||||
bool dlc = mubuf.cache.value & ac_dlc;
|
||||
|
||||
uint32_t encoding = (0b111000 << 26);
|
||||
if (ctx.gfx_level >= GFX11 && mubuf.lds) /* GFX11 has separate opcodes for LDS loads */
|
||||
|
|
@ -543,7 +550,7 @@ emit_mubuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
|
|||
else
|
||||
encoding |= (mubuf.lds ? 1 : 0) << 16;
|
||||
encoding |= opcode << 18;
|
||||
encoding |= (mubuf.glc ? 1 : 0) << 14;
|
||||
encoding |= (glc ? 1 : 0) << 14;
|
||||
if (ctx.gfx_level <= GFX10_3)
|
||||
encoding |= (mubuf.idxen ? 1 : 0) << 13;
|
||||
assert(!mubuf.addr64 || ctx.gfx_level <= GFX7);
|
||||
|
|
@ -552,19 +559,19 @@ emit_mubuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
|
|||
if (ctx.gfx_level <= GFX10_3)
|
||||
encoding |= (mubuf.offen ? 1 : 0) << 12;
|
||||
if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
|
||||
assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
encoding |= (mubuf.slc ? 1 : 0) << 17;
|
||||
assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
encoding |= (slc ? 1 : 0) << 17;
|
||||
} else if (ctx.gfx_level >= GFX11) {
|
||||
encoding |= (mubuf.slc ? 1 : 0) << 12;
|
||||
encoding |= (mubuf.dlc ? 1 : 0) << 13;
|
||||
encoding |= (slc ? 1 : 0) << 12;
|
||||
encoding |= (dlc ? 1 : 0) << 13;
|
||||
} else if (ctx.gfx_level >= GFX10) {
|
||||
encoding |= (mubuf.dlc ? 1 : 0) << 15;
|
||||
encoding |= (dlc ? 1 : 0) << 15;
|
||||
}
|
||||
encoding |= 0x0FFF & mubuf.offset;
|
||||
out.push_back(encoding);
|
||||
encoding = 0;
|
||||
if (ctx.gfx_level <= GFX7 || (ctx.gfx_level >= GFX10 && ctx.gfx_level <= GFX10_3)) {
|
||||
encoding |= (mubuf.slc ? 1 : 0) << 22;
|
||||
encoding |= (slc ? 1 : 0) << 22;
|
||||
}
|
||||
encoding |= reg(ctx, instr->operands[2]) << 24;
|
||||
if (ctx.gfx_level >= GFX11) {
|
||||
|
|
@ -625,24 +632,27 @@ emit_mtbuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
|
|||
{
|
||||
uint32_t opcode = ctx.opcode[(int)instr->opcode];
|
||||
MTBUF_instruction& mtbuf = instr->mtbuf();
|
||||
bool glc = mtbuf.cache.value & ac_glc;
|
||||
bool slc = mtbuf.cache.value & ac_slc;
|
||||
bool dlc = mtbuf.cache.value & ac_dlc;
|
||||
|
||||
uint32_t img_format = ac_get_tbuffer_format(ctx.gfx_level, mtbuf.dfmt, mtbuf.nfmt);
|
||||
|
||||
uint32_t encoding = (0b111010 << 26);
|
||||
assert(img_format <= 0x7F);
|
||||
assert(!mtbuf.dlc || ctx.gfx_level >= GFX10);
|
||||
assert(!dlc || ctx.gfx_level >= GFX10);
|
||||
if (ctx.gfx_level >= GFX11) {
|
||||
encoding |= (mtbuf.slc ? 1 : 0) << 12;
|
||||
encoding |= (mtbuf.dlc ? 1 : 0) << 13;
|
||||
encoding |= (slc ? 1 : 0) << 12;
|
||||
encoding |= (dlc ? 1 : 0) << 13;
|
||||
} else {
|
||||
/* DLC bit replaces one bit of the OPCODE on GFX10 */
|
||||
encoding |= (mtbuf.dlc ? 1 : 0) << 15;
|
||||
encoding |= (dlc ? 1 : 0) << 15;
|
||||
}
|
||||
if (ctx.gfx_level <= GFX10_3) {
|
||||
encoding |= (mtbuf.idxen ? 1 : 0) << 13;
|
||||
encoding |= (mtbuf.offen ? 1 : 0) << 12;
|
||||
}
|
||||
encoding |= (mtbuf.glc ? 1 : 0) << 14;
|
||||
encoding |= (glc ? 1 : 0) << 14;
|
||||
encoding |= 0x0FFF & mtbuf.offset;
|
||||
encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
|
||||
|
||||
|
|
@ -662,7 +672,7 @@ emit_mtbuf_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction
|
|||
encoding |= (mtbuf.idxen ? 1 : 0) << 23;
|
||||
} else {
|
||||
encoding |= (mtbuf.tfe ? 1 : 0) << 23;
|
||||
encoding |= (mtbuf.slc ? 1 : 0) << 22;
|
||||
encoding |= (slc ? 1 : 0) << 22;
|
||||
}
|
||||
encoding |= (reg(ctx, instr->operands[0]) >> 2) << 16;
|
||||
if (instr->operands.size() > 3)
|
||||
|
|
@ -721,6 +731,9 @@ emit_mimg_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
{
|
||||
uint32_t opcode = ctx.opcode[(int)instr->opcode];
|
||||
MIMG_instruction& mimg = instr->mimg();
|
||||
bool glc = mimg.cache.value & ac_glc;
|
||||
bool slc = mimg.cache.value & ac_slc;
|
||||
bool dlc = mimg.cache.value & ac_dlc;
|
||||
|
||||
unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
|
||||
assert(!nsa_dwords || ctx.gfx_level >= GFX10);
|
||||
|
|
@ -732,23 +745,23 @@ emit_mimg_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
encoding |= mimg.dim << 2;
|
||||
encoding |= mimg.unrm ? 1 << 7 : 0;
|
||||
encoding |= (0xF & mimg.dmask) << 8;
|
||||
encoding |= mimg.slc ? 1 << 12 : 0;
|
||||
encoding |= mimg.dlc ? 1 << 13 : 0;
|
||||
encoding |= mimg.glc ? 1 << 14 : 0;
|
||||
encoding |= slc ? 1 << 12 : 0;
|
||||
encoding |= dlc ? 1 << 13 : 0;
|
||||
encoding |= glc ? 1 << 14 : 0;
|
||||
encoding |= mimg.r128 ? 1 << 15 : 0;
|
||||
encoding |= mimg.a16 ? 1 << 16 : 0;
|
||||
encoding |= mimg.d16 ? 1 << 17 : 0;
|
||||
encoding |= (opcode & 0xFF) << 18;
|
||||
} else {
|
||||
encoding |= mimg.slc ? 1 << 25 : 0;
|
||||
encoding |= slc ? 1 << 25 : 0;
|
||||
encoding |= (opcode & 0x7f) << 18;
|
||||
encoding |= (opcode >> 7) & 1;
|
||||
encoding |= mimg.lwe ? 1 << 17 : 0;
|
||||
encoding |= mimg.tfe ? 1 << 16 : 0;
|
||||
encoding |= mimg.glc ? 1 << 13 : 0;
|
||||
encoding |= glc ? 1 << 13 : 0;
|
||||
encoding |= mimg.unrm ? 1 << 12 : 0;
|
||||
if (ctx.gfx_level <= GFX9) {
|
||||
assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
assert(!dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
assert(!mimg.r128);
|
||||
encoding |= mimg.a16 ? 1 << 15 : 0;
|
||||
encoding |= mimg.da ? 1 << 14 : 0;
|
||||
|
|
@ -757,7 +770,7 @@ emit_mimg_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
: 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
|
||||
encoding |= nsa_dwords << 1;
|
||||
encoding |= mimg.dim << 3; /* GFX10: dimensionality instead of declare array */
|
||||
encoding |= mimg.dlc ? 1 << 7 : 0;
|
||||
encoding |= dlc ? 1 << 7 : 0;
|
||||
}
|
||||
encoding |= (0xF & mimg.dmask) << 8;
|
||||
}
|
||||
|
|
@ -856,6 +869,9 @@ emit_flatlike_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruct
|
|||
{
|
||||
uint32_t opcode = ctx.opcode[(int)instr->opcode];
|
||||
FLAT_instruction& flat = instr->flatlike();
|
||||
bool glc = flat.cache.value & ac_glc;
|
||||
bool slc = flat.cache.value & ac_slc;
|
||||
bool dlc = flat.cache.value & ac_dlc;
|
||||
|
||||
uint32_t encoding = (0b110111 << 26);
|
||||
encoding |= opcode << 18;
|
||||
|
|
@ -879,13 +895,13 @@ emit_flatlike_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruct
|
|||
else if (instr->isGlobal())
|
||||
encoding |= 2 << (ctx.gfx_level >= GFX11 ? 16 : 14);
|
||||
encoding |= flat.lds ? 1 << 13 : 0;
|
||||
encoding |= flat.glc ? 1 << (ctx.gfx_level >= GFX11 ? 14 : 16) : 0;
|
||||
encoding |= flat.slc ? 1 << (ctx.gfx_level >= GFX11 ? 15 : 17) : 0;
|
||||
encoding |= glc ? 1 << (ctx.gfx_level >= GFX11 ? 14 : 16) : 0;
|
||||
encoding |= slc ? 1 << (ctx.gfx_level >= GFX11 ? 15 : 17) : 0;
|
||||
if (ctx.gfx_level >= GFX10) {
|
||||
assert(!flat.nv);
|
||||
encoding |= flat.dlc ? 1 << (ctx.gfx_level >= GFX11 ? 13 : 12) : 0;
|
||||
encoding |= dlc ? 1 << (ctx.gfx_level >= GFX11 ? 13 : 12) : 0;
|
||||
} else {
|
||||
assert(!flat.dlc);
|
||||
assert(!dlc);
|
||||
}
|
||||
out.push_back(encoding);
|
||||
encoding = reg(ctx, instr->operands[0], 8);
|
||||
|
|
|
|||
|
|
@ -4423,6 +4423,35 @@ lds_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
|
|||
|
||||
const EmitLoadParameters lds_load_params{lds_load_callback, false, true, UINT32_MAX};
|
||||
|
||||
ac_hw_cache_flags
|
||||
get_gfx6_cache_flags(bool glc, bool slc, bool dlc)
|
||||
{
|
||||
uint8_t value = 0;
|
||||
value |= glc ? ac_glc : 0;
|
||||
value |= slc ? ac_slc : 0;
|
||||
value |= dlc ? ac_dlc : 0;
|
||||
return ac_hw_cache_flags{value};
|
||||
}
|
||||
|
||||
ac_hw_cache_flags
|
||||
get_load_cache_flags(Builder& bld, bool glc, bool slc)
|
||||
{
|
||||
bool dlc = glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
return get_gfx6_cache_flags(glc, slc, dlc);
|
||||
}
|
||||
|
||||
ac_hw_cache_flags
|
||||
get_store_cache_flags(Builder& bld, bool glc, bool slc)
|
||||
{
|
||||
return get_gfx6_cache_flags(glc, slc, false);
|
||||
}
|
||||
|
||||
ac_hw_cache_flags
|
||||
get_atomic_cache_flags(Builder& bld, bool return_previous)
|
||||
{
|
||||
return get_gfx6_cache_flags(return_previous, false, false);
|
||||
}
|
||||
|
||||
Temp
|
||||
smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
|
||||
unsigned align, unsigned const_offset, Temp dst_hint)
|
||||
|
|
@ -4478,9 +4507,7 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned
|
|||
RegClass rc(RegType::sgpr, DIV_ROUND_UP(bytes_needed, 4u));
|
||||
Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc);
|
||||
load->definitions[0] = Definition(val);
|
||||
load->smem().glc = info.glc;
|
||||
load->smem().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
load->smem().cache = get_load_cache_flags(bld, info.glc, false);
|
||||
load->smem().sync = info.sync;
|
||||
bld.insert(std::move(load));
|
||||
return val;
|
||||
|
|
@ -4539,13 +4566,11 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
|
|||
mubuf->operands[2] = soffset;
|
||||
mubuf->mubuf().offen = offen;
|
||||
mubuf->mubuf().idxen = idxen;
|
||||
mubuf->mubuf().glc = info.glc;
|
||||
mubuf->mubuf().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->mubuf().slc = info.slc;
|
||||
mubuf->mubuf().cache = get_load_cache_flags(bld, info.glc, info.slc);
|
||||
if (info.swizzle_component_size != 0)
|
||||
mubuf->mubuf().cache.value |= ac_swizzled;
|
||||
mubuf->mubuf().sync = info.sync;
|
||||
mubuf->mubuf().offset = const_offset;
|
||||
mubuf->mubuf().swizzled = info.swizzle_component_size != 0;
|
||||
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
|
||||
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
|
||||
mubuf->definitions[0] = Definition(val);
|
||||
|
|
@ -4607,10 +4632,7 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
|
|||
mubuf->operands[2] = soffset;
|
||||
mubuf->mubuf().offen = offen;
|
||||
mubuf->mubuf().idxen = idxen;
|
||||
mubuf->mubuf().glc = info.glc;
|
||||
mubuf->mubuf().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->mubuf().slc = info.slc;
|
||||
mubuf->mubuf().cache = get_load_cache_flags(bld, info.glc, info.slc);
|
||||
mubuf->mubuf().sync = info.sync;
|
||||
mubuf->mubuf().offset = const_offset;
|
||||
RegClass rc = RegClass::get(RegType::vgpr, bytes_needed);
|
||||
|
|
@ -4818,8 +4840,7 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
|
|||
mubuf->operands[0] = Operand(get_gfx6_global_rsrc(bld, addr));
|
||||
mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
|
||||
mubuf->operands[2] = Operand(offset);
|
||||
mubuf->mubuf().glc = info.glc;
|
||||
mubuf->mubuf().dlc = false;
|
||||
mubuf->mubuf().cache = get_load_cache_flags(bld, info.glc, false);
|
||||
mubuf->mubuf().offset = const_offset;
|
||||
mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
|
||||
mubuf->mubuf().disable_wqm = false;
|
||||
|
|
@ -4838,9 +4859,7 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
|
|||
flat->operands[0] = Operand(addr);
|
||||
flat->operands[1] = Operand(s1);
|
||||
}
|
||||
flat->flatlike().glc = info.glc;
|
||||
flat->flatlike().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
flat->flatlike().cache = get_load_cache_flags(bld, info.glc, false);
|
||||
flat->flatlike().sync = info.sync;
|
||||
assert(global || !const_offset);
|
||||
flat->flatlike().offset = const_offset;
|
||||
|
|
@ -5673,10 +5692,7 @@ mtbuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
|
|||
mtbuf->operands[2] = soffset;
|
||||
mtbuf->mtbuf().offen = offen;
|
||||
mtbuf->mtbuf().idxen = idxen;
|
||||
mtbuf->mtbuf().glc = info.glc;
|
||||
mtbuf->mtbuf().dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mtbuf->mtbuf().slc = info.slc;
|
||||
mtbuf->mtbuf().cache = get_load_cache_flags(bld, info.glc, info.slc);
|
||||
mtbuf->mtbuf().sync = info.sync;
|
||||
mtbuf->mtbuf().offset = const_offset;
|
||||
mtbuf->mtbuf().dfmt = fetch_fmt & 0xf;
|
||||
|
|
@ -6220,6 +6236,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
|
||||
unsigned access = nir_intrinsic_access(instr);
|
||||
bool glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
|
||||
|
||||
unsigned result_size = instr->def.num_components - is_sparse;
|
||||
unsigned expand_mask = nir_def_components_read(&instr->def) & u_bit_consecutive(0, result_size);
|
||||
|
|
@ -6275,9 +6292,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
load->operands[2] = Operand::c32(0);
|
||||
load->definitions[0] = Definition(tmp);
|
||||
load->mubuf().idxen = true;
|
||||
load->mubuf().glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
|
||||
load->mubuf().dlc = load->mubuf().glc &&
|
||||
(ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
|
||||
load->mubuf().cache = get_load_cache_flags(bld, glc, false);
|
||||
load->mubuf().sync = sync;
|
||||
load->mubuf().tfe = is_sparse;
|
||||
if (load->mubuf().tfe)
|
||||
|
|
@ -6296,9 +6311,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1);
|
||||
MIMG_instruction* load = emit_mimg(bld, opcode, tmp, resource, Operand(s4), coords, vdata);
|
||||
load->glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT) ? 1 : 0;
|
||||
load->dlc =
|
||||
load->glc && (ctx->options->gfx_level == GFX10 || ctx->options->gfx_level == GFX10_3);
|
||||
load->cache = get_load_cache_flags(bld, glc, false);
|
||||
load->a16 = instr->src[1].ssa->bit_size == 16;
|
||||
load->d16 = d16;
|
||||
load->dmask = dmask;
|
||||
|
|
@ -6422,8 +6435,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
store->operands[2] = Operand::c32(0);
|
||||
store->operands[3] = Operand(data);
|
||||
store->mubuf().idxen = true;
|
||||
store->mubuf().glc = glc;
|
||||
store->mubuf().dlc = false;
|
||||
store->mubuf().cache = get_store_cache_flags(bld, glc, false);
|
||||
store->mubuf().disable_wqm = true;
|
||||
store->mubuf().sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
|
|
@ -6440,8 +6452,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
MIMG_instruction* store =
|
||||
emit_mimg(bld, opcode, Temp(0, v1), resource, Operand(s4), coords, Operand(data));
|
||||
store->glc = glc;
|
||||
store->dlc = false;
|
||||
store->cache = get_store_cache_flags(bld, glc, false);
|
||||
store->a16 = instr->src[1].ssa->bit_size == 16;
|
||||
store->d16 = d16;
|
||||
store->dmask = dmask;
|
||||
|
|
@ -6581,8 +6592,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
mubuf->definitions[0] = def;
|
||||
mubuf->mubuf().offset = 0;
|
||||
mubuf->mubuf().idxen = true;
|
||||
mubuf->mubuf().glc = return_previous;
|
||||
mubuf->mubuf().dlc = false; /* Not needed for atomics */
|
||||
mubuf->mubuf().cache = get_atomic_cache_flags(bld, return_previous);
|
||||
mubuf->mubuf().disable_wqm = true;
|
||||
mubuf->mubuf().sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
|
|
@ -6597,8 +6607,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Temp tmp = return_previous ? (cmpswap ? bld.tmp(data.regClass()) : dst) : Temp(0, v1);
|
||||
MIMG_instruction* mimg =
|
||||
emit_mimg(bld, image_op, tmp, resource, Operand(s4), coords, Operand(data));
|
||||
mimg->glc = return_previous;
|
||||
mimg->dlc = false; /* Not needed for atomics */
|
||||
mimg->cache = get_atomic_cache_flags(bld, return_previous);
|
||||
mimg->dmask = (1 << data.size()) - 1;
|
||||
mimg->a16 = instr->src[1].ssa->bit_size == 16;
|
||||
mimg->unrm = true;
|
||||
|
|
@ -6670,8 +6679,8 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
store->operands[3] = Operand(write_datas[i]);
|
||||
store->mubuf().offset = offsets[i];
|
||||
store->mubuf().offen = (offset.type() == RegType::vgpr);
|
||||
store->mubuf().glc = glc || (ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4);
|
||||
store->mubuf().dlc = false;
|
||||
store->mubuf().cache = get_store_cache_flags(
|
||||
bld, glc || (ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4), false);
|
||||
store->mubuf().disable_wqm = true;
|
||||
store->mubuf().sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
|
|
@ -6712,8 +6721,7 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
mubuf->definitions[0] = def;
|
||||
mubuf->mubuf().offset = 0;
|
||||
mubuf->mubuf().offen = (offset.type() == RegType::vgpr);
|
||||
mubuf->mubuf().glc = return_previous;
|
||||
mubuf->mubuf().dlc = false; /* Not needed for atomics */
|
||||
mubuf->mubuf().cache = get_atomic_cache_flags(bld, return_previous);
|
||||
mubuf->mubuf().disable_wqm = true;
|
||||
mubuf->mubuf().sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
|
||||
ctx->program->needs_exact = true;
|
||||
|
|
@ -6846,8 +6854,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
flat->operands[1] = Operand(s1);
|
||||
}
|
||||
flat->operands[2] = Operand(write_datas[i]);
|
||||
flat->flatlike().glc = glc;
|
||||
flat->flatlike().dlc = false;
|
||||
flat->flatlike().cache = get_store_cache_flags(bld, glc, false);
|
||||
assert(global || !write_const_offset);
|
||||
flat->flatlike().offset = write_const_offset;
|
||||
flat->flatlike().disable_wqm = true;
|
||||
|
|
@ -6867,8 +6874,8 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
write_address.type() == RegType::vgpr ? Operand(write_address) : Operand(v1);
|
||||
mubuf->operands[2] = Operand(write_offset);
|
||||
mubuf->operands[3] = Operand(write_datas[i]);
|
||||
mubuf->mubuf().glc = glc || write_datas[i].bytes() < 4;
|
||||
mubuf->mubuf().dlc = false;
|
||||
mubuf->mubuf().cache =
|
||||
get_store_cache_flags(bld, glc || write_datas[i].bytes() < 4, false);
|
||||
mubuf->mubuf().offset = write_const_offset;
|
||||
mubuf->mubuf().addr64 = write_address.type() == RegType::vgpr;
|
||||
mubuf->mubuf().disable_wqm = true;
|
||||
|
|
@ -6980,8 +6987,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
flat->operands[2] = Operand(data);
|
||||
if (return_previous)
|
||||
flat->definitions[0] = Definition(dst);
|
||||
flat->flatlike().glc = return_previous;
|
||||
flat->flatlike().dlc = false; /* Not needed for atomics */
|
||||
flat->flatlike().cache = get_atomic_cache_flags(bld, return_previous);
|
||||
assert(global || !const_offset);
|
||||
flat->flatlike().offset = const_offset;
|
||||
flat->flatlike().disable_wqm = true;
|
||||
|
|
@ -7007,8 +7013,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
|
||||
if (return_previous)
|
||||
mubuf->definitions[0] = def;
|
||||
mubuf->mubuf().glc = return_previous;
|
||||
mubuf->mubuf().dlc = false;
|
||||
mubuf->mubuf().cache = get_atomic_cache_flags(bld, return_previous);
|
||||
mubuf->mubuf().offset = const_offset;
|
||||
mubuf->mubuf().addr64 = addr.type() == RegType::vgpr;
|
||||
mubuf->mubuf().disable_wqm = true;
|
||||
|
|
@ -7167,6 +7172,9 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
|||
bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT;
|
||||
glc |= ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4;
|
||||
glc &= ctx->program->gfx_level < GFX11;
|
||||
ac_hw_cache_flags cache = get_store_cache_flags(bld, glc, slc);
|
||||
if (swizzled)
|
||||
cache.value |= ac_swizzled;
|
||||
|
||||
Operand vaddr_op(v1);
|
||||
if (offen && idxen)
|
||||
|
|
@ -7177,9 +7185,8 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
|||
vaddr_op = Operand(idx);
|
||||
|
||||
Instruction* mubuf = bld.mubuf(op, Operand(descriptor), vaddr_op, s_offset,
|
||||
Operand(write_datas[i]), const_offset, offen, swizzled, idxen,
|
||||
/* addr64 */ false, /* disable_wqm */ false, glc,
|
||||
/* dlc */ false, slc)
|
||||
Operand(write_datas[i]), const_offset, offen, idxen,
|
||||
/* addr64 */ false, /* disable_wqm */ false, cache)
|
||||
.instr;
|
||||
mubuf->mubuf().sync = sync;
|
||||
}
|
||||
|
|
@ -7637,9 +7644,11 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
for (unsigned i = 0; i < write_count; i++) {
|
||||
aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
|
||||
Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset,
|
||||
write_datas[i], offsets[i], true, true);
|
||||
write_datas[i], offsets[i], true);
|
||||
mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private);
|
||||
mubuf->mubuf().glc = ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4;
|
||||
bool glc = ctx->program->gfx_level == GFX6 && write_datas[i].bytes() < 4;
|
||||
mubuf->mubuf().cache = get_store_cache_flags(bld, glc, false);
|
||||
mubuf->mubuf().cache.value |= ac_swizzled;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -12098,9 +12107,12 @@ select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shade
|
|||
bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg{ttmp4}, s4), Operand(PhysReg{tma}, s2),
|
||||
Operand::zero());
|
||||
|
||||
ac_hw_cache_flags cache_glc;
|
||||
cache_glc.value = ac_glc;
|
||||
|
||||
/* Store TTMP0-TTMP1. */
|
||||
bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(PhysReg{ttmp4}, s4), Operand::zero(),
|
||||
Operand(PhysReg{ttmp0}, s2), memory_sync_info(), true);
|
||||
Operand(PhysReg{ttmp0}, s2), memory_sync_info(), cache_glc);
|
||||
|
||||
uint32_t hw_regs_idx[] = {
|
||||
2, /* HW_REG_STATUS */
|
||||
|
|
@ -12116,7 +12128,8 @@ select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shade
|
|||
((20 - 1) << 11) | hw_regs_idx[i]);
|
||||
|
||||
bld.smem(aco_opcode::s_buffer_store_dword, Operand(PhysReg{ttmp4}, s4),
|
||||
Operand::c32(8u + i * 4), Operand(PhysReg{ttmp8}, s1), memory_sync_info(), true);
|
||||
Operand::c32(8u + i * 4), Operand(PhysReg{ttmp8}, s1), memory_sync_info(),
|
||||
cache_glc);
|
||||
}
|
||||
|
||||
program->config->float_mode = program->blocks[0].fp_mode.val;
|
||||
|
|
@ -12632,18 +12645,18 @@ load_unaligned_vs_attrib(Builder& bld, PhysReg dst, Operand desc, Operand index,
|
|||
PhysReg scratch(load.scratch);
|
||||
if (load.d16) {
|
||||
bld.mubuf(aco_opcode::buffer_load_ubyte_d16, Definition(dst, v1), desc, index,
|
||||
Operand::c32(0u), offset, false, false, true);
|
||||
Operand::c32(0u), offset, false, true);
|
||||
bld.mubuf(aco_opcode::buffer_load_ubyte_d16_hi, Definition(dst, v1), desc, index,
|
||||
Operand::c32(0u), offset + 2, false, false, true);
|
||||
Operand::c32(0u), offset + 2, false, true);
|
||||
bld.mubuf(aco_opcode::buffer_load_ubyte_d16, Definition(scratch, v1), desc, index,
|
||||
Operand::c32(0u), offset + 1, false, false, true);
|
||||
Operand::c32(0u), offset + 1, false, true);
|
||||
bld.mubuf(aco_opcode::buffer_load_ubyte_d16_hi, Definition(scratch, v1), desc, index,
|
||||
Operand::c32(0u), offset + 3, false, false, true);
|
||||
Operand::c32(0u), offset + 3, false, true);
|
||||
} else {
|
||||
for (unsigned i = 0; i < size; i++) {
|
||||
Definition def(i ? scratch.advance(i * 4 - 4) : dst, v1);
|
||||
bld.mubuf(aco_opcode::buffer_load_ubyte, def, desc, index, Operand::c32(0u), offset + i,
|
||||
false, false, true);
|
||||
false, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -12835,7 +12848,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
|||
i += slots;
|
||||
} else {
|
||||
bld.mubuf(aco_opcode::buffer_load_format_xyzw, Definition(dest, v4),
|
||||
Operand(cur_desc, s4), fetch_index, Operand::c32(0u), 0u, false, false, true);
|
||||
Operand(cur_desc, s4), fetch_index, Operand::c32(0u), 0u, false, true);
|
||||
loc++;
|
||||
i++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include "ac_binary.h"
|
||||
#include "ac_hw_stage.h"
|
||||
#include "ac_shader_util.h"
|
||||
#include "amd_family.h"
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
|
|
@ -1309,11 +1310,7 @@ static_assert(sizeof(SALU_instruction) == sizeof(Instruction) + 4, "Unexpected p
|
|||
*/
|
||||
struct SMEM_instruction : public Instruction {
|
||||
memory_sync_info sync;
|
||||
bool glc : 1; /* VI+: globally coherent */
|
||||
bool dlc : 1; /* NAVI: device level coherent */
|
||||
bool nv : 1; /* VEGA only: Non-volatile */
|
||||
bool disable_wqm : 1;
|
||||
uint8_t padding : 4;
|
||||
ac_hw_cache_flags cache;
|
||||
};
|
||||
static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
|
||||
|
||||
|
|
@ -1492,19 +1489,16 @@ static_assert(sizeof(LDSDIR_instruction) == sizeof(Instruction) + 8, "Unexpected
|
|||
*/
|
||||
struct MUBUF_instruction : public Instruction {
|
||||
memory_sync_info sync;
|
||||
ac_hw_cache_flags cache;
|
||||
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
|
||||
bool idxen : 1; /* Supply an index from VGPR (VADDR) */
|
||||
bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
|
||||
bool glc : 1; /* globally coherent */
|
||||
bool dlc : 1; /* NAVI: device level coherent */
|
||||
bool slc : 1; /* system level coherent */
|
||||
bool tfe : 1; /* texture fail enable */
|
||||
bool lds : 1; /* Return read-data to LDS instead of VGPRs */
|
||||
uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
|
||||
uint16_t swizzled : 1;
|
||||
uint16_t padding0 : 2;
|
||||
uint16_t padding1;
|
||||
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
uint8_t padding0 : 2;
|
||||
uint8_t padding1;
|
||||
uint16_t offset; /* Unsigned byte offset - 12 bit */
|
||||
};
|
||||
static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
|
||||
|
|
@ -1518,16 +1512,14 @@ static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected
|
|||
*/
|
||||
struct MTBUF_instruction : public Instruction {
|
||||
memory_sync_info sync;
|
||||
ac_hw_cache_flags cache;
|
||||
uint8_t dfmt : 4; /* Data Format of data in memory buffer */
|
||||
uint8_t nfmt : 3; /* Numeric format of data in memory */
|
||||
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
|
||||
uint16_t idxen : 1; /* Supply an index from VGPR (VADDR) */
|
||||
uint16_t glc : 1; /* globally coherent */
|
||||
uint16_t dlc : 1; /* NAVI: device level coherent */
|
||||
uint16_t slc : 1; /* system level coherent */
|
||||
uint16_t tfe : 1; /* texture fail enable */
|
||||
uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
uint16_t padding : 10;
|
||||
bool idxen : 1; /* Supply an index from VGPR (VADDR) */
|
||||
bool tfe : 1; /* texture fail enable */
|
||||
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
uint8_t padding : 5;
|
||||
uint16_t offset; /* Unsigned byte offset - 12 bit */
|
||||
};
|
||||
static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
|
|
@ -1543,12 +1535,10 @@ static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected
|
|||
*/
|
||||
struct MIMG_instruction : public Instruction {
|
||||
memory_sync_info sync;
|
||||
ac_hw_cache_flags cache;
|
||||
uint8_t dmask; /* Data VGPR enable mask */
|
||||
uint8_t dim : 3; /* NAVI: dimensionality */
|
||||
bool unrm : 1; /* Force address to be un-normalized */
|
||||
bool dlc : 1; /* NAVI: device level coherent */
|
||||
bool glc : 1; /* globally coherent */
|
||||
bool slc : 1; /* system level coherent */
|
||||
bool tfe : 1; /* texture fail enable */
|
||||
bool da : 1; /* declare an array */
|
||||
bool lwe : 1; /* LOD warning enable */
|
||||
|
|
@ -1557,9 +1547,8 @@ struct MIMG_instruction : public Instruction {
|
|||
bool d16 : 1; /* Convert 32-bit data to 16-bit data */
|
||||
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
bool strict_wqm : 1; /* VADDR is a linear VGPR and additional VGPRs may be copied into it */
|
||||
uint8_t padding0 : 1;
|
||||
uint8_t padding0 : 4;
|
||||
uint8_t padding1;
|
||||
uint8_t padding2;
|
||||
};
|
||||
static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
|
||||
|
|
@ -1572,15 +1561,13 @@ static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected p
|
|||
*/
|
||||
struct FLAT_instruction : public Instruction {
|
||||
memory_sync_info sync;
|
||||
bool slc : 1; /* system level coherent */
|
||||
bool glc : 1; /* globally coherent */
|
||||
bool dlc : 1; /* NAVI: device level coherent */
|
||||
ac_hw_cache_flags cache;
|
||||
bool lds : 1;
|
||||
bool nv : 1;
|
||||
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
|
||||
uint8_t padding0 : 2;
|
||||
uint8_t padding0 : 5;
|
||||
uint8_t padding1;
|
||||
int16_t offset; /* Vega/Navi only */
|
||||
uint16_t padding1;
|
||||
};
|
||||
static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
|
||||
|
|
|
|||
|
|
@ -105,9 +105,7 @@ class Format(IntEnum):
|
|||
return [('uint32_t', 'imm', '0')]
|
||||
elif self == Format.SMEM:
|
||||
return [('memory_sync_info', 'sync', 'memory_sync_info()'),
|
||||
('bool', 'glc', 'false'),
|
||||
('bool', 'dlc', 'false'),
|
||||
('bool', 'nv', 'false')]
|
||||
('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}')]
|
||||
elif self == Format.DS:
|
||||
return [('uint16_t', 'offset0', '0'),
|
||||
('uint8_t', 'offset1', '0'),
|
||||
|
|
@ -125,20 +123,15 @@ class Format(IntEnum):
|
|||
('bool', 'offen', None),
|
||||
('bool', 'idxen', 'false'),
|
||||
('bool', 'disable_wqm', 'false'),
|
||||
('bool', 'glc', 'false'),
|
||||
('bool', 'dlc', 'false'),
|
||||
('bool', 'slc', 'false'),
|
||||
('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}'),
|
||||
('bool', 'tfe', 'false')]
|
||||
elif self == Format.MUBUF:
|
||||
return [('unsigned', 'offset', None),
|
||||
('bool', 'offen', None),
|
||||
('bool', 'swizzled', 'false'),
|
||||
('bool', 'idxen', 'false'),
|
||||
('bool', 'addr64', 'false'),
|
||||
('bool', 'disable_wqm', 'false'),
|
||||
('bool', 'glc', 'false'),
|
||||
('bool', 'dlc', 'false'),
|
||||
('bool', 'slc', 'false'),
|
||||
('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}'),
|
||||
('bool', 'tfe', 'false'),
|
||||
('bool', 'lds', 'false')]
|
||||
elif self == Format.MIMG:
|
||||
|
|
@ -146,9 +139,7 @@ class Format(IntEnum):
|
|||
('bool', 'da', 'false'),
|
||||
('bool', 'unrm', 'false'),
|
||||
('bool', 'disable_wqm', 'false'),
|
||||
('bool', 'glc', 'false'),
|
||||
('bool', 'dlc', 'false'),
|
||||
('bool', 'slc', 'false'),
|
||||
('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}'),
|
||||
('bool', 'tfe', 'false'),
|
||||
('bool', 'lwe', 'false'),
|
||||
('bool', 'r128', 'false'),
|
||||
|
|
@ -195,8 +186,7 @@ class Format(IntEnum):
|
|||
elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]:
|
||||
return [('int16_t', 'offset', 0),
|
||||
('memory_sync_info', 'sync', 'memory_sync_info()'),
|
||||
('bool', 'glc', 'false'),
|
||||
('bool', 'slc', 'false'),
|
||||
('ac_hw_cache_flags', 'cache', '{{0, 0, 0, 0, 0}}'),
|
||||
('bool', 'lds', 'false'),
|
||||
('bool', 'nv', 'false')]
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -164,8 +164,7 @@ struct InstrPred {
|
|||
case Format::SMEM: {
|
||||
SMEM_instruction& aS = a->smem();
|
||||
SMEM_instruction& bS = b->smem();
|
||||
return aS.sync == bS.sync && aS.glc == bS.glc && aS.dlc == bS.dlc && aS.nv == bS.nv &&
|
||||
aS.disable_wqm == bS.disable_wqm;
|
||||
return aS.sync == bS.sync && aS.cache.value == bS.cache.value;
|
||||
}
|
||||
case Format::VINTRP: {
|
||||
VINTRP_instruction& aI = a->vintrp();
|
||||
|
|
@ -203,21 +202,21 @@ struct InstrPred {
|
|||
MTBUF_instruction& bM = b->mtbuf();
|
||||
return aM.sync == bM.sync && aM.dfmt == bM.dfmt && aM.nfmt == bM.nfmt &&
|
||||
aM.offset == bM.offset && aM.offen == bM.offen && aM.idxen == bM.idxen &&
|
||||
aM.glc == bM.glc && aM.dlc == bM.dlc && aM.slc == bM.slc && aM.tfe == bM.tfe &&
|
||||
aM.cache.value == bM.cache.value && aM.tfe == bM.tfe &&
|
||||
aM.disable_wqm == bM.disable_wqm;
|
||||
}
|
||||
case Format::MUBUF: {
|
||||
MUBUF_instruction& aM = a->mubuf();
|
||||
MUBUF_instruction& bM = b->mubuf();
|
||||
return aM.sync == bM.sync && aM.offset == bM.offset && aM.offen == bM.offen &&
|
||||
aM.idxen == bM.idxen && aM.glc == bM.glc && aM.dlc == bM.dlc && aM.slc == bM.slc &&
|
||||
aM.tfe == bM.tfe && aM.lds == bM.lds && aM.disable_wqm == bM.disable_wqm;
|
||||
aM.idxen == bM.idxen && aM.cache.value == bM.cache.value && aM.tfe == bM.tfe &&
|
||||
aM.lds == bM.lds && aM.disable_wqm == bM.disable_wqm;
|
||||
}
|
||||
case Format::MIMG: {
|
||||
MIMG_instruction& aM = a->mimg();
|
||||
MIMG_instruction& bM = b->mimg();
|
||||
return aM.sync == bM.sync && aM.dmask == bM.dmask && aM.unrm == bM.unrm &&
|
||||
aM.glc == bM.glc && aM.slc == bM.slc && aM.tfe == bM.tfe && aM.da == bM.da &&
|
||||
aM.cache.value == bM.cache.value && aM.tfe == bM.tfe && aM.da == bM.da &&
|
||||
aM.lwe == bM.lwe && aM.r128 == bM.r128 && aM.a16 == bM.a16 && aM.d16 == bM.d16 &&
|
||||
aM.disable_wqm == bM.disable_wqm;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -860,10 +860,7 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (!smem.definitions.empty())
|
||||
new_instr->definitions[0] = smem.definitions[0];
|
||||
new_instr->smem().sync = smem.sync;
|
||||
new_instr->smem().glc = smem.glc;
|
||||
new_instr->smem().dlc = smem.dlc;
|
||||
new_instr->smem().nv = smem.nv;
|
||||
new_instr->smem().disable_wqm = smem.disable_wqm;
|
||||
new_instr->smem().cache = smem.cache;
|
||||
instr.reset(new_instr);
|
||||
}
|
||||
}
|
||||
|
|
@ -1429,13 +1426,14 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
while (info.is_temp())
|
||||
info = ctx.info[info.temp.id()];
|
||||
|
||||
bool swizzled = mubuf.cache.value & ac_swizzled;
|
||||
/* According to AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(), vaddr
|
||||
* overflow for scratch accesses works only on GFX9+ and saddr overflow
|
||||
* never works. Since swizzling is the only thing that separates
|
||||
* scratch accesses and other accesses and swizzling changing how
|
||||
* addressing works significantly, this probably applies to swizzled
|
||||
* MUBUF accesses. */
|
||||
bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->gfx_level < GFX9;
|
||||
bool vaddr_prevent_overflow = swizzled && ctx.program->gfx_level < GFX9;
|
||||
|
||||
if (mubuf.offen && mubuf.idxen && i == 1 && info.is_vec() &&
|
||||
info.instr->operands.size() == 2 && info.instr->operands[0].isTemp() &&
|
||||
|
|
@ -1465,7 +1463,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
mubuf.offset += offset;
|
||||
continue;
|
||||
} else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) &&
|
||||
base.regClass() == s1 && mubuf.offset + offset < 4096 && !mubuf.swizzled) {
|
||||
base.regClass() == s1 && mubuf.offset + offset < 4096 && !swizzled) {
|
||||
instr->operands[i].setTemp(base);
|
||||
mubuf.offset += offset;
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -262,6 +262,20 @@ print_sync(memory_sync_info sync, FILE* output)
|
|||
print_scope(sync.scope, output);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void
|
||||
print_cache_flags(enum amd_gfx_level gfx_level, const T& instr, FILE* output)
|
||||
{
|
||||
if (instr.cache.value & ac_glc)
|
||||
fprintf(output, " glc");
|
||||
if (instr.cache.value & ac_slc)
|
||||
fprintf(output, " slc");
|
||||
if (instr.cache.value & ac_dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (instr.cache.value & ac_swizzled)
|
||||
fprintf(output, " swizzled");
|
||||
}
|
||||
|
||||
static void
|
||||
print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output)
|
||||
{
|
||||
|
|
@ -428,12 +442,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
|
|||
}
|
||||
case Format::SMEM: {
|
||||
const SMEM_instruction& smem = instr->smem();
|
||||
if (smem.glc)
|
||||
fprintf(output, " glc");
|
||||
if (smem.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (smem.nv)
|
||||
fprintf(output, " nv");
|
||||
print_cache_flags(gfx_level, smem, output);
|
||||
print_sync(smem.sync, output);
|
||||
break;
|
||||
}
|
||||
|
|
@ -482,12 +491,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
|
|||
fprintf(output, " idxen");
|
||||
if (mubuf.addr64)
|
||||
fprintf(output, " addr64");
|
||||
if (mubuf.glc)
|
||||
fprintf(output, " glc");
|
||||
if (mubuf.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (mubuf.slc)
|
||||
fprintf(output, " slc");
|
||||
print_cache_flags(gfx_level, mubuf, output);
|
||||
if (mubuf.tfe)
|
||||
fprintf(output, " tfe");
|
||||
if (mubuf.lds)
|
||||
|
|
@ -517,12 +521,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
|
|||
}
|
||||
if (mimg.unrm)
|
||||
fprintf(output, " unrm");
|
||||
if (mimg.glc)
|
||||
fprintf(output, " glc");
|
||||
if (mimg.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (mimg.slc)
|
||||
fprintf(output, " slc");
|
||||
print_cache_flags(gfx_level, mimg, output);
|
||||
if (mimg.tfe)
|
||||
fprintf(output, " tfe");
|
||||
if (mimg.da)
|
||||
|
|
@ -594,12 +593,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
|
|||
const FLAT_instruction& flat = instr->flatlike();
|
||||
if (flat.offset)
|
||||
fprintf(output, " offset:%d", flat.offset);
|
||||
if (flat.glc)
|
||||
fprintf(output, " glc");
|
||||
if (flat.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (flat.slc)
|
||||
fprintf(output, " slc");
|
||||
print_cache_flags(gfx_level, flat, output);
|
||||
if (flat.lds)
|
||||
fprintf(output, " lds");
|
||||
if (flat.nv)
|
||||
|
|
@ -646,12 +640,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
|
|||
fprintf(output, " offen");
|
||||
if (mtbuf.idxen)
|
||||
fprintf(output, " idxen");
|
||||
if (mtbuf.glc)
|
||||
fprintf(output, " glc");
|
||||
if (mtbuf.dlc)
|
||||
fprintf(output, " dlc");
|
||||
if (mtbuf.slc)
|
||||
fprintf(output, " slc");
|
||||
print_cache_flags(gfx_level, mtbuf, output);
|
||||
if (mtbuf.tfe)
|
||||
fprintf(output, " tfe");
|
||||
if (mtbuf.disable_wqm)
|
||||
|
|
|
|||
|
|
@ -1324,8 +1324,9 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
|
|||
offset, memory_sync_info(storage_vgpr_spill, semantic_private));
|
||||
} else {
|
||||
Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc,
|
||||
Operand(v1), scratch_offset, elem, offset, false, true);
|
||||
Operand(v1), scratch_offset, elem, offset, false);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
instr->mubuf().cache.value = ac_swizzled;
|
||||
}
|
||||
}
|
||||
} else if (ctx.program->gfx_level >= GFX9) {
|
||||
|
|
@ -1333,8 +1334,9 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
|
|||
memory_sync_info(storage_vgpr_spill, semantic_private));
|
||||
} else {
|
||||
Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1),
|
||||
scratch_offset, temp, offset, false, true);
|
||||
scratch_offset, temp, offset, false);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
instr->mubuf().cache.value = ac_swizzled;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1366,8 +1368,9 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
|
|||
} else {
|
||||
Instruction* instr =
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, Definition(tmp), ctx.scratch_rsrc,
|
||||
Operand(v1), scratch_offset, offset, false, true);
|
||||
Operand(v1), scratch_offset, offset, false);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
instr->mubuf().cache.value = ac_swizzled;
|
||||
}
|
||||
}
|
||||
bld.insert(vec);
|
||||
|
|
@ -1376,8 +1379,9 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
|
|||
memory_sync_info(storage_vgpr_spill, semantic_private));
|
||||
} else {
|
||||
Instruction* instr = bld.mubuf(aco_opcode::buffer_load_dword, def, ctx.scratch_rsrc,
|
||||
Operand(v1), scratch_offset, offset, false, true);
|
||||
Operand(v1), scratch_offset, offset, false);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
instr->mubuf().cache.value = ac_swizzled;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -411,13 +411,19 @@ BEGIN_TEST(assembler.smem)
|
|||
//! s_load_b32 s4, s[16:17], s8 offset:0x2a ; f4000108 1000002a
|
||||
bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42), op_s1);
|
||||
|
||||
ac_hw_cache_flags cache_coherent;
|
||||
ac_hw_cache_flags cache_non_temporal;
|
||||
cache_coherent.value = ac_glc;
|
||||
cache_non_temporal.value = ac_dlc;
|
||||
|
||||
//~gfx11! s_buffer_load_b32 s4, s[32:35], s8 glc ; f4204110 10000000
|
||||
//~gfx12! s_buffer_load_b32 s4, s[32:35], s8 offset:0x0 scope:SCOPE_SYS ; f4620110 10000000
|
||||
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().glc = true;
|
||||
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().cache = cache_coherent;
|
||||
|
||||
//~gfx11! s_buffer_load_b32 s4, s[32:35], s8 dlc ; f4202110 10000000
|
||||
//~gfx12! (then repeated 1 times)
|
||||
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().dlc = true;
|
||||
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().cache =
|
||||
cache_non_temporal;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
|
|
@ -482,22 +488,31 @@ BEGIN_TEST(assembler.mubuf)
|
|||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
|
||||
|
||||
/* Various flags */
|
||||
ac_hw_cache_flags cache_coherent;
|
||||
ac_hw_cache_flags cache_sys_coherent;
|
||||
ac_hw_cache_flags cache_non_temporal;
|
||||
ac_hw_cache_flags cache_atomic_rtn;
|
||||
cache_coherent.value = ac_glc;
|
||||
cache_sys_coherent.value = ac_slc;
|
||||
cache_non_temporal.value = ac_dlc;
|
||||
cache_atomic_rtn.value = ac_glc;
|
||||
|
||||
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80
|
||||
//~gfx12! buffer_load_b32 v42, off, s[32:35], null scope:SCOPE_SYS ; c405007c 008c402a 00000000
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.glc = true;
|
||||
.cache = cache_coherent;
|
||||
|
||||
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80
|
||||
//~gfx12! (then repeated 2 times)
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.dlc = true;
|
||||
.cache = cache_non_temporal;
|
||||
|
||||
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
|
||||
->mubuf()
|
||||
.slc = true;
|
||||
.cache = cache_sys_coherent;
|
||||
|
||||
//; if llvm_ver >= 16 and variant == 'gfx11':
|
||||
//; insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe ; e0500000 80282a80')
|
||||
|
|
@ -562,7 +577,7 @@ BEGIN_TEST(assembler.mubuf)
|
|||
bld.mubuf(aco_opcode::buffer_atomic_add, Definition(op_v1.physReg(), v1), op_s4, Operand(v1),
|
||||
Operand::zero(), op_v1, 0, false)
|
||||
->mubuf()
|
||||
.glc = true;
|
||||
.cache = cache_atomic_rtn;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
|
|
@ -632,25 +647,32 @@ BEGIN_TEST(assembler.mtbuf)
|
|||
false);
|
||||
|
||||
/* Various flags */
|
||||
ac_hw_cache_flags cache_coherent;
|
||||
ac_hw_cache_flags cache_sys_coherent;
|
||||
ac_hw_cache_flags cache_non_temporal;
|
||||
cache_coherent.value = ac_glc;
|
||||
cache_sys_coherent.value = ac_slc;
|
||||
cache_non_temporal.value = ac_dlc;
|
||||
|
||||
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
|
||||
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], null format:[BUF_FMT_32_32_FLOAT] scope:SCOPE_SYS ; c420007c 190c402a 00000080
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.glc = true;
|
||||
.cache = cache_coherent;
|
||||
|
||||
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
|
||||
//~gfx12! (then repeated 2 times)
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.dlc = true;
|
||||
.cache = cache_non_temporal;
|
||||
|
||||
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
|
||||
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
|
||||
nfmt, 0, false)
|
||||
->mtbuf()
|
||||
.slc = true;
|
||||
.cache = cache_sys_coherent;
|
||||
|
||||
//; if llvm_ver >= 16 and variant == 'gfx11':
|
||||
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
|
||||
|
|
@ -718,19 +740,28 @@ BEGIN_TEST(assembler.mimg)
|
|||
0x1;
|
||||
|
||||
/* Various flags */
|
||||
ac_hw_cache_flags cache_coherent;
|
||||
ac_hw_cache_flags cache_sys_coherent;
|
||||
ac_hw_cache_flags cache_non_temporal;
|
||||
ac_hw_cache_flags cache_atomic_rtn;
|
||||
cache_coherent.value = ac_glc;
|
||||
cache_sys_coherent.value = ac_slc;
|
||||
cache_non_temporal.value = ac_dlc;
|
||||
cache_atomic_rtn.value = ac_glc;
|
||||
|
||||
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a
|
||||
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D scope:SCOPE_SYS ; e7c6c000 100c8054 0000000a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().dlc =
|
||||
true;
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().cache =
|
||||
cache_non_temporal;
|
||||
|
||||
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a
|
||||
//~gfx12! (then repeated 2 times)
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().glc =
|
||||
true;
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().cache =
|
||||
cache_coherent;
|
||||
|
||||
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().slc =
|
||||
true;
|
||||
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().cache =
|
||||
cache_sys_coherent;
|
||||
|
||||
//~gfx11! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a
|
||||
//~gfx12! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; e7c6c008 10008054 0000000a
|
||||
|
|
@ -799,7 +830,7 @@ BEGIN_TEST(assembler.mimg)
|
|||
//~gfx11! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D glc ; f0304f04 00100a14
|
||||
//~gfx12! image_atomic_add_uint v10, [v20, v21, v0, v0], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN ; d3c30001 0010800a 00001514
|
||||
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
|
||||
op_v1, op_v2, 0xf, false, false, false, true)
|
||||
op_v1, op_v2, 0xf, false, false, false, cache_atomic_rtn)
|
||||
->mimg()
|
||||
.dim = ac_image_2d;
|
||||
|
||||
|
|
@ -876,16 +907,28 @@ BEGIN_TEST(assembler.flat)
|
|||
bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), 84);
|
||||
|
||||
/* Various flags */
|
||||
ac_hw_cache_flags cache_coherent;
|
||||
ac_hw_cache_flags cache_sys_coherent;
|
||||
ac_hw_cache_flags cache_non_temporal;
|
||||
ac_hw_cache_flags cache_atomic_rtn;
|
||||
cache_coherent.value = ac_glc;
|
||||
cache_sys_coherent.value = ac_slc;
|
||||
cache_non_temporal.value = ac_dlc;
|
||||
cache_atomic_rtn.value = ac_glc;
|
||||
|
||||
//~gfx11! flat_load_b32 v42, v[20:21] slc ; dc508000 2a7c0014
|
||||
//~gfx12! flat_load_b32 v42, v[20:21] scope:SCOPE_SYS ; ec05007c 000c002a 00000014
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().slc = true;
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().cache =
|
||||
cache_sys_coherent;
|
||||
|
||||
//~gfx11! flat_load_b32 v42, v[20:21] glc ; dc504000 2a7c0014
|
||||
//~gfx12! (then repeated 2 times)
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().glc = true;
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().cache =
|
||||
cache_coherent;
|
||||
|
||||
//~gfx11! flat_load_b32 v42, v[20:21] dlc ; dc502000 2a7c0014
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().dlc = true;
|
||||
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().cache =
|
||||
cache_non_temporal;
|
||||
|
||||
/* Stores */
|
||||
//~gfx11! flat_store_b32 v[20:21], v10 ; dc680000 007c0a14
|
||||
|
|
@ -895,8 +938,8 @@ BEGIN_TEST(assembler.flat)
|
|||
/* Atomic with return */
|
||||
//~gfx11! global_atomic_add_u32 v42, v[20:21], v10, off glc ; dcd64000 2a7c0a14
|
||||
//~gfx12! global_atomic_add_u32 v42, v[20:21], v10, off th:TH_ATOMIC_RETURN ; ee0d407c 0510002a 00000014
|
||||
bld.global(aco_opcode::global_atomic_add, dst_v1, op_v2, Operand(s1), op_v1)->global().glc =
|
||||
true;
|
||||
bld.global(aco_opcode::global_atomic_add, dst_v1, op_v2, Operand(s1), op_v1)->global().cache =
|
||||
cache_atomic_rtn;
|
||||
|
||||
finish_assembler_test();
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue