amd: stop using custom gl_access_qualifier for access type

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36764>
This commit is contained in:
Georg Lehmann 2025-08-13 18:22:12 +02:00 committed by Marge Bot
parent f17cb6b714
commit 9ed94371f7
8 changed files with 99 additions and 112 deletions

View file

@ -1080,22 +1080,18 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info, unsigned num_sc
S_0286E8_WAVESIZE(bytes_per_wave >> info->scratch_wavesize_granularity_shift);
}
/* Convert chip-agnostic memory access flags into hw-specific cache flags.
*
* "access" must be a result of ac_nir_get_mem_access_flags() with the appropriate ACCESS_TYPE_*
* flags set.
*/
/* Convert chip-agnostic memory access flags into hw-specific cache flags. */
union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
enum gl_access_qualifier access)
enum gl_access_qualifier access,
enum ac_access_type type)
{
union ac_hw_cache_flags result;
result.value = 0;
assert(util_bitcount(access & (ACCESS_TYPE_LOAD | ACCESS_TYPE_STORE |
ACCESS_TYPE_ATOMIC)) == 1);
assert(!(access & ACCESS_SMEM_AMD) || access & ACCESS_TYPE_LOAD);
bool is_store = type == ac_access_type_store || type == ac_access_type_store_subdword;
assert(!(access & ACCESS_SMEM_AMD) || type == ac_access_type_load);
assert(!(access & ACCESS_IS_SWIZZLED_AMD) || !(access & ACCESS_SMEM_AMD));
assert(!(access & ACCESS_MAY_STORE_SUBDWORD) || access & ACCESS_TYPE_STORE);
bool scope_is_device = access & (ACCESS_COHERENT | ACCESS_VOLATILE);
@ -1111,11 +1107,11 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
}
if (access & ACCESS_NON_TEMPORAL) {
if (access & ACCESS_TYPE_LOAD) {
if (type == ac_access_type_load) {
/* Don't use non_temporal for SMEM because it can't set regular_temporal for MALL. */
if (!(access & ACCESS_SMEM_AMD))
result.gfx12.temporal_hint = gfx12_load_near_non_temporal_far_regular_temporal;
} else if (access & ACCESS_TYPE_STORE) {
} else if (is_store) {
result.gfx12.temporal_hint = gfx12_store_near_non_temporal_far_regular_temporal;
} else {
result.gfx12.temporal_hint = gfx12_atomic_non_temporal;
@ -1130,7 +1126,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
*
* GL0 doesn't have a non-temporal flag, so you always get LRU caching in CU scope.
*/
if (access & ACCESS_TYPE_LOAD && scope_is_device)
if (type == ac_access_type_load && scope_is_device)
result.value |= ac_glc;
if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_SMEM_AMD))
@ -1162,8 +1158,8 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
* "stream" allows write combining in GL2. "coherent bypass" doesn't.
* "non-coherent bypass" doesn't guarantee ordering with any coherent stores.
*/
if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC))
result.value |= ac_glc | (access & ACCESS_TYPE_LOAD ? ac_dlc : 0);
if (scope_is_device && type != ac_access_type_atomic)
result.value |= ac_glc | (type == ac_access_type_load ? ac_dlc : 0);
if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_SMEM_AMD))
result.value |= ac_slc;
@ -1187,7 +1183,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
* SMEM loads:
* GLC means device scope (available on GFX8+)
*/
if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC)) {
if (scope_is_device && type != ac_access_type_atomic) {
/* SMEM doesn't support the device scope on GFX6-7. */
assert(gfx_level >= GFX8 || !(access & ACCESS_SMEM_AMD));
result.value |= ac_glc;
@ -1199,7 +1195,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
/* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All store opcodes not
* aligned to a dword are affected.
*/
if (gfx_level == GFX6 && access & ACCESS_MAY_STORE_SUBDWORD)
if (gfx_level == GFX6 && type == ac_access_type_store_subdword)
result.value |= ac_glc;
}

View file

@ -36,14 +36,12 @@ extern "C" {
/* An extension of gl_access_qualifier describing other aspects of memory operations
* for code generation.
*/
enum {
/* Only one of LOAD/STORE/ATOMIC can be set. */
ACCESS_TYPE_LOAD = BITFIELD_BIT(27),
ACCESS_TYPE_STORE = BITFIELD_BIT(28),
ACCESS_TYPE_ATOMIC = BITFIELD_BIT(29),
enum ac_access_type {
ac_access_type_load,
ac_access_type_store,
/* Whether a store offset or size alignment is less than 4. */
ACCESS_MAY_STORE_SUBDWORD = BITFIELD_BIT(31),
ac_access_type_store_subdword,
ac_access_type_atomic,
};
/* GFX6-11. The meaning of these enums is different between chips. They match LLVM definitions,
@ -307,7 +305,8 @@ ac_ngg_get_scratch_lds_size(mesa_shader_stage stage,
bool compact_primitives);
union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
enum gl_access_qualifier access);
enum gl_access_qualifier access,
enum ac_access_type type);
unsigned ac_get_all_edge_flag_bits(enum amd_gfx_level gfx_level);

View file

@ -660,37 +660,24 @@ bool ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const
return used_load_size < align_load_store_size(gfx_level, load_size, uses_smem, is_shared);
}
/* Get chip-agnostic memory instruction access flags (as opposed to chip-specific GLC/DLC/SLC)
* from a NIR memory intrinsic.
*/
enum gl_access_qualifier ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr)
/* Determine if the store can be subdword (for the GFX6 TC L1 bug workaround) */
bool ac_nir_store_may_be_subdword(const nir_intrinsic_instr *instr)
{
enum gl_access_qualifier access =
nir_intrinsic_has_access(instr) ? nir_intrinsic_access(instr) : 0;
assert(!nir_intrinsic_infos[instr->intrinsic].has_dest);
switch (instr->intrinsic) {
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_buffer_amd:
case nir_intrinsic_store_global:
case nir_intrinsic_store_global_amd:
return (nir_intrinsic_has_align_offset(instr) && nir_intrinsic_align(instr) % 4 != 0) ||
((instr->src[0].ssa->bit_size / 8) * instr->src[0].ssa->num_components) % 4 != 0;
/* Determine ACCESS_MAY_STORE_SUBDWORD. (for the GFX6 TC L1 bug workaround) */
if (!nir_intrinsic_infos[instr->intrinsic].has_dest) {
switch (instr->intrinsic) {
case nir_intrinsic_bindless_image_store:
access |= ACCESS_MAY_STORE_SUBDWORD;
break;
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_buffer_amd:
case nir_intrinsic_store_global:
case nir_intrinsic_store_global_amd:
if (access & ACCESS_USES_FORMAT_AMD ||
(nir_intrinsic_has_align_offset(instr) && nir_intrinsic_align(instr) % 4 != 0) ||
((instr->src[0].ssa->bit_size / 8) * instr->src[0].ssa->num_components) % 4 != 0)
access |= ACCESS_MAY_STORE_SUBDWORD;
break;
default:
UNREACHABLE("unexpected store instruction");
}
default:
UNREACHABLE("unexpected store instruction");
}
return access;
return false;
}
/**

View file

@ -431,8 +431,8 @@ ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigne
bool
ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const void *data);
enum gl_access_qualifier
ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr);
bool
ac_nir_store_may_be_subdword(const nir_intrinsic_instr *instr);
uint8_t
ac_nir_lower_phis_to_scalar_cb(const nir_instr *instr, const void *_);

View file

@ -1673,15 +1673,15 @@ visit_load_per_vertex_input(isel_context* ctx, nir_intrinsic_instr* instr)
}
ac_hw_cache_flags
get_cache_flags(isel_context* ctx, unsigned access)
get_cache_flags(isel_context* ctx, unsigned access, enum ac_access_type type)
{
return ac_get_hw_cache_flags(ctx->program->gfx_level, (gl_access_qualifier)access);
return ac_get_hw_cache_flags(ctx->program->gfx_level, (gl_access_qualifier)access, type);
}
ac_hw_cache_flags
get_atomic_cache_flags(isel_context* ctx, bool return_previous)
{
ac_hw_cache_flags cache = get_cache_flags(ctx, ACCESS_TYPE_ATOMIC);
ac_hw_cache_flags cache = get_cache_flags(ctx, 0, ac_access_type_atomic);
if (return_previous && ctx->program->gfx_level >= GFX12)
cache.gfx12.temporal_hint |= gfx12_atomic_return;
else if (return_previous)
@ -1710,7 +1710,7 @@ load_buffer(isel_context* ctx, unsigned num_components, unsigned component_size,
}
LoadEmitInfo info = {Operand(offset), dst, num_components, component_size, rsrc};
info.cache = get_cache_flags(ctx, access | ACCESS_TYPE_LOAD);
info.cache = get_cache_flags(ctx, access, ac_access_type_load);
info.sync = sync;
info.align_mul = align_mul;
info.align_offset = align_offset;
@ -2069,7 +2069,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
load->operands[2] = Operand::c32(0);
load->definitions[0] = Definition(tmp);
load->mubuf().idxen = true;
load->mubuf().cache = get_cache_flags(ctx, nir_intrinsic_access(instr) | ACCESS_TYPE_LOAD);
load->mubuf().cache = get_cache_flags(ctx, nir_intrinsic_access(instr), ac_access_type_load);
load->mubuf().sync = sync;
load->mubuf().tfe = is_sparse;
if (load->mubuf().tfe)
@ -2089,7 +2089,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1);
MIMG_instruction* load =
emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, false, vdata);
load->cache = get_cache_flags(ctx, nir_intrinsic_access(instr) | ACCESS_TYPE_LOAD);
load->cache = get_cache_flags(ctx, nir_intrinsic_access(instr), ac_access_type_load);
load->a16 = instr->src[1].ssa->bit_size == 16;
load->d16 = d16;
load->dmask = dmask;
@ -2137,8 +2137,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
unsigned access = nir_intrinsic_access(instr);
ac_hw_cache_flags cache =
get_cache_flags(ctx, access | ACCESS_TYPE_STORE | ACCESS_MAY_STORE_SUBDWORD);
ac_hw_cache_flags cache = get_cache_flags(ctx, access, ac_access_type_store_subdword);
uint32_t dmask = BITFIELD_MASK(num_components);
if (instr->src[3].ssa->bit_size == 32 || instr->src[3].ssa->bit_size == 16) {
@ -2446,9 +2445,10 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
for (unsigned i = 0; i < write_count; i++) {
aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
unsigned access = nir_intrinsic_access(instr) | ACCESS_TYPE_STORE;
unsigned access = nir_intrinsic_access(instr);
enum ac_access_type type = ac_access_type_store;
if (write_datas[i].bytes() < 4)
access |= ACCESS_MAY_STORE_SUBDWORD;
type = ac_access_type_store_subdword;
aco_ptr<Instruction> store{create_instruction(op, Format::MUBUF, 6, 0)};
store->operands[0] = Operand(rsrc);
@ -2459,7 +2459,7 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
store->operands[5] = Operand();
store->mubuf().offset = offsets[i];
store->mubuf().offen = (offset.type() == RegType::vgpr);
store->mubuf().cache = get_cache_flags(ctx, access);
store->mubuf().cache = get_cache_flags(ctx, access, type);
store->mubuf().disable_wqm = true;
store->mubuf().sync = sync;
ctx->program->needs_exact = true;
@ -2534,6 +2534,7 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
Builder bld(ctx->program, ctx->block);
unsigned num_components = instr->num_components;
unsigned component_size = instr->def.bit_size / 8;
unsigned access = nir_intrinsic_access(instr);
Temp addr, offset;
uint32_t const_offset;
@ -2550,21 +2551,19 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
info.align_offset = nir_intrinsic_align_offset(instr);
info.sync = get_memory_sync_info(instr, storage_buffer, 0);
info.offset_src = &instr->src[1];
info.cache = get_cache_flags(ctx, access, ac_access_type_load);
unsigned access = nir_intrinsic_access(instr) | ACCESS_TYPE_LOAD;
if (access & ACCESS_SMEM_AMD) {
assert(component_size >= 4 ||
(num_components * component_size <= 2 && ctx->program->gfx_level >= GFX12));
if (info.resource.id())
info.resource = bld.as_uniform(info.resource);
info.offset = Operand(bld.as_uniform(info.offset));
info.cache = get_cache_flags(ctx, access);
EmitLoadParameters params = smem_load_params;
params.max_const_offset = ctx->program->dev.smem_offset_max;
emit_load(ctx, bld, info, params);
} else {
EmitLoadParameters params = global_load_params;
info.cache = get_cache_flags(ctx, access);
emit_load(ctx, bld, info, params);
}
}
@ -2596,9 +2595,10 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
Format format = lower_global_address(ctx, bld, offsets[i], &write_address,
&write_const_offset, &write_offset, &instr->src[2]);
unsigned access = nir_intrinsic_access(instr) | ACCESS_TYPE_STORE;
unsigned access = nir_intrinsic_access(instr);
enum ac_access_type type = ac_access_type_store;
if (write_datas[i].bytes() < 4)
access |= ACCESS_MAY_STORE_SUBDWORD;
type = ac_access_type_store_subdword;
if (format != Format::MUBUF) {
bool global = format == Format::GLOBAL;
@ -2632,7 +2632,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
flat->operands[2] = Operand(write_datas[i]);
flat->operands[3] = Operand();
flat->operands[4] = Operand();
flat->flatlike().cache = get_cache_flags(ctx, access);
flat->flatlike().cache = get_cache_flags(ctx, access, type);
assert(global || !write_const_offset);
flat->flatlike().offset = write_const_offset;
flat->flatlike().disable_wqm = true;
@ -2660,7 +2660,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
mubuf->operands[4] = Operand();
mubuf->operands[5] = Operand();
mubuf->mubuf().offen = write_offset.type() == RegType::vgpr;
mubuf->mubuf().cache = get_cache_flags(ctx, access);
mubuf->mubuf().cache = get_cache_flags(ctx, access, type);
mubuf->mubuf().offset = write_const_offset;
mubuf->mubuf().addr64 = write_address.type() == RegType::vgpr;
mubuf->mubuf().disable_wqm = true;
@ -2858,7 +2858,8 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
s_offset_zero ? Temp(0, s1) : bld.as_uniform(get_ssa_temp(ctx, intrin->src[2].ssa));
Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[3].ssa)) : Temp();
ac_hw_cache_flags cache = get_cache_flags(ctx, nir_intrinsic_access(intrin) | ACCESS_TYPE_LOAD);
ac_hw_cache_flags cache =
get_cache_flags(ctx, nir_intrinsic_access(intrin), ac_access_type_load);
unsigned const_offset = nir_intrinsic_base(intrin);
unsigned elem_size_bytes = intrin->def.bit_size / 8u;
@ -2979,9 +2980,10 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
vaddr_op = Operand(idx);
unsigned access = nir_intrinsic_access(intrin);
enum ac_access_type type = ac_access_type_store;
if (write_datas[i].bytes() < 4)
access |= ACCESS_MAY_STORE_SUBDWORD;
ac_hw_cache_flags cache = get_cache_flags(ctx, access | ACCESS_TYPE_STORE);
type = ac_access_type_store_subdword;
ac_hw_cache_flags cache = get_cache_flags(ctx, access, type);
Instruction* mubuf = bld.mubuf(op, Operand(descriptor), vaddr_op, s_offset,
Operand(write_datas[i]), const_offset, offen, idxen,
@ -3360,7 +3362,7 @@ visit_load_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
LoadEmitInfo info = {Operand(v1), dst, instr->def.num_components, instr->def.bit_size / 8u};
info.align_mul = nir_intrinsic_align_mul(instr);
info.align_offset = nir_intrinsic_align_offset(instr);
info.cache = get_cache_flags(ctx, ACCESS_TYPE_LOAD | ACCESS_IS_SWIZZLED_AMD);
info.cache = get_cache_flags(ctx, ACCESS_IS_SWIZZLED_AMD, ac_access_type_load);
info.swizzle_component_size = ctx->program->gfx_level <= GFX8 ? 4 : 0;
info.sync = memory_sync_info(storage_scratch, semantic_private);
if (ctx->program->gfx_level >= GFX9) {
@ -3464,9 +3466,9 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offsets.back(),
write_datas[i], offsets[i], true);
mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private);
unsigned access = ACCESS_TYPE_STORE | ACCESS_IS_SWIZZLED_AMD |
(write_datas[i].bytes() < 4 ? ACCESS_MAY_STORE_SUBDWORD : 0);
mubuf->mubuf().cache = get_cache_flags(ctx, access);
enum ac_access_type type =
write_datas[i].bytes() < 4 ? ac_access_type_store_subdword : ac_access_type_store;
mubuf->mubuf().cache = get_cache_flags(ctx, ACCESS_IS_SWIZZLED_AMD, type);
}
}
}

View file

@ -855,16 +855,18 @@ LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, struct ac_llvm_p
return ac_build_load_custom(ctx, ptr.t, ptr.v, index, true, true, true);
}
static unsigned get_cache_flags(struct ac_llvm_context *ctx, enum gl_access_qualifier access)
static unsigned get_cache_flags(struct ac_llvm_context *ctx, enum gl_access_qualifier access,
enum ac_access_type type)
{
return ac_get_hw_cache_flags(ctx->gfx_level, access).value;
return ac_get_hw_cache_flags(ctx->gfx_level, access, type).value;
}
static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
LLVMValueRef data, LLVMValueRef vindex,
LLVMValueRef voffset, LLVMValueRef soffset,
enum gl_access_qualifier access, bool use_format)
enum gl_access_qualifier access, bool may_subdword, bool use_format)
{
enum ac_access_type type = may_subdword ? ac_access_type_store_subdword : ac_access_type_store;
LLVMValueRef args[6];
int idx = 0;
args[idx++] = data;
@ -873,7 +875,7 @@ static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueR
args[idx++] = vindex ? vindex : ctx->i32_0;
args[idx++] = voffset ? voffset : ctx->i32_0;
args[idx++] = soffset ? soffset : ctx->i32_0;
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_STORE), 0);
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access, type), 0);
const char *indexing_kind = vindex ? "struct" : "raw";
char name[256], type_name[8];
@ -890,15 +892,16 @@ static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueR
}
void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access)
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access,
bool may_subdword)
{
ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, access, true);
ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, access, may_subdword, true);
}
/* buffer_store_dword(,x2,x3,x4) <- the suffix is selected by the type of vdata. */
void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
enum gl_access_qualifier access)
enum gl_access_qualifier access, bool may_subdword)
{
unsigned num_channels = ac_get_llvm_num_components(vdata);
@ -914,13 +917,13 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
voffset2 = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0,
LLVMConstInt(ctx->i32, 8, 0), "");
ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, access);
ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, access);
ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, access, may_subdword);
ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, access, may_subdword);
return;
}
ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, soffset,
access, false);
access, may_subdword, false);
}
static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
@ -936,7 +939,7 @@ static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLV
args[idx++] = vindex;
args[idx++] = voffset ? voffset : ctx->i32_0;
args[idx++] = soffset ? soffset : ctx->i32_0;
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD), 0);
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access, ac_access_type_load), 0);
unsigned func =
!ac_has_vec3_support(ctx->gfx_level, use_format) && num_channels == 3 ? 4 : num_channels;
const char *indexing_kind = vindex ? "struct" : "raw";
@ -990,8 +993,7 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc
LLVMValueRef args[3] = {
rsrc,
offset,
LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD |
ACCESS_SMEM_AMD), 0),
LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_SMEM_AMD, ac_access_type_load), 0),
};
result[i] = ac_build_intrinsic(ctx, name, channel_type, args, 3, AC_ATTR_INVARIANT_LOAD);
}
@ -1028,7 +1030,7 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueR
assert(!d16);
union ac_hw_cache_flags cache_flags =
ac_get_hw_cache_flags(ctx->gfx_level, access | ACCESS_TYPE_LOAD);
ac_get_hw_cache_flags(ctx->gfx_level, access, ac_access_type_load);
char code[1024];
/* The definition in the assembly and the one in the constraint string
@ -1115,7 +1117,7 @@ static LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValue
args[idx++] = voffset ? voffset : ctx->i32_0;
args[idx++] = soffset ? soffset : ctx->i32_0;
args[idx++] = LLVMConstInt(ctx->i32, tbuffer_format, 0);
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD), 0);
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access, ac_access_type_load), 0);
const char *indexing_kind = vindex ? "struct" : "raw";
char name[256], type_name[8];
@ -1224,7 +1226,7 @@ void ac_build_buffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
{
vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, false);
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, true, false);
}
void ac_build_buffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
@ -1232,7 +1234,7 @@ void ac_build_buffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
{
vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, false);
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, true, false);
}
/**
@ -1723,9 +1725,9 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
args[num_args++] = a->tfe ? ctx->i32_1 : ctx->i32_0; /* texfailctrl */
args[num_args++] = LLVMConstInt(
ctx->i32, get_cache_flags(ctx,
a->access |
(atomic ? ACCESS_TYPE_ATOMIC :
load ? ACCESS_TYPE_LOAD : ACCESS_TYPE_STORE)),
a->access,
(atomic ? ac_access_type_atomic :
load ? ac_access_type_load : ac_access_type_store_subdword)),
false);
const char *name;

View file

@ -232,10 +232,10 @@ LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, struct ac_llvm_p
void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
enum gl_access_qualifier access);
enum gl_access_qualifier access, bool may_subdword);
void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access);
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access, bool may_subdword);
LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels,
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,

View file

@ -1574,7 +1574,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
unsigned writemask = nir_intrinsic_write_mask(instr);
enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr);
enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool may_subdword = ac_nir_store_may_be_subdword(instr);
struct waterfall_context wctx;
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
@ -1655,7 +1656,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
ac_build_buffer_store_dword(&ctx->ac, rsrc, data, NULL, offset,
ctx->ac.i32_0, access);
ctx->ac.i32_0, access, may_subdword);
}
}
@ -1797,7 +1798,7 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_
unsigned cache_flags =
ac_get_hw_cache_flags(ctx->ac.gfx_level,
ac_nir_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
nir_intrinsic_access(instr), ac_access_type_atomic).value;
params[arg_count++] = data;
params[arg_count++] = descriptor;
@ -1825,7 +1826,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_
int elem_size_bytes = instr->def.bit_size / 8;
int num_components = instr->num_components;
enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr);
enum gl_access_qualifier access = nir_intrinsic_access(instr);
LLVMValueRef offset = get_src(ctx, instr->src[1]);
LLVMValueRef rsrc = ctx->abi->load_ssbo ?
@ -2230,7 +2231,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
struct ac_image_args args = {0};
args.access = ac_nir_get_mem_access_flags(instr);
args.access = nir_intrinsic_access(instr);
args.tfe = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
if (dim == GLSL_SAMPLER_DIM_BUF) {
@ -2316,7 +2317,7 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in
LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
struct ac_image_args args = {0};
args.access = ac_nir_get_mem_access_flags(instr);
args.access = nir_intrinsic_access(instr);
LLVMValueRef src = get_src(ctx, instr->src[3]);
if (instr->src[3].ssa->bit_size == 64) {
@ -2338,7 +2339,7 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in
vindex =
LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.access);
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.access, true);
} else {
bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
@ -2447,7 +2448,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
char type[8];
unsigned cache_flags =
ac_get_hw_cache_flags(ctx->ac.gfx_level,
ac_nir_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
nir_intrinsic_access(instr), ac_access_type_atomic).value;
params[param_count++] = ctx->ac.i32_0; /* soffset */
params[param_count++] = LLVMConstInt(ctx->ac.i32, cache_flags, 0);
@ -2471,7 +2472,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array);
args.a16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.coords[0])) == 16;
args.access = ac_nir_get_mem_access_flags(instr);
args.access = nir_intrinsic_access(instr);
result = ac_build_image_opcode(&ctx->ac, &args);
}
@ -3085,7 +3086,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
unsigned num_components = instr->def.num_components;
unsigned const_offset = nir_intrinsic_base(instr);
bool reorder = nir_intrinsic_can_reorder(instr);
enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr);
enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool uses_format = access & ACCESS_USES_FORMAT_AMD;
LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr_voffset,
@ -3099,7 +3100,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
result = ac_to_integer(&ctx->ac, result);
} else if (instr->intrinsic == nir_intrinsic_store_buffer_amd && uses_format) {
assert(instr->src[0].ssa->bit_size == 16 || instr->src[0].ssa->bit_size == 32);
ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, voffset, access);
ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, voffset, access, true);
} else if (instr->intrinsic == nir_intrinsic_load_buffer_amd ||
instr->intrinsic == nir_intrinsic_load_typed_buffer_amd) {
/* LLVM is unable to select instructions for larger than 32-bit channel types.
@ -3152,7 +3153,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
LLVMValueRef data = extract_vector_range(&ctx->ac, store_data, start, count);
ac_build_buffer_store_dword(&ctx->ac, descriptor, data, vidx, voffset, addr_soffset,
access);
access, ac_nir_store_may_be_subdword(instr));
}
}
break;