mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 01:18:06 +02:00
amd: stop using custom gl_access_qualifier for access type
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36764>
This commit is contained in:
parent
f17cb6b714
commit
9ed94371f7
8 changed files with 99 additions and 112 deletions
|
|
@ -1080,22 +1080,18 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info, unsigned num_sc
|
|||
S_0286E8_WAVESIZE(bytes_per_wave >> info->scratch_wavesize_granularity_shift);
|
||||
}
|
||||
|
||||
/* Convert chip-agnostic memory access flags into hw-specific cache flags.
|
||||
*
|
||||
* "access" must be a result of ac_nir_get_mem_access_flags() with the appropriate ACCESS_TYPE_*
|
||||
* flags set.
|
||||
*/
|
||||
/* Convert chip-agnostic memory access flags into hw-specific cache flags. */
|
||||
union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
|
||||
enum gl_access_qualifier access)
|
||||
enum gl_access_qualifier access,
|
||||
enum ac_access_type type)
|
||||
{
|
||||
union ac_hw_cache_flags result;
|
||||
result.value = 0;
|
||||
|
||||
assert(util_bitcount(access & (ACCESS_TYPE_LOAD | ACCESS_TYPE_STORE |
|
||||
ACCESS_TYPE_ATOMIC)) == 1);
|
||||
assert(!(access & ACCESS_SMEM_AMD) || access & ACCESS_TYPE_LOAD);
|
||||
bool is_store = type == ac_access_type_store || type == ac_access_type_store_subdword;
|
||||
|
||||
assert(!(access & ACCESS_SMEM_AMD) || type == ac_access_type_load);
|
||||
assert(!(access & ACCESS_IS_SWIZZLED_AMD) || !(access & ACCESS_SMEM_AMD));
|
||||
assert(!(access & ACCESS_MAY_STORE_SUBDWORD) || access & ACCESS_TYPE_STORE);
|
||||
|
||||
bool scope_is_device = access & (ACCESS_COHERENT | ACCESS_VOLATILE);
|
||||
|
||||
|
|
@ -1111,11 +1107,11 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
|
|||
}
|
||||
|
||||
if (access & ACCESS_NON_TEMPORAL) {
|
||||
if (access & ACCESS_TYPE_LOAD) {
|
||||
if (type == ac_access_type_load) {
|
||||
/* Don't use non_temporal for SMEM because it can't set regular_temporal for MALL. */
|
||||
if (!(access & ACCESS_SMEM_AMD))
|
||||
result.gfx12.temporal_hint = gfx12_load_near_non_temporal_far_regular_temporal;
|
||||
} else if (access & ACCESS_TYPE_STORE) {
|
||||
} else if (is_store) {
|
||||
result.gfx12.temporal_hint = gfx12_store_near_non_temporal_far_regular_temporal;
|
||||
} else {
|
||||
result.gfx12.temporal_hint = gfx12_atomic_non_temporal;
|
||||
|
|
@ -1130,7 +1126,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
|
|||
*
|
||||
* GL0 doesn't have a non-temporal flag, so you always get LRU caching in CU scope.
|
||||
*/
|
||||
if (access & ACCESS_TYPE_LOAD && scope_is_device)
|
||||
if (type == ac_access_type_load && scope_is_device)
|
||||
result.value |= ac_glc;
|
||||
|
||||
if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_SMEM_AMD))
|
||||
|
|
@ -1162,8 +1158,8 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
|
|||
* "stream" allows write combining in GL2. "coherent bypass" doesn't.
|
||||
* "non-coherent bypass" doesn't guarantee ordering with any coherent stores.
|
||||
*/
|
||||
if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC))
|
||||
result.value |= ac_glc | (access & ACCESS_TYPE_LOAD ? ac_dlc : 0);
|
||||
if (scope_is_device && type != ac_access_type_atomic)
|
||||
result.value |= ac_glc | (type == ac_access_type_load ? ac_dlc : 0);
|
||||
|
||||
if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_SMEM_AMD))
|
||||
result.value |= ac_slc;
|
||||
|
|
@ -1187,7 +1183,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
|
|||
* SMEM loads:
|
||||
* GLC means device scope (available on GFX8+)
|
||||
*/
|
||||
if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC)) {
|
||||
if (scope_is_device && type != ac_access_type_atomic) {
|
||||
/* SMEM doesn't support the device scope on GFX6-7. */
|
||||
assert(gfx_level >= GFX8 || !(access & ACCESS_SMEM_AMD));
|
||||
result.value |= ac_glc;
|
||||
|
|
@ -1199,7 +1195,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
|
|||
/* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All store opcodes not
|
||||
* aligned to a dword are affected.
|
||||
*/
|
||||
if (gfx_level == GFX6 && access & ACCESS_MAY_STORE_SUBDWORD)
|
||||
if (gfx_level == GFX6 && type == ac_access_type_store_subdword)
|
||||
result.value |= ac_glc;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,14 +36,12 @@ extern "C" {
|
|||
/* An extension of gl_access_qualifier describing other aspects of memory operations
|
||||
* for code generation.
|
||||
*/
|
||||
enum {
|
||||
/* Only one of LOAD/STORE/ATOMIC can be set. */
|
||||
ACCESS_TYPE_LOAD = BITFIELD_BIT(27),
|
||||
ACCESS_TYPE_STORE = BITFIELD_BIT(28),
|
||||
ACCESS_TYPE_ATOMIC = BITFIELD_BIT(29),
|
||||
|
||||
enum ac_access_type {
|
||||
ac_access_type_load,
|
||||
ac_access_type_store,
|
||||
/* Whether a store offset or size alignment is less than 4. */
|
||||
ACCESS_MAY_STORE_SUBDWORD = BITFIELD_BIT(31),
|
||||
ac_access_type_store_subdword,
|
||||
ac_access_type_atomic,
|
||||
};
|
||||
|
||||
/* GFX6-11. The meaning of these enums is different between chips. They match LLVM definitions,
|
||||
|
|
@ -307,7 +305,8 @@ ac_ngg_get_scratch_lds_size(mesa_shader_stage stage,
|
|||
bool compact_primitives);
|
||||
|
||||
union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
|
||||
enum gl_access_qualifier access);
|
||||
enum gl_access_qualifier access,
|
||||
enum ac_access_type type);
|
||||
|
||||
unsigned ac_get_all_edge_flag_bits(enum amd_gfx_level gfx_level);
|
||||
|
||||
|
|
|
|||
|
|
@ -660,37 +660,24 @@ bool ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const
|
|||
return used_load_size < align_load_store_size(gfx_level, load_size, uses_smem, is_shared);
|
||||
}
|
||||
|
||||
/* Get chip-agnostic memory instruction access flags (as opposed to chip-specific GLC/DLC/SLC)
|
||||
* from a NIR memory intrinsic.
|
||||
*/
|
||||
enum gl_access_qualifier ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr)
|
||||
/* Determine if the store can be subdword (for the GFX6 TC L1 bug workaround) */
|
||||
bool ac_nir_store_may_be_subdword(const nir_intrinsic_instr *instr)
|
||||
{
|
||||
enum gl_access_qualifier access =
|
||||
nir_intrinsic_has_access(instr) ? nir_intrinsic_access(instr) : 0;
|
||||
assert(!nir_intrinsic_infos[instr->intrinsic].has_dest);
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_store_ssbo:
|
||||
case nir_intrinsic_store_buffer_amd:
|
||||
case nir_intrinsic_store_global:
|
||||
case nir_intrinsic_store_global_amd:
|
||||
return (nir_intrinsic_has_align_offset(instr) && nir_intrinsic_align(instr) % 4 != 0) ||
|
||||
((instr->src[0].ssa->bit_size / 8) * instr->src[0].ssa->num_components) % 4 != 0;
|
||||
|
||||
/* Determine ACCESS_MAY_STORE_SUBDWORD. (for the GFX6 TC L1 bug workaround) */
|
||||
if (!nir_intrinsic_infos[instr->intrinsic].has_dest) {
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
access |= ACCESS_MAY_STORE_SUBDWORD;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_ssbo:
|
||||
case nir_intrinsic_store_buffer_amd:
|
||||
case nir_intrinsic_store_global:
|
||||
case nir_intrinsic_store_global_amd:
|
||||
if (access & ACCESS_USES_FORMAT_AMD ||
|
||||
(nir_intrinsic_has_align_offset(instr) && nir_intrinsic_align(instr) % 4 != 0) ||
|
||||
((instr->src[0].ssa->bit_size / 8) * instr->src[0].ssa->num_components) % 4 != 0)
|
||||
access |= ACCESS_MAY_STORE_SUBDWORD;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("unexpected store instruction");
|
||||
}
|
||||
default:
|
||||
UNREACHABLE("unexpected store instruction");
|
||||
}
|
||||
|
||||
return access;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -431,8 +431,8 @@ ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigne
|
|||
bool
|
||||
ac_nir_scalarize_overfetching_loads_callback(const nir_instr *instr, const void *data);
|
||||
|
||||
enum gl_access_qualifier
|
||||
ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr);
|
||||
bool
|
||||
ac_nir_store_may_be_subdword(const nir_intrinsic_instr *instr);
|
||||
|
||||
uint8_t
|
||||
ac_nir_lower_phis_to_scalar_cb(const nir_instr *instr, const void *_);
|
||||
|
|
|
|||
|
|
@ -1673,15 +1673,15 @@ visit_load_per_vertex_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
}
|
||||
|
||||
ac_hw_cache_flags
|
||||
get_cache_flags(isel_context* ctx, unsigned access)
|
||||
get_cache_flags(isel_context* ctx, unsigned access, enum ac_access_type type)
|
||||
{
|
||||
return ac_get_hw_cache_flags(ctx->program->gfx_level, (gl_access_qualifier)access);
|
||||
return ac_get_hw_cache_flags(ctx->program->gfx_level, (gl_access_qualifier)access, type);
|
||||
}
|
||||
|
||||
ac_hw_cache_flags
|
||||
get_atomic_cache_flags(isel_context* ctx, bool return_previous)
|
||||
{
|
||||
ac_hw_cache_flags cache = get_cache_flags(ctx, ACCESS_TYPE_ATOMIC);
|
||||
ac_hw_cache_flags cache = get_cache_flags(ctx, 0, ac_access_type_atomic);
|
||||
if (return_previous && ctx->program->gfx_level >= GFX12)
|
||||
cache.gfx12.temporal_hint |= gfx12_atomic_return;
|
||||
else if (return_previous)
|
||||
|
|
@ -1710,7 +1710,7 @@ load_buffer(isel_context* ctx, unsigned num_components, unsigned component_size,
|
|||
}
|
||||
|
||||
LoadEmitInfo info = {Operand(offset), dst, num_components, component_size, rsrc};
|
||||
info.cache = get_cache_flags(ctx, access | ACCESS_TYPE_LOAD);
|
||||
info.cache = get_cache_flags(ctx, access, ac_access_type_load);
|
||||
info.sync = sync;
|
||||
info.align_mul = align_mul;
|
||||
info.align_offset = align_offset;
|
||||
|
|
@ -2069,7 +2069,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
load->operands[2] = Operand::c32(0);
|
||||
load->definitions[0] = Definition(tmp);
|
||||
load->mubuf().idxen = true;
|
||||
load->mubuf().cache = get_cache_flags(ctx, nir_intrinsic_access(instr) | ACCESS_TYPE_LOAD);
|
||||
load->mubuf().cache = get_cache_flags(ctx, nir_intrinsic_access(instr), ac_access_type_load);
|
||||
load->mubuf().sync = sync;
|
||||
load->mubuf().tfe = is_sparse;
|
||||
if (load->mubuf().tfe)
|
||||
|
|
@ -2089,7 +2089,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1);
|
||||
MIMG_instruction* load =
|
||||
emit_mimg(bld, opcode, {tmp}, resource, Operand(s4), coords, false, vdata);
|
||||
load->cache = get_cache_flags(ctx, nir_intrinsic_access(instr) | ACCESS_TYPE_LOAD);
|
||||
load->cache = get_cache_flags(ctx, nir_intrinsic_access(instr), ac_access_type_load);
|
||||
load->a16 = instr->src[1].ssa->bit_size == 16;
|
||||
load->d16 = d16;
|
||||
load->dmask = dmask;
|
||||
|
|
@ -2137,8 +2137,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
|
||||
unsigned access = nir_intrinsic_access(instr);
|
||||
ac_hw_cache_flags cache =
|
||||
get_cache_flags(ctx, access | ACCESS_TYPE_STORE | ACCESS_MAY_STORE_SUBDWORD);
|
||||
ac_hw_cache_flags cache = get_cache_flags(ctx, access, ac_access_type_store_subdword);
|
||||
|
||||
uint32_t dmask = BITFIELD_MASK(num_components);
|
||||
if (instr->src[3].ssa->bit_size == 32 || instr->src[3].ssa->bit_size == 16) {
|
||||
|
|
@ -2446,9 +2445,10 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
for (unsigned i = 0; i < write_count; i++) {
|
||||
aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
|
||||
unsigned access = nir_intrinsic_access(instr) | ACCESS_TYPE_STORE;
|
||||
unsigned access = nir_intrinsic_access(instr);
|
||||
enum ac_access_type type = ac_access_type_store;
|
||||
if (write_datas[i].bytes() < 4)
|
||||
access |= ACCESS_MAY_STORE_SUBDWORD;
|
||||
type = ac_access_type_store_subdword;
|
||||
|
||||
aco_ptr<Instruction> store{create_instruction(op, Format::MUBUF, 6, 0)};
|
||||
store->operands[0] = Operand(rsrc);
|
||||
|
|
@ -2459,7 +2459,7 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
store->operands[5] = Operand();
|
||||
store->mubuf().offset = offsets[i];
|
||||
store->mubuf().offen = (offset.type() == RegType::vgpr);
|
||||
store->mubuf().cache = get_cache_flags(ctx, access);
|
||||
store->mubuf().cache = get_cache_flags(ctx, access, type);
|
||||
store->mubuf().disable_wqm = true;
|
||||
store->mubuf().sync = sync;
|
||||
ctx->program->needs_exact = true;
|
||||
|
|
@ -2534,6 +2534,7 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Builder bld(ctx->program, ctx->block);
|
||||
unsigned num_components = instr->num_components;
|
||||
unsigned component_size = instr->def.bit_size / 8;
|
||||
unsigned access = nir_intrinsic_access(instr);
|
||||
|
||||
Temp addr, offset;
|
||||
uint32_t const_offset;
|
||||
|
|
@ -2550,21 +2551,19 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
info.align_offset = nir_intrinsic_align_offset(instr);
|
||||
info.sync = get_memory_sync_info(instr, storage_buffer, 0);
|
||||
info.offset_src = &instr->src[1];
|
||||
info.cache = get_cache_flags(ctx, access, ac_access_type_load);
|
||||
|
||||
unsigned access = nir_intrinsic_access(instr) | ACCESS_TYPE_LOAD;
|
||||
if (access & ACCESS_SMEM_AMD) {
|
||||
assert(component_size >= 4 ||
|
||||
(num_components * component_size <= 2 && ctx->program->gfx_level >= GFX12));
|
||||
if (info.resource.id())
|
||||
info.resource = bld.as_uniform(info.resource);
|
||||
info.offset = Operand(bld.as_uniform(info.offset));
|
||||
info.cache = get_cache_flags(ctx, access);
|
||||
EmitLoadParameters params = smem_load_params;
|
||||
params.max_const_offset = ctx->program->dev.smem_offset_max;
|
||||
emit_load(ctx, bld, info, params);
|
||||
} else {
|
||||
EmitLoadParameters params = global_load_params;
|
||||
info.cache = get_cache_flags(ctx, access);
|
||||
emit_load(ctx, bld, info, params);
|
||||
}
|
||||
}
|
||||
|
|
@ -2596,9 +2595,10 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Format format = lower_global_address(ctx, bld, offsets[i], &write_address,
|
||||
&write_const_offset, &write_offset, &instr->src[2]);
|
||||
|
||||
unsigned access = nir_intrinsic_access(instr) | ACCESS_TYPE_STORE;
|
||||
unsigned access = nir_intrinsic_access(instr);
|
||||
enum ac_access_type type = ac_access_type_store;
|
||||
if (write_datas[i].bytes() < 4)
|
||||
access |= ACCESS_MAY_STORE_SUBDWORD;
|
||||
type = ac_access_type_store_subdword;
|
||||
|
||||
if (format != Format::MUBUF) {
|
||||
bool global = format == Format::GLOBAL;
|
||||
|
|
@ -2632,7 +2632,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
flat->operands[2] = Operand(write_datas[i]);
|
||||
flat->operands[3] = Operand();
|
||||
flat->operands[4] = Operand();
|
||||
flat->flatlike().cache = get_cache_flags(ctx, access);
|
||||
flat->flatlike().cache = get_cache_flags(ctx, access, type);
|
||||
assert(global || !write_const_offset);
|
||||
flat->flatlike().offset = write_const_offset;
|
||||
flat->flatlike().disable_wqm = true;
|
||||
|
|
@ -2660,7 +2660,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
mubuf->operands[4] = Operand();
|
||||
mubuf->operands[5] = Operand();
|
||||
mubuf->mubuf().offen = write_offset.type() == RegType::vgpr;
|
||||
mubuf->mubuf().cache = get_cache_flags(ctx, access);
|
||||
mubuf->mubuf().cache = get_cache_flags(ctx, access, type);
|
||||
mubuf->mubuf().offset = write_const_offset;
|
||||
mubuf->mubuf().addr64 = write_address.type() == RegType::vgpr;
|
||||
mubuf->mubuf().disable_wqm = true;
|
||||
|
|
@ -2858,7 +2858,8 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
|||
s_offset_zero ? Temp(0, s1) : bld.as_uniform(get_ssa_temp(ctx, intrin->src[2].ssa));
|
||||
Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[3].ssa)) : Temp();
|
||||
|
||||
ac_hw_cache_flags cache = get_cache_flags(ctx, nir_intrinsic_access(intrin) | ACCESS_TYPE_LOAD);
|
||||
ac_hw_cache_flags cache =
|
||||
get_cache_flags(ctx, nir_intrinsic_access(intrin), ac_access_type_load);
|
||||
|
||||
unsigned const_offset = nir_intrinsic_base(intrin);
|
||||
unsigned elem_size_bytes = intrin->def.bit_size / 8u;
|
||||
|
|
@ -2979,9 +2980,10 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
|||
vaddr_op = Operand(idx);
|
||||
|
||||
unsigned access = nir_intrinsic_access(intrin);
|
||||
enum ac_access_type type = ac_access_type_store;
|
||||
if (write_datas[i].bytes() < 4)
|
||||
access |= ACCESS_MAY_STORE_SUBDWORD;
|
||||
ac_hw_cache_flags cache = get_cache_flags(ctx, access | ACCESS_TYPE_STORE);
|
||||
type = ac_access_type_store_subdword;
|
||||
ac_hw_cache_flags cache = get_cache_flags(ctx, access, type);
|
||||
|
||||
Instruction* mubuf = bld.mubuf(op, Operand(descriptor), vaddr_op, s_offset,
|
||||
Operand(write_datas[i]), const_offset, offen, idxen,
|
||||
|
|
@ -3360,7 +3362,7 @@ visit_load_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
LoadEmitInfo info = {Operand(v1), dst, instr->def.num_components, instr->def.bit_size / 8u};
|
||||
info.align_mul = nir_intrinsic_align_mul(instr);
|
||||
info.align_offset = nir_intrinsic_align_offset(instr);
|
||||
info.cache = get_cache_flags(ctx, ACCESS_TYPE_LOAD | ACCESS_IS_SWIZZLED_AMD);
|
||||
info.cache = get_cache_flags(ctx, ACCESS_IS_SWIZZLED_AMD, ac_access_type_load);
|
||||
info.swizzle_component_size = ctx->program->gfx_level <= GFX8 ? 4 : 0;
|
||||
info.sync = memory_sync_info(storage_scratch, semantic_private);
|
||||
if (ctx->program->gfx_level >= GFX9) {
|
||||
|
|
@ -3464,9 +3466,9 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offsets.back(),
|
||||
write_datas[i], offsets[i], true);
|
||||
mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private);
|
||||
unsigned access = ACCESS_TYPE_STORE | ACCESS_IS_SWIZZLED_AMD |
|
||||
(write_datas[i].bytes() < 4 ? ACCESS_MAY_STORE_SUBDWORD : 0);
|
||||
mubuf->mubuf().cache = get_cache_flags(ctx, access);
|
||||
enum ac_access_type type =
|
||||
write_datas[i].bytes() < 4 ? ac_access_type_store_subdword : ac_access_type_store;
|
||||
mubuf->mubuf().cache = get_cache_flags(ctx, ACCESS_IS_SWIZZLED_AMD, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -855,16 +855,18 @@ LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, struct ac_llvm_p
|
|||
return ac_build_load_custom(ctx, ptr.t, ptr.v, index, true, true, true);
|
||||
}
|
||||
|
||||
static unsigned get_cache_flags(struct ac_llvm_context *ctx, enum gl_access_qualifier access)
|
||||
static unsigned get_cache_flags(struct ac_llvm_context *ctx, enum gl_access_qualifier access,
|
||||
enum ac_access_type type)
|
||||
{
|
||||
return ac_get_hw_cache_flags(ctx->gfx_level, access).value;
|
||||
return ac_get_hw_cache_flags(ctx->gfx_level, access, type).value;
|
||||
}
|
||||
|
||||
static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef data, LLVMValueRef vindex,
|
||||
LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
enum gl_access_qualifier access, bool use_format)
|
||||
enum gl_access_qualifier access, bool may_subdword, bool use_format)
|
||||
{
|
||||
enum ac_access_type type = may_subdword ? ac_access_type_store_subdword : ac_access_type_store;
|
||||
LLVMValueRef args[6];
|
||||
int idx = 0;
|
||||
args[idx++] = data;
|
||||
|
|
@ -873,7 +875,7 @@ static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueR
|
|||
args[idx++] = vindex ? vindex : ctx->i32_0;
|
||||
args[idx++] = voffset ? voffset : ctx->i32_0;
|
||||
args[idx++] = soffset ? soffset : ctx->i32_0;
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_STORE), 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access, type), 0);
|
||||
const char *indexing_kind = vindex ? "struct" : "raw";
|
||||
char name[256], type_name[8];
|
||||
|
||||
|
|
@ -890,15 +892,16 @@ static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueR
|
|||
}
|
||||
|
||||
void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access)
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access,
|
||||
bool may_subdword)
|
||||
{
|
||||
ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, access, true);
|
||||
ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, access, may_subdword, true);
|
||||
}
|
||||
|
||||
/* buffer_store_dword(,x2,x3,x4) <- the suffix is selected by the type of vdata. */
|
||||
void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
enum gl_access_qualifier access)
|
||||
enum gl_access_qualifier access, bool may_subdword)
|
||||
{
|
||||
unsigned num_channels = ac_get_llvm_num_components(vdata);
|
||||
|
||||
|
|
@ -914,13 +917,13 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
|||
voffset2 = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0,
|
||||
LLVMConstInt(ctx->i32, 8, 0), "");
|
||||
|
||||
ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, access);
|
||||
ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, access);
|
||||
ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, access, may_subdword);
|
||||
ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, access, may_subdword);
|
||||
return;
|
||||
}
|
||||
|
||||
ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, soffset,
|
||||
access, false);
|
||||
access, may_subdword, false);
|
||||
}
|
||||
|
||||
static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
|
|
@ -936,7 +939,7 @@ static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLV
|
|||
args[idx++] = vindex;
|
||||
args[idx++] = voffset ? voffset : ctx->i32_0;
|
||||
args[idx++] = soffset ? soffset : ctx->i32_0;
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD), 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access, ac_access_type_load), 0);
|
||||
unsigned func =
|
||||
!ac_has_vec3_support(ctx->gfx_level, use_format) && num_channels == 3 ? 4 : num_channels;
|
||||
const char *indexing_kind = vindex ? "struct" : "raw";
|
||||
|
|
@ -990,8 +993,7 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc
|
|||
LLVMValueRef args[3] = {
|
||||
rsrc,
|
||||
offset,
|
||||
LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD |
|
||||
ACCESS_SMEM_AMD), 0),
|
||||
LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_SMEM_AMD, ac_access_type_load), 0),
|
||||
};
|
||||
result[i] = ac_build_intrinsic(ctx, name, channel_type, args, 3, AC_ATTR_INVARIANT_LOAD);
|
||||
}
|
||||
|
|
@ -1028,7 +1030,7 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueR
|
|||
assert(!d16);
|
||||
|
||||
union ac_hw_cache_flags cache_flags =
|
||||
ac_get_hw_cache_flags(ctx->gfx_level, access | ACCESS_TYPE_LOAD);
|
||||
ac_get_hw_cache_flags(ctx->gfx_level, access, ac_access_type_load);
|
||||
char code[1024];
|
||||
|
||||
/* The definition in the assembly and the one in the constraint string
|
||||
|
|
@ -1115,7 +1117,7 @@ static LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValue
|
|||
args[idx++] = voffset ? voffset : ctx->i32_0;
|
||||
args[idx++] = soffset ? soffset : ctx->i32_0;
|
||||
args[idx++] = LLVMConstInt(ctx->i32, tbuffer_format, 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD), 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access, ac_access_type_load), 0);
|
||||
const char *indexing_kind = vindex ? "struct" : "raw";
|
||||
char name[256], type_name[8];
|
||||
|
||||
|
|
@ -1224,7 +1226,7 @@ void ac_build_buffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
|||
{
|
||||
vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
|
||||
|
||||
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, false);
|
||||
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, true, false);
|
||||
}
|
||||
|
||||
void ac_build_buffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
|
||||
|
|
@ -1232,7 +1234,7 @@ void ac_build_buffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
|||
{
|
||||
vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
|
||||
|
||||
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, false);
|
||||
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1723,9 +1725,9 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
|
|||
args[num_args++] = a->tfe ? ctx->i32_1 : ctx->i32_0; /* texfailctrl */
|
||||
args[num_args++] = LLVMConstInt(
|
||||
ctx->i32, get_cache_flags(ctx,
|
||||
a->access |
|
||||
(atomic ? ACCESS_TYPE_ATOMIC :
|
||||
load ? ACCESS_TYPE_LOAD : ACCESS_TYPE_STORE)),
|
||||
a->access,
|
||||
(atomic ? ac_access_type_atomic :
|
||||
load ? ac_access_type_load : ac_access_type_store_subdword)),
|
||||
false);
|
||||
|
||||
const char *name;
|
||||
|
|
|
|||
|
|
@ -232,10 +232,10 @@ LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, struct ac_llvm_p
|
|||
|
||||
void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
enum gl_access_qualifier access);
|
||||
enum gl_access_qualifier access, bool may_subdword);
|
||||
|
||||
void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access);
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access, bool may_subdword);
|
||||
|
||||
LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
|
|
|
|||
|
|
@ -1574,7 +1574,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
|
|||
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
|
||||
int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
|
||||
unsigned writemask = nir_intrinsic_write_mask(instr);
|
||||
enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr);
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||
bool may_subdword = ac_nir_store_may_be_subdword(instr);
|
||||
|
||||
struct waterfall_context wctx;
|
||||
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
|
||||
|
|
@ -1655,7 +1656,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
|
|||
data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac, rsrc, data, NULL, offset,
|
||||
ctx->ac.i32_0, access);
|
||||
ctx->ac.i32_0, access, may_subdword);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1797,7 +1798,7 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_
|
|||
|
||||
unsigned cache_flags =
|
||||
ac_get_hw_cache_flags(ctx->ac.gfx_level,
|
||||
ac_nir_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
|
||||
nir_intrinsic_access(instr), ac_access_type_atomic).value;
|
||||
|
||||
params[arg_count++] = data;
|
||||
params[arg_count++] = descriptor;
|
||||
|
|
@ -1825,7 +1826,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_
|
|||
|
||||
int elem_size_bytes = instr->def.bit_size / 8;
|
||||
int num_components = instr->num_components;
|
||||
enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr);
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||
|
||||
LLVMValueRef offset = get_src(ctx, instr->src[1]);
|
||||
LLVMValueRef rsrc = ctx->abi->load_ssbo ?
|
||||
|
|
@ -2230,7 +2231,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
|
|||
|
||||
struct ac_image_args args = {0};
|
||||
|
||||
args.access = ac_nir_get_mem_access_flags(instr);
|
||||
args.access = nir_intrinsic_access(instr);
|
||||
args.tfe = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
|
|
@ -2316,7 +2317,7 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in
|
|||
LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
|
||||
|
||||
struct ac_image_args args = {0};
|
||||
args.access = ac_nir_get_mem_access_flags(instr);
|
||||
args.access = nir_intrinsic_access(instr);
|
||||
|
||||
LLVMValueRef src = get_src(ctx, instr->src[3]);
|
||||
if (instr->src[3].ssa->bit_size == 64) {
|
||||
|
|
@ -2338,7 +2339,7 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in
|
|||
vindex =
|
||||
LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
|
||||
|
||||
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.access);
|
||||
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.access, true);
|
||||
} else {
|
||||
bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
|
||||
|
||||
|
|
@ -2447,7 +2448,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
|
|||
char type[8];
|
||||
unsigned cache_flags =
|
||||
ac_get_hw_cache_flags(ctx->ac.gfx_level,
|
||||
ac_nir_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
|
||||
nir_intrinsic_access(instr), ac_access_type_atomic).value;
|
||||
|
||||
params[param_count++] = ctx->ac.i32_0; /* soffset */
|
||||
params[param_count++] = LLVMConstInt(ctx->ac.i32, cache_flags, 0);
|
||||
|
|
@ -2471,7 +2472,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
|
|||
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
|
||||
args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array);
|
||||
args.a16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.coords[0])) == 16;
|
||||
args.access = ac_nir_get_mem_access_flags(instr);
|
||||
args.access = nir_intrinsic_access(instr);
|
||||
|
||||
result = ac_build_image_opcode(&ctx->ac, &args);
|
||||
}
|
||||
|
|
@ -3085,7 +3086,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
unsigned num_components = instr->def.num_components;
|
||||
unsigned const_offset = nir_intrinsic_base(instr);
|
||||
bool reorder = nir_intrinsic_can_reorder(instr);
|
||||
enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr);
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||
bool uses_format = access & ACCESS_USES_FORMAT_AMD;
|
||||
|
||||
LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr_voffset,
|
||||
|
|
@ -3099,7 +3100,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
result = ac_to_integer(&ctx->ac, result);
|
||||
} else if (instr->intrinsic == nir_intrinsic_store_buffer_amd && uses_format) {
|
||||
assert(instr->src[0].ssa->bit_size == 16 || instr->src[0].ssa->bit_size == 32);
|
||||
ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, voffset, access);
|
||||
ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, voffset, access, true);
|
||||
} else if (instr->intrinsic == nir_intrinsic_load_buffer_amd ||
|
||||
instr->intrinsic == nir_intrinsic_load_typed_buffer_amd) {
|
||||
/* LLVM is unable to select instructions for larger than 32-bit channel types.
|
||||
|
|
@ -3152,7 +3153,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
|
||||
LLVMValueRef data = extract_vector_range(&ctx->ac, store_data, start, count);
|
||||
ac_build_buffer_store_dword(&ctx->ac, descriptor, data, vidx, voffset, addr_soffset,
|
||||
access);
|
||||
access, ac_nir_store_may_be_subdword(instr));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue