mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 07:18:17 +02:00
ac/llvm: rewrite and unify how GLC, DLC, SLC are set
Use ACCESS_* flags in call sites instead of GLC/DLC/SLC. ACCESS_* flags are extended to describe other aspects of memory instructions like load/store/atomic/smem. Then add a function that converts the access flags to GLC, DLC, SLC. The new functions are also usable by ACO. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22770>
This commit is contained in:
parent
968db0208d
commit
f98871608c
6 changed files with 270 additions and 123 deletions
|
|
@ -1014,3 +1014,143 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info,
|
|||
*tmpring_size = S_0286E8_WAVES(max_scratch_waves) |
|
||||
S_0286E8_WAVESIZE(*max_seen_bytes_per_wave >> size_shift);
|
||||
}
|
||||
|
||||
/* Get chip-agnostic memory instruction access flags (as opposed to chip-specific GLC/DLC/SLC)
|
||||
* from a NIR memory intrinsic.
|
||||
*/
|
||||
enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *instr)
|
||||
{
|
||||
enum gl_access_qualifier access =
|
||||
nir_intrinsic_has_access(instr) ? nir_intrinsic_access(instr) : 0;
|
||||
|
||||
/* Determine ACCESS_MAY_STORE_SUBDWORD. (for the GFX6 TC L1 bug workaround) */
|
||||
if (!nir_intrinsic_infos[instr->intrinsic].has_dest) {
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
access |= ACCESS_MAY_STORE_SUBDWORD;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_ssbo:
|
||||
case nir_intrinsic_store_buffer_amd:
|
||||
case nir_intrinsic_store_global:
|
||||
case nir_intrinsic_store_global_amd:
|
||||
if (access & ACCESS_USES_FORMAT_AMD ||
|
||||
(nir_intrinsic_has_align_offset(instr) && nir_intrinsic_align(instr) % 4 != 0) ||
|
||||
((instr->src[0].ssa->bit_size / 8) * instr->src[0].ssa->num_components) % 4 != 0)
|
||||
access |= ACCESS_MAY_STORE_SUBDWORD;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("unexpected store instruction");
|
||||
}
|
||||
}
|
||||
|
||||
return access;
|
||||
}
|
||||
|
||||
/* Convert chip-agnostic memory access flags into hw-specific cache flags.
|
||||
*
|
||||
* "access" must be a result of ac_get_mem_access_flags() with the appropriate ACCESS_TYPE_*
|
||||
* flags set.
|
||||
*/
|
||||
union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
|
||||
enum gl_access_qualifier access)
|
||||
{
|
||||
union ac_hw_cache_flags result;
|
||||
result.value = 0;
|
||||
|
||||
assert(util_bitcount(access & (ACCESS_TYPE_LOAD | ACCESS_TYPE_STORE |
|
||||
ACCESS_TYPE_ATOMIC)) == 1);
|
||||
assert(!(access & ACCESS_TYPE_SMEM) || access & ACCESS_TYPE_LOAD);
|
||||
assert(!(access & ACCESS_IS_SWIZZLED_AMD) || !(access & ACCESS_TYPE_SMEM));
|
||||
assert(!(access & ACCESS_MAY_STORE_SUBDWORD) || access & ACCESS_TYPE_STORE);
|
||||
|
||||
bool scope_is_device = access & (ACCESS_COHERENT | ACCESS_VOLATILE);
|
||||
|
||||
if (gfx_level >= GFX11) {
|
||||
/* GFX11 simplified it and exposes what is actually useful.
|
||||
*
|
||||
* GLC means device scope for loads only. (stores and atomics are always device scope)
|
||||
* SLC means non-temporal for GL1 and GL2 caches. (GL1 = hit-evict, GL2 = stream, unavailable in SMEM)
|
||||
* DLC means non-temporal for MALL. (noalloc, i.e. coherent bypass)
|
||||
*
|
||||
* GL0 doesn't have a non-temporal flag, so you always get LRU caching in CU scope.
|
||||
*/
|
||||
if (access & ACCESS_TYPE_LOAD && scope_is_device)
|
||||
result.value |= ac_glc;
|
||||
|
||||
if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM))
|
||||
result.value |= ac_slc;
|
||||
} else if (gfx_level >= GFX10) {
|
||||
/* GFX10-10.3:
|
||||
*
|
||||
* VMEM and SMEM loads (SMEM only supports the first four):
|
||||
* !GLC && !DLC && !SLC means CU scope <== use for normal loads with CU scope
|
||||
* GLC && !DLC && !SLC means SA scope
|
||||
* !GLC && DLC && !SLC means CU scope, GL1 bypass
|
||||
* GLC && DLC && !SLC means device scope <== use for normal loads with device scope
|
||||
* !GLC && !DLC && SLC means CU scope, non-temporal (GL0 = GL1 = hit-evict, GL2 = stream) <== use for non-temporal loads with CU scope
|
||||
* GLC && !DLC && SLC means SA scope, non-temporal (GL1 = hit-evict, GL2 = stream)
|
||||
* !GLC && DLC && SLC means CU scope, GL0 non-temporal, GL1-GL2 coherent bypass (GL0 = hit-evict, GL1 = bypass, GL2 = noalloc)
|
||||
* GLC && DLC && SLC means device scope, GL2 coherent bypass (noalloc) <== use for non-temporal loads with device scope
|
||||
*
|
||||
* VMEM stores/atomics (stores are CU scope only if they overwrite the whole cache line,
|
||||
* atomics are always device scope, GL1 is always bypassed):
|
||||
* !GLC && !DLC && !SLC means CU scope <== use for normal stores with CU scope
|
||||
* GLC && !DLC && !SLC means device scope <== use for normal stores with device scope
|
||||
* !GLC && DLC && !SLC means CU scope, GL2 non-coherent bypass
|
||||
* GLC && DLC && !SLC means device scope, GL2 non-coherent bypass
|
||||
* !GLC && !DLC && SLC means CU scope, GL2 non-temporal (stream) <== use for non-temporal stores with CU scope
|
||||
* GLC && !DLC && SLC means device scope, GL2 non-temporal (stream) <== use for non-temporal stores with device scope
|
||||
* !GLC && DLC && SLC means CU scope, GL2 coherent bypass (noalloc)
|
||||
* GLC && DLC && SLC means device scope, GL2 coherent bypass (noalloc)
|
||||
*
|
||||
* "stream" allows write combining in GL2. "coherent bypass" doesn't.
|
||||
* "non-coherent bypass" doesn't guarantee ordering with any coherent stores.
|
||||
*/
|
||||
if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC))
|
||||
result.value |= ac_glc | (access & ACCESS_TYPE_LOAD ? ac_dlc : 0);
|
||||
|
||||
if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM))
|
||||
result.value |= ac_slc;
|
||||
} else {
|
||||
/* GFX6-GFX9:
|
||||
*
|
||||
* VMEM loads:
|
||||
* !GLC && !SLC means CU scope
|
||||
* GLC && !SLC means (GFX6: device scope, GFX7-9: device scope [*])
|
||||
* !GLC && SLC means (GFX6: CU scope, GFX7: device scope, GFX8-9: CU scope), GL2 non-temporal (stream)
|
||||
* GLC && SLC means device scope, GL2 non-temporal (stream)
|
||||
*
|
||||
* VMEM stores (atomics don't have [*]):
|
||||
* !GLC && !SLC means (GFX6: CU scope, GFX7-9: device scope [*])
|
||||
* GLC && !SLC means (GFX6-7: device scope, GFX8-9: device scope [*])
|
||||
* !GLC && SLC means (GFX6: CU scope, GFX7-9: device scope [*]), GL2 non-temporal (stream)
|
||||
* GLC && SLC means device scope, GL2 non-temporal (stream)
|
||||
*
|
||||
* [*] data can be cached in GL1 for future CU scope
|
||||
*
|
||||
* SMEM loads:
|
||||
* GLC means device scope (available on GFX8+)
|
||||
*/
|
||||
if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC)) {
|
||||
/* SMEM doesn't support the device scope on GFX6-7. */
|
||||
assert(gfx_level >= GFX8 || !(access & ACCESS_TYPE_SMEM));
|
||||
result.value |= ac_glc;
|
||||
}
|
||||
|
||||
if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM))
|
||||
result.value |= ac_slc;
|
||||
|
||||
/* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All store opcodes not
|
||||
* aligned to a dword are affected.
|
||||
*/
|
||||
if (gfx_level == GFX6 && access & ACCESS_MAY_STORE_SUBDWORD)
|
||||
result.value |= ac_glc;
|
||||
}
|
||||
|
||||
if (access & ACCESS_IS_SWIZZLED_AMD)
|
||||
result.value |= ac_swizzled;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,41 @@ extern "C" {
|
|||
#define AC_SENDMSG_GS_OP_EMIT (2 << 4)
|
||||
#define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4)
|
||||
|
||||
/* An extension of gl_access_qualifier describing other aspects of memory operations
|
||||
* for code generation.
|
||||
*/
|
||||
enum {
|
||||
/* Only one of LOAD/STORE/ATOMIC can be set. */
|
||||
ACCESS_TYPE_LOAD = BITFIELD_BIT(27),
|
||||
ACCESS_TYPE_STORE = BITFIELD_BIT(28),
|
||||
ACCESS_TYPE_ATOMIC = BITFIELD_BIT(29),
|
||||
|
||||
/* This access is expected to use an SMEM instruction if source operands are non-divergent.
|
||||
* Only loads can set this.
|
||||
*/
|
||||
ACCESS_TYPE_SMEM = BITFIELD_BIT(30),
|
||||
|
||||
/* Whether a store offset or size alignment is less than 4. */
|
||||
ACCESS_MAY_STORE_SUBDWORD = BITFIELD_BIT(31),
|
||||
};
|
||||
|
||||
/* The meaning of these enums is different between chips. They match LLVM definitions,
|
||||
* but they can also be used by ACO. Use ac_get_hw_cache_flags to get these.
|
||||
*/
|
||||
enum ac_cache_flags
|
||||
{
|
||||
ac_glc = BITFIELD_BIT(0),
|
||||
ac_slc = BITFIELD_BIT(1),
|
||||
ac_dlc = BITFIELD_BIT(2),
|
||||
ac_swizzled = BITFIELD_BIT(3),
|
||||
};
|
||||
|
||||
union ac_hw_cache_flags
|
||||
{
|
||||
/* NOTE: This will contain more fields in the future. */
|
||||
enum ac_cache_flags value;
|
||||
};
|
||||
|
||||
enum ac_image_dim
|
||||
{
|
||||
ac_image_1d,
|
||||
|
|
@ -199,6 +234,11 @@ ac_ngg_get_scratch_lds_size(gl_shader_stage stage,
|
|||
bool streamout_enabled,
|
||||
bool can_cull);
|
||||
|
||||
enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *instr);
|
||||
|
||||
union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
|
||||
enum gl_access_qualifier access);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1221,23 +1221,15 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
|
|||
return ac_build_load_custom(ctx, ptr.t, ptr.v, index, true, true, false);
|
||||
}
|
||||
|
||||
static unsigned get_load_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy)
|
||||
static unsigned get_cache_flags(struct ac_llvm_context *ctx, enum gl_access_qualifier access)
|
||||
{
|
||||
return cache_policy |
|
||||
(ctx->gfx_level >= GFX10 && ctx->gfx_level < GFX11 && cache_policy & ac_glc ? ac_dlc : 0);
|
||||
}
|
||||
|
||||
static unsigned get_store_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy)
|
||||
{
|
||||
if (ctx->gfx_level >= GFX11)
|
||||
cache_policy &= ~ac_glc; /* GLC has no effect on stores */
|
||||
return cache_policy;
|
||||
return ac_get_hw_cache_flags(ctx->gfx_level, access).value;
|
||||
}
|
||||
|
||||
static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef data, LLVMValueRef vindex,
|
||||
LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
unsigned cache_policy, bool use_format)
|
||||
enum gl_access_qualifier access, bool use_format)
|
||||
{
|
||||
LLVMValueRef args[6];
|
||||
int idx = 0;
|
||||
|
|
@ -1247,7 +1239,7 @@ static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueR
|
|||
args[idx++] = vindex ? vindex : ctx->i32_0;
|
||||
args[idx++] = voffset ? voffset : ctx->i32_0;
|
||||
args[idx++] = soffset ? soffset : ctx->i32_0;
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_store_cache_policy(ctx, cache_policy), 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_STORE), 0);
|
||||
const char *indexing_kind = vindex ? "struct" : "raw";
|
||||
char name[256], type_name[8];
|
||||
|
||||
|
|
@ -1264,15 +1256,15 @@ static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueR
|
|||
}
|
||||
|
||||
void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy)
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access)
|
||||
{
|
||||
ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, cache_policy, true);
|
||||
ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, access, true);
|
||||
}
|
||||
|
||||
/* buffer_store_dword(,x2,x3,x4) <- the suffix is selected by the type of vdata. */
|
||||
void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
unsigned cache_policy)
|
||||
enum gl_access_qualifier access)
|
||||
{
|
||||
unsigned num_channels = ac_get_llvm_num_components(vdata);
|
||||
|
||||
|
|
@ -1288,19 +1280,19 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
|||
voffset2 = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0,
|
||||
LLVMConstInt(ctx->i32, 8, 0), "");
|
||||
|
||||
ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, cache_policy);
|
||||
ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, cache_policy);
|
||||
ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, access);
|
||||
ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, access);
|
||||
return;
|
||||
}
|
||||
|
||||
ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, soffset,
|
||||
cache_policy, false);
|
||||
access, false);
|
||||
}
|
||||
|
||||
static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset,
|
||||
LLVMValueRef soffset, unsigned num_channels,
|
||||
LLVMTypeRef channel_type, unsigned cache_policy,
|
||||
LLVMTypeRef channel_type, enum gl_access_qualifier access,
|
||||
bool can_speculate, bool use_format)
|
||||
{
|
||||
LLVMValueRef args[5];
|
||||
|
|
@ -1310,7 +1302,7 @@ static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLV
|
|||
args[idx++] = vindex;
|
||||
args[idx++] = voffset ? voffset : ctx->i32_0;
|
||||
args[idx++] = soffset ? soffset : ctx->i32_0;
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD), 0);
|
||||
unsigned func =
|
||||
!ac_has_vec3_support(ctx->gfx_level, use_format) && num_channels == 3 ? 4 : num_channels;
|
||||
const char *indexing_kind = vindex ? "struct" : "raw";
|
||||
|
|
@ -1339,11 +1331,10 @@ static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLV
|
|||
|
||||
LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
LLVMTypeRef channel_type, unsigned cache_policy,
|
||||
LLVMTypeRef channel_type, enum gl_access_qualifier access,
|
||||
bool can_speculate, bool allow_smem)
|
||||
{
|
||||
if (allow_smem && !(cache_policy & ac_slc) &&
|
||||
(!(cache_policy & ac_glc) || ctx->gfx_level >= GFX8)) {
|
||||
if (allow_smem && (!(access & ACCESS_COHERENT) || ctx->gfx_level >= GFX8)) {
|
||||
assert(vindex == NULL);
|
||||
|
||||
LLVMValueRef result[32];
|
||||
|
|
@ -1365,7 +1356,8 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc
|
|||
LLVMValueRef args[3] = {
|
||||
rsrc,
|
||||
offset,
|
||||
LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0),
|
||||
LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD |
|
||||
ACCESS_TYPE_SMEM), 0),
|
||||
};
|
||||
result[i] = ac_build_intrinsic(ctx, name, channel_type, args, 3, AC_ATTR_INVARIANT_LOAD);
|
||||
}
|
||||
|
|
@ -1386,7 +1378,7 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc
|
|||
LLVMConstInt(ctx->i32, i * ac_get_type_size(channel_type), 0), "");
|
||||
LLVMValueRef item =
|
||||
ac_build_buffer_load_common(ctx, rsrc, vindex, fetch_voffset, soffset, fetch_num_channels,
|
||||
channel_type, cache_policy, can_speculate, false);
|
||||
channel_type, access, can_speculate, false);
|
||||
result = ac_build_concat(ctx, result, item);
|
||||
}
|
||||
|
||||
|
|
@ -1395,13 +1387,13 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc
|
|||
|
||||
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset,
|
||||
unsigned num_channels, unsigned cache_policy,
|
||||
unsigned num_channels, enum gl_access_qualifier access,
|
||||
bool can_speculate, bool d16, bool tfe)
|
||||
{
|
||||
if (tfe) {
|
||||
assert(!d16);
|
||||
|
||||
cache_policy = get_load_cache_policy(ctx, cache_policy);
|
||||
unsigned cache_flags = get_cache_flags(ctx, access | ACCESS_TYPE_LOAD);
|
||||
|
||||
char code[256];
|
||||
/* The definition in the assembly and the one in the constraint string
|
||||
|
|
@ -1415,9 +1407,9 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueR
|
|||
"v_mov_b32 v4, 0\n"
|
||||
"buffer_load_format_xyzw v[0:3], $1, $2, 0, idxen offen %s %s tfe %s\n"
|
||||
"s_waitcnt vmcnt(0)",
|
||||
cache_policy & ac_glc ? "glc" : "",
|
||||
cache_policy & ac_slc ? "slc" : "",
|
||||
cache_policy & ac_dlc ? "dlc" : "");
|
||||
cache_flags & ac_glc ? "glc" : "",
|
||||
cache_flags & ac_slc ? "slc" : "",
|
||||
cache_flags & ac_dlc ? "dlc" : "");
|
||||
|
||||
LLVMTypeRef param_types[] = {ctx->v2i32, ctx->v4i32};
|
||||
LLVMTypeRef calltype = LLVMFunctionType(LLVMVectorType(ctx->f32, 5), param_types, 2, false);
|
||||
|
|
@ -1435,7 +1427,7 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueR
|
|||
}
|
||||
|
||||
return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
|
||||
num_channels, d16 ? ctx->f16 : ctx->f32, cache_policy,
|
||||
num_channels, d16 ? ctx->f16 : ctx->f32, access,
|
||||
can_speculate, true);
|
||||
}
|
||||
|
||||
|
|
@ -1443,7 +1435,7 @@ static LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValue
|
|||
LLVMValueRef vindex, LLVMValueRef voffset,
|
||||
LLVMValueRef soffset, unsigned num_channels,
|
||||
unsigned tbuffer_format, LLVMTypeRef channel_type,
|
||||
unsigned cache_policy, bool can_speculate)
|
||||
enum gl_access_qualifier access, bool can_speculate)
|
||||
{
|
||||
LLVMValueRef args[6];
|
||||
int idx = 0;
|
||||
|
|
@ -1453,7 +1445,7 @@ static LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValue
|
|||
args[idx++] = voffset ? voffset : ctx->i32_0;
|
||||
args[idx++] = soffset ? soffset : ctx->i32_0;
|
||||
args[idx++] = LLVMConstInt(ctx->i32, tbuffer_format, 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD), 0);
|
||||
const char *indexing_kind = vindex ? "struct" : "raw";
|
||||
char name[256], type_name[8];
|
||||
|
||||
|
|
@ -1474,7 +1466,7 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRe
|
|||
unsigned align_offset,
|
||||
unsigned align_mul,
|
||||
unsigned num_channels,
|
||||
unsigned cache_policy,
|
||||
enum gl_access_qualifier access,
|
||||
bool can_speculate)
|
||||
{
|
||||
const unsigned max_channels = vtx_info->num_channels;
|
||||
|
|
@ -1503,7 +1495,7 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRe
|
|||
LLVMValueRef item =
|
||||
ac_build_tbuffer_load(ctx, rsrc, vidx, fetch_voffset, soffset,
|
||||
fetch_num_channels, fetch_format, channel_type,
|
||||
cache_policy, can_speculate);
|
||||
access, can_speculate);
|
||||
result = ac_build_concat(ctx, result, item);
|
||||
}
|
||||
|
||||
|
|
@ -1513,35 +1505,35 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRe
|
|||
|
||||
LLVMValueRef ac_build_buffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
unsigned cache_policy)
|
||||
enum gl_access_qualifier access)
|
||||
{
|
||||
return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i16,
|
||||
cache_policy, false, false);
|
||||
access, false, false);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_buffer_load_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
unsigned cache_policy)
|
||||
enum gl_access_qualifier access)
|
||||
{
|
||||
return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i8, cache_policy,
|
||||
return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i8, access,
|
||||
false, false);
|
||||
}
|
||||
|
||||
void ac_build_buffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
unsigned cache_policy)
|
||||
enum gl_access_qualifier access)
|
||||
{
|
||||
vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
|
||||
|
||||
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false);
|
||||
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, false);
|
||||
}
|
||||
|
||||
void ac_build_buffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
|
||||
LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy)
|
||||
LLVMValueRef voffset, LLVMValueRef soffset, enum gl_access_qualifier access)
|
||||
{
|
||||
vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
|
||||
|
||||
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false);
|
||||
ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, access, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -2025,7 +2017,11 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
|
|||
|
||||
args[num_args++] = a->tfe ? ctx->i32_1 : ctx->i32_0; /* texfailctrl */
|
||||
args[num_args++] = LLVMConstInt(
|
||||
ctx->i32, load ? get_load_cache_policy(ctx, a->cache_policy) : a->cache_policy, false);
|
||||
ctx->i32, get_cache_flags(ctx,
|
||||
a->access |
|
||||
(atomic ? ACCESS_TYPE_ATOMIC :
|
||||
load ? ACCESS_TYPE_LOAD : ACCESS_TYPE_STORE)),
|
||||
false);
|
||||
|
||||
const char *name;
|
||||
const char *atomic_subop = "";
|
||||
|
|
|
|||
|
|
@ -281,28 +281,28 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
|
|||
|
||||
void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
unsigned cache_policy);
|
||||
enum gl_access_qualifier access);
|
||||
|
||||
void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy);
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, enum gl_access_qualifier access);
|
||||
|
||||
LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
LLVMTypeRef channel_type, unsigned cache_policy,
|
||||
LLVMTypeRef channel_type, enum gl_access_qualifier access,
|
||||
bool can_speculate, bool allow_smem);
|
||||
|
||||
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset,
|
||||
unsigned num_channels, unsigned cache_policy,
|
||||
unsigned num_channels, enum gl_access_qualifier access,
|
||||
bool can_speculate, bool d16, bool tfe);
|
||||
|
||||
LLVMValueRef ac_build_buffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
unsigned cache_policy);
|
||||
enum gl_access_qualifier access);
|
||||
|
||||
LLVMValueRef ac_build_buffer_load_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
unsigned cache_policy);
|
||||
enum gl_access_qualifier access);
|
||||
|
||||
LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef vindex, LLVMValueRef voffset,
|
||||
|
|
@ -312,15 +312,15 @@ LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRe
|
|||
unsigned align_offset,
|
||||
unsigned align_mul,
|
||||
unsigned num_channels,
|
||||
unsigned cache_policy,
|
||||
enum gl_access_qualifier access,
|
||||
bool can_speculate);
|
||||
|
||||
void ac_build_buffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
||||
LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset,
|
||||
unsigned cache_policy);
|
||||
enum gl_access_qualifier access);
|
||||
|
||||
void ac_build_buffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
|
||||
LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy);
|
||||
LLVMValueRef voffset, LLVMValueRef soffset, enum gl_access_qualifier access);
|
||||
|
||||
void ac_set_range_metadata(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned lo,
|
||||
unsigned hi);
|
||||
|
|
@ -391,21 +391,12 @@ enum ac_atomic_op
|
|||
ac_atomic_fmax,
|
||||
};
|
||||
|
||||
/* These cache policy bits match the definitions used by the LLVM intrinsics. */
|
||||
enum ac_image_cache_policy
|
||||
{
|
||||
ac_glc = 1 << 0, /* per-CU cache control */
|
||||
ac_slc = 1 << 1, /* global L2 cache control */
|
||||
ac_dlc = 1 << 2, /* per-shader-array cache control */
|
||||
ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */
|
||||
};
|
||||
|
||||
struct ac_image_args {
|
||||
enum ac_image_opcode opcode;
|
||||
enum ac_atomic_op atomic; /* for the ac_image_atomic opcode */
|
||||
enum ac_image_dim dim;
|
||||
enum gl_access_qualifier access;
|
||||
unsigned dmask : 4;
|
||||
unsigned cache_policy : 3;
|
||||
bool unorm : 1;
|
||||
bool level_zero : 1;
|
||||
bool d16 : 1; /* GFX8+: data and return values are 16-bit */
|
||||
|
|
|
|||
|
|
@ -1804,26 +1804,6 @@ static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueR
|
|||
}
|
||||
}
|
||||
|
||||
static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qualifier access,
|
||||
bool may_store_unaligned)
|
||||
{
|
||||
unsigned cache_policy = 0;
|
||||
|
||||
/* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All
|
||||
* store opcodes not aligned to a dword are affected. The only way to
|
||||
* get unaligned stores is through shader images.
|
||||
*/
|
||||
if (((may_store_unaligned && ctx->ac.gfx_level == GFX6) ||
|
||||
access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
|
||||
cache_policy |= ac_glc;
|
||||
}
|
||||
|
||||
if (access & ACCESS_NON_TEMPORAL)
|
||||
cache_policy |= ac_slc | ac_glc;
|
||||
|
||||
return cache_policy;
|
||||
}
|
||||
|
||||
static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
|
||||
const nir_intrinsic_instr *instr, nir_src src)
|
||||
{
|
||||
|
|
@ -1841,8 +1821,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
|
|||
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
|
||||
int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
|
||||
unsigned writemask = nir_intrinsic_write_mask(instr);
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||
unsigned cache_policy = get_cache_policy(ctx, access, false);
|
||||
enum gl_access_qualifier access = ac_get_mem_access_flags(instr);
|
||||
|
||||
struct waterfall_context wctx;
|
||||
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
|
||||
|
|
@ -1897,9 +1876,9 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
|
|||
LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
|
||||
|
||||
if (num_bytes == 1) {
|
||||
ac_build_buffer_store_byte(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
|
||||
ac_build_buffer_store_byte(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, access);
|
||||
} else if (num_bytes == 2) {
|
||||
ac_build_buffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
|
||||
ac_build_buffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, access);
|
||||
} else {
|
||||
switch (num_bytes) {
|
||||
case 16: /* v4f32 */
|
||||
|
|
@ -1920,7 +1899,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
|
|||
data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac, rsrc, data, NULL, offset,
|
||||
ctx->ac.i32_0, cache_policy);
|
||||
ctx->ac.i32_0, access);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2066,11 +2045,16 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_
|
|||
data = ac_to_float(&ctx->ac, data);
|
||||
return_type = LLVMTypeOf(data);
|
||||
}
|
||||
|
||||
unsigned cache_flags =
|
||||
ac_get_hw_cache_flags(ctx->ac.gfx_level,
|
||||
ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
|
||||
|
||||
params[arg_count++] = data;
|
||||
params[arg_count++] = descriptor;
|
||||
params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
|
||||
params[arg_count++] = ctx->ac.i32_0; /* soffset */
|
||||
params[arg_count++] = ctx->ac.i32_0; /* slc */
|
||||
params[arg_count++] = LLVMConstInt(ctx->ac.i32, cache_flags, 0);
|
||||
|
||||
ac_build_type_name_for_intr(return_type, type, sizeof(type));
|
||||
snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
|
||||
|
|
@ -2095,8 +2079,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_
|
|||
|
||||
int elem_size_bytes = instr->dest.ssa.bit_size / 8;
|
||||
int num_components = instr->num_components;
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||
unsigned cache_policy = get_cache_policy(ctx, access, false);
|
||||
enum gl_access_qualifier access = ac_get_mem_access_flags(instr);
|
||||
|
||||
LLVMValueRef offset = get_src(ctx, instr->src[1]);
|
||||
LLVMValueRef rsrc = ctx->abi->load_ssbo ?
|
||||
|
|
@ -2122,16 +2105,16 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_
|
|||
|
||||
if (load_bytes == 1) {
|
||||
ret = ac_build_buffer_load_byte(&ctx->ac, rsrc, voffset, ctx->ac.i32_0,
|
||||
cache_policy);
|
||||
access);
|
||||
} else if (load_bytes == 2) {
|
||||
ret = ac_build_buffer_load_short(&ctx->ac, rsrc, voffset, ctx->ac.i32_0,
|
||||
cache_policy);
|
||||
access);
|
||||
} else {
|
||||
int num_channels = util_next_power_of_two(load_bytes) / 4;
|
||||
bool can_speculate = access & ACCESS_CAN_REORDER;
|
||||
|
||||
ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, voffset, ctx->ac.i32_0,
|
||||
ctx->ac.f32, cache_policy, can_speculate, false);
|
||||
ctx->ac.f32, access, can_speculate, false);
|
||||
}
|
||||
|
||||
LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
|
||||
|
|
@ -2507,7 +2490,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
|
|||
|
||||
struct ac_image_args args = {0};
|
||||
|
||||
args.cache_policy = get_cache_policy(ctx, access, false);
|
||||
args.access = ac_get_mem_access_flags(instr);
|
||||
args.tfe = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
|
|
@ -2523,7 +2506,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
|
|||
assert(instr->dest.is_ssa);
|
||||
bool can_speculate = access & ACCESS_CAN_REORDER;
|
||||
res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels,
|
||||
args.cache_policy, can_speculate,
|
||||
args.access, can_speculate,
|
||||
instr->dest.ssa.bit_size == 16,
|
||||
args.tfe);
|
||||
res = ac_build_expand(&ctx->ac, res, num_channels, args.tfe ? 5 : 4);
|
||||
|
|
@ -2588,14 +2571,13 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in
|
|||
}
|
||||
|
||||
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(instr);
|
||||
bool is_array = nir_intrinsic_image_array(instr);
|
||||
|
||||
struct waterfall_context wctx;
|
||||
LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
|
||||
|
||||
struct ac_image_args args = {0};
|
||||
args.cache_policy = get_cache_policy(ctx, access, true);
|
||||
args.access = ac_get_mem_access_flags(instr);
|
||||
|
||||
LLVMValueRef src = get_src(ctx, instr->src[3]);
|
||||
if (instr->src[3].ssa->bit_size == 64) {
|
||||
|
|
@ -2617,7 +2599,7 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in
|
|||
vindex =
|
||||
LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
|
||||
|
||||
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.cache_policy);
|
||||
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.access);
|
||||
} else {
|
||||
bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
|
||||
|
||||
|
|
@ -2730,9 +2712,12 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
|
|||
} else {
|
||||
LLVMTypeRef data_type = LLVMTypeOf(params[0]);
|
||||
char type[8];
|
||||
unsigned cache_flags =
|
||||
ac_get_hw_cache_flags(ctx->ac.gfx_level,
|
||||
ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
|
||||
|
||||
params[param_count++] = ctx->ac.i32_0; /* soffset */
|
||||
params[param_count++] = ctx->ac.i32_0; /* slc */
|
||||
params[param_count++] = LLVMConstInt(ctx->ac.i32, cache_flags, 0);
|
||||
|
||||
ac_build_type_name_for_intr(data_type, type, sizeof(type));
|
||||
length = snprintf(intrinsic_name, sizeof(intrinsic_name),
|
||||
|
|
@ -2752,6 +2737,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
|
|||
args.resource = ctx->abi->load_sampler_desc(ctx->abi, dynamic_index, AC_DESC_IMAGE);
|
||||
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
|
||||
args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array);
|
||||
args.access = ac_get_mem_access_flags(instr);
|
||||
|
||||
result = ac_build_image_opcode(&ctx->ac, &args);
|
||||
}
|
||||
|
|
@ -3805,19 +3791,9 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
LLVMValueRef vidx = idxen ? get_src(ctx, instr->src[src_base + 3]) : NULL;
|
||||
unsigned num_components = instr->dest.ssa.num_components;
|
||||
unsigned const_offset = nir_intrinsic_base(instr);
|
||||
bool swizzled = nir_intrinsic_access(instr) & ACCESS_IS_SWIZZLED_AMD;
|
||||
bool reorder = nir_intrinsic_can_reorder(instr);
|
||||
bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT;
|
||||
bool slc = nir_intrinsic_access(instr) & ACCESS_NON_TEMPORAL;
|
||||
bool uses_format = nir_intrinsic_access(instr) & ACCESS_USES_FORMAT_AMD;
|
||||
|
||||
enum ac_image_cache_policy cache_policy = 0;
|
||||
if (swizzled)
|
||||
cache_policy |= ac_swizzled;
|
||||
if (slc)
|
||||
cache_policy |= ac_slc;
|
||||
if (coherent)
|
||||
cache_policy |= ac_glc;
|
||||
enum gl_access_qualifier access = ac_get_mem_access_flags(instr);
|
||||
bool uses_format = access & ACCESS_USES_FORMAT_AMD;
|
||||
|
||||
LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr_voffset,
|
||||
LLVMConstInt(ctx->ac.i32, const_offset, 0), "");
|
||||
|
|
@ -3825,12 +3801,12 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
if (instr->intrinsic == nir_intrinsic_load_buffer_amd && uses_format) {
|
||||
assert(instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 32);
|
||||
result = ac_build_buffer_load_format(&ctx->ac, descriptor, vidx, voffset, num_components,
|
||||
cache_policy, reorder,
|
||||
access, reorder,
|
||||
instr->dest.ssa.bit_size == 16, false);
|
||||
result = ac_to_integer(&ctx->ac, result);
|
||||
} else if (instr->intrinsic == nir_intrinsic_store_buffer_amd && uses_format) {
|
||||
assert(instr->src[0].ssa->bit_size == 16 || instr->src[0].ssa->bit_size == 32);
|
||||
ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, voffset, cache_policy);
|
||||
ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, voffset, access);
|
||||
} else if (instr->intrinsic == nir_intrinsic_load_buffer_amd ||
|
||||
instr->intrinsic == nir_intrinsic_load_typed_buffer_amd) {
|
||||
/* LLVM is unable to select instructions for larger than 32-bit channel types.
|
||||
|
|
@ -3843,7 +3819,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
|
||||
if (instr->intrinsic == nir_intrinsic_load_buffer_amd) {
|
||||
result = ac_build_buffer_load(&ctx->ac, descriptor, fetch_num_components, vidx, voffset,
|
||||
addr_soffset, channel_type, cache_policy, reorder, false);
|
||||
addr_soffset, channel_type, access, reorder, false);
|
||||
} else {
|
||||
const unsigned align_offset = nir_intrinsic_align_offset(instr);
|
||||
const unsigned align_mul = nir_intrinsic_align_mul(instr);
|
||||
|
|
@ -3854,7 +3830,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
result =
|
||||
ac_build_safe_tbuffer_load(&ctx->ac, descriptor, vidx, addr_voffset, addr_soffset,
|
||||
channel_type, vtx_info, const_offset, align_offset,
|
||||
align_mul, fetch_num_components, cache_policy, reorder);
|
||||
align_mul, fetch_num_components, access, reorder);
|
||||
}
|
||||
|
||||
/* Trim to needed vector components. */
|
||||
|
|
@ -3884,7 +3860,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
|
||||
LLVMValueRef data = extract_vector_range(&ctx->ac, store_data, start, count);
|
||||
ac_build_buffer_store_dword(&ctx->ac, descriptor, data, vidx, voffset, addr_soffset,
|
||||
cache_policy);
|
||||
access);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
@ -3933,12 +3909,15 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
LLVMValueRef data = get_src(ctx, instr->src[1]);
|
||||
unsigned base = nir_intrinsic_base(instr);
|
||||
LLVMTypeRef return_type = LLVMTypeOf(data);
|
||||
unsigned cache_flags =
|
||||
ac_get_hw_cache_flags(ctx->ac.gfx_level,
|
||||
ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
|
||||
|
||||
LLVMValueRef args[] = {
|
||||
data, desc,
|
||||
LLVMConstInt(ctx->ac.i32, base, false),
|
||||
ctx->ac.i32_0, /* soffset */
|
||||
ctx->ac.i32_0, /* cachepolicy */
|
||||
LLVMConstInt(ctx->ac.i32, cache_flags, 0),
|
||||
};
|
||||
|
||||
char name[64], type[8];
|
||||
|
|
|
|||
|
|
@ -372,7 +372,8 @@ static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader
|
|||
ac_build_ifcc(&ctx->ac,
|
||||
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);
|
||||
ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0),
|
||||
NULL, LLVMConstInt(ctx->ac.i32, offset, 0), tf_base, ac_glc);
|
||||
NULL, LLVMConstInt(ctx->ac.i32, offset, 0), tf_base,
|
||||
ACCESS_COHERENT);
|
||||
ac_build_endif(&ctx->ac, 6504);
|
||||
offset += 4;
|
||||
}
|
||||
|
|
@ -381,13 +382,13 @@ static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader
|
|||
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, NULL,
|
||||
LLVMBuildAdd(ctx->ac.builder, byteoffset,
|
||||
LLVMConstInt(ctx->ac.i32, offset, 0), ""),
|
||||
tf_base, ac_glc);
|
||||
tf_base, ACCESS_COHERENT);
|
||||
offset += 16;
|
||||
if (vec1)
|
||||
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, NULL,
|
||||
LLVMBuildAdd(ctx->ac.builder, byteoffset,
|
||||
LLVMConstInt(ctx->ac.i32, offset, 0), ""),
|
||||
tf_base, ac_glc);
|
||||
tf_base, ACCESS_COHERENT);
|
||||
|
||||
/* Store the tess factors into the offchip buffer if TES reads them. */
|
||||
if (shader->key.ge.part.tcs.epilog.tes_reads_tess_factors) {
|
||||
|
|
@ -405,7 +406,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader
|
|||
outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_comps);
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, NULL, tf_outer_offset,
|
||||
base, ac_glc);
|
||||
base, ACCESS_COHERENT);
|
||||
if (inner_comps) {
|
||||
param_inner = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);
|
||||
tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
|
||||
|
|
@ -413,7 +414,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader
|
|||
|
||||
inner_vec = ac_build_gather_values(&ctx->ac, inner, inner_comps);
|
||||
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, NULL,
|
||||
tf_inner_offset, base, ac_glc);
|
||||
tf_inner_offset, base, ACCESS_COHERENT);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue