ac/nir: Move ac_nir_get_mem_access_flags to ac_nir.c

And change its name to indicate that it is NIR specific.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32966>
This commit is contained in:
Timur Kristóf 2025-01-09 15:57:25 -06:00
parent ad5c0b7103
commit cc0166462e
5 changed files with 45 additions and 44 deletions

View file

@ -2343,3 +2343,36 @@ ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data)
return 0;
}
/* Get chip-agnostic memory instruction access flags (as opposed to chip-specific GLC/DLC/SLC)
* from a NIR memory intrinsic.
*/
enum gl_access_qualifier ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr)
{
enum gl_access_qualifier access =
nir_intrinsic_has_access(instr) ? nir_intrinsic_access(instr) : 0;
/* Determine ACCESS_MAY_STORE_SUBDWORD. (for the GFX6 TC L1 bug workaround) */
if (!nir_intrinsic_infos[instr->intrinsic].has_dest) {
switch (instr->intrinsic) {
case nir_intrinsic_bindless_image_store:
access |= ACCESS_MAY_STORE_SUBDWORD;
break;
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_buffer_amd:
case nir_intrinsic_store_global:
case nir_intrinsic_store_global_amd:
if (access & ACCESS_USES_FORMAT_AMD ||
(nir_intrinsic_has_align_offset(instr) && nir_intrinsic_align(instr) % 4 != 0) ||
((instr->src[0].ssa->bit_size / 8) * instr->src[0].ssa->num_components) % 4 != 0)
access |= ACCESS_MAY_STORE_SUBDWORD;
break;
default:
unreachable("unexpected store instruction");
}
}
return access;
}

View file

@ -372,6 +372,9 @@ ac_nir_optimize_uniform_atomics(nir_shader *nir);
unsigned
ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data);
enum gl_access_qualifier
ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr);
#ifdef __cplusplus
}
#endif

View file

@ -1399,42 +1399,9 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info,
S_0286E8_WAVESIZE(*max_seen_bytes_per_wave >> size_shift);
}
/* Get chip-agnostic memory instruction access flags (as opposed to chip-specific GLC/DLC/SLC)
* from a NIR memory intrinsic.
*/
enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *instr)
{
enum gl_access_qualifier access =
nir_intrinsic_has_access(instr) ? nir_intrinsic_access(instr) : 0;
/* Determine ACCESS_MAY_STORE_SUBDWORD. (for the GFX6 TC L1 bug workaround) */
if (!nir_intrinsic_infos[instr->intrinsic].has_dest) {
switch (instr->intrinsic) {
case nir_intrinsic_bindless_image_store:
access |= ACCESS_MAY_STORE_SUBDWORD;
break;
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_buffer_amd:
case nir_intrinsic_store_global:
case nir_intrinsic_store_global_amd:
if (access & ACCESS_USES_FORMAT_AMD ||
(nir_intrinsic_has_align_offset(instr) && nir_intrinsic_align(instr) % 4 != 0) ||
((instr->src[0].ssa->bit_size / 8) * instr->src[0].ssa->num_components) % 4 != 0)
access |= ACCESS_MAY_STORE_SUBDWORD;
break;
default:
unreachable("unexpected store instruction");
}
}
return access;
}
/* Convert chip-agnostic memory access flags into hw-specific cache flags.
*
* "access" must be a result of ac_get_mem_access_flags() with the appropriate ACCESS_TYPE_*
* "access" must be a result of ac_nir_get_mem_access_flags() with the appropriate ACCESS_TYPE_*
* flags set.
*/
union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,

View file

@ -336,8 +336,6 @@ ac_ngg_get_scratch_lds_size(gl_shader_stage stage,
bool can_cull,
bool compact_primitives);
enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *instr);
union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
enum gl_access_qualifier access);

View file

@ -1572,7 +1572,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
unsigned writemask = nir_intrinsic_write_mask(instr);
enum gl_access_qualifier access = ac_get_mem_access_flags(instr);
enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr);
struct waterfall_context wctx;
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
@ -1792,7 +1792,7 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_
unsigned cache_flags =
ac_get_hw_cache_flags(ctx->ac.gfx_level,
ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
ac_nir_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
params[arg_count++] = data;
params[arg_count++] = descriptor;
@ -1820,7 +1820,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_
int elem_size_bytes = instr->def.bit_size / 8;
int num_components = instr->num_components;
enum gl_access_qualifier access = ac_get_mem_access_flags(instr);
enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr);
LLVMValueRef offset = get_src(ctx, instr->src[1]);
LLVMValueRef rsrc = ctx->abi->load_ssbo ?
@ -2198,7 +2198,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
struct ac_image_args args = {0};
args.access = ac_get_mem_access_flags(instr);
args.access = ac_nir_get_mem_access_flags(instr);
args.tfe = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
if (dim == GLSL_SAMPLER_DIM_BUF) {
@ -2284,7 +2284,7 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in
LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
struct ac_image_args args = {0};
args.access = ac_get_mem_access_flags(instr);
args.access = ac_nir_get_mem_access_flags(instr);
LLVMValueRef src = get_src(ctx, instr->src[3]);
if (instr->src[3].ssa->bit_size == 64) {
@ -2415,7 +2415,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
char type[8];
unsigned cache_flags =
ac_get_hw_cache_flags(ctx->ac.gfx_level,
ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
ac_nir_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
params[param_count++] = ctx->ac.i32_0; /* soffset */
params[param_count++] = LLVMConstInt(ctx->ac.i32, cache_flags, 0);
@ -2439,7 +2439,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array);
args.a16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.coords[0])) == 16;
args.access = ac_get_mem_access_flags(instr);
args.access = ac_nir_get_mem_access_flags(instr);
result = ac_build_image_opcode(&ctx->ac, &args);
}
@ -3081,7 +3081,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
unsigned num_components = instr->def.num_components;
unsigned const_offset = nir_intrinsic_base(instr);
bool reorder = nir_intrinsic_can_reorder(instr);
enum gl_access_qualifier access = ac_get_mem_access_flags(instr);
enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr);
bool uses_format = access & ACCESS_USES_FORMAT_AMD;
LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr_voffset,