diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index c34712c6943..615644c1dd4 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -2343,3 +2343,36 @@ ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data) return 0; } + +/* Get chip-agnostic memory instruction access flags (as opposed to chip-specific GLC/DLC/SLC) + * from a NIR memory intrinsic. + */ +enum gl_access_qualifier ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr) +{ + enum gl_access_qualifier access = + nir_intrinsic_has_access(instr) ? nir_intrinsic_access(instr) : 0; + + /* Determine ACCESS_MAY_STORE_SUBDWORD. (for the GFX6 TC L1 bug workaround) */ + if (!nir_intrinsic_infos[instr->intrinsic].has_dest) { + switch (instr->intrinsic) { + case nir_intrinsic_bindless_image_store: + access |= ACCESS_MAY_STORE_SUBDWORD; + break; + + case nir_intrinsic_store_ssbo: + case nir_intrinsic_store_buffer_amd: + case nir_intrinsic_store_global: + case nir_intrinsic_store_global_amd: + if (access & ACCESS_USES_FORMAT_AMD || + (nir_intrinsic_has_align_offset(instr) && nir_intrinsic_align(instr) % 4 != 0) || + ((instr->src[0].ssa->bit_size / 8) * instr->src[0].ssa->num_components) % 4 != 0) + access |= ACCESS_MAY_STORE_SUBDWORD; + break; + + default: + unreachable("unexpected store instruction"); + } + } + + return access; +} \ No newline at end of file diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index 5ba953906a8..3602e5761a8 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -372,6 +372,9 @@ ac_nir_optimize_uniform_atomics(nir_shader *nir); unsigned ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data); +enum gl_access_qualifier +ac_nir_get_mem_access_flags(const nir_intrinsic_instr *instr); + #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 8de1c58ffeb..532246d574f 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -1399,42 +1399,9 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info, S_0286E8_WAVESIZE(*max_seen_bytes_per_wave >> size_shift); } -/* Get chip-agnostic memory instruction access flags (as opposed to chip-specific GLC/DLC/SLC) - * from a NIR memory intrinsic. - */ -enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *instr) -{ - enum gl_access_qualifier access = - nir_intrinsic_has_access(instr) ? nir_intrinsic_access(instr) : 0; - - /* Determine ACCESS_MAY_STORE_SUBDWORD. (for the GFX6 TC L1 bug workaround) */ - if (!nir_intrinsic_infos[instr->intrinsic].has_dest) { - switch (instr->intrinsic) { - case nir_intrinsic_bindless_image_store: - access |= ACCESS_MAY_STORE_SUBDWORD; - break; - - case nir_intrinsic_store_ssbo: - case nir_intrinsic_store_buffer_amd: - case nir_intrinsic_store_global: - case nir_intrinsic_store_global_amd: - if (access & ACCESS_USES_FORMAT_AMD || - (nir_intrinsic_has_align_offset(instr) && nir_intrinsic_align(instr) % 4 != 0) || - ((instr->src[0].ssa->bit_size / 8) * instr->src[0].ssa->num_components) % 4 != 0) - access |= ACCESS_MAY_STORE_SUBDWORD; - break; - - default: - unreachable("unexpected store instruction"); - } - } - - return access; -} - /* Convert chip-agnostic memory access flags into hw-specific cache flags. * - * "access" must be a result of ac_get_mem_access_flags() with the appropriate ACCESS_TYPE_* + * "access" must be a result of ac_nir_get_mem_access_flags() with the appropriate ACCESS_TYPE_* * flags set. */ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index c60c7f6378d..5008f947a80 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -336,8 +336,6 @@ ac_ngg_get_scratch_lds_size(gl_shader_stage stage, bool can_cull, bool compact_primitives); -enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *instr); - union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, enum gl_access_qualifier access); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 74bd6a30319..e210d3c4dc5 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1572,7 +1572,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in LLVMValueRef src_data = get_src(ctx, instr->src[0]); int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8; unsigned writemask = nir_intrinsic_write_mask(instr); - enum gl_access_qualifier access = ac_get_mem_access_flags(instr); + enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr); struct waterfall_context wctx; LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]); @@ -1792,7 +1792,7 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_ unsigned cache_flags = ac_get_hw_cache_flags(ctx->ac.gfx_level, - ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value; + ac_nir_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value; params[arg_count++] = data; params[arg_count++] = descriptor; @@ -1820,7 +1820,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_ int elem_size_bytes = instr->def.bit_size / 8; int num_components = instr->num_components; - enum gl_access_qualifier access = ac_get_mem_access_flags(instr); + enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr); LLVMValueRef offset = get_src(ctx, instr->src[1]); LLVMValueRef rsrc = ctx->abi->load_ssbo ? @@ -2198,7 +2198,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri struct ac_image_args args = {0}; - args.access = ac_get_mem_access_flags(instr); + args.access = ac_nir_get_mem_access_flags(instr); args.tfe = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load; if (dim == GLSL_SAMPLER_DIM_BUF) { @@ -2284,7 +2284,7 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr); struct ac_image_args args = {0}; - args.access = ac_get_mem_access_flags(instr); + args.access = ac_nir_get_mem_access_flags(instr); LLVMValueRef src = get_src(ctx, instr->src[3]); if (instr->src[3].ssa->bit_size == 64) { @@ -2415,7 +2415,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int char type[8]; unsigned cache_flags = ac_get_hw_cache_flags(ctx->ac.gfx_level, - ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value; + ac_nir_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value; params[param_count++] = ctx->ac.i32_0; /* soffset */ params[param_count++] = LLVMConstInt(ctx->ac.i32, cache_flags, 0); @@ -2439,7 +2439,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array); args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array); args.a16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.coords[0])) == 16; - args.access = ac_get_mem_access_flags(instr); + args.access = ac_nir_get_mem_access_flags(instr); result = ac_build_image_opcode(&ctx->ac, &args); } @@ -3081,7 +3081,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins unsigned num_components = instr->def.num_components; unsigned const_offset = nir_intrinsic_base(instr); bool reorder = nir_intrinsic_can_reorder(instr); - enum gl_access_qualifier access = ac_get_mem_access_flags(instr); + enum gl_access_qualifier access = ac_nir_get_mem_access_flags(instr); bool uses_format = access & ACCESS_USES_FORMAT_AMD; LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr_voffset,