diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 38b44421abc..e0880a5c026 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -1093,8 +1093,8 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, assert(util_bitcount(access & (ACCESS_TYPE_LOAD | ACCESS_TYPE_STORE | ACCESS_TYPE_ATOMIC)) == 1); - assert(!(access & ACCESS_TYPE_SMEM) || access & ACCESS_TYPE_LOAD); - assert(!(access & ACCESS_IS_SWIZZLED_AMD) || !(access & ACCESS_TYPE_SMEM)); + assert(!(access & ACCESS_SMEM_AMD) || access & ACCESS_TYPE_LOAD); + assert(!(access & ACCESS_IS_SWIZZLED_AMD) || !(access & ACCESS_SMEM_AMD)); assert(!(access & ACCESS_MAY_STORE_SUBDWORD) || access & ACCESS_TYPE_STORE); bool scope_is_device = access & (ACCESS_COHERENT | ACCESS_VOLATILE); @@ -1113,7 +1113,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, if (access & ACCESS_NON_TEMPORAL) { if (access & ACCESS_TYPE_LOAD) { /* Don't use non_temporal for SMEM because it can't set regular_temporal for MALL. */ - if (!(access & ACCESS_TYPE_SMEM)) + if (!(access & ACCESS_SMEM_AMD)) result.gfx12.temporal_hint = gfx12_load_near_non_temporal_far_regular_temporal; } else if (access & ACCESS_TYPE_STORE) { result.gfx12.temporal_hint = gfx12_store_near_non_temporal_far_regular_temporal; @@ -1133,7 +1133,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, if (access & ACCESS_TYPE_LOAD && scope_is_device) result.value |= ac_glc; - if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM)) + if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_SMEM_AMD)) result.value |= ac_slc; } else if (gfx_level >= GFX10) { /* GFX10-10.3: @@ -1165,7 +1165,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC)) result.value |= ac_glc | (access & ACCESS_TYPE_LOAD ? ac_dlc : 0); - if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM)) + if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_SMEM_AMD)) result.value |= ac_slc; } else { /* GFX6-GFX9: @@ -1189,11 +1189,11 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, */ if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC)) { /* SMEM doesn't support the device scope on GFX6-7. */ - assert(gfx_level >= GFX8 || !(access & ACCESS_TYPE_SMEM)); + assert(gfx_level >= GFX8 || !(access & ACCESS_SMEM_AMD)); result.value |= ac_glc; } - if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM)) + if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_SMEM_AMD)) result.value |= ac_slc; /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All store opcodes not diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index 8b853196572..67eccfe3876 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -42,11 +42,6 @@ enum { ACCESS_TYPE_STORE = BITFIELD_BIT(28), ACCESS_TYPE_ATOMIC = BITFIELD_BIT(29), - /* This access is expected to use an SMEM instruction if source operands are non-divergent. - * Only loads can set this. - */ - ACCESS_TYPE_SMEM = BITFIELD_BIT(30), - /* Whether a store offset or size alignment is less than 4. */ ACCESS_MAY_STORE_SUBDWORD = BITFIELD_BIT(31), }; diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 0c9fbb054d7..5e690833f56 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -1710,7 +1710,7 @@ load_buffer(isel_context* ctx, unsigned num_components, unsigned component_size, } LoadEmitInfo info = {Operand(offset), dst, num_components, component_size, rsrc}; - info.cache = get_cache_flags(ctx, access | ACCESS_TYPE_LOAD | (use_smem ? ACCESS_TYPE_SMEM : 0)); + info.cache = get_cache_flags(ctx, access | ACCESS_TYPE_LOAD); info.sync = sync; info.align_mul = align_mul; info.align_offset = align_offset; @@ -2558,7 +2558,7 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr) if (info.resource.id()) info.resource = bld.as_uniform(info.resource); info.offset = Operand(bld.as_uniform(info.offset)); - info.cache = get_cache_flags(ctx, access | ACCESS_TYPE_SMEM); + info.cache = get_cache_flags(ctx, access); EmitLoadParameters params = smem_load_params; params.max_const_offset = ctx->program->dev.smem_offset_max; emit_load(ctx, bld, info, params); diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 451f5725812..f07192fd3d5 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -991,7 +991,7 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc rsrc, offset, LLVMConstInt(ctx->i32, get_cache_flags(ctx, access | ACCESS_TYPE_LOAD | - ACCESS_TYPE_SMEM), 0), + ACCESS_SMEM_AMD), 0), }; result[i] = ac_build_intrinsic(ctx, name, channel_type, args, 3, AC_ATTR_INVARIANT_LOAD); }