diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index e447b095e0f..6762f7c6829 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -164,8 +164,8 @@ emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info, nir_ssa_def *data = nir_vec(b, vec, util_last_bit(mask)); nir_ssa_def *zero = nir_imm_int(b, 0); nir_store_buffer_amd(b, data, so_buffers[buffer], so_write_offset[buffer], zero, zero, - .base = output->offset, .slc_amd = true, .write_mask = mask, - .access = ACCESS_COHERENT); + .base = output->offset, .write_mask = mask, + .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY); } nir_pop_if(b, NULL); @@ -221,7 +221,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, outputs[location][j] = nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero, .base = offset, .is_swizzled = false, - .slc_amd = true, .access = ACCESS_COHERENT); + .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY); offset += gs_nir->info.gs.vertices_out * 16 * 4; } @@ -528,8 +528,8 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in nir_ssa_def *data = nir_u2uN(b, output, 32); nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0), - .is_swizzled = true, .slc_amd = true, - .access = ACCESS_COHERENT, + .is_swizzled = true, + .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY, /* For ACO to not reorder this store around EmitVertex/EndPrimitve */ .memory_modes = nir_var_shader_out); } @@ -571,8 +571,8 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi), gsvs_ring, voffset, soffset, nir_imm_int(b, 0), - .is_swizzled = true, .slc_amd = true, - .access = ACCESS_COHERENT, + .is_swizzled = true, + .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY, /* For ACO to not reorder this store around EmitVertex/EndPrimitve */ .memory_modes = nir_var_shader_out); } diff --git a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c index 9a483b3c694..401b5ad359d 100644 --- a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c @@ -110,8 +110,9 @@ emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_s store_bytes = MIN2(store_bytes, 2); nir_ssa_def *store_val = nir_extract_bits(b, &d, 1, start_byte * 8u, 1, store_bytes * 8u); - nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero, .is_swizzled = swizzled, .slc_amd = slc, - .base = start_byte, .memory_modes = nir_var_shader_out, .access = ACCESS_COHERENT); + nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero, .is_swizzled = swizzled, + .base = start_byte, .memory_modes = nir_var_shader_out, + .access = ACCESS_COHERENT | (slc ? ACCESS_STREAM_CACHE_POLICY : 0)); start_byte += store_bytes; bytes -= store_bytes; diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 7fbc76329b4..ef908798fd1 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -1847,7 +1847,7 @@ ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info, vtx_buffer_offsets[out->buffer], zero, zero, .base = out->offset, - .slc_amd = true); + .access = ACCESS_STREAM_CACHE_POLICY); } } diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 8181b215858..a03836f9d9c 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7271,8 +7271,8 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin) Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[3].ssa)) : Temp(); bool swizzled = nir_intrinsic_is_swizzled(intrin); - bool slc = nir_intrinsic_slc_amd(intrin); bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT; + bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY; unsigned const_offset = nir_intrinsic_base(intrin); unsigned elem_size_bytes = intrin->dest.ssa.bit_size / 8u; @@ -7301,7 +7301,7 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin) bool swizzled = nir_intrinsic_is_swizzled(intrin); bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT; - bool slc = nir_intrinsic_slc_amd(intrin); + bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY; unsigned const_offset = nir_intrinsic_base(intrin); unsigned write_mask = nir_intrinsic_write_mask(intrin); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 89b1af3842c..3d7cf88ec80 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4183,7 +4183,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins bool swizzled = nir_intrinsic_is_swizzled(instr); bool reorder = nir_intrinsic_can_reorder(instr); bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT; - bool slc = nir_intrinsic_slc_amd(instr); + bool slc = nir_intrinsic_access(instr) & ACCESS_STREAM_CACHE_POLICY; enum ac_image_cache_policy cache_policy = 0; if (swizzled) @@ -4226,7 +4226,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins unsigned const_offset = nir_intrinsic_base(instr); bool swizzled = nir_intrinsic_is_swizzled(instr); bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT; - bool slc = nir_intrinsic_slc_amd(instr); + bool slc = nir_intrinsic_access(instr) & ACCESS_STREAM_CACHE_POLICY; enum ac_image_cache_policy cache_policy = 0; if (swizzled) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 211afb9a8a5..42f9eab673e 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -223,9 +223,6 @@ index("unsigned", "swizzle_mask") # Whether the load_buffer_amd/store_buffer_amd is swizzled index("bool", "is_swizzled") -# The SLC ("system level coherent") bit of load_buffer_amd/store_buffer_amd -index("bool", "slc_amd") - # Offsets for load_shared2_amd/store_shared2_amd index("uint8_t", "offset0") index("uint8_t", "offset1") @@ -1320,9 +1317,9 @@ intrinsic("optimization_barrier_vgpr_amd", dest_comp=0, src_comp=[0], # src[] = { descriptor, vector byte offset, scalar byte offset, index offset } # The index offset is multiplied by the stride in the descriptor. The vertex/scalar byte offsets # are in bytes. -intrinsic("load_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, IS_SWIZZLED, SLC_AMD, MEMORY_MODES, ACCESS], flags=[CAN_ELIMINATE]) +intrinsic("load_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, IS_SWIZZLED, MEMORY_MODES, ACCESS], flags=[CAN_ELIMINATE]) # src[] = { store value, descriptor, vector byte offset, scalar byte offset, index offset } -intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1, 1], indices=[BASE, WRITE_MASK, IS_SWIZZLED, SLC_AMD, MEMORY_MODES, ACCESS]) +intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1, 1], indices=[BASE, WRITE_MASK, IS_SWIZZLED, MEMORY_MODES, ACCESS]) # src[] = { address, unsigned 32-bit offset }. load("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])