From 6698753cdb6d001669f51e23d42fec65d74e6b58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 15 Apr 2022 02:00:55 -0400 Subject: [PATCH] ac/llvm: don't use tbuffer_store as a fallback for swizzled stores This depends on the offset computation fix from: "ac/llvm: remove inst_offset parameter from ac_build_buffer_store_dword" v2: The instruction type is changed to MUBUF, which requires us to clear DATA_FORMAT with ADD_TID_ENABLE. Reviewed-by: Mihai Preda (v1) Reviewed-by: Pierre-Eric Pelloux-Prayer (v1) Part-of: --- src/amd/llvm/ac_llvm_build.c | 53 +------------------ .../drivers/radeonsi/si_shader_llvm_gs.c | 13 ++++- 2 files changed, 14 insertions(+), 52 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 55cefce89e4..306e23da4a0 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -51,11 +51,6 @@ struct ac_llvm_flow { LLVMBasicBlockRef loop_entry_block; }; -static void ac_build_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, - LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset, - LLVMValueRef soffset, unsigned num_channels, unsigned dfmt, - unsigned nfmt, unsigned cache_policy); - /* Initialize module-independent parts of the context. * * The caller is responsible for initializing ctx::module and ctx::builder. @@ -1174,24 +1169,8 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, return; } - /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset - * (voffset is swizzled, but soffset isn't swizzled). - * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter. - */ - if (!(cache_policy & ac_swizzled)) { - ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, soffset, - cache_policy, false); - return; - } - - static const unsigned dfmts[] = {V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32_32}; - unsigned dfmt = dfmts[num_channels - 1]; - unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - - ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, num_channels, dfmt, - nfmt, cache_policy); + ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, soffset, + cache_policy, false); } static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc, @@ -1659,34 +1638,6 @@ LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigne return ac_build_gather_values(ctx, loads, 4); } -static void ac_build_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, - LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset, - LLVMValueRef soffset, unsigned num_channels, unsigned dfmt, - unsigned nfmt, unsigned cache_policy) -{ - LLVMValueRef args[7]; - int idx = 0; - args[idx++] = vdata; - args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); - if (vindex) - args[idx++] = vindex ? vindex : ctx->i32_0; - args[idx++] = voffset ? voffset : ctx->i32_0; - args[idx++] = soffset ? soffset : ctx->i32_0; - args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0); - args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0); - unsigned func = - !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; - const char *indexing_kind = vindex ? "struct" : "raw"; - char name[256], type_name[8]; - - LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32; - ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); - - snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s", indexing_kind, type_name); - - ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); -} - void ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy) diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index d5759026866..9362499b97e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -332,6 +332,13 @@ void si_preload_esgs_ring(struct si_shader_context *ctx) S_008F0C_ELEMENT_SIZE(1) | S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1), 0), ""); + + /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */ + if (ctx->screen->info.chip_class == GFX8) { + desc3 = LLVMBuildAnd(builder, desc3, + LLVMConstInt(ctx->ac.i32, C_008F0C_DATA_FORMAT, 0), ""); + } + ctx->esgs_ring = LLVMBuildInsertElement(builder, ctx->esgs_ring, desc1, ctx->ac.i32_1, ""); ctx->esgs_ring = LLVMBuildInsertElement(builder, ctx->esgs_ring, desc3, LLVMConstInt(ctx->ac.i32, 3, 0), ""); @@ -408,8 +415,12 @@ void si_preload_gs_rings(struct si_shader_context *ctx) rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } else { + /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */ + unsigned data_format = ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ? + 0 : V_008F0C_BUF_DATA_FORMAT_32; + rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_DATA_FORMAT(data_format) | S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */ }