diff --git a/.pick_status.json b/.pick_status.json index 2072a4b5d9a..ba880dae2be 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -607,7 +607,7 @@ "description": "ac/llvm: add missing optimization barrier for 64-bit readlanes", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "0f45d4dc2b15e137346e1e3f064a24302e1c9048" }, diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index a131c2a10cd..a1a9a453e4e 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -3611,11 +3611,15 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, } static LLVMValueRef -_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) +_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMValueRef lane, bool with_opt_barrier) { LLVMTypeRef type = LLVMTypeOf(src); LLVMValueRef result; + if (with_opt_barrier) + ac_build_optimization_barrier(ctx, &src); + src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); if (lane) lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, ""); @@ -3630,6 +3634,43 @@ _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef l return LLVMBuildTrunc(ctx->builder, result, type, ""); } +static LLVMValueRef +ac_build_readlane_common(struct ac_llvm_context *ctx, + LLVMValueRef src, LLVMValueRef lane, + bool with_opt_barrier) +{ + LLVMTypeRef src_type = LLVMTypeOf(src); + src = ac_to_integer(ctx, src); + unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); + LLVMValueRef ret; + + if (bits > 32) { + assert(bits % 32 == 0); + LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); + LLVMValueRef src_vector = + LLVMBuildBitCast(ctx->builder, src, vec_type, ""); + ret = LLVMGetUndef(vec_type); + for (unsigned i = 0; i < bits / 32; i++) { + LLVMValueRef ret_comp; + + src = LLVMBuildExtractElement(ctx->builder, src_vector, + LLVMConstInt(ctx->i32, i, 0), ""); + + ret_comp = _ac_build_readlane(ctx, src, lane, + with_opt_barrier); + + ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp, + LLVMConstInt(ctx->i32, i, 0), ""); + } + } else { + ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier); + } + + if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind) + return LLVMBuildIntToPtr(ctx->builder, ret, src_type, ""); + return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); +} + /** * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic. * @@ -3642,44 +3683,16 @@ _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef l * @return value of the lane */ LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, - LLVMValueRef src, LLVMValueRef lane) + LLVMValueRef src, LLVMValueRef lane) { - unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); - LLVMValueRef ret; - - if (bits > 32) { - assert(bits % 32 == 0); - LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); - LLVMValueRef src_vector = - LLVMBuildBitCast(ctx->builder, src, vec_type, ""); - ret = LLVMGetUndef(vec_type); - for (unsigned i = 0; i < bits / 32; i++) { - src = LLVMBuildExtractElement(ctx->builder, src_vector, - LLVMConstInt(ctx->i32, i, 0), ""); - LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane); - ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp, - LLVMConstInt(ctx->i32, i, 0), ""); - } - } else { - ret = _ac_build_readlane(ctx, src, lane); - } - - return ret; + return ac_build_readlane_common(ctx, src, lane, false); } + LLVMValueRef ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) { - LLVMTypeRef src_type = LLVMTypeOf(src); - src = ac_to_integer(ctx, src); - LLVMValueRef ret; - - ac_build_optimization_barrier(ctx, &src); - - ret = ac_build_readlane_no_opt_barrier(ctx, src, lane); - if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind) - return LLVMBuildIntToPtr(ctx->builder, ret, src_type, ""); - return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); + return ac_build_readlane_common(ctx, src, lane, true); } LLVMValueRef