mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
ac/llvm: add better code for fsign
There are 2 improvements: - better code for 16, 32, and 64 bits - vector support for 16 and 32 bits Totals: SGPRS: 2639738 -> 2625882 (-0.52 %) VGPRS: 1534120 -> 1533916 (-0.01 %) Spilled SGPRs: 3541 -> 3557 (0.45 %) Spilled VGPRs: 33 -> 33 (0.00 %) Private memory VGPRs: 256 -> 256 (0.00 %) Scratch size: 292 -> 292 (0.00 %) dwords per thread Code Size: 55640332 -> 55384892 (-0.46 %) bytes Max Waves: 964785 -> 964857 (0.01 %) Totals from affected shaders: SGPRS: 377352 -> 363496 (-3.67 %) VGPRS: 209800 -> 209596 (-0.10 %) Spilled SGPRs: 1979 -> 1995 (0.81 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 256 -> 256 (0.00 %) Scratch size: 256 -> 256 (0.00 %) dwords per thread Code Size: 12549300 -> 12293860 (-2.04 %) bytes Max Waves: 105762 -> 105834 (0.07 %) Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6284>
This commit is contained in:
parent
ca74603b4f
commit
d9a77f9ca3
5 changed files with 73 additions and 26 deletions
|
|
@ -2776,31 +2776,53 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0)
|
|||
return ac_build_imin(ctx, val, ac_const_uint_vec(ctx, type, 1));
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
|
||||
unsigned bitsize)
|
||||
static LLVMValueRef ac_eliminate_negative_zero(struct ac_llvm_context *ctx, LLVMValueRef val)
|
||||
{
|
||||
LLVMValueRef cmp, val, zero, one;
|
||||
LLVMTypeRef type;
|
||||
ac_enable_signed_zeros(ctx);
|
||||
/* (val + 0) converts negative zero to positive zero. */
|
||||
val = LLVMBuildFAdd(ctx->builder, val, LLVMConstNull(LLVMTypeOf(val)), "");
|
||||
ac_disable_signed_zeros(ctx);
|
||||
return val;
|
||||
}
|
||||
|
||||
if (bitsize == 16) {
|
||||
type = ctx->f16;
|
||||
zero = ctx->f16_0;
|
||||
one = ctx->f16_1;
|
||||
} else if (bitsize == 32) {
|
||||
type = ctx->f32;
|
||||
zero = ctx->f32_0;
|
||||
one = ctx->f32_1;
|
||||
} else {
|
||||
type = ctx->f64;
|
||||
zero = ctx->f64_0;
|
||||
one = ctx->f64_1;
|
||||
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src)
|
||||
{
|
||||
LLVMTypeRef type = LLVMTypeOf(src);
|
||||
LLVMValueRef pos, neg, dw[2], val;
|
||||
unsigned bitsize = ac_get_elem_bits(ctx, type);
|
||||
|
||||
/* The standard version leads to this:
|
||||
* v_cmp_ngt_f32_e64 s[0:1], s4, 0 ; D40B0000 00010004
|
||||
* v_cndmask_b32_e64 v4, 1.0, s4, s[0:1] ; D5010004 000008F2
|
||||
* v_cmp_le_f32_e32 vcc, 0, v4 ; 7C060880
|
||||
* v_cndmask_b32_e32 v4, -1.0, v4, vcc ; 020808F3
|
||||
*
|
||||
* The isign version:
|
||||
* v_add_f32_e64 v4, s4, 0 ; D5030004 00010004
|
||||
* v_med3_i32 v4, v4, -1, 1 ; D5580004 02058304
|
||||
* v_cvt_f32_i32_e32 v4, v4 ; 7E080B04
|
||||
*
|
||||
* (src0 + 0) converts negative zero to positive zero.
|
||||
* After that, int(fsign(x)) == isign(floatBitsToInt(x)).
|
||||
*
|
||||
* For FP64, use the standard version, which doesn't suffer from the huge DP rate
|
||||
* reduction. (FP64 comparisons are as fast as int64 comparisons)
|
||||
*/
|
||||
if (bitsize == 16 || bitsize == 32) {
|
||||
val = ac_to_integer(ctx, ac_eliminate_negative_zero(ctx, src));
|
||||
val = ac_build_isign(ctx, val);
|
||||
return LLVMBuildSIToFP(ctx->builder, val, type, "");
|
||||
}
|
||||
|
||||
cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
|
||||
val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
|
||||
cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
|
||||
val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), "");
|
||||
return val;
|
||||
assert(bitsize == 64);
|
||||
pos = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src, ctx->f64_0, "");
|
||||
neg = LLVMBuildFCmp(ctx->builder, LLVMRealOLT, src, ctx->f64_0, "");
|
||||
dw[0] = ctx->i32_0;
|
||||
dw[1] = LLVMBuildSelect(ctx->builder, pos, LLVMConstInt(ctx->i32, 0x3FF00000, 0),
|
||||
LLVMBuildSelect(ctx->builder, neg,
|
||||
LLVMConstInt(ctx->i32, 0xBFF00000, 0),
|
||||
ctx->i32_0, ""), "");
|
||||
return LLVMBuildBitCast(ctx->builder, ac_build_gather_values(ctx, dw, 2), ctx->f64, "");
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
|
||||
|
|
|
|||
|
|
@ -598,9 +598,7 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
|
|||
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
|
||||
unsigned bitsize);
|
||||
LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0);
|
||||
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
|
||||
unsigned bitsize);
|
||||
|
||||
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src);
|
||||
LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
|
||||
|
||||
LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
|
||||
|
|
|
|||
|
|
@ -121,6 +121,31 @@ LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
|||
return builder;
|
||||
}
|
||||
|
||||
void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
|
||||
{
|
||||
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
||||
auto *b = llvm::unwrap(ctx->builder);
|
||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||
|
||||
/* This disables the optimization of (x + 0), which is used
|
||||
* to convert negative zero to positive zero.
|
||||
*/
|
||||
flags.setNoSignedZeros(false);
|
||||
b->setFastMathFlags(flags);
|
||||
}
|
||||
}
|
||||
|
||||
void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
|
||||
{
|
||||
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
||||
auto *b = llvm::unwrap(ctx->builder);
|
||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||
|
||||
flags.setNoSignedZeros();
|
||||
b->setFastMathFlags(flags);
|
||||
}
|
||||
}
|
||||
|
||||
LLVMTargetLibraryInfoRef
|
||||
ac_create_target_library_info(const char *triple)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
struct ac_compiler_passes;
|
||||
struct ac_llvm_context;
|
||||
|
||||
enum ac_func_attr {
|
||||
AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
|
||||
|
|
@ -109,6 +110,8 @@ LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
|
|||
|
||||
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
||||
enum ac_float_mode float_mode);
|
||||
void ac_enable_signed_zeros(struct ac_llvm_context *ctx);
|
||||
void ac_disable_signed_zeros(struct ac_llvm_context *ctx);
|
||||
|
||||
void
|
||||
ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
|
||||
|
|
|
|||
|
|
@ -826,8 +826,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
break;
|
||||
case nir_op_fsign:
|
||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
result = ac_build_fsign(&ctx->ac, src[0],
|
||||
instr->dest.dest.ssa.bit_size);
|
||||
result = ac_build_fsign(&ctx->ac, src[0]);
|
||||
break;
|
||||
case nir_op_ffloor:
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue