mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 01:38:06 +02:00
radv: flush f32->f16 conversion denormals to zero. (v2)
SPIR-V defines the f32->f16 operation as flushing denormals to 0, this compares the class using amd class opcode. Thanks to Matt Arsenault for figuring it out. This fix is VI+ only, add a TODO for SI/CIK. This fixes: dEQP-VK.spirv_assembly.instruction.compute.opquantize.flush_to_zero Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
eeff7e1154
commit
83e58b036e
2 changed files with 41 additions and 4 deletions
|
|
@ -1325,6 +1325,33 @@ static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
|
||||||
return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
|
return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
|
||||||
|
LLVMValueRef src0)
|
||||||
|
{
|
||||||
|
LLVMValueRef result;
|
||||||
|
LLVMValueRef cond;
|
||||||
|
|
||||||
|
src0 = to_float(ctx, src0);
|
||||||
|
result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
|
||||||
|
|
||||||
|
/* TODO SI/CIK options here */
|
||||||
|
if (ctx->options->chip_class >= VI) {
|
||||||
|
LLVMValueRef args[2];
|
||||||
|
/* Check if the result is a denormal - and flush to 0 if so. */
|
||||||
|
args[0] = result;
|
||||||
|
args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
|
||||||
|
cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* need to convert back up to f32 */
|
||||||
|
result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
|
||||||
|
|
||||||
|
if (ctx->options->chip_class >= VI)
|
||||||
|
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
|
static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
|
||||||
LLVMValueRef src0, LLVMValueRef src1)
|
LLVMValueRef src0, LLVMValueRef src1)
|
||||||
{
|
{
|
||||||
|
|
@ -1812,10 +1839,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
|
||||||
result = emit_b2f(ctx, src[0]);
|
result = emit_b2f(ctx, src[0]);
|
||||||
break;
|
break;
|
||||||
case nir_op_fquantize2f16:
|
case nir_op_fquantize2f16:
|
||||||
src[0] = to_float(ctx, src[0]);
|
result = emit_f2f16(ctx, src[0]);
|
||||||
result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
|
|
||||||
/* need to convert back up to f32 */
|
|
||||||
result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
|
|
||||||
break;
|
break;
|
||||||
case nir_op_umul_high:
|
case nir_op_umul_high:
|
||||||
result = emit_umul_high(ctx, src[0], src[1]);
|
result = emit_umul_high(ctx, src[0], src[1]);
|
||||||
|
|
|
||||||
|
|
@ -9095,5 +9095,18 @@
|
||||||
#define CIK_SDMA_PACKET_SRBM_WRITE 0xe
|
#define CIK_SDMA_PACKET_SRBM_WRITE 0xe
|
||||||
#define CIK_SDMA_COPY_MAX_SIZE 0x3fffe0
|
#define CIK_SDMA_COPY_MAX_SIZE 0x3fffe0
|
||||||
|
|
||||||
|
enum amd_cmp_class_flags {
|
||||||
|
S_NAN = 1 << 0, // Signaling NaN
|
||||||
|
Q_NAN = 1 << 1, // Quiet NaN
|
||||||
|
N_INFINITY = 1 << 2, // Negative infinity
|
||||||
|
N_NORMAL = 1 << 3, // Negative normal
|
||||||
|
N_SUBNORMAL = 1 << 4, // Negative subnormal
|
||||||
|
N_ZERO = 1 << 5, // Negative zero
|
||||||
|
P_ZERO = 1 << 6, // Positive zero
|
||||||
|
P_SUBNORMAL = 1 << 7, // Positive subnormal
|
||||||
|
P_NORMAL = 1 << 8, // Positive normal
|
||||||
|
P_INFINITY = 1 << 9 // Positive infinity
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* _SID_H */
|
#endif /* _SID_H */
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue