radv: flush f32->f16 conversion denormals to zero. (v2)

SPIR-V defines the f32->f16 operation as flushing denormals to 0, this compares the class using amd class opcode. Thanks to Matt Arsenault for figuring it out. This fix is VI+ only, add a TODO for SI/CIK. This fixes: dEQP-VK.spirv_assembly.instruction.compute.opquantize.flush_to_zero Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com>
2026-05-01 01:38:06 +02:00 · 2017-03-17 12:11:30 +10:00 · 2017-03-17 12:11:30 +10:00 · 83e58b036e
commit 83e58b036e
parent eeff7e1154
2 changed files with 41 additions and 4 deletions
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@ -1325,6 +1325,33 @@ static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
 	return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
 }
 static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
 			       LLVMValueRef src0)
 {
 	LLVMValueRef result;
 	LLVMValueRef cond;
 	src0 = to_float(ctx, src0);
 	result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
 	/* TODO SI/CIK options here */
 	if (ctx->options->chip_class >= VI) {
 		LLVMValueRef args[2];
 		/* Check if the result is a denormal - and flush to 0 if so. */
 		args[0] = result;
 		args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
 		cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
 	}
 	/* need to convert back up to f32 */
 	result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
 	if (ctx->options->chip_class >= VI)
 		result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
 	return result;
 }
 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
 				   LLVMValueRef src0, LLVMValueRef src1)
 {
@ -1812,10 +1839,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
 		result = emit_b2f(ctx, src[0]);
 		break;
 	case nir_op_fquantize2f16:
-		src[0] = to_float(ctx, src[0]);
+		result = emit_f2f16(ctx, src[0]);
 		result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
 		/* need to convert back up to f32 */
 		result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
 		break;
 	case nir_op_umul_high:
 		result = emit_umul_high(ctx, src[0], src[1]);
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@ -9095,5 +9095,18 @@
 #define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
 #define    CIK_SDMA_COPY_MAX_SIZE                  0x3fffe0
 enum amd_cmp_class_flags {
 	S_NAN = 1 << 0,        // Signaling NaN
 	Q_NAN = 1 << 1,        // Quiet NaN
 	N_INFINITY = 1 << 2,   // Negative infinity
 	N_NORMAL = 1 << 3,     // Negative normal
 	N_SUBNORMAL = 1 << 4,  // Negative subnormal
 	N_ZERO = 1 << 5,       // Negative zero
 	P_ZERO = 1 << 6,       // Positive zero
 	P_SUBNORMAL = 1 << 7,  // Positive subnormal
 	P_NORMAL = 1 << 8,     // Positive normal
 	P_INFINITY = 1 << 9    // Positive infinity
 };
 #endif /* _SID_H */