From 56a31d5210cfb56f740376674362df05bbc7cad0 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 13 Feb 2021 13:22:41 -0800 Subject: [PATCH] intel/compiler: Make the CMPN builder work like the CMP builder Since the CMPN builder was never used, there was no reason to make its interface usable. :) Fixes: 2f2c00c7279 ("i965: Lower min/max after optimization on Gen4/5.") Reviewed-by: Jason Ekstrand Part-of: (cherry picked from commit 684ec33c79a068bacff70c3414342ee895501926) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs_builder.h | 26 +++++++++++++++++++++++++- src/intel/compiler/brw_vec4_builder.h | 26 +++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index d73662aa697..b772396f8bc 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -355,7 +355,7 @@ "description": "intel/compiler: Make the CMPN builder work like the CMP builder", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "2f2c00c7279e7c43e520e21de1781f8cec263e92" }, diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 368285a3cbe..2fcbdb73f2a 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -565,7 +565,6 @@ namespace brw { ALU3(BFI2) ALU1(BFREV) ALU1(CBIT) - ALU2(CMPN) ALU1(DIM) ALU2(DP2) ALU2(DP3) @@ -633,6 +632,31 @@ namespace brw { fix_unsigned_negate(src1))); } + /** + * CMPN: Behaves like CMP, but produces true if src1 is NaN. + */ + instruction * + CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1, + brw_conditional_mod condition) const + { + /* Take the instruction: + * + * CMP null src0 src1 + * + * Original gen4 does type conversion to the destination type + * before comparison, producing garbage results for floating + * point comparisons. + * + * The destination type doesn't matter on newer generations, + * so we set the type to match src0 so we can compact the + * instruction. + */ + return set_condmod(condition, + emit(BRW_OPCODE_CMPN, retype(dst, src0.type), + fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); + } + /** * Gen4 predicated IF. */ diff --git a/src/intel/compiler/brw_vec4_builder.h b/src/intel/compiler/brw_vec4_builder.h index f056fe6a331..f821607bcfa 100644 --- a/src/intel/compiler/brw_vec4_builder.h +++ b/src/intel/compiler/brw_vec4_builder.h @@ -403,7 +403,6 @@ namespace brw { ALU3(BFI2) ALU1(BFREV) ALU1(CBIT) - ALU2(CMPN) ALU3(CSEL) ALU1(DIM) ALU2(DP2) @@ -470,6 +469,31 @@ namespace brw { fix_unsigned_negate(src1))); } + /** + * CMPN: Behaves like CMP, but produces true if src1 is NaN. + */ + instruction * + CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1, + brw_conditional_mod condition) const + { + /* Take the instruction: + * + * CMPN null src0 src1 + * + * Original gen4 does type conversion to the destination type + * before comparison, producing garbage results for floating + * point comparisons. + * + * The destination type doesn't matter on newer generations, + * so we set the type to match src0 so we can compact the + * instruction. + */ + return set_condmod(condition, + emit(BRW_OPCODE_CMPN, retype(dst, src0.type), + fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); + } + /** * Gen4 predicated IF. */