From c8038643b88818d0da6001afcb6c92e24b058bd6 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 9 Aug 2024 18:25:36 -0700 Subject: [PATCH] intel/brw: Make ifind_msb SSA friendly No shader-db changes on any Intel platform. v2: Use negate(tmp) instead of creating a new temporary. Suggested by Ken. fossil-db: Meteor Lake, DG2, and Skylake had similar results. (Meteor Lake shown) Totals: Instrs: 152535897 -> 152535883 (-0.00%); split: -0.00%, +0.00% Cycle count: 17112329592 -> 17112406110 (+0.00%); split: -0.06%, +0.06% Totals from 40 (0.01% of 633223) affected shaders: Instrs: 458813 -> 458799 (-0.00%); split: -0.01%, +0.00% Cycle count: 4358016282 -> 4358092800 (+0.00%); split: -0.23%, +0.24% Tiger Lake and Ice Lake had similar results. (Tiger Lake shown) Totals: Instrs: 150560511 -> 150560465 (-0.00%); split: -0.00%, +0.00% Cycle count: 15484534441 -> 15482372893 (-0.01%); split: -0.12%, +0.11% Spill count: 59795 -> 59794 (-0.00%) Fill count: 103513 -> 103509 (-0.00%) Totals from 40 (0.01% of 632445) affected shaders: Instrs: 368877 -> 368831 (-0.01%); split: -0.01%, +0.00% Cycle count: 3918398264 -> 3916236716 (-0.06%); split: -0.49%, +0.43% Spill count: 16896 -> 16895 (-0.01%) Fill count: 27819 -> 27815 (-0.01%) Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 0b609871d7e..f20912d4dba 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1571,18 +1571,21 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, assert(instr->def.bit_size == 32); assert(nir_src_bit_size(instr->src[0].src) == 32); - bld.FBH(retype(result, BRW_TYPE_UD), op[0]); + brw_reg tmp = bld.FBH(retype(op[0], BRW_TYPE_D)); /* FBH counts from the MSB side, while GLSL's findMSB() wants the count * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then * subtract the result from 31 to convert the MSB count into an LSB * count. */ - bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + brw_reg count_from_lsb = bld.ADD(negate(tmp), brw_imm_w(31)); - inst = bld.ADD(result, result, brw_imm_d(31)); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->src[0].negate = true; + /* The high word of the FBH result will be 0xffff or 0x0000. After + * calculating 31 - fbh, we can obtain the correct result for + * ifind_msb(0) by ORing the (sign extended) upper word of the + * intermediate result. + */ + bld.OR(result, count_from_lsb, subscript(tmp, BRW_TYPE_W, 1)); break; }