From b948e6d503db0abe7b08265943ae84618def5fc2 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 10 Jun 2025 18:41:39 -0700 Subject: [PATCH] brw: Use BFN to implement nir_opt_bitfield_select shader-db: Lunar Lake, Meteor Lake, and DG2 had similar results. (Lunar Lake shown) total instructions in shared programs: 17181559 -> 17181254 (<.01%) instructions in affected programs: 250921 -> 250616 (-0.12%) helped: 303 / HURT: 0 total cycles in shared programs: 888542568 -> 888543370 (<.01%) cycles in affected programs: 49861772 -> 49862574 (<.01%) helped: 181 / HURT: 110 fossil-db: Lunar Lake, Meteor Lake, and DG2 had similar results. (Lunar Lake shown) Totals: Instrs: 233260591 -> 233260196 (-0.00%); split: -0.00%, +0.00% Cycle count: 32754501248 -> 32754567116 (+0.00%); split: -0.00%, +0.00% Max live registers: 71738476 -> 71738442 (-0.00%) Non SSA regs after NIR: 67837262 -> 67837108 (-0.00%); split: -0.00%, +0.00% Totals from 226 (0.03% of 790721) affected shaders: Instrs: 382227 -> 381832 (-0.10%); split: -0.15%, +0.05% Cycle count: 72863878 -> 72929746 (+0.09%); split: -0.65%, +0.74% Max live registers: 36557 -> 36523 (-0.09%) Non SSA regs after NIR: 60427 -> 60273 (-0.25%); split: -0.26%, +0.00% No shader-db or fossil-db changes on any previous Intel platforms. Reviewed-by: Matt Turner Part-of: --- src/intel/compiler/brw/brw_compiler.c | 2 ++ src/intel/compiler/brw/brw_from_nir.cpp | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/intel/compiler/brw/brw_compiler.c b/src/intel/compiler/brw/brw_compiler.c index 1b89bcbcbff..bd76081546b 100644 --- a/src/intel/compiler/brw/brw_compiler.c +++ b/src/intel/compiler/brw/brw_compiler.c @@ -199,6 +199,8 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) nir_options->has_udot_4x8_sat = devinfo->ver >= 12; nir_options->has_sudot_4x8_sat = devinfo->ver >= 12; + nir_options->has_bitfield_select = devinfo->verx10 >= 125; + nir_options->lower_int64_options = int64_options; nir_options->lower_doubles_options = fp64_options; diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index fd69d1eb0e1..ee543f4c010 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -31,6 +31,7 @@ #include "dev/intel_debug.h" #include "util/u_math.h" #include "util/bitscan.h" +#include "util/lut.h" #include "compiler/glsl_types.h" #include @@ -1684,6 +1685,15 @@ brw_from_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, case nir_op_bitfield_insert: UNREACHABLE("not reached: should have been lowered"); + case nir_op_bitfield_select: { + /* The sources are rearranged because, due to the way opt_algebraic + * generates bitfield_select, op[0] will never be a constant. The only + * source of BFN that can't be immediate is src1. + */ + bld.BFN(result, op[1], op[0], op[2], UTIL_LUT3((b & a) | (~b & c))); + break; + } + /* With regards to implicit masking of the shift counts for 8- and 16-bit * types, the PRMs are **incorrect**. They falsely state that on Gen9+ only * the low bits of src1 matching the size of src0 (e.g., 4-bits for W or UW