From 05481f56a06b93b72cf3a166ec1698925dcb4018 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 20 Oct 2025 16:49:56 -0400 Subject: [PATCH] brw: use the right int8/int16 division lowering lowering bitsize before lowering idiv is silly, since then it forces us down the software int32 division path instead of the much faster int8/int16 lowered path. Relevant CTS tests: dEQP-VK.spirv_assembly.type.scalar.i16.div_comp, dEQP-VK.spirv_assembly.type.scalar.i8.rem_comp, Go from: SIMD8 shader: 46 instructions. 1 loops. 4716 cycles. 0:0 spills:fills SIMD8 shader: 1008 instructions. 0 loops. 3600 cycles. 0:0 spills:fills, 8 sends to: SIMD8 shader: 17 instructions. 1 loops. 2556 cycles. 0:0 spills:fills SIMD8 shader: 464 instructions. 0 loops. 1394 cycles. 0:0 spills:fills, 8 sends No stats change on fossil-db (which has very little int8/int16 and even less integer division, apparently). Signed-off-by: Alyssa Rosenzweig Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw/brw_nir.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index db16e8b7061..241b0244075 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -1145,8 +1145,10 @@ brw_nir_optimize(nir_shader *nir, } static unsigned -lower_bit_size_callback(const nir_instr *instr, UNUSED void *data) +lower_bit_size_callback(const nir_instr *instr, void *data) { + const struct brw_compiler *compiler = data; + switch (instr->type) { case nir_instr_type_alu: { nir_alu_instr *alu = nir_instr_as_alu(instr); @@ -1180,6 +1182,12 @@ lower_bit_size_callback(const nir_instr *instr, UNUSED void *data) case nir_op_irem: case nir_op_udiv: case nir_op_umod: + /* Gfx12.5+ lacks integer division instructions. As nir_lower_idiv is + * far more efficient for int8/int16 divisions, we do not lower here. + * + * Older platforms have idiv instructions only for int32, so lower. + */ + return compiler->devinfo->verx10 >= 125 ? 0 : 32; case nir_op_fceil: case nir_op_ffloor: case nir_op_ffract: @@ -2222,7 +2230,12 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler, const nir_lower_idiv_options options = { .allow_fp16 = false }; - OPT(nir_lower_idiv, &options); + + /* Given an 8-bit integer remainder, nir_lower_idiv will produce new + * 8-bit integer math which needs to be lowered. + */ + if (OPT(nir_lower_idiv, &options)) + OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler); } if (devinfo->ver >= 30)