mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
brw: use the right int8/int16 division lowering
lowering bitsize before lowering idiv is silly, since then it forces us down the software int32 division path instead of the much faster int8/int16 lowered path. Relevant CTS tests: dEQP-VK.spirv_assembly.type.scalar.i16.div_comp, dEQP-VK.spirv_assembly.type.scalar.i8.rem_comp, Go from: SIMD8 shader: 46 instructions. 1 loops. 4716 cycles. 0:0 spills:fills SIMD8 shader: 1008 instructions. 0 loops. 3600 cycles. 0:0 spills:fills, 8 sends to: SIMD8 shader: 17 instructions. 1 loops. 2556 cycles. 0:0 spills:fills SIMD8 shader: 464 instructions. 0 loops. 1394 cycles. 0:0 spills:fills, 8 sends No stats change on fossil-db (which has very little int8/int16 and even less integer division, apparently). Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37966>
This commit is contained in:
parent
c2a6fb6419
commit
05481f56a0
1 changed files with 15 additions and 2 deletions
|
|
@ -1145,8 +1145,10 @@ brw_nir_optimize(nir_shader *nir,
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned
|
static unsigned
|
||||||
lower_bit_size_callback(const nir_instr *instr, UNUSED void *data)
|
lower_bit_size_callback(const nir_instr *instr, void *data)
|
||||||
{
|
{
|
||||||
|
const struct brw_compiler *compiler = data;
|
||||||
|
|
||||||
switch (instr->type) {
|
switch (instr->type) {
|
||||||
case nir_instr_type_alu: {
|
case nir_instr_type_alu: {
|
||||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||||
|
|
@ -1180,6 +1182,12 @@ lower_bit_size_callback(const nir_instr *instr, UNUSED void *data)
|
||||||
case nir_op_irem:
|
case nir_op_irem:
|
||||||
case nir_op_udiv:
|
case nir_op_udiv:
|
||||||
case nir_op_umod:
|
case nir_op_umod:
|
||||||
|
/* Gfx12.5+ lacks integer division instructions. As nir_lower_idiv is
|
||||||
|
* far more efficient for int8/int16 divisions, we do not lower here.
|
||||||
|
*
|
||||||
|
* Older platforms have idiv instructions only for int32, so lower.
|
||||||
|
*/
|
||||||
|
return compiler->devinfo->verx10 >= 125 ? 0 : 32;
|
||||||
case nir_op_fceil:
|
case nir_op_fceil:
|
||||||
case nir_op_ffloor:
|
case nir_op_ffloor:
|
||||||
case nir_op_ffract:
|
case nir_op_ffract:
|
||||||
|
|
@ -2222,7 +2230,12 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
|
||||||
const nir_lower_idiv_options options = {
|
const nir_lower_idiv_options options = {
|
||||||
.allow_fp16 = false
|
.allow_fp16 = false
|
||||||
};
|
};
|
||||||
OPT(nir_lower_idiv, &options);
|
|
||||||
|
/* Given an 8-bit integer remainder, nir_lower_idiv will produce new
|
||||||
|
* 8-bit integer math which needs to be lowered.
|
||||||
|
*/
|
||||||
|
if (OPT(nir_lower_idiv, &options))
|
||||||
|
OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (devinfo->ver >= 30)
|
if (devinfo->ver >= 30)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue