brw/algebraic: Constant fold multiplicands of MAD

v2: Move the full constant folding part to
brw_constant_fold_instruction. Suggested by Caio. I did this by
extracting the core part of the folding to a helper function.

v3: Delete stale comment. Noticed by Caio.

shader-db:

All Intel platforms had similar results. (Lunar Lake shown)
total instructions in shared programs: 18090847 -> 18090843 (<.01%)
instructions in affected programs: 150 -> 146 (-2.67%)
helped: 1 / HURT: 0

total cycles in shared programs: 919664648 -> 919663210 (<.01%)
cycles in affected programs: 3426 -> 1988 (-41.97%)
helped: 1 / HURT: 0

LOST:   1
GAINED: 0

fossil-db:

All Intel platforms had similar results. (Lunar Lake shown)
Totals:
Instrs: 220496486 -> 220496403 (-0.00%)
Cycle count: 31610880908 -> 31610879044 (-0.00%); split: -0.00%, +0.00%

Totals from 70 (0.01% of 702439) affected shaders:
Instrs: 47018 -> 46935 (-0.18%)
Cycle count: 6335504 -> 6333640 (-0.03%); split: -0.11%, +0.09%

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32436>
This commit is contained in:
Ian Romanick 2024-11-14 10:04:26 -08:00 committed by Marge Bot
parent 3a16ad71b7
commit b605f76b2a

View file

@ -5,6 +5,7 @@
#include "brw_fs.h" #include "brw_fs.h"
#include "brw_fs_builder.h" #include "brw_fs_builder.h"
#include "util/half_float.h"
using namespace brw; using namespace brw;
@ -37,6 +38,26 @@ src_as_uint(const brw_reg &src)
} }
} }
static double
src_as_float(const brw_reg &src)
{
assert(src.file == IMM);
switch (src.type) {
case BRW_TYPE_HF:
return _mesa_half_to_float((uint16_t)src.d);
case BRW_TYPE_F:
return src.f;
case BRW_TYPE_DF:
return src.df;
default:
unreachable("Invalid float type.");
}
}
static brw_reg static brw_reg
brw_imm_for_type(uint64_t value, enum brw_reg_type type) brw_imm_for_type(uint64_t value, enum brw_reg_type type)
{ {
@ -64,6 +85,55 @@ brw_imm_for_type(uint64_t value, enum brw_reg_type type)
} }
} }
/**
* Converts a MAD to an ADD by folding the multiplicand sources.
*/
static void
fold_multiplicands_of_MAD(fs_inst *inst)
{
assert(inst->opcode == BRW_OPCODE_MAD);
assert (inst->src[1].file == IMM &&
inst->src[2].file == IMM &&
!brw_type_is_vector_imm(inst->src[1].type) &&
!brw_type_is_vector_imm(inst->src[2].type));
if (brw_type_is_int(inst->src[1].type)) {
const uint64_t imm1 = src_as_uint(inst->src[1]);
const uint64_t imm2 = src_as_uint(inst->src[2]);
brw_reg product = brw_imm_ud(imm1 * imm2);
inst->src[1] = retype(product,
brw_type_larger_of(inst->src[1].type,
inst->src[2].type));
} else {
const double product = src_as_float(inst->src[1]) *
src_as_float(inst->src[2]);
switch (brw_type_larger_of(inst->src[1].type,
inst->src[2].type)) {
case BRW_TYPE_HF:
inst->src[1] = retype(brw_imm_w(_mesa_float_to_half(product)),
BRW_TYPE_HF);
break;
case BRW_TYPE_F:
inst->src[1] = brw_imm_f(product);
break;
case BRW_TYPE_DF:
unreachable("float64 should be impossible.");
break;
default:
unreachable("Invalid float type.");
}
}
inst->opcode = BRW_OPCODE_ADD;
inst->resize_sources(2);
}
bool bool
brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst) brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
{ {
@ -120,6 +190,25 @@ brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
break; break;
case BRW_OPCODE_MAD:
if (inst->src[1].file == IMM &&
inst->src[2].file == IMM &&
inst->src[3].file == IMM &&
!brw_type_is_vector_imm(inst->src[1].type) &&
!brw_type_is_vector_imm(inst->src[2].type) &&
!brw_type_is_vector_imm(inst->src[3].type)) {
fold_multiplicands_of_MAD(inst);
assert(inst->opcode == BRW_OPCODE_ADD);
ASSERTED bool folded = brw_constant_fold_instruction(devinfo, inst);
assert(folded);
progress = true;
break;
}
break;
case BRW_OPCODE_MUL: case BRW_OPCODE_MUL:
if (brw_type_is_float(inst->src[1].type)) if (brw_type_is_float(inst->src[1].type))
break; break;
@ -560,6 +649,25 @@ brw_fs_opt_algebraic(fs_visitor &s)
} }
break; break;
case BRW_OPCODE_MAD: case BRW_OPCODE_MAD:
if (brw_constant_fold_instruction(devinfo, inst)) {
progress = true;
break;
}
if (inst->src[1].file == IMM &&
inst->src[2].file == IMM &&
!brw_type_is_vector_imm(inst->src[1].type) &&
!brw_type_is_vector_imm(inst->src[2].type)) {
fold_multiplicands_of_MAD(inst);
/* This could result in (x + 0). For floats, we want to leave this
* as an ADD so that a subnormal x will get flushed to zero.
*/
assert(inst->opcode == BRW_OPCODE_ADD);
progress = true;
break;
}
if (inst->src[0].type != BRW_TYPE_F || if (inst->src[0].type != BRW_TYPE_F ||
inst->src[1].type != BRW_TYPE_F || inst->src[1].type != BRW_TYPE_F ||
inst->src[2].type != BRW_TYPE_F) inst->src[2].type != BRW_TYPE_F)