mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 13:10:10 +01:00
intel/fs: switch from SIMD 1 to 8 instructions surface/sampler rematerialization
SIMD1 instructions are problematic because they are considered partial writes. This increases the liveness of the destination register written by those instructions. To workaround this we use UNDEF instructions to bound the liveness of the register. But this causing other issues like in this case : undef(1) vgrf2 mov(1) vgrf2, u4.0 add(1) vgrf3, vgrf2.0, 64UD In this case the copy propagation pass in unable to see that vgrf2 in the add() instruction can be replaced with the uniform u4.0. To fix this problem, we switch NoMask SIMD8 instructions that cover the entire register. We can drop the UNDEF instructions and now copy propagation can do its job. Good results on 2 apps : Cyberpunk 2077 : Totals from 7258 (68.80% of 10549) affected shaders: Instrs: 6332210 -> 6073833 (-4.08%); split: -4.11%, +0.03% Cycles: 130667501 -> 127351268 (-2.54%); split: -3.12%, +0.58% Subgroup size: 90320 -> 90400 (+0.09%) Spill count: 90 -> 68 (-24.44%) Fill count: 82 -> 64 (-21.95%) Scratch Memory Size: 8192 -> 6144 (-25.00%) Max live registers: 385464 -> 375152 (-2.68%) Max dispatch width: 64336 -> 64424 (+0.14%); split: +0.96%, -0.82% Gaining 60 SIMD16/SIMD32 shaders, loosing 33 Strange Brigade : Totals from 2137 (53.12% of 4023) affected shaders: Instrs: 1544031 -> 1457544 (-5.60%); split: -5.60%, +0.00% Cycles: 22292564 -> 21868978 (-1.90%); split: -2.43%, +0.53% Subgroup size: 25328 -> 25344 (+0.06%) Max live registers: 113716 -> 111214 (-2.20%) Max dispatch width: 17232 -> 18608 (+7.99%); split: +8.36%, -0.37% Gaining 138 SIMD16/SIMD32 shaders, loosing 4 On app slightly negatively affected : Dota2 : Totals from 232 (14.73% of 1575) affected shaders: Instrs: 30029 -> 28194 (-6.11%) Cycles: 385155 -> 371422 (-3.57%); split: -3.59%, +0.02% Max live registers: 6792 -> 6780 (-0.18%) Max dispatch width: 2256 -> 2160 (-4.26%) Loosing 6 SIMD32 shaders Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24554>
This commit is contained in:
parent
d28f42f85d
commit
a25f96c00c
1 changed files with 13 additions and 20 deletions
|
|
@ -4003,7 +4003,7 @@ fs_reg
|
|||
fs_visitor::try_rebuild_resource(const brw::fs_builder &bld, nir_def *resource_def)
|
||||
{
|
||||
/* Create a build at the location of the resource_intel intrinsic */
|
||||
fs_builder ubld1 = bld.exec_all().group(1, 0);
|
||||
fs_builder ubld8 = bld.exec_all().group(8, 0);
|
||||
|
||||
struct rebuild_resource resources = {};
|
||||
resources.idx = 0;
|
||||
|
|
@ -4041,10 +4041,9 @@ fs_visitor::try_rebuild_resource(const brw::fs_builder &bld, nir_def *resource_d
|
|||
case nir_instr_type_load_const: {
|
||||
nir_load_const_instr *load_const =
|
||||
nir_instr_as_load_const(instr);
|
||||
fs_reg dst = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld1.UNDEF(dst);
|
||||
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
nir_resource_insts[def->index] =
|
||||
ubld1.group(8, 0).MOV(dst, brw_imm_ud(load_const->value[0].i32));
|
||||
ubld8.MOV(dst, brw_imm_ud(load_const->value[0].i32));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -4067,52 +4066,47 @@ fs_visitor::try_rebuild_resource(const brw::fs_builder &bld, nir_def *resource_d
|
|||
|
||||
switch (alu->op) {
|
||||
case nir_op_iadd: {
|
||||
fs_reg dst = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld1.UNDEF(dst);
|
||||
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
fs_reg src0 = nir_resource_insts[alu->src[0].src.ssa->index]->dst;
|
||||
fs_reg src1 = nir_resource_insts[alu->src[1].src.ssa->index]->dst;
|
||||
assert(src0.file != BAD_FILE && src1.file != BAD_FILE);
|
||||
assert(src0.type == BRW_REGISTER_TYPE_UD);
|
||||
nir_resource_insts[def->index] =
|
||||
ubld1.ADD(dst,
|
||||
ubld8.ADD(dst,
|
||||
src0.file != IMM ? src0 : src1,
|
||||
src0.file != IMM ? src1 : src0);
|
||||
break;
|
||||
}
|
||||
case nir_op_iadd3: {
|
||||
fs_reg dst = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld1.UNDEF(dst);
|
||||
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
fs_reg src0 = nir_resource_insts[alu->src[0].src.ssa->index]->dst;
|
||||
fs_reg src1 = nir_resource_insts[alu->src[1].src.ssa->index]->dst;
|
||||
fs_reg src2 = nir_resource_insts[alu->src[2].src.ssa->index]->dst;
|
||||
assert(src0.file != BAD_FILE && src1.file != BAD_FILE && src2.file != BAD_FILE);
|
||||
assert(src0.type == BRW_REGISTER_TYPE_UD);
|
||||
nir_resource_insts[def->index] =
|
||||
ubld1.ADD3(dst,
|
||||
ubld8.ADD3(dst,
|
||||
src1.file == IMM ? src1 : src0,
|
||||
src1.file == IMM ? src0 : src1,
|
||||
src2);
|
||||
break;
|
||||
}
|
||||
case nir_op_ushr: {
|
||||
assert(ubld1.dispatch_width() == 1);
|
||||
fs_reg dst = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld1.UNDEF(dst);
|
||||
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
fs_reg src0 = nir_resource_insts[alu->src[0].src.ssa->index]->dst;
|
||||
fs_reg src1 = nir_resource_insts[alu->src[1].src.ssa->index]->dst;
|
||||
assert(src0.file != BAD_FILE && src1.file != BAD_FILE);
|
||||
assert(src0.type == BRW_REGISTER_TYPE_UD);
|
||||
nir_resource_insts[def->index] = ubld1.SHR(dst, src0, src1);
|
||||
nir_resource_insts[def->index] = ubld8.SHR(dst, src0, src1);
|
||||
break;
|
||||
}
|
||||
case nir_op_ishl: {
|
||||
fs_reg dst = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld1.UNDEF(dst);
|
||||
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
fs_reg src0 = nir_resource_insts[alu->src[0].src.ssa->index]->dst;
|
||||
fs_reg src1 = nir_resource_insts[alu->src[1].src.ssa->index]->dst;
|
||||
assert(src0.file != BAD_FILE && src1.file != BAD_FILE);
|
||||
assert(src0.type == BRW_REGISTER_TYPE_UD);
|
||||
nir_resource_insts[def->index] = ubld1.SHL(dst, src0, src1);
|
||||
nir_resource_insts[def->index] = ubld8.SHL(dst, src0, src1);
|
||||
break;
|
||||
}
|
||||
case nir_op_mov: {
|
||||
|
|
@ -4138,11 +4132,10 @@ fs_visitor::try_rebuild_resource(const brw::fs_builder &bld, nir_def *resource_d
|
|||
|
||||
unsigned base_offset = nir_intrinsic_base(intrin);
|
||||
unsigned load_offset = nir_src_as_uint(intrin->src[0]);
|
||||
fs_reg dst = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld1.UNDEF(dst);
|
||||
fs_reg dst = ubld8.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
fs_reg src(UNIFORM, base_offset / 4, BRW_REGISTER_TYPE_UD);
|
||||
src.offset = load_offset + base_offset % 4;
|
||||
nir_resource_insts[def->index] = ubld1.MOV(dst, src);
|
||||
nir_resource_insts[def->index] = ubld8.MOV(dst, src);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue