mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
brw/lower: Lower invalid source conversion to better code
There are two fragment shaders from RDR2 that is hurt for spills and
fills on Lunar Lake.
Totals from 2 (0.00% of 551413) affected shaders:
Spill count: 1252 -> 1317 (+5.19%)
Fill count: 2518 -> 2642 (+4.92%)
Those shaders... have a lot of room for improvement. There are some
patterns in those shaders that we handle very, very poorly. Improving
those patterns would likely improve the spills and fills in these
shaders quite dramatically.
Given how much other platforms are helped, I don't this should block
this commit.
No shader-db or fossil-db changes on any pre-Gfx12.5 Intel platforms.
v2: Add some comments and an additional assertion. Suggested by Ken.
shader-db:
Lunar Lake
total instructions in shared programs: 18094517 -> 18094511 (<.01%)
instructions in affected programs: 809 -> 803 (-0.74%)
helped: 6 / HURT: 0
total cycles in shared programs: 921532158 -> 921532168 (<.01%)
cycles in affected programs: 2266 -> 2276 (0.44%)
helped: 0 / HURT: 3
Meteor Lake and DG2 had similar results. (Meteor Lake shown)
total instructions in shared programs: 19820845 -> 19820839 (<.01%)
instructions in affected programs: 803 -> 797 (-0.75%)
helped: 6 / HURT: 0
total cycles in shared programs: 906372999 -> 906372949 (<.01%)
cycles in affected programs: 3216 -> 3166 (-1.55%)
helped: 6 / HURT: 0
fossil-db:
Lunar Lake
Totals:
Instrs: 141887377 -> 141884465 (-0.00%); split: -0.00%, +0.00%
Cycle count: 21990301498 -> 21990267232 (-0.00%); split: -0.00%, +0.00%
Spill count: 69732 -> 69797 (+0.09%)
Fill count: 128521 -> 128645 (+0.10%)
Totals from 349 (0.06% of 551413) affected shaders:
Instrs: 506117 -> 503205 (-0.58%); split: -0.79%, +0.21%
Cycle count: 32362996 -> 32328730 (-0.11%); split: -0.52%, +0.41%
Spill count: 1951 -> 2016 (+3.33%)
Fill count: 4899 -> 5023 (+2.53%)
Meteor Lake and DG2 had similar results. (Meteor Lake shown)
Totals:
Instrs: 152773732 -> 152761383 (-0.01%); split: -0.01%, +0.00%
Cycle count: 17187529968 -> 17187450663 (-0.00%); split: -0.00%, +0.00%
Spill count: 79279 -> 79003 (-0.35%)
Fill count: 148803 -> 147942 (-0.58%)
Scratch Memory Size: 3949568 -> 3946496 (-0.08%)
Max live registers: 31879325 -> 31879230 (-0.00%)
Totals from 366 (0.06% of 633185) affected shaders:
Instrs: 557377 -> 545028 (-2.22%); split: -2.22%, +0.01%
Cycle count: 26171205 -> 26091900 (-0.30%); split: -0.54%, +0.24%
Spill count: 3238 -> 2962 (-8.52%)
Fill count: 10018 -> 9157 (-8.59%)
Scratch Memory Size: 257024 -> 253952 (-1.20%)
Max live registers: 28187 -> 28092 (-0.34%)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32041>
This commit is contained in:
parent
2a57568ebd
commit
7aad19ccd2
1 changed files with 37 additions and 0 deletions
|
|
@ -720,6 +720,38 @@ namespace {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fast-path for very specific kinds of invalid regions.
|
||||
*
|
||||
* Gfx12.5+ does not allow moves of B or UB sources to floating-point
|
||||
* destinations. This restriction can be resolved more efficiently than by
|
||||
* the general lowering in lower_src_modifiers or lower_src_region.
|
||||
*/
|
||||
void
|
||||
lower_src_conversion(fs_visitor *v, bblock_t *block, fs_inst *inst)
|
||||
{
|
||||
const intel_device_info *devinfo = v->devinfo;
|
||||
const fs_builder ibld = fs_builder(v, block, inst).scalar_group();
|
||||
|
||||
/* We only handle scalar conversions from small types for now. */
|
||||
assert(is_uniform(inst->src[0]));
|
||||
|
||||
brw_reg tmp = ibld.vgrf(brw_type_with_size(inst->src[0].type, 32));
|
||||
fs_inst *mov = ibld.MOV(tmp, inst->src[0]);
|
||||
|
||||
inst->src[0] = component(tmp, 0);
|
||||
|
||||
/* Assert that neither the added MOV nor the original instruction will need
|
||||
* any additional lowering.
|
||||
*/
|
||||
assert(!has_invalid_src_region(devinfo, mov, 0));
|
||||
assert(!has_invalid_src_modifiers(devinfo, mov, 0));
|
||||
assert(!has_invalid_dst_region(devinfo, mov));
|
||||
|
||||
assert(!has_invalid_src_region(devinfo, inst, 0));
|
||||
assert(!has_invalid_src_modifiers(devinfo, inst, 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Legalize the source and destination regioning controls of the specified
|
||||
* instruction.
|
||||
|
|
@ -736,6 +768,11 @@ namespace {
|
|||
if (has_invalid_dst_region(devinfo, inst))
|
||||
progress |= lower_dst_region(v, block, inst);
|
||||
|
||||
if (has_invalid_src_conversion(devinfo, inst)) {
|
||||
lower_src_conversion(v, block, inst);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (has_invalid_src_modifiers(devinfo, inst, i))
|
||||
progress |= lower_src_modifiers(v, block, inst, i);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue