i965/fs: Recognize and emit ld_lz, sample_lz, sample_c_lz.

Ken suggested instead of a big and complicated optimization pass, to
just recognize the operations here. It's certainly less code and a lot
prettier, but it seems to actually perform worse for currently unknown
reasons.

total instructions in shared programs: 8923452 -> 8904108 (-0.22%)
instructions in affected programs: 814563 -> 795219 (-2.37%)
helped: 3336
HURT: 10

total cycles in shared programs: 66970734 -> 66651476 (-0.48%)
cycles in affected programs: 10582686 -> 10263428 (-3.02%)
helped: 2438
HURT: 691

total spills in shared programs: 1811 -> 1789 (-1.21%)
spills in affected programs: 85 -> 63 (-25.88%)
helped: 4

total fills in shared programs: 3143 -> 3109 (-1.08%)
fills in affected programs: 167 -> 133 (-20.36%)
helped: 4

LOST:   2
GAINED: 36

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Matt Turner 2016-05-04 15:37:02 -07:00
parent 75dccf5ac2
commit 8a65b5135a

View file

@ -4189,6 +4189,10 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
switch (op) {
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXL:
if (devinfo->gen >= 9 && op == SHADER_OPCODE_TXL && lod.is_zero()) {
op = SHADER_OPCODE_TXL_LZ;
break;
}
bld.MOV(sources[length], lod);
length++;
break;
@ -4240,8 +4244,12 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
length++;
}
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
length++;
if (devinfo->gen >= 9 && lod.is_zero()) {
op = SHADER_OPCODE_TXF_LZ;
} else {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
length++;
}
for (unsigned i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);