mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 10:40:11 +01:00
i965/fs: Reduce the response length of sampler messages on Skylake.
Often, we don't need a full 4 channels worth of data from the sampler. For example, depth comparisons and red textures only return one value. To handle this, the sampler message header contains a mask which can be used to disable channels, and reduce the message length (in SIMD16 mode on all hardware, and SIMD8 mode on Broadwell and later). We've never used it before, since it required setting up a message header. This meant trading a smaller response length for a larger message length and additional MOVs to set it up. However, Skylake introduces a terrific new feature: for headerless messages, you can simply reduce the response length, and it makes the implicit header contain an appropriate mask. So to read only RG, you would simply set the message length to 2 or 4 (SIMD8/16). This means we can finally take advantage of this at no cost. total instructions in shared programs: 9091831 -> 9073067 (-0.21%) instructions in affected programs: 191370 -> 172606 (-9.81%) helped: 2609 HURT: 0 total cycles in shared programs: 70868114 -> 68454752 (-3.41%) cycles in affected programs: 35841154 -> 33427792 (-6.73%) helped: 16357 HURT: 8188 total spills in shared programs: 3492 -> 1707 (-51.12%) spills in affected programs: 2749 -> 964 (-64.93%) helped: 74 HURT: 0 total fills in shared programs: 4266 -> 2647 (-37.95%) fills in affected programs: 3029 -> 1410 (-53.45%) helped: 74 HURT: 0 LOST: 1 GAINED: 143 Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
parent
d800b7daa5
commit
13195f7ef8
4 changed files with 28 additions and 5 deletions
|
|
@ -4048,6 +4048,16 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
header_size = 1;
|
||||
sources[0] = fs_reg();
|
||||
length++;
|
||||
|
||||
/* If we're requesting fewer than four channels worth of response,
|
||||
* and we have an explicit header, we need to set up the sampler
|
||||
* writemask. It's reversed from normal: 1 means "don't write".
|
||||
*/
|
||||
if (inst->regs_written != 4 * reg_width) {
|
||||
assert((inst->regs_written % reg_width) == 0);
|
||||
unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & 0xf;
|
||||
inst->offset |= mask << 12;
|
||||
}
|
||||
}
|
||||
|
||||
if (shadow_c.file != BAD_FILE) {
|
||||
|
|
|
|||
|
|
@ -213,7 +213,8 @@ public:
|
|||
uint32_t surface,
|
||||
fs_reg surface_reg,
|
||||
uint32_t sampler,
|
||||
fs_reg sampler_reg);
|
||||
fs_reg sampler_reg,
|
||||
unsigned return_channels);
|
||||
fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
||||
const fs_reg &sampler);
|
||||
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
|
||||
|
|
|
|||
|
|
@ -3224,14 +3224,25 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
|||
unreachable("unknown texture opcode");
|
||||
}
|
||||
|
||||
unsigned num_components = nir_tex_instr_dest_size(instr);
|
||||
|
||||
if (instr->dest.is_ssa) {
|
||||
uint8_t write_mask = nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
assert(write_mask != 0); /* dead code should have been eliminated */
|
||||
num_components = _mesa_fls(write_mask);
|
||||
}
|
||||
|
||||
const bool can_reduce_return_length = devinfo->gen >= 9 &&
|
||||
instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels;
|
||||
|
||||
emit_texture(op, dest_type, coordinate, instr->coord_components,
|
||||
shadow_comparitor, lod, lod2, lod_components, sample_index,
|
||||
tex_offset, mcs, gather_component, is_cube_array,
|
||||
texture, texture_reg, sampler, sampler_reg);
|
||||
texture, texture_reg, sampler, sampler_reg,
|
||||
can_reduce_return_length ? num_components : 4);
|
||||
|
||||
fs_reg dest = get_nir_dest(instr->dest);
|
||||
dest.type = this->result.type;
|
||||
unsigned num_components = nir_tex_instr_dest_size(instr);
|
||||
emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
|
||||
dest, this->result),
|
||||
(1 << num_components) - 1);
|
||||
|
|
|
|||
|
|
@ -115,7 +115,8 @@ fs_visitor::emit_texture(ir_texture_opcode op,
|
|||
uint32_t surface,
|
||||
fs_reg surface_reg,
|
||||
uint32_t sampler,
|
||||
fs_reg sampler_reg)
|
||||
fs_reg sampler_reg,
|
||||
unsigned return_channels)
|
||||
{
|
||||
fs_inst *inst = NULL;
|
||||
|
||||
|
|
@ -204,7 +205,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
|
|||
}
|
||||
|
||||
inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
|
||||
inst->regs_written = 4 * dispatch_width / 8;
|
||||
inst->regs_written = return_channels * dispatch_width / 8;
|
||||
|
||||
if (shadow_c.file != BAD_FILE)
|
||||
inst->shadow_compare = true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue