mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 18:00:13 +01:00
intel/fs: Use split sends for surface writes on gen9+
Surface reads don't need them because they just have the one address payload. With surface writes, on the other hand, we can put the address and the data in the different halves and avoid building the payload all together. The decrease in register pressure and added freedom in register allocation resulting from this change reduces spilling enough to improve the performance of one customer benchmark by about 2x. Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
parent
014edff0d2
commit
a920979d4f
2 changed files with 47 additions and 18 deletions
|
|
@ -5016,28 +5016,42 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
* Gen11+ the header has been removed so we can only use predication.
|
* Gen11+ the header has been removed so we can only use predication.
|
||||||
*/
|
*/
|
||||||
const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0;
|
const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0;
|
||||||
const unsigned sz = header_sz + addr_sz + src_sz;
|
|
||||||
|
|
||||||
/* Allocate space for the payload. */
|
|
||||||
fs_reg *const components = new fs_reg[sz];
|
|
||||||
const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
|
|
||||||
unsigned n = 0;
|
|
||||||
|
|
||||||
const bool has_side_effects = inst->has_side_effects();
|
const bool has_side_effects = inst->has_side_effects();
|
||||||
fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() :
|
fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() :
|
||||||
fs_reg(brw_imm_d(0xffff));
|
fs_reg(brw_imm_d(0xffff));
|
||||||
|
|
||||||
/* Construct the payload. */
|
fs_reg payload, payload2;
|
||||||
if (header_sz)
|
unsigned mlen, ex_mlen = 0;
|
||||||
components[n++] = emit_surface_header(bld, sample_mask);
|
if (devinfo->gen >= 9) {
|
||||||
|
/* We have split sends on gen9 and above */
|
||||||
|
assert(header_sz == 0);
|
||||||
|
payload = bld.move_to_vgrf(addr, addr_sz);
|
||||||
|
payload2 = bld.move_to_vgrf(src, src_sz);
|
||||||
|
mlen = addr_sz * (inst->exec_size / 8);
|
||||||
|
ex_mlen = src_sz * (inst->exec_size / 8);
|
||||||
|
} else {
|
||||||
|
/* Allocate space for the payload. */
|
||||||
|
const unsigned sz = header_sz + addr_sz + src_sz;
|
||||||
|
payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
|
||||||
|
fs_reg *const components = new fs_reg[sz];
|
||||||
|
unsigned n = 0;
|
||||||
|
|
||||||
for (unsigned i = 0; i < addr_sz; i++)
|
/* Construct the payload. */
|
||||||
components[n++] = offset(addr, bld, i);
|
if (header_sz)
|
||||||
|
components[n++] = emit_surface_header(bld, sample_mask);
|
||||||
|
|
||||||
for (unsigned i = 0; i < src_sz; i++)
|
for (unsigned i = 0; i < addr_sz; i++)
|
||||||
components[n++] = offset(src, bld, i);
|
components[n++] = offset(addr, bld, i);
|
||||||
|
|
||||||
bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
|
for (unsigned i = 0; i < src_sz; i++)
|
||||||
|
components[n++] = offset(src, bld, i);
|
||||||
|
|
||||||
|
bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
|
||||||
|
mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
|
||||||
|
|
||||||
|
delete[] components;
|
||||||
|
}
|
||||||
|
|
||||||
/* Predicate the instruction on the sample mask if no header is
|
/* Predicate the instruction on the sample mask if no header is
|
||||||
* provided.
|
* provided.
|
||||||
|
|
@ -5162,7 +5176,8 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
|
|
||||||
/* Update the original instruction. */
|
/* Update the original instruction. */
|
||||||
inst->opcode = SHADER_OPCODE_SEND;
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
|
inst->mlen = mlen;
|
||||||
|
inst->ex_mlen = ex_mlen;
|
||||||
inst->header_size = header_sz;
|
inst->header_size = header_sz;
|
||||||
inst->send_has_side_effects = has_side_effects;
|
inst->send_has_side_effects = has_side_effects;
|
||||||
inst->send_is_volatile = !has_side_effects;
|
inst->send_is_volatile = !has_side_effects;
|
||||||
|
|
@ -5183,10 +5198,9 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
|
|
||||||
/* Finally, the payload */
|
/* Finally, the payload */
|
||||||
inst->src[2] = payload;
|
inst->src[2] = payload;
|
||||||
|
inst->src[3] = payload2;
|
||||||
|
|
||||||
inst->resize_sources(3);
|
inst->resize_sources(4);
|
||||||
|
|
||||||
delete[] components;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
||||||
|
|
@ -426,6 +426,21 @@ namespace brw {
|
||||||
return src_reg(component(dst, 0));
|
return src_reg(component(dst, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
src_reg
|
||||||
|
move_to_vgrf(const src_reg &src, unsigned num_components) const
|
||||||
|
{
|
||||||
|
src_reg *const src_comps = new src_reg[num_components];
|
||||||
|
for (unsigned i = 0; i < num_components; i++)
|
||||||
|
src_comps[i] = offset(src, dispatch_width(), i);
|
||||||
|
|
||||||
|
const dst_reg dst = vgrf(src.type, num_components);
|
||||||
|
LOAD_PAYLOAD(dst, src_comps, num_components, 0);
|
||||||
|
|
||||||
|
delete[] src_comps;
|
||||||
|
|
||||||
|
return src_reg(dst);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
emit_scan(enum opcode opcode, const dst_reg &tmp,
|
emit_scan(enum opcode opcode, const dst_reg &tmp,
|
||||||
unsigned cluster_size, brw_conditional_mod mod) const
|
unsigned cluster_size, brw_conditional_mod mod) const
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue