mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 14:20:11 +01:00
intel/brw: Build the scratch header on the fly for pre-LSC systems
Instead of reserving a register to contain the spill header, which gets marked live for the entire program, we can just emit the ALU instructions to build it on the fly. (This is similar to the way we handle scratch on Alchemist with the newer LSC data port.) There are a couple of downsides that make this not obviously a win. First, in order to construct the scratch header on Gfx9-12, we have to use fields from g0, which will have to remain live anywhere that scratch access is required. This could negate the register pressure benefits of creating the header on the fly. However, g0 is oft used in other places anyway, so it may already be there. Another is that it's a non-trivial number of ALU instructions to construct the value. Still, trading lower pressure (so fewer spills, less memory access and stalls) for more cheap ALU seems like it ought to be a win. There is another valuable benefit: by not reserving a register, we eliminate the need to reconstruct the interference graph. (The next patch will actually do so.) shader-db on Icelake shows spills/fills at 54/53 helped, 4/10 hurt, and an 8% increase in ALU on affected shaders. Synmark's OglCSDof (a benchmark that spills) performance remains the same on Alderlake. fossil-db on Icelake shows a 5.6%/5.1% reduction in spills/fills and a 4% reduction in scratch memory size on affected shaders. Instruction counts go up by 11.07%, but cycle estimates only increase by 0.57%. Assassin's Creed Odyssey and Wolfenstein Youngblood both see 20-30% reductions in spills/fills, a significant improvement. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25811>
This commit is contained in:
parent
ce85f3a431
commit
29d6264627
1 changed files with 30 additions and 61 deletions
|
|
@ -266,7 +266,6 @@ public:
|
|||
|
||||
node_count = 0;
|
||||
first_payload_node = 0;
|
||||
scratch_header_node = 0;
|
||||
grf127_send_hack_node = 0;
|
||||
first_vgrf_node = 0;
|
||||
last_vgrf_node = 0;
|
||||
|
|
@ -296,6 +295,8 @@ private:
|
|||
uint32_t spill_offset, int ip);
|
||||
fs_reg build_single_offset(const fs_builder &bld,
|
||||
uint32_t spill_offset, int ip);
|
||||
fs_reg build_legacy_scratch_header(const fs_builder &bld,
|
||||
uint32_t spill_offset, int ip);
|
||||
|
||||
void emit_unspill(const fs_builder &bld, struct shader_stats *stats,
|
||||
fs_reg dst, uint32_t spill_offset, unsigned count, int ip);
|
||||
|
|
@ -304,7 +305,6 @@ private:
|
|||
|
||||
void set_spill_costs();
|
||||
int choose_spill_reg();
|
||||
fs_reg alloc_scratch_header();
|
||||
fs_reg alloc_spill_reg(unsigned size, int ip);
|
||||
void spill_reg(unsigned spill_reg);
|
||||
|
||||
|
|
@ -325,7 +325,6 @@ private:
|
|||
|
||||
int node_count;
|
||||
int first_payload_node;
|
||||
int scratch_header_node;
|
||||
int grf127_send_hack_node;
|
||||
int first_vgrf_node;
|
||||
int last_vgrf_node;
|
||||
|
|
@ -334,8 +333,6 @@ private:
|
|||
int *spill_vgrf_ip;
|
||||
int spill_vgrf_ip_alloc;
|
||||
int spill_node_count;
|
||||
|
||||
fs_reg scratch_header;
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
|
@ -393,10 +390,6 @@ fs_reg_alloc::setup_live_interference(unsigned node,
|
|||
ra_add_node_interference(g, node, first_payload_node + i);
|
||||
}
|
||||
|
||||
/* Everything interferes with the scratch header */
|
||||
if (scratch_header_node >= 0)
|
||||
ra_add_node_interference(g, node, scratch_header_node);
|
||||
|
||||
/* Add interference with every vgrf whose live range intersects this
|
||||
* node's. We only need to look at nodes below this one as the reflexivity
|
||||
* of interference will take care of the rest.
|
||||
|
|
@ -530,11 +523,6 @@ fs_reg_alloc::build_interference_graph(bool allow_spilling)
|
|||
first_vgrf_node = node_count;
|
||||
node_count += fs->alloc.count;
|
||||
last_vgrf_node = node_count - 1;
|
||||
if (devinfo->verx10 < 125 && allow_spilling) {
|
||||
scratch_header_node = node_count++;
|
||||
} else {
|
||||
scratch_header_node = -1;
|
||||
}
|
||||
first_spill_node = node_count;
|
||||
|
||||
fs->calculate_payload_ranges(payload_node_count,
|
||||
|
|
@ -631,6 +619,31 @@ fs_reg_alloc::build_lane_offsets(const fs_builder &bld, uint32_t spill_offset, i
|
|||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a scratch header for pre-LSC platforms.
|
||||
*/
|
||||
fs_reg
|
||||
fs_reg_alloc::build_legacy_scratch_header(const fs_builder &bld,
|
||||
uint32_t spill_offset, int ip)
|
||||
{
|
||||
const fs_builder ubld8 = bld.exec_all().group(8, 0);
|
||||
const fs_builder ubld1 = bld.exec_all().group(1, 0);
|
||||
|
||||
/* Allocate a spill header and make it interfere with g0 */
|
||||
fs_reg header = retype(alloc_spill_reg(1, ip), BRW_TYPE_UD);
|
||||
ra_add_node_interference(g, first_vgrf_node + header.nr, first_payload_node);
|
||||
|
||||
fs_inst *inst = ubld8.emit(SHADER_OPCODE_SCRATCH_HEADER, header);
|
||||
_mesa_set_add(spill_insts, inst);
|
||||
|
||||
/* Write the scratch offset */
|
||||
assert(spill_offset % 16 == 0);
|
||||
inst = ubld1.MOV(component(header, 2), brw_imm_ud(spill_offset / 16));
|
||||
_mesa_set_add(spill_insts, inst);
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
void
|
||||
fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
||||
struct shader_stats *stats,
|
||||
|
|
@ -689,12 +702,7 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
|||
unspill_inst->send_is_volatile = true;
|
||||
unspill_inst->send_ex_desc_scratch = true;
|
||||
} else {
|
||||
fs_reg header = this->scratch_header;
|
||||
fs_builder ubld = bld.exec_all().group(1, 0);
|
||||
assert(spill_offset % 16 == 0);
|
||||
unspill_inst = ubld.MOV(component(header, 2),
|
||||
brw_imm_ud(spill_offset / 16));
|
||||
_mesa_set_add(spill_insts, unspill_inst);
|
||||
fs_reg header = build_legacy_scratch_header(bld, spill_offset, ip);
|
||||
|
||||
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
||||
const fs_reg ex_desc = brw_imm_ud(0);
|
||||
|
|
@ -767,12 +775,7 @@ fs_reg_alloc::emit_spill(const fs_builder &bld,
|
|||
spill_inst->send_is_volatile = false;
|
||||
spill_inst->send_ex_desc_scratch = true;
|
||||
} else {
|
||||
fs_reg header = this->scratch_header;
|
||||
fs_builder ubld = bld.exec_all().group(1, 0);
|
||||
assert(spill_offset % 16 == 0);
|
||||
spill_inst = ubld.MOV(component(header, 2),
|
||||
brw_imm_ud(spill_offset / 16));
|
||||
_mesa_set_add(spill_insts, spill_inst);
|
||||
fs_reg header = build_legacy_scratch_header(bld, spill_offset, ip);
|
||||
|
||||
const unsigned bti = GFX8_BTI_STATELESS_NON_COHERENT;
|
||||
const fs_reg ex_desc = brw_imm_ud(0);
|
||||
|
|
@ -900,19 +903,6 @@ fs_reg_alloc::choose_spill_reg()
|
|||
return node - first_vgrf_node;
|
||||
}
|
||||
|
||||
fs_reg
|
||||
fs_reg_alloc::alloc_scratch_header()
|
||||
{
|
||||
int vgrf = fs->alloc.allocate(1);
|
||||
assert(first_vgrf_node + vgrf == scratch_header_node);
|
||||
ra_set_node_class(g, scratch_header_node,
|
||||
compiler->fs_reg_set.classes[0]);
|
||||
|
||||
setup_live_interference(scratch_header_node, 0, INT_MAX);
|
||||
|
||||
return fs_reg(VGRF, vgrf, BRW_TYPE_UD);
|
||||
}
|
||||
|
||||
fs_reg
|
||||
fs_reg_alloc::alloc_spill_reg(unsigned size, int ip)
|
||||
{
|
||||
|
|
@ -953,28 +943,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||
unsigned int spill_offset = fs->last_scratch;
|
||||
assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
|
||||
|
||||
/* Spills may use MRFs 13-15 in the SIMD16 case. Our texturing is done
|
||||
* using up to 11 MRFs starting from either m1 or m2, and fb writes can use
|
||||
* up to m13 (gfx6+ simd16: 2 header + 8 color + 2 src0alpha + 2 omask) or
|
||||
* m15 (gfx4-5 simd16: 2 header + 8 color + 1 aads + 2 src depth + 2 dst
|
||||
* depth), starting from m1. In summary: We may not be able to spill in
|
||||
* SIMD16 mode, because we'd stomp the FB writes.
|
||||
*/
|
||||
if (!fs->spilled_any_registers) {
|
||||
if (devinfo->verx10 >= 125) {
|
||||
/* We will allocate a register on the fly */
|
||||
} else {
|
||||
this->scratch_header = alloc_scratch_header();
|
||||
fs_builder ubld = fs_builder(fs, 8).exec_all().at(
|
||||
fs->cfg->first_block(), fs->cfg->first_block()->start());
|
||||
|
||||
fs_inst *inst = ubld.emit(SHADER_OPCODE_SCRATCH_HEADER,
|
||||
this->scratch_header);
|
||||
_mesa_set_add(spill_insts, inst);
|
||||
}
|
||||
|
||||
fs->spilled_any_registers = true;
|
||||
}
|
||||
fs->spilled_any_registers = true;
|
||||
|
||||
fs->last_scratch += size * REG_SIZE;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue