mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
intel/fs: Be more conservative in split_virtual_grfs
Instead of modifying every single instruction, keep track of which VGRFs are actually split in a bit-set, and only modify the instructions that actually touch split regs. This cuts the runtime of the following Vulkan CTS on my SKL box by 45% from 3:21 to 1:51: dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13 Reviewed-by: Emma Anholt <emma@anholt.net> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>
This commit is contained in:
parent
c9c50f89b2
commit
38fa18a7a3
1 changed files with 47 additions and 15 deletions
|
|
@ -2135,10 +2135,15 @@ fs_visitor::split_virtual_grfs()
|
|||
}
|
||||
}
|
||||
|
||||
/* Bitset of which registers have been split */
|
||||
bool *vgrf_has_split = new bool[num_vars];
|
||||
memset(vgrf_has_split, 0, num_vars * sizeof(*vgrf_has_split));
|
||||
|
||||
int *new_virtual_grf = new int[reg_count];
|
||||
int *new_reg_offset = new int[reg_count];
|
||||
|
||||
int reg = 0;
|
||||
bool has_splits = false;
|
||||
for (int i = 0; i < num_vars; i++) {
|
||||
/* The first one should always be 0 as a quick sanity check. */
|
||||
assert(split_points[reg] == false);
|
||||
|
|
@ -2154,6 +2159,8 @@ fs_visitor::split_virtual_grfs()
|
|||
* new virtual GRF for the previous offset many registers
|
||||
*/
|
||||
if (split_points[reg]) {
|
||||
has_splits = true;
|
||||
vgrf_has_split[i] = true;
|
||||
assert(offset <= MAX_VGRF_SIZE);
|
||||
int grf = alloc.allocate(offset);
|
||||
for (int k = reg - offset; k < reg; k++)
|
||||
|
|
@ -2173,40 +2180,65 @@ fs_visitor::split_virtual_grfs()
|
|||
}
|
||||
assert(reg == reg_count);
|
||||
|
||||
if (!has_splits)
|
||||
goto cleanup;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
|
||||
if (inst->opcode == SHADER_OPCODE_UNDEF) {
|
||||
const fs_builder ibld(this, block, inst);
|
||||
assert(inst->size_written % REG_SIZE == 0);
|
||||
unsigned reg_offset = 0;
|
||||
while (reg_offset < inst->size_written / REG_SIZE) {
|
||||
reg = vgrf_to_reg[inst->dst.nr] + reg_offset;
|
||||
ibld.UNDEF(fs_reg(VGRF, new_virtual_grf[reg], inst->dst.type));
|
||||
reg_offset += alloc.sizes[new_virtual_grf[reg]];
|
||||
assert(inst->dst.file == VGRF);
|
||||
if (vgrf_has_split[inst->dst.nr]) {
|
||||
const fs_builder ibld(this, block, inst);
|
||||
assert(inst->size_written % REG_SIZE == 0);
|
||||
unsigned reg_offset = 0;
|
||||
while (reg_offset < inst->size_written / REG_SIZE) {
|
||||
reg = vgrf_to_reg[inst->dst.nr] + reg_offset;
|
||||
ibld.UNDEF(fs_reg(VGRF, new_virtual_grf[reg], inst->dst.type));
|
||||
reg_offset += alloc.sizes[new_virtual_grf[reg]];
|
||||
}
|
||||
inst->remove(block);
|
||||
} else {
|
||||
reg = vgrf_to_reg[inst->dst.nr];
|
||||
assert(new_reg_offset[reg] == 0);
|
||||
assert(new_virtual_grf[reg] == (int)inst->dst.nr);
|
||||
}
|
||||
inst->remove(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst->dst.file == VGRF) {
|
||||
reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
|
||||
inst->dst.nr = new_virtual_grf[reg];
|
||||
inst->dst.offset = new_reg_offset[reg] * REG_SIZE +
|
||||
inst->dst.offset % REG_SIZE;
|
||||
assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
|
||||
if (vgrf_has_split[inst->dst.nr]) {
|
||||
inst->dst.nr = new_virtual_grf[reg];
|
||||
inst->dst.offset = new_reg_offset[reg] * REG_SIZE +
|
||||
inst->dst.offset % REG_SIZE;
|
||||
assert((unsigned)new_reg_offset[reg] <
|
||||
alloc.sizes[new_virtual_grf[reg]]);
|
||||
} else {
|
||||
assert(new_reg_offset[reg] == inst->dst.offset / REG_SIZE);
|
||||
assert(new_virtual_grf[reg] == (int)inst->dst.nr);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
|
||||
if (inst->src[i].file != VGRF)
|
||||
continue;
|
||||
|
||||
reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
|
||||
if (vgrf_has_split[inst->src[i].nr]) {
|
||||
inst->src[i].nr = new_virtual_grf[reg];
|
||||
inst->src[i].offset = new_reg_offset[reg] * REG_SIZE +
|
||||
inst->src[i].offset % REG_SIZE;
|
||||
assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
|
||||
assert((unsigned)new_reg_offset[reg] <
|
||||
alloc.sizes[new_virtual_grf[reg]]);
|
||||
} else {
|
||||
assert(new_reg_offset[reg] == inst->src[i].offset / REG_SIZE);
|
||||
assert(new_virtual_grf[reg] == (int)inst->src[i].nr);
|
||||
}
|
||||
}
|
||||
}
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL | DEPENDENCY_VARIABLES);
|
||||
|
||||
cleanup:
|
||||
delete[] split_points;
|
||||
delete[] vgrf_has_split;
|
||||
delete[] new_virtual_grf;
|
||||
delete[] new_reg_offset;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue