mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-25 16:58:10 +02:00
brw: Don't directly use regs_read/regs_written/size_read as bound for non-trivial loops
Instead save to a local variable and use that. In various cases the compiler is not able to pull it out of the loop, since there are other not inlined function calls as part of the loop's body, resulting in repeated unnecessary calls to either size_read() or its pieces that get inlined. Below are fossil compilation times in a MTL machine compiling shaders for a BMG GPU: ``` // Differences at 95.0% confidence. // Rise of the Tomb Raider (n=20) -0.017 +/- 0.00724575 -3.45177665% +/- 1.45084% // Alan Wake (n=20) -0.153 +/- 0.00960067 -4.99265786% +/- 0.303695% // Borderlands 3 (n=14) -0.486428571 +/- 0.15354 -3.51248195% +/- 1.0835% // Oblivion Remastered (n=14) -0.143571429 +/- 0.0357991 -3.05749924% +/- 0.747872% // Baldur's Gate 3 (n=14) -1.68928571 +/- 0.151598 -4.12128605% +/- 0.364259% ``` Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41496>
This commit is contained in:
parent
3f71aab327
commit
821a812c7d
4 changed files with 44 additions and 22 deletions
|
|
@ -89,7 +89,8 @@ brw_live_variables::setup_def_use()
|
|||
continue;
|
||||
|
||||
const int var = var_from_reg(inst->src[i]);
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++)
|
||||
const unsigned read = regs_read(devinfo, inst, i);
|
||||
for (unsigned j = 0; j < read; j++)
|
||||
setup_one_read(bd, ip, var + j);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +99,8 @@ brw_live_variables::setup_def_use()
|
|||
/* Set def[] for this instruction */
|
||||
if (inst->dst.file == VGRF) {
|
||||
const int var = var_from_reg(inst->dst);
|
||||
for (unsigned j = 0; j < regs_written(inst); j++)
|
||||
const unsigned written = regs_written(inst);
|
||||
for (unsigned j = 0; j < written; j++)
|
||||
setup_one_write(bd, inst, ip, var + j);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -905,7 +905,8 @@ namespace {
|
|||
|
||||
/* Stall on any source dependencies. */
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++)
|
||||
const unsigned read = regs_read(devinfo, inst, i);
|
||||
for (unsigned j = 0; j < read; j++)
|
||||
stall_on_dependency(
|
||||
st, reg_dependency_id(devinfo, inst->src[i], j));
|
||||
}
|
||||
|
|
@ -927,7 +928,8 @@ namespace {
|
|||
|
||||
/* Stall on any write dependencies. */
|
||||
if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
|
||||
for (unsigned j = 0; j < regs_written(inst); j++)
|
||||
const unsigned written = regs_written(inst);
|
||||
for (unsigned j = 0; j < written; j++)
|
||||
stall_on_dependency(
|
||||
st, reg_dependency_id(devinfo, inst->dst, j));
|
||||
}
|
||||
|
|
@ -960,7 +962,8 @@ namespace {
|
|||
if (inst->is_send()) {
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->is_payload(i)) {
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++)
|
||||
const unsigned read = regs_read(devinfo, inst, i);
|
||||
for (unsigned j = 0; j < read; j++)
|
||||
mark_read_dependency(
|
||||
st, perf, reg_dependency_id(devinfo, inst->src[i], j));
|
||||
}
|
||||
|
|
@ -969,7 +972,8 @@ namespace {
|
|||
|
||||
/* Mark any destination dependencies. */
|
||||
if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
|
||||
for (unsigned j = 0; j < regs_written(inst); j++) {
|
||||
const unsigned written = regs_written(inst);
|
||||
for (unsigned j = 0; j < written; j++) {
|
||||
mark_write_dependency(st, perf,
|
||||
reg_dependency_id(devinfo, inst->dst, j));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1020,7 +1020,8 @@ namespace {
|
|||
is_ordered ? dependency(TGL_REGDIST_SRC, jp, exec_all) :
|
||||
dependency::done;
|
||||
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++) {
|
||||
const unsigned read = regs_read(devinfo, inst, i);
|
||||
for (unsigned j = 0; j < read; j++) {
|
||||
const brw_reg r = byte_offset(inst->src[i], REG_SIZE * j);
|
||||
sb.set(r, shadow(sb.get(r), rd_dep));
|
||||
}
|
||||
|
|
@ -1072,7 +1073,8 @@ namespace {
|
|||
|
||||
if (is_valid(wr_dep) && inst->dst.file != BAD_FILE &&
|
||||
!inst->dst.is_null()) {
|
||||
for (unsigned j = 0; j < regs_written(inst); j++)
|
||||
const unsigned written = regs_written(inst);
|
||||
for (unsigned j = 0; j < written; j++)
|
||||
sb.set(byte_offset(inst->dst, REG_SIZE * j), wr_dep);
|
||||
}
|
||||
}
|
||||
|
|
@ -1168,7 +1170,8 @@ namespace {
|
|||
std::vector<dependency> inst_deps;
|
||||
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
for (unsigned j = 0; j < regs_read(devinfo, inst, i); j++)
|
||||
const unsigned read = regs_read(devinfo, inst, i);
|
||||
for (unsigned j = 0; j < read; j++)
|
||||
add_dependency(ids, inst_deps, dependency_for_read(
|
||||
sb.get(byte_offset(inst->src[i], REG_SIZE * j))));
|
||||
}
|
||||
|
|
@ -1214,7 +1217,8 @@ namespace {
|
|||
if (inst->dst.file != BAD_FILE && !inst->dst.is_null() &&
|
||||
!inst->dst.is_accumulator() &&
|
||||
inst->opcode != SHADER_OPCODE_UNDEF) {
|
||||
for (unsigned j = 0; j < regs_written(inst); j++) {
|
||||
const unsigned written = regs_written(inst);
|
||||
for (unsigned j = 0; j < written; j++) {
|
||||
add_dependency(ids, inst_deps, dependency_for_write(devinfo, inst,
|
||||
sb.get(byte_offset(inst->dst, REG_SIZE * j))));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1363,7 +1363,8 @@ brw_instruction_scheduler::calculate_deps()
|
|||
if (!inst->src[i].is_address())
|
||||
continue;
|
||||
|
||||
for (unsigned byte = 0; byte < inst->size_read(s->devinfo, i); byte += 2) {
|
||||
const unsigned read = inst->size_read(s->devinfo, i);
|
||||
for (unsigned byte = 0; byte < read; byte += 2) {
|
||||
assert(inst->src[i].address_slot(byte) < ARRAY_SIZE(last_address_write));
|
||||
schedule_node *write_addr_node =
|
||||
last_address_write[inst->src[i].address_slot(byte)];
|
||||
|
|
@ -1396,11 +1397,13 @@ brw_instruction_scheduler::calculate_deps()
|
|||
/* read-after-write deps. */
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++)
|
||||
const unsigned read = regs_read(s->devinfo, inst, i);
|
||||
for (unsigned r = 0; r < read; r++)
|
||||
add_dep(last_grf_write[grf_index(inst->src[i]) + r], n);
|
||||
} else if (inst->src[i].file == FIXED_GRF) {
|
||||
if (post_reg_alloc) {
|
||||
for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++)
|
||||
const unsigned read = regs_read(s->devinfo, inst, i);
|
||||
for (unsigned r = 0; r < read; r++)
|
||||
add_dep(last_grf_write[inst->src[i].nr + r], n);
|
||||
} else {
|
||||
add_dep(last_fixed_grf_write, n);
|
||||
|
|
@ -1409,7 +1412,8 @@ brw_instruction_scheduler::calculate_deps()
|
|||
add_dep(last_accumulator_write, n);
|
||||
} else if (inst->src[i].is_address()) {
|
||||
if (post_reg_alloc) {
|
||||
for (unsigned byte = 0; byte < inst->size_read(s->devinfo, i); byte += 2)
|
||||
const unsigned read = inst->size_read(s->devinfo, i);
|
||||
for (unsigned byte = 0; byte < read; byte += 2)
|
||||
add_dep(last_address_write[inst->src[i].address_slot(byte)], n);
|
||||
}
|
||||
} else if (register_needs_barrier(inst->src[i])) {
|
||||
|
|
@ -1433,13 +1437,15 @@ brw_instruction_scheduler::calculate_deps()
|
|||
/* write-after-write deps. */
|
||||
if (inst->dst.file == VGRF) {
|
||||
int grf_idx = grf_index(inst->dst);
|
||||
for (unsigned r = 0; r < regs_written(inst); r++) {
|
||||
const unsigned written = regs_written(inst);
|
||||
for (unsigned r = 0; r < written; r++) {
|
||||
add_dep(last_grf_write[grf_idx + r], n);
|
||||
last_grf_write[grf_idx + r] = n;
|
||||
}
|
||||
} else if (inst->dst.file == FIXED_GRF) {
|
||||
if (post_reg_alloc) {
|
||||
for (unsigned r = 0; r < regs_written(inst); r++) {
|
||||
const unsigned written = regs_written(inst);
|
||||
for (unsigned r = 0; r < written; r++) {
|
||||
add_dep(last_grf_write[inst->dst.nr + r], n);
|
||||
last_grf_write[inst->dst.nr + r] = n;
|
||||
}
|
||||
|
|
@ -1500,11 +1506,13 @@ brw_instruction_scheduler::calculate_deps()
|
|||
/* write-after-read deps. */
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++)
|
||||
const unsigned read = regs_read(s->devinfo, inst, i);
|
||||
for (unsigned r = 0; r < read; r++)
|
||||
add_dep(n, last_grf_write[grf_index(inst->src[i]) + r], 0);
|
||||
} else if (inst->src[i].file == FIXED_GRF) {
|
||||
if (post_reg_alloc) {
|
||||
for (unsigned r = 0; r < regs_read(s->devinfo, inst, i); r++)
|
||||
const unsigned read = regs_read(s->devinfo, inst, i);
|
||||
for (unsigned r = 0; r < read; r++)
|
||||
add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
|
||||
} else {
|
||||
add_dep(n, last_fixed_grf_write, 0);
|
||||
|
|
@ -1513,7 +1521,8 @@ brw_instruction_scheduler::calculate_deps()
|
|||
add_dep(n, last_accumulator_write, 0);
|
||||
} else if (inst->src[i].is_address()) {
|
||||
if (post_reg_alloc) {
|
||||
for (unsigned byte = 0; byte < inst->size_read(s->devinfo, i); byte += 2) {
|
||||
const unsigned read = inst->size_read(s->devinfo, i);
|
||||
for (unsigned byte = 0; byte < read; byte += 2) {
|
||||
add_dep(n, last_address_write[inst->src[i].address_slot(byte)], 0);
|
||||
}
|
||||
}
|
||||
|
|
@ -1544,7 +1553,8 @@ brw_instruction_scheduler::calculate_deps()
|
|||
* can mark this as WAR dependency.
|
||||
*/
|
||||
if (inst->dst.file == VGRF) {
|
||||
for (unsigned r = 0; r < regs_written(inst); r++)
|
||||
const unsigned written = regs_written(inst);
|
||||
for (unsigned r = 0; r < written; r++)
|
||||
last_grf_write[grf_index(inst->dst) + r] = n;
|
||||
} else if (inst->dst.file == FIXED_GRF) {
|
||||
if (post_reg_alloc) {
|
||||
|
|
@ -1603,7 +1613,8 @@ brw_instruction_scheduler::address_register_interfere(const schedule_node *n)
|
|||
for (unsigned i = 0; i < n->inst->sources; i++) {
|
||||
if (!n->inst->src[i].is_address())
|
||||
continue;
|
||||
for (unsigned byte = 0; byte < n->inst->size_read(s->devinfo, i); byte += 2) {
|
||||
const unsigned read = n->inst->size_read(s->devinfo, i);
|
||||
for (unsigned byte = 0; byte < read; byte += 2) {
|
||||
if (current.address_register[n->inst->src[i].address_slot(byte)] !=
|
||||
n->inst->src[i].nr)
|
||||
return true;
|
||||
|
|
@ -1772,7 +1783,8 @@ brw_instruction_scheduler::update_children(schedule_node *chosen)
|
|||
for (unsigned i = 0; i < chosen->inst->sources; i++) {
|
||||
if (!chosen->inst->src[i].is_address())
|
||||
continue;
|
||||
for (unsigned byte = 0; byte < chosen->inst->size_read(s->devinfo, i); byte += 2) {
|
||||
const unsigned read = chosen->inst->size_read(s->devinfo, i);
|
||||
for (unsigned byte = 0; byte < read; byte += 2) {
|
||||
assert(chosen->inst->src[i].address_slot(byte) <
|
||||
ARRAY_SIZE(current.address_register));
|
||||
current.address_register[chosen->inst->src[i].address_slot(byte)] = 0;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue