mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 19:50:11 +01:00
aco/sched_ilp: base latency and issue cycles on aco_statistics
This matters for trans and scalar fpu instructions. Foz-DB GFX1150: Totals from 53894 (67.90% of 79377) affected shaders: Instrs: 38528421 -> 38481337 (-0.12%); split: -0.16%, +0.04% CodeSize: 200206016 -> 200023916 (-0.09%); split: -0.12%, +0.03% Latency: 265011734 -> 264303762 (-0.27%); split: -0.28%, +0.02% InvThroughput: 53804490 -> 53696097 (-0.20%); split: -0.21%, +0.01% VClause: 736996 -> 736988 (-0.00%); split: -0.00%, +0.00% SClause: 1118494 -> 1118474 (-0.00%); split: -0.01%, +0.01% VALU: 21982349 -> 21982358 (+0.00%); split: -0.00%, +0.00% Foz-DB Navi31: Totals from 50791 (63.99% of 79377) affected shaders: Instrs: 37511862 -> 37495712 (-0.04%); split: -0.11%, +0.07% CodeSize: 197990892 -> 197925104 (-0.03%); split: -0.09%, +0.06% Latency: 261929261 -> 261273534 (-0.25%); split: -0.27%, +0.01% InvThroughput: 43978329 -> 43921618 (-0.13%); split: -0.14%, +0.01% VClause: 727683 -> 727695 (+0.00%); split: -0.00%, +0.00% SClause: 1092527 -> 1092544 (+0.00%); split: -0.01%, +0.01% VALU: 22646553 -> 22646566 (+0.00%) Foz-DB Navi21: Totals from 43899 (55.30% of 79377) affected shaders: Instrs: 35649081 -> 35649110 (+0.00%); split: -0.00%, +0.00% CodeSize: 192336212 -> 192337276 (+0.00%); split: -0.00%, +0.00% Latency: 270621538 -> 270221431 (-0.15%); split: -0.16%, +0.02% InvThroughput: 66757841 -> 66715918 (-0.06%); split: -0.07%, +0.01% VClause: 734884 -> 734867 (-0.00%); split: -0.01%, +0.01% SClause: 1072956 -> 1072951 (-0.00%); split: -0.01%, +0.01% Foz-DB Vega10: Totals from 52687 (83.60% of 63026) affected shaders: Instrs: 24595280 -> 24595693 (+0.00%); split: -0.01%, +0.01% CodeSize: 127199836 -> 127200164 (+0.00%); split: -0.01%, +0.01% Latency: 252281578 -> 252497934 (+0.09%); split: -0.03%, +0.12% InvThroughput: 136551527 -> 136577609 (+0.02%); split: -0.01%, +0.03% VClause: 536798 -> 536718 (-0.01%); split: -0.04%, +0.03% SClause: 819978 -> 819693 (-0.03%); split: -0.04%, +0.01% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
This commit is contained in:
parent
068f9b51de
commit
b23ff87db4
2 changed files with 36 additions and 33 deletions
|
|
@ -283,32 +283,32 @@ can_use_vopd(const SchedILPContext& ctx, unsigned idx)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned
|
Instruction_cycle_info
|
||||||
get_latency(const Instruction* const instr)
|
get_cycle_info_with_mem_latency(const SchedILPContext& ctx, const Instruction* const instr)
|
||||||
{
|
{
|
||||||
/* Note, that these are not accurate latency estimations. */
|
Instruction_cycle_info cycle_info = get_cycle_info(*ctx.program, *instr);
|
||||||
if (instr->isVALU() || instr->isVINTRP())
|
|
||||||
return 5;
|
|
||||||
if (instr->isSALU())
|
|
||||||
return 2;
|
|
||||||
/* Based on get_wait_counter_info in aco_statistics.cpp. */
|
|
||||||
if (instr->isVMEM() || instr->isFlatLike())
|
|
||||||
return 320;
|
|
||||||
if (instr->isSMEM()) {
|
|
||||||
if (instr->operands.empty())
|
|
||||||
return 1;
|
|
||||||
if (instr->operands[0].size() == 2 ||
|
|
||||||
(instr->operands[1].isConstant() &&
|
|
||||||
(instr->operands.size() < 3 || instr->operands[2].isConstant())))
|
|
||||||
return 30;
|
|
||||||
return 200;
|
|
||||||
}
|
|
||||||
if (instr->isLDSDIR())
|
|
||||||
return 13;
|
|
||||||
if (instr->isDS())
|
|
||||||
return 20;
|
|
||||||
|
|
||||||
return 0;
|
/* Based on get_wait_counter_info in aco_statistics.cpp. */
|
||||||
|
if (instr->isVMEM() || instr->isFlatLike()) {
|
||||||
|
cycle_info.latency = 320;
|
||||||
|
} else if (instr->isSMEM()) {
|
||||||
|
if (instr->operands.empty()) {
|
||||||
|
cycle_info.latency = 1;
|
||||||
|
} else if (instr->operands[0].size() == 2 ||
|
||||||
|
(instr->operands[1].isConstant() &&
|
||||||
|
(instr->operands.size() < 3 || instr->operands[2].isConstant()))) {
|
||||||
|
/* Likely cached. */
|
||||||
|
cycle_info.latency = 30;
|
||||||
|
} else {
|
||||||
|
cycle_info.latency = 200;
|
||||||
|
}
|
||||||
|
} else if (instr->isLDSDIR()) {
|
||||||
|
cycle_info.latency = 13;
|
||||||
|
} else if (instr->isDS()) {
|
||||||
|
cycle_info.latency = 20;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cycle_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
@ -452,13 +452,18 @@ remove_entry(SchedILPContext& ctx, const Instruction* const instr, const uint32_
|
||||||
const mask_t mask = ~BITFIELD_BIT(idx);
|
const mask_t mask = ~BITFIELD_BIT(idx);
|
||||||
ctx.active_mask &= mask;
|
ctx.active_mask &= mask;
|
||||||
|
|
||||||
int stall = 1; /* Assume all instructions take one cycle to issue. */
|
int latency = 0;
|
||||||
|
int stall = 1;
|
||||||
|
if (!ctx.is_vopd) {
|
||||||
|
Instruction_cycle_info cycle_info = get_cycle_info_with_mem_latency(ctx, instr);
|
||||||
|
latency = cycle_info.latency;
|
||||||
|
stall = cycle_info.issue_cycles;
|
||||||
|
|
||||||
if (ctx.nodes[idx].wait_cycles > 0) {
|
if (ctx.nodes[idx].wait_cycles > 0) {
|
||||||
/* Add remaining latency stall. */
|
/* Add remaining latency stall. */
|
||||||
stall += ctx.nodes[idx].wait_cycles;
|
stall += ctx.nodes[idx].wait_cycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ctx.is_vopd) {
|
|
||||||
unsigned i;
|
unsigned i;
|
||||||
BITSET_FOREACH_SET (i, ctx.reg_has_latency, 512) {
|
BITSET_FOREACH_SET (i, ctx.reg_has_latency, 512) {
|
||||||
if (ctx.regs[i].latency <= stall) {
|
if (ctx.regs[i].latency <= stall) {
|
||||||
|
|
@ -489,8 +494,6 @@ remove_entry(SchedILPContext& ctx, const Instruction* const instr, const uint32_
|
||||||
ctx.regs[flat_scr_hi].read_mask &= mask;
|
ctx.regs[flat_scr_hi].read_mask &= mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int latency = get_latency(instr);
|
|
||||||
|
|
||||||
for (const Definition& def : instr->definitions) {
|
for (const Definition& def : instr->definitions) {
|
||||||
for (unsigned i = 0; i < def.size(); i++) {
|
for (unsigned i = 0; i < def.size(); i++) {
|
||||||
unsigned reg = def.physReg().reg() + i;
|
unsigned reg = def.physReg().reg() + i;
|
||||||
|
|
|
||||||
|
|
@ -473,9 +473,9 @@ BEGIN_TEST(d3d11_derivs.cube_array)
|
||||||
//>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_
|
//>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_
|
||||||
|
|
||||||
//>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_
|
//>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_
|
||||||
|
//>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_
|
||||||
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||||
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
||||||
//>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_
|
|
||||||
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
|
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
|
||||||
|
|
||||||
//>> BB1:
|
//>> BB1:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue