mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 06:48:06 +02:00
aco: forget valu delays after certain s_waitcnt_depctr/LDSDIR
fossil-db (navi31): Totals from 55242 (69.58% of 79395) affected shaders: Instrs: 40507666 -> 40138006 (-0.91%); split: -0.91%, +0.00% CodeSize: 212516104 -> 211025880 (-0.70%); split: -0.70%, +0.00% Latency: 281643258 -> 281628053 (-0.01%); split: -0.01%, +0.00% InvThroughput: 46370668 -> 46369637 (-0.00%); split: -0.00%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Acked-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23337>
This commit is contained in:
parent
30396ba604
commit
7b92e11e16
4 changed files with 29 additions and 23 deletions
|
|
@ -1136,19 +1136,6 @@ test_vgpr_bitset(std::bitset<256>& set, Operand op)
|
|||
}
|
||||
|
||||
/* GFX11 */
|
||||
unsigned
|
||||
parse_vdst_wait(aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isEXP())
|
||||
return 0;
|
||||
else if (instr->isLDSDIR())
|
||||
return instr->ldsdir().wait_vdst;
|
||||
else if (instr->opcode == aco_opcode::s_waitcnt_depctr)
|
||||
return (instr->salu().imm >> 12) & 0xf;
|
||||
else
|
||||
return 15;
|
||||
}
|
||||
|
||||
struct LdsDirectVALUHazardGlobalState {
|
||||
unsigned wait_vdst = 15;
|
||||
PhysReg vgpr;
|
||||
|
|
@ -1188,7 +1175,7 @@ handle_lds_direct_valu_hazard_instr(LdsDirectVALUHazardGlobalState& global_state
|
|||
block_state.num_valu++;
|
||||
}
|
||||
|
||||
if (parse_vdst_wait(instr) == 0)
|
||||
if (parse_vdst_wait(instr.get()) == 0)
|
||||
return true;
|
||||
|
||||
block_state.num_instrs++;
|
||||
|
|
@ -1310,7 +1297,7 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta
|
|||
}
|
||||
|
||||
block_state.num_valu_since_read++;
|
||||
} else if (parse_vdst_wait(instr) == 0) {
|
||||
} else if (parse_vdst_wait(instr.get()) == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1407,7 +1394,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||
ctx.has_Vcmpx = false;
|
||||
}
|
||||
|
||||
unsigned va_vdst = parse_vdst_wait(instr);
|
||||
unsigned va_vdst = parse_vdst_wait(instr.get());
|
||||
unsigned vm_vsrc = 7;
|
||||
unsigned sa_sdst = 1;
|
||||
|
||||
|
|
@ -1608,7 +1595,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||
bool
|
||||
has_vdst0_since_valu_instr(bool& global_state, unsigned& block_state, aco_ptr<Instruction>& pred)
|
||||
{
|
||||
if (parse_vdst_wait(pred) == 0)
|
||||
if (parse_vdst_wait(pred.get()) == 0)
|
||||
return true;
|
||||
|
||||
if (--block_state == 0) {
|
||||
|
|
|
|||
|
|
@ -192,6 +192,16 @@ update_alu(delay_ctx& ctx, bool is_valu, bool is_trans, int cycles)
|
|||
void
|
||||
kill_alu(alu_delay_info& delay, Instruction* instr, delay_ctx& ctx)
|
||||
{
|
||||
if (parse_vdst_wait(instr) == 0) {
|
||||
std::map<PhysReg, alu_delay_info>::iterator it = ctx.gpr_map.begin();
|
||||
while (it != ctx.gpr_map.end()) {
|
||||
alu_delay_info& entry = it->second;
|
||||
entry.valu_instrs = alu_delay_info::valu_nop;
|
||||
entry.trans_instrs = alu_delay_info::trans_nop;
|
||||
it = it->second.fixup() ? ctx.gpr_map.erase(it) : std::next(it);
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->isVALU() || instr->isSALU())
|
||||
check_alu(ctx, delay, instr);
|
||||
|
||||
|
|
@ -213,12 +223,6 @@ kill_alu(alu_delay_info& delay, Instruction* instr, delay_ctx& ctx)
|
|||
void
|
||||
gen_alu(Instruction* instr, delay_ctx& ctx)
|
||||
{
|
||||
if (instr->isEXP() || instr->isDS() || instr->isMIMG() || instr->isFlatLike() ||
|
||||
instr->isMUBUF() || instr->isMTBUF()) {
|
||||
ctx.gpr_map.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
Instruction_cycle_info cycle_info = get_cycle_info(*ctx.program, *instr);
|
||||
bool is_valu = instr->isVALU();
|
||||
bool is_trans = instr->isTrans();
|
||||
|
|
|
|||
|
|
@ -1398,6 +1398,19 @@ get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
unsigned
|
||||
parse_vdst_wait(Instruction* instr)
|
||||
{
|
||||
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isEXP())
|
||||
return 0;
|
||||
else if (instr->isLDSDIR())
|
||||
return instr->ldsdir().wait_vdst;
|
||||
else if (instr->opcode == aco_opcode::s_waitcnt_depctr)
|
||||
return (instr->salu().imm >> 12) & 0xf;
|
||||
else
|
||||
return 15;
|
||||
}
|
||||
|
||||
bool
|
||||
dealloc_vgprs(Program* program)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1853,6 +1853,8 @@ enum vmem_type : uint8_t {
|
|||
*/
|
||||
uint8_t get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr);
|
||||
|
||||
unsigned parse_vdst_wait(Instruction* instr);
|
||||
|
||||
enum block_kind {
|
||||
/* uniform indicates that leaving this block,
|
||||
* all actives lanes stay active */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue