mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 01:40:08 +01:00
aco: make p_wqm a marker instruction without Operands/Definitions
Totals from 28277 (36.93% of 76572) affected shaders: (GFX11) MaxWaves: 833930 -> 833898 (-0.00%); split: +0.01%, -0.01% Instrs: 21366950 -> 21353346 (-0.06%); split: -0.11%, +0.05% CodeSize: 112855368 -> 112610508 (-0.22%); split: -0.24%, +0.03% VGPRs: 1157748 -> 1158540 (+0.07%); split: -0.10%, +0.17% SpillSGPRs: 2465 -> 2463 (-0.08%); split: -0.16%, +0.08% Latency: 168339886 -> 168383646 (+0.03%); split: -0.10%, +0.12% InvThroughput: 25164895 -> 25158376 (-0.03%); split: -0.08%, +0.06% VClause: 347660 -> 346256 (-0.40%); split: -0.55%, +0.15% SClause: 794460 -> 799521 (+0.64%); split: -0.33%, +0.97% Copies: 1151908 -> 1148370 (-0.31%); split: -0.54%, +0.23% Branches: 359447 -> 359437 (-0.00%); split: -0.01%, +0.00% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25038>
This commit is contained in:
parent
1275981df8
commit
040142684c
5 changed files with 11 additions and 30 deletions
|
|
@ -171,18 +171,16 @@ emit_mbcnt(isel_context* ctx, Temp dst, Operand mask = Operand(), Operand base =
|
|||
Temp
|
||||
emit_wqm(Builder& bld, Temp src, Temp dst = Temp(0, s1), bool program_needs_wqm = false)
|
||||
{
|
||||
if (bld.program->stage != fragment_fs) {
|
||||
if (!dst.id())
|
||||
return src;
|
||||
else
|
||||
return bld.copy(Definition(dst), src);
|
||||
} else if (!dst.id()) {
|
||||
dst = bld.tmp(src.regClass());
|
||||
if (dst.id())
|
||||
bld.copy(Definition(dst), src);
|
||||
else
|
||||
dst = src;
|
||||
|
||||
if (bld.program->stage == fragment_fs) {
|
||||
bld.pseudo(aco_opcode::p_wqm);
|
||||
bld.program->needs_wqm |= program_needs_wqm;
|
||||
}
|
||||
|
||||
assert(src.bytes() == dst.bytes());
|
||||
bld.pseudo(aco_opcode::p_wqm, Definition(dst), src);
|
||||
bld.program->needs_wqm |= program_needs_wqm;
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2395,8 +2395,7 @@ lower_to_hw_instr(Program* program)
|
|||
handle_operands(copy_operations, &ctx, program->gfx_level, pi);
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_parallelcopy:
|
||||
case aco_opcode::p_wqm: {
|
||||
case aco_opcode::p_parallelcopy: {
|
||||
std::map<PhysReg, copy_operation> copy_operations;
|
||||
for (unsigned j = 0; j < instr->operands.size(); j++) {
|
||||
assert(instr->definitions[j].bytes() == instr->operands[j].bytes());
|
||||
|
|
|
|||
|
|
@ -662,7 +662,6 @@ alu_can_accept_constant(const aco_ptr<Instruction>& instr, unsigned operand)
|
|||
case aco_opcode::v_cndmask_b32: return operand != 2;
|
||||
case aco_opcode::s_addk_i32:
|
||||
case aco_opcode::s_mulk_i32:
|
||||
case aco_opcode::p_wqm:
|
||||
case aco_opcode::p_extract_vector:
|
||||
case aco_opcode::p_split_vector:
|
||||
case aco_opcode::v_readlane_b32:
|
||||
|
|
@ -2071,11 +2070,6 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
instr->operands[2].setTemp(ctx.info[instr->operands[2].tempId()].temp);
|
||||
}
|
||||
break;
|
||||
case aco_opcode::p_wqm:
|
||||
if (instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_scc_invert()) {
|
||||
ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
|
||||
}
|
||||
break;
|
||||
case aco_opcode::s_mul_i32:
|
||||
/* Testing every uint32_t shows that 0x3f800000*n is never a denormal.
|
||||
* This pattern is created from a uniform nir_op_b2f. */
|
||||
|
|
@ -4787,13 +4781,6 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
instr->opcode == aco_opcode::s_cselect_b32) &&
|
||||
instr->operands[2].isTemp()) {
|
||||
ctx.info[instr->operands[2].tempId()].set_scc_needed();
|
||||
} else if (instr->opcode == aco_opcode::p_wqm && instr->operands[0].isTemp() &&
|
||||
ctx.info[instr->definitions[0].tempId()].is_scc_needed()) {
|
||||
/* Propagate label so it is correctly detected by the uniform bool transform */
|
||||
ctx.info[instr->operands[0].tempId()].set_scc_needed();
|
||||
|
||||
/* Fix definition to SCC, this will prevent RA from adding superfluous moves */
|
||||
instr->definitions[0].setFixed(scc);
|
||||
}
|
||||
|
||||
/* check for literals */
|
||||
|
|
|
|||
|
|
@ -1888,7 +1888,6 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
|
|||
case aco_opcode::p_create_vector:
|
||||
case aco_opcode::p_split_vector:
|
||||
case aco_opcode::p_parallelcopy:
|
||||
case aco_opcode::p_wqm:
|
||||
case aco_opcode::p_start_linear_vgpr: break;
|
||||
default: return;
|
||||
}
|
||||
|
|
@ -2942,8 +2941,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
if (get_reg_specified(ctx, register_file, rc, instr, reg))
|
||||
definition->setFixed(reg);
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::p_wqm ||
|
||||
instr->opcode == aco_opcode::p_parallelcopy ||
|
||||
} else if (instr->opcode == aco_opcode::p_parallelcopy ||
|
||||
(instr->opcode == aco_opcode::p_start_linear_vgpr &&
|
||||
!instr->operands.empty())) {
|
||||
PhysReg reg = instr->operands[i].physReg();
|
||||
|
|
|
|||
|
|
@ -468,8 +468,7 @@ validate_ir(Program* program)
|
|||
check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(),
|
||||
"Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::p_parallelcopy ||
|
||||
instr->opcode == aco_opcode::p_wqm) {
|
||||
} else if (instr->opcode == aco_opcode::p_parallelcopy) {
|
||||
check(instr->definitions.size() == instr->operands.size(),
|
||||
"Number of Operands does not match number of Definitions", instr.get());
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue