mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
aco: fix value numbering of reductions
Non-ssa definitions caused an assertion in value numbering. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6662>
This commit is contained in:
parent
5a8447cbd4
commit
834b449a46
3 changed files with 57 additions and 42 deletions
|
|
@ -7046,6 +7046,54 @@ void emit_uniform_subgroup(isel_context *ctx, nir_intrinsic_instr *instr, Temp s
|
|||
}
|
||||
}
|
||||
|
||||
Pseudo_reduction_instruction *create_reduction_instr(isel_context *ctx, aco_opcode aco_op, ReduceOp op, Definition dst, Temp src)
|
||||
{
|
||||
assert(src.bytes() <= 8);
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
unsigned num_defs = 0;
|
||||
Definition defs[5];
|
||||
defs[num_defs++] = dst;
|
||||
defs[num_defs++] = bld.def(bld.lm); /* used internally to save/restore exec */
|
||||
|
||||
/* scalar identity temporary */
|
||||
bool need_sitmp = (ctx->program->chip_class <= GFX7 || ctx->program->chip_class >= GFX10) && aco_op != aco_opcode::p_reduce;
|
||||
if (aco_op == aco_opcode::p_exclusive_scan) {
|
||||
need_sitmp |=
|
||||
(op == imin8 || op == imin16 || op == imin32 || op == imin64 ||
|
||||
op == imax8 || op == imax16 || op == imax32 || op == imax64 ||
|
||||
op == fmin16 || op == fmin32 || op == fmin64 ||
|
||||
op == fmax16 || op == fmax32 || op == fmax64 ||
|
||||
op == fmul16 || op == fmul64);
|
||||
}
|
||||
if (need_sitmp)
|
||||
defs[num_defs++] = bld.def(RegType::sgpr, dst.size());
|
||||
|
||||
/* scc clobber */
|
||||
defs[num_defs++] = bld.def(s1, scc);
|
||||
|
||||
/* vcc clobber */
|
||||
bool clobber_vcc = false;
|
||||
if ((op == iadd32 || op == imul64) && ctx->program->chip_class < GFX9)
|
||||
clobber_vcc = true;
|
||||
if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)
|
||||
clobber_vcc = true;
|
||||
|
||||
if (clobber_vcc)
|
||||
defs[num_defs++] = bld.def(bld.lm, vcc);
|
||||
|
||||
Pseudo_reduction_instruction *reduce = create_instruction<Pseudo_reduction_instruction>(aco_op, Format::PSEUDO_REDUCTION, 3, num_defs);
|
||||
reduce->operands[0] = Operand(src);
|
||||
/* setup_reduce_temp will update these undef operands if needed */
|
||||
reduce->operands[1] = Operand(RegClass(RegType::vgpr, dst.size()).as_linear());
|
||||
reduce->operands[2] = Operand(v1.as_linear());
|
||||
std::copy(defs, defs + num_defs, reduce->definitions.begin());
|
||||
|
||||
reduce->reduce_op = op;
|
||||
|
||||
return reduce;
|
||||
}
|
||||
|
||||
void emit_interp_center(isel_context *ctx, Temp dst, Temp pos1, Temp pos2)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
|
@ -7661,21 +7709,10 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
unreachable("unknown reduce intrinsic");
|
||||
}
|
||||
|
||||
aco_ptr<Pseudo_reduction_instruction> reduce{create_instruction<Pseudo_reduction_instruction>(aco_op, Format::PSEUDO_REDUCTION, 3, 5)};
|
||||
reduce->operands[0] = Operand(src);
|
||||
// filled in by aco_reduce_assign.cpp, used internally as part of the
|
||||
// reduce sequence
|
||||
assert(dst.size() == 1 || dst.size() == 2);
|
||||
reduce->operands[1] = Operand(RegClass(RegType::vgpr, dst.size()).as_linear());
|
||||
reduce->operands[2] = Operand(v1.as_linear());
|
||||
|
||||
Temp tmp_dst = bld.tmp(dst.regClass());
|
||||
reduce->definitions[0] = Definition(tmp_dst);
|
||||
reduce->definitions[1] = bld.def(ctx->program->lane_mask); // used internally
|
||||
reduce->definitions[2] = Definition();
|
||||
reduce->definitions[3] = Definition(scc, s1);
|
||||
reduce->definitions[4] = Definition();
|
||||
reduce->reduce_op = reduce_op;
|
||||
aco_ptr<Pseudo_reduction_instruction> reduce{
|
||||
create_reduction_instr(ctx, aco_op, reduce_op, Definition(tmp_dst), src)};
|
||||
|
||||
reduce->cluster_size = cluster_size;
|
||||
ctx->block->instructions.emplace_back(std::move(reduce));
|
||||
|
||||
|
|
|
|||
|
|
@ -152,34 +152,6 @@ void setup_reduce_temp(Program* program)
|
|||
instr->operands[1] = Operand(reduceTmp);
|
||||
if (need_vtmp)
|
||||
instr->operands[2] = Operand(vtmp);
|
||||
|
||||
/* scalar temporary */
|
||||
Builder bld(program);
|
||||
instr->definitions[1] = bld.def(s2);
|
||||
|
||||
/* scalar identity temporary */
|
||||
bool need_sitmp = (program->chip_class <= GFX7 || program->chip_class >= GFX10) && instr->opcode != aco_opcode::p_reduce;
|
||||
if (instr->opcode == aco_opcode::p_exclusive_scan) {
|
||||
need_sitmp |=
|
||||
(op == imin8 || op == imin16 || op == imin32 || op == imin64 ||
|
||||
op == imax8 || op == imax16 || op == imax32 || op == imax64 ||
|
||||
op == fmin16 || op == fmin32 || op == fmin64 ||
|
||||
op == fmax16 || op == fmax32 || op == fmax64 ||
|
||||
op == fmul16 || op == fmul64);
|
||||
}
|
||||
if (need_sitmp) {
|
||||
instr->definitions[2] = bld.def(RegClass(RegType::sgpr, instr->operands[0].size()));
|
||||
}
|
||||
|
||||
/* vcc clobber */
|
||||
bool clobber_vcc = false;
|
||||
if ((op == iadd32 || op == imul64) && program->chip_class < GFX9)
|
||||
clobber_vcc = true;
|
||||
if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)
|
||||
clobber_vcc = true;
|
||||
|
||||
if (clobber_vcc)
|
||||
instr->definitions[4] = Definition(vcc, bld.lm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -216,6 +216,12 @@ public:
|
|||
--length;
|
||||
}
|
||||
|
||||
/*! \brief Adds an element to the end of the span
|
||||
*/
|
||||
constexpr void push_back(const_reference val) noexcept {
|
||||
*std::next(begin(), length++) = val;
|
||||
}
|
||||
|
||||
/*! \brief Clears the span
|
||||
*/
|
||||
constexpr void clear() noexcept {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue