mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 04:30:10 +01:00
aco: allow to use the range analysis UB in emit_{sop2,vop2}_instruction()
It will allow to combine v_add+s_lshl or v_add+v_lshlrev to v_mad_u32_u24 on GFX6-8 if operands are known to be 16-bit or 24-bit. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7673>
This commit is contained in:
parent
be600b009a
commit
eaef1f2127
1 changed files with 31 additions and 5 deletions
|
|
@ -718,7 +718,8 @@ Temp convert_pointer_to_64_bit(isel_context *ctx, Temp ptr)
|
|||
ptr, Operand((unsigned)ctx->options->address32_hi));
|
||||
}
|
||||
|
||||
void emit_sop2_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode op, Temp dst, bool writes_scc)
|
||||
void emit_sop2_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode op,
|
||||
Temp dst, bool writes_scc, uint8_t uses_ub = 0)
|
||||
{
|
||||
aco_ptr<SOP2_instruction> sop2{create_instruction<SOP2_instruction>(op, Format::SOP2, 2, writes_scc ? 2 : 1)};
|
||||
sop2->operands[0] = Operand(get_alu_src(ctx, instr->src[0]));
|
||||
|
|
@ -728,12 +729,24 @@ void emit_sop2_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode o
|
|||
sop2->definitions[0].setNUW(true);
|
||||
if (writes_scc)
|
||||
sop2->definitions[1] = Definition(ctx->program->allocateId(s1), scc, s1);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (uses_ub & (1 << i)) {
|
||||
uint32_t src_ub = get_alu_src_ub(ctx, instr, i);
|
||||
if (src_ub <= 0xffff)
|
||||
sop2->operands[i].set16bit(true);
|
||||
else if (src_ub <= 0xffffff)
|
||||
sop2->operands[i].set24bit(true);
|
||||
}
|
||||
}
|
||||
|
||||
ctx->block->instructions.emplace_back(std::move(sop2));
|
||||
}
|
||||
|
||||
void emit_vop2_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode op, Temp dst,
|
||||
bool commutative, bool swap_srcs=false,
|
||||
bool flush_denorms = false, bool nuw = false)
|
||||
bool flush_denorms = false, bool nuw = false,
|
||||
uint8_t uses_ub = 0)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
bld.is_precise = instr->exact;
|
||||
|
|
@ -750,15 +763,28 @@ void emit_vop2_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode o
|
|||
}
|
||||
}
|
||||
|
||||
Operand op0(src0);
|
||||
Operand op1(src1);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
uint32_t src_ub = get_alu_src_ub(ctx, instr, swap_srcs ? !i : i);
|
||||
if (uses_ub & (1 << i)) {
|
||||
if (src_ub <= 0xffff)
|
||||
bld.set16bit(i ? op1 : op0);
|
||||
else if (src_ub <= 0xffffff)
|
||||
bld.set24bit(i ? op1 : op0);
|
||||
}
|
||||
}
|
||||
|
||||
if (flush_denorms && ctx->program->chip_class < GFX9) {
|
||||
assert(dst.size() == 1);
|
||||
Temp tmp = bld.vop2(op, bld.def(v1), src0, src1);
|
||||
Temp tmp = bld.vop2(op, bld.def(v1), op0, op1);
|
||||
bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand(0x3f800000u), tmp);
|
||||
} else {
|
||||
if (nuw) {
|
||||
bld.nuw().vop2(op, Definition(dst), src0, src1);
|
||||
bld.nuw().vop2(op, Definition(dst), op0, op1);
|
||||
} else {
|
||||
bld.vop2(op, Definition(dst), src0, src1);
|
||||
bld.vop2(op, Definition(dst), op0, op1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue