aco: fix nir_op_f2f16_rtne with non-default rounding modes

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5773>
This commit is contained in:
Rhys Perry 2020-07-02 13:35:41 +01:00 committed by Marge Bot
parent d14f4faa13
commit b36950ad2c
3 changed files with 19 additions and 1 deletions

View file

@ -2298,7 +2298,13 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
Temp src = get_alu_src(ctx, instr->src[0]);
if (instr->src[0].src.ssa->bit_size == 64)
src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
if (instr->op == nir_op_f2f16_rtne && ctx->block->fp_mode.round16_64 != fp_round_ne)
/* We emit s_round_mode/s_setreg_imm32 in lower_to_hw_instr to
* keep value numbering and the scheduler simpler.
*/
bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src);
else
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
break;
}
case nir_op_f2f16_rtz: {

View file

@ -1908,6 +1908,17 @@ void lower_to_hw_instr(Program* program)
reduce->operands[2].physReg(), // vtmp
reduce->definitions[2].physReg(), // sitmp
reduce->operands[0], reduce->definitions[0]);
} else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) {
float_mode new_mode = block->fp_mode;
new_mode.round16_64 = fp_round_ne;
bool set_round = new_mode.round != block->fp_mode.round;
emit_set_mode(bld, new_mode, set_round, false);
instr->opcode = aco_opcode::v_cvt_f16_f32;
ctx.instructions.emplace_back(std::move(instr));
emit_set_mode(bld, block->fp_mode, set_round, false);
} else {
ctx.instructions.emplace_back(std::move(instr));
}

View file

@ -734,6 +734,7 @@ VOP1 = {
(0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
(0x09, 0x09, -1, -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9
(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
( -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True),
(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),