mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
aco: fix nir_op_f2f16_rtne with non-default rounding modes
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5773>
This commit is contained in:
parent
d14f4faa13
commit
b36950ad2c
3 changed files with 19 additions and 1 deletions
|
|
@ -2298,7 +2298,13 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
Temp src = get_alu_src(ctx, instr->src[0]);
|
||||
if (instr->src[0].src.ssa->bit_size == 64)
|
||||
src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
|
||||
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
|
||||
if (instr->op == nir_op_f2f16_rtne && ctx->block->fp_mode.round16_64 != fp_round_ne)
|
||||
/* We emit s_round_mode/s_setreg_imm32 in lower_to_hw_instr to
|
||||
* keep value numbering and the scheduler simpler.
|
||||
*/
|
||||
bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src);
|
||||
else
|
||||
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
|
||||
break;
|
||||
}
|
||||
case nir_op_f2f16_rtz: {
|
||||
|
|
|
|||
|
|
@ -1908,6 +1908,17 @@ void lower_to_hw_instr(Program* program)
|
|||
reduce->operands[2].physReg(), // vtmp
|
||||
reduce->definitions[2].physReg(), // sitmp
|
||||
reduce->operands[0], reduce->definitions[0]);
|
||||
} else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) {
|
||||
float_mode new_mode = block->fp_mode;
|
||||
new_mode.round16_64 = fp_round_ne;
|
||||
bool set_round = new_mode.round != block->fp_mode.round;
|
||||
|
||||
emit_set_mode(bld, new_mode, set_round, false);
|
||||
|
||||
instr->opcode = aco_opcode::v_cvt_f16_f32;
|
||||
ctx.instructions.emplace_back(std::move(instr));
|
||||
|
||||
emit_set_mode(bld, block->fp_mode, set_round, false);
|
||||
} else {
|
||||
ctx.instructions.emplace_back(std::move(instr));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -734,6 +734,7 @@ VOP1 = {
|
|||
(0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
|
||||
(0x09, 0x09, -1, -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9
|
||||
(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
|
||||
( -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True),
|
||||
(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
|
||||
(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
|
||||
(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue