mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
aco: Add support to f2f16 with rtpi/rtni
Those rounding modes are needed when computing 16-bit bounding boxes since the bounding box must not get smaller. Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37883>
This commit is contained in:
parent
f156743b0f
commit
39d58a55a7
5 changed files with 21 additions and 1 deletions
|
|
@ -279,6 +279,13 @@ emit_set_mode_block(fp_mode_ctx* ctx, Block* block)
|
|||
instr->opcode = aco_opcode::v_cvt_f16_f32;
|
||||
else
|
||||
instr->opcode = aco_opcode::s_cvt_f16_f32;
|
||||
} else if (instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtpi ||
|
||||
instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtni) {
|
||||
set_mode |= fp_state.require(mode_round16_64, instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtpi ? fp_round_pi : fp_round_ni);
|
||||
set_mode |= fp_state.require(mode_fp16_ovfl, default_state.fields[mode_fp16_ovfl]);
|
||||
set_mode |= fp_state.require(mode_denorm16_64, default_state.fields[mode_denorm16_64]);
|
||||
set_mode |= fp_state.require(mode_denorm32, default_state.fields[mode_denorm32]);
|
||||
instr->opcode = aco_opcode::v_cvt_f16_f32;
|
||||
} else if (instr->opcode == aco_opcode::p_v_cvt_pk_fp8_f32_ovfl) {
|
||||
set_mode |= fp_state.require(mode_fp16_ovfl, 1);
|
||||
instr->opcode = aco_opcode::v_cvt_pk_fp8_f32;
|
||||
|
|
|
|||
|
|
@ -661,6 +661,8 @@ instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op)
|
|||
/* VOP1 */
|
||||
case aco_opcode::v_cvt_f16_f32:
|
||||
case aco_opcode::p_v_cvt_f16_f32_rtne:
|
||||
case aco_opcode::p_v_cvt_f16_f32_rtpi:
|
||||
case aco_opcode::p_v_cvt_f16_f32_rtni:
|
||||
case aco_opcode::v_cvt_f16_u16:
|
||||
case aco_opcode::v_cvt_f16_i16:
|
||||
case aco_opcode::v_rcp_f16:
|
||||
|
|
|
|||
|
|
@ -1029,6 +1029,8 @@ VOP1 = {
|
|||
("v_cvt_i32_f32", dst(U32), src(F32), op(0x08)),
|
||||
("v_cvt_f16_f32", dst(F16), src(F32), op(0x0a)),
|
||||
("p_v_cvt_f16_f32_rtne", dst(F16), src(F32), op(-1)),
|
||||
("p_v_cvt_f16_f32_rtpi", dst(F16), src(F32), op(-1)),
|
||||
("p_v_cvt_f16_f32_rtni", dst(F16), src(F32), op(-1)),
|
||||
("v_cvt_f32_f16", dst(F32), src(F16), op(0x0b)),
|
||||
("v_cvt_rpi_i32_f32", dst(U32), src(F32), op(0x0c)), #v_cvt_nearest_i32_f32 in GFX11
|
||||
("v_cvt_flr_i32_f32", dst(U32), src(F32), op(0x0d)),#v_cvt_floor_i32_f32 in GFX11
|
||||
|
|
|
|||
|
|
@ -453,7 +453,9 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_op_sdot_2x16_iadd_sat:
|
||||
case nir_op_bfdot2_bfadd:
|
||||
case nir_op_byte_perm_amd:
|
||||
case nir_op_alignbyte_amd: type = RegType::vgpr; break;
|
||||
case nir_op_alignbyte_amd:
|
||||
case nir_op_f2f16_ru:
|
||||
case nir_op_f2f16_rd: type = RegType::vgpr; break;
|
||||
case nir_op_fmul:
|
||||
case nir_op_ffma:
|
||||
case nir_op_fadd:
|
||||
|
|
|
|||
|
|
@ -2615,6 +2615,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_f2f16_ru:
|
||||
case nir_op_f2f16_rd:
|
||||
ctx->program->needs_fp_mode_insertion = true;
|
||||
bld.vop1(instr->op == nir_op_f2f16_ru ? aco_opcode::p_v_cvt_f16_f32_rtpi
|
||||
: aco_opcode::p_v_cvt_f16_f32_rtni,
|
||||
Definition(dst), Operand(get_alu_src(ctx, instr->src[0])));
|
||||
break;
|
||||
case nir_op_f2f32: {
|
||||
if (dst.regClass() == s1) {
|
||||
assert(instr->src[0].src.ssa->bit_size == 16);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue