mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 22:20:14 +01:00
aco: use VOP2 version of v_cvt_pkrtz_f16_f32 on GFX_6_7_10
Totals from 767 (0.56% of 136546) affected shaders (NAVI): CodeSize: 2862208 -> 2850036 (-0.43%) Instrs: 561572 -> 561574 (+0.00%) Cycles: 6455420 -> 6455428 (+0.00%) Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6777>
This commit is contained in:
parent
2f125908b3
commit
7240edec2a
2 changed files with 11 additions and 4 deletions
|
|
@ -2141,7 +2141,10 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
Temp src = get_alu_src(ctx, instr->src[0]);
|
||||
if (instr->src[0].src.ssa->bit_size == 64)
|
||||
src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
|
||||
bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src, Operand(0u));
|
||||
if (ctx->block->fp_mode.round16_64 == fp_round_tz)
|
||||
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
|
||||
else
|
||||
bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src, Operand(0u));
|
||||
break;
|
||||
}
|
||||
case nir_op_f2f32: {
|
||||
|
|
@ -2615,7 +2618,10 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
/* upper bits zero on GFX6-GFX9 */
|
||||
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), get_alu_src(ctx, instr->src[0]));
|
||||
} else if (!ctx->block->fp_mode.care_about_round16_64 || ctx->block->fp_mode.round16_64 == fp_round_tz) {
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst);
|
||||
if (ctx->program->chip_class == GFX8 || ctx->program->chip_class == GFX9)
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
|
||||
else
|
||||
emit_vop2_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst, false);
|
||||
} else {
|
||||
Temp src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[0]));
|
||||
Temp src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[1]));
|
||||
|
|
@ -10343,7 +10349,7 @@ static bool export_fs_mrt_color(isel_context *ctx, int slot)
|
|||
|
||||
case V_028714_SPI_SHADER_FP16_ABGR:
|
||||
enabled_channels = 0x5;
|
||||
compr_op = aco_opcode::v_cvt_pkrtz_f16_f32;
|
||||
compr_op = aco_opcode::v_cvt_pkrtz_f16_f32_e64;
|
||||
if (is_16bit) {
|
||||
if (ctx->options->chip_class >= GFX9) {
|
||||
/* Pack the FP16 values together instead of converting them to
|
||||
|
|
|
|||
|
|
@ -682,6 +682,7 @@ VOP2 = {
|
|||
( -1, -1, -1, -1, 0x2b, "v_fmac_f32", True),
|
||||
( -1, -1, -1, -1, 0x2c, "v_fmamk_f32", True),
|
||||
( -1, -1, -1, -1, 0x2d, "v_fmaak_f32", True),
|
||||
(0x2f, 0x2f, -1, -1, 0x2f, "v_cvt_pkrtz_f16_f32", True),
|
||||
( -1, -1, 0x1f, 0x1f, 0x32, "v_add_f16", True),
|
||||
( -1, -1, 0x20, 0x20, 0x33, "v_sub_f16", True),
|
||||
( -1, -1, 0x21, 0x21, 0x34, "v_subrev_f16", True),
|
||||
|
|
@ -1051,7 +1052,7 @@ VOP3 = {
|
|||
(0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False),
|
||||
(0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False),
|
||||
(0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False),
|
||||
(0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f
|
||||
(0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32_e64", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f
|
||||
(0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False),
|
||||
(0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False),
|
||||
( -1, -1, -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue