aco: use VOP2 version of v_cvt_pkrtz_f16_f32 on GFX_6_7_10

Totals from 767 (0.56% of 136546) affected shaders (NAVI):
CodeSize: 2862208 -> 2850036 (-0.43%)
Instrs: 561572 -> 561574 (+0.00%)
Cycles: 6455420 -> 6455428 (+0.00%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6777>
This commit is contained in:
Daniel Schürmann 2020-09-18 18:02:08 +01:00 committed by Marge Bot
parent 2f125908b3
commit 7240edec2a
2 changed files with 11 additions and 4 deletions

View file

@ -2141,7 +2141,10 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
Temp src = get_alu_src(ctx, instr->src[0]); Temp src = get_alu_src(ctx, instr->src[0]);
if (instr->src[0].src.ssa->bit_size == 64) if (instr->src[0].src.ssa->bit_size == 64)
src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src); src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src, Operand(0u)); if (ctx->block->fp_mode.round16_64 == fp_round_tz)
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
else
bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src, Operand(0u));
break; break;
} }
case nir_op_f2f32: { case nir_op_f2f32: {
@ -2615,7 +2618,10 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
/* upper bits zero on GFX6-GFX9 */ /* upper bits zero on GFX6-GFX9 */
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), get_alu_src(ctx, instr->src[0])); bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), get_alu_src(ctx, instr->src[0]));
} else if (!ctx->block->fp_mode.care_about_round16_64 || ctx->block->fp_mode.round16_64 == fp_round_tz) { } else if (!ctx->block->fp_mode.care_about_round16_64 || ctx->block->fp_mode.round16_64 == fp_round_tz) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst); if (ctx->program->chip_class == GFX8 || ctx->program->chip_class == GFX9)
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
else
emit_vop2_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst, false);
} else { } else {
Temp src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[0])); Temp src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[0]));
Temp src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[1])); Temp src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[1]));
@ -10343,7 +10349,7 @@ static bool export_fs_mrt_color(isel_context *ctx, int slot)
case V_028714_SPI_SHADER_FP16_ABGR: case V_028714_SPI_SHADER_FP16_ABGR:
enabled_channels = 0x5; enabled_channels = 0x5;
compr_op = aco_opcode::v_cvt_pkrtz_f16_f32; compr_op = aco_opcode::v_cvt_pkrtz_f16_f32_e64;
if (is_16bit) { if (is_16bit) {
if (ctx->options->chip_class >= GFX9) { if (ctx->options->chip_class >= GFX9) {
/* Pack the FP16 values together instead of converting them to /* Pack the FP16 values together instead of converting them to

View file

@ -682,6 +682,7 @@ VOP2 = {
( -1, -1, -1, -1, 0x2b, "v_fmac_f32", True), ( -1, -1, -1, -1, 0x2b, "v_fmac_f32", True),
( -1, -1, -1, -1, 0x2c, "v_fmamk_f32", True), ( -1, -1, -1, -1, 0x2c, "v_fmamk_f32", True),
( -1, -1, -1, -1, 0x2d, "v_fmaak_f32", True), ( -1, -1, -1, -1, 0x2d, "v_fmaak_f32", True),
(0x2f, 0x2f, -1, -1, 0x2f, "v_cvt_pkrtz_f16_f32", True),
( -1, -1, 0x1f, 0x1f, 0x32, "v_add_f16", True), ( -1, -1, 0x1f, 0x1f, 0x32, "v_add_f16", True),
( -1, -1, 0x20, 0x20, 0x33, "v_sub_f16", True), ( -1, -1, 0x20, 0x20, 0x33, "v_sub_f16", True),
( -1, -1, 0x21, 0x21, 0x34, "v_subrev_f16", True), ( -1, -1, 0x21, 0x21, 0x34, "v_subrev_f16", True),
@ -1051,7 +1052,7 @@ VOP3 = {
(0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False), (0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False),
(0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False), (0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False),
(0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False), (0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False),
(0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f (0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32_e64", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f
(0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False), (0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False),
(0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False), (0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False),
( -1, -1, -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False), ( -1, -1, -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False),