diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index f26947a6969..fb989eb9cd4 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2608,18 +2608,17 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } break; } - case nir_op_pack_half_2x16: { - Temp src = get_alu_src(ctx, instr->src[0], 2); - + case nir_op_pack_half_2x16_split: { if (dst.regClass() == v1) { - Temp src0 = bld.tmp(v1); - Temp src1 = bld.tmp(v1); - bld.pseudo(aco_opcode::p_split_vector, Definition(src0), Definition(src1), src); - if (!ctx->block->fp_mode.care_about_round16_64 || ctx->block->fp_mode.round16_64 == fp_round_tz) { - bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src0, src1); + nir_const_value* val = nir_src_as_const_value(instr->src[1].src); + if (val && val->u32 == 0 && ctx->program->chip_class <= GFX9) { + /* upper bits zero on GFX6-GFX9 */ + bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), get_alu_src(ctx, instr->src[0])); + } else if (!ctx->block->fp_mode.care_about_round16_64 || ctx->block->fp_mode.round16_64 == fp_round_tz) { + emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst); } else { - src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src0); - src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src1); + Temp src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[0])); + Temp src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[1])); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src0, src1); } } else { diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index e5b70920307..9c2c5673bb5 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -837,7 +837,7 @@ void init_context(isel_context *ctx, nir_shader *shader) case nir_op_i2f16: case nir_op_i2f32: case nir_op_i2f64: - case nir_op_pack_half_2x16: + case nir_op_pack_half_2x16_split: case nir_op_unpack_half_2x16_split_x: case nir_op_unpack_half_2x16_split_y: case nir_op_fddx: diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 0c328d8a0df..9d25b4f0b89 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -60,6 +60,7 @@ static const struct nir_shader_compiler_options nir_options = { .lower_pack_snorm_4x8 = true, .lower_pack_unorm_2x16 = true, .lower_pack_unorm_4x8 = true, + .lower_pack_half_2x16 = true, .lower_unpack_snorm_2x16 = true, .lower_unpack_snorm_4x8 = true, .lower_unpack_unorm_2x16 = true,