diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 6c378111ff4..d8af3d9e76b 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2069,6 +2069,14 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } break; } + case nir_op_fmulz: { + if (dst.regClass() == v1) { + emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_legacy_f32, dst, true); + } else { + isel_err(&instr->instr, "Unimplemented NIR instr bit size"); + } + break; + } case nir_op_fadd: { if (dst.regClass() == v2b) { emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f16, dst, true); @@ -2141,6 +2149,15 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } break; } + case nir_op_ffmaz: { + if (dst.regClass() == v1) { + emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst, + ctx->block->fp_mode.must_flush_denorms32, 3); + } else { + isel_err(&instr->instr, "Unimplemented NIR instr bit size"); + } + break; + } case nir_op_fmax: { if (dst.regClass() == v2b) { // TODO: check fp_mode.must_flush_denorms16_64 diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 3f4396c1a99..7cfd616b135 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -480,9 +480,11 @@ init_context(isel_context* ctx, nir_shader* shader) nir_dest_is_divergent(alu_instr->dest.dest) ? RegType::vgpr : RegType::sgpr; switch (alu_instr->op) { case nir_op_fmul: + case nir_op_fmulz: case nir_op_fadd: case nir_op_fsub: case nir_op_ffma: + case nir_op_ffmaz: case nir_op_fmax: case nir_op_fmin: case nir_op_fneg: diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index e038e49f094..53a641e848d 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -688,6 +688,13 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) src[1] = ac_to_float(&ctx->ac, src[1]); result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); break; + case nir_op_fmulz: + assert(LLVM_VERSION_MAJOR >= 12); + src[0] = ac_to_float(&ctx->ac, src[0]); + src[1] = ac_to_float(&ctx->ac, src[1]); + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32, + src, 2, AC_FUNC_ATTR_READNONE); + break; case nir_op_frcp: /* For doubles, we need precise division to pass GLCTS. */ if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) { @@ -906,6 +913,14 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); break; + case nir_op_ffmaz: + assert(LLVM_VERSION_MAJOR >= 12 && ctx->ac.chip_class >= GFX10_3); + src[0] = ac_to_float(&ctx->ac, src[0]); + src[1] = ac_to_float(&ctx->ac, src[1]); + src[2] = ac_to_float(&ctx->ac, src[2]); + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fma.legacy", ctx->ac.f32, + src, 3, AC_FUNC_ATTR_READNONE); + break; case nir_op_ldexp: src[0] = ac_to_float(&ctx->ac, src[0]); if (ac_get_elem_bits(&ctx->ac, def_type) == 32) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index d7c59c77bf1..de9bc7b01d0 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -92,6 +92,11 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s .has_udot_4x8 = device->rad_info.has_accelerated_dot_product, .has_dot_2x16 = device->rad_info.has_accelerated_dot_product, .use_scoped_barrier = true, +#ifdef LLVM_AVAILABLE + .has_fmulz = !device->use_llvm || LLVM_VERSION_MAJOR >= 12, +#else + .has_fmulz = true, +#endif .max_unroll_iterations = 32, .max_unroll_iterations_aggressive = 128, .use_interpolated_input_intrinsics = true,