mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 03:50:13 +01:00
radv,aco,ac/llvm: implement fmulz and ffmaz
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>
This commit is contained in:
parent
7f05ea3793
commit
e7f91b194a
4 changed files with 39 additions and 0 deletions
|
|
@ -2069,6 +2069,14 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_fmulz: {
|
||||
if (dst.regClass() == v1) {
|
||||
emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_legacy_f32, dst, true);
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_fadd: {
|
||||
if (dst.regClass() == v2b) {
|
||||
emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f16, dst, true);
|
||||
|
|
@ -2141,6 +2149,15 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_ffmaz: {
|
||||
if (dst.regClass() == v1) {
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst,
|
||||
ctx->block->fp_mode.must_flush_denorms32, 3);
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_fmax: {
|
||||
if (dst.regClass() == v2b) {
|
||||
// TODO: check fp_mode.must_flush_denorms16_64
|
||||
|
|
|
|||
|
|
@ -480,9 +480,11 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
nir_dest_is_divergent(alu_instr->dest.dest) ? RegType::vgpr : RegType::sgpr;
|
||||
switch (alu_instr->op) {
|
||||
case nir_op_fmul:
|
||||
case nir_op_fmulz:
|
||||
case nir_op_fadd:
|
||||
case nir_op_fsub:
|
||||
case nir_op_ffma:
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_fmax:
|
||||
case nir_op_fmin:
|
||||
case nir_op_fneg:
|
||||
|
|
|
|||
|
|
@ -688,6 +688,13 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
src[1] = ac_to_float(&ctx->ac, src[1]);
|
||||
result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
|
||||
break;
|
||||
case nir_op_fmulz:
|
||||
assert(LLVM_VERSION_MAJOR >= 12);
|
||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
src[1] = ac_to_float(&ctx->ac, src[1]);
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
|
||||
src, 2, AC_FUNC_ATTR_READNONE);
|
||||
break;
|
||||
case nir_op_frcp:
|
||||
/* For doubles, we need precise division to pass GLCTS. */
|
||||
if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) {
|
||||
|
|
@ -906,6 +913,14 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type),
|
||||
src[0], src[1], src[2]);
|
||||
break;
|
||||
case nir_op_ffmaz:
|
||||
assert(LLVM_VERSION_MAJOR >= 12 && ctx->ac.chip_class >= GFX10_3);
|
||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
src[1] = ac_to_float(&ctx->ac, src[1]);
|
||||
src[2] = ac_to_float(&ctx->ac, src[2]);
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fma.legacy", ctx->ac.f32,
|
||||
src, 3, AC_FUNC_ATTR_READNONE);
|
||||
break;
|
||||
case nir_op_ldexp:
|
||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
|
||||
|
|
|
|||
|
|
@ -92,6 +92,11 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s
|
|||
.has_udot_4x8 = device->rad_info.has_accelerated_dot_product,
|
||||
.has_dot_2x16 = device->rad_info.has_accelerated_dot_product,
|
||||
.use_scoped_barrier = true,
|
||||
#ifdef LLVM_AVAILABLE
|
||||
.has_fmulz = !device->use_llvm || LLVM_VERSION_MAJOR >= 12,
|
||||
#else
|
||||
.has_fmulz = true,
|
||||
#endif
|
||||
.max_unroll_iterations = 32,
|
||||
.max_unroll_iterations_aggressive = 128,
|
||||
.use_interpolated_input_intrinsics = true,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue