mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 22:10:10 +01:00
aco,ac/llvm,radeonsi: lower f2f16 to f2f16_rtz in nir
No need to handle f2f16 specially for OpenGL, and we can vectorize f2f16 when using ACO. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25990>
This commit is contained in:
parent
7e4aac46ad
commit
dbbf566588
3 changed files with 31 additions and 35 deletions
|
|
@ -444,7 +444,8 @@ aco_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu)
|
||||||
case nir_op_f2f16: {
|
case nir_op_f2f16: {
|
||||||
nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
|
nir_shader* shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader;
|
||||||
unsigned execution_mode = shader->info.float_controls_execution_mode;
|
unsigned execution_mode = shader->info.float_controls_execution_mode;
|
||||||
return nir_is_rounding_mode_rtz(execution_mode, 16);
|
return (shader->options->force_f2f16_rtz && !nir_is_rounding_mode_rtne(execution_mode, 16)) ||
|
||||||
|
nir_is_rounding_mode_rtz(execution_mode, 16);
|
||||||
}
|
}
|
||||||
case nir_op_fadd:
|
case nir_op_fadd:
|
||||||
case nir_op_fsub:
|
case nir_op_fsub:
|
||||||
|
|
|
||||||
|
|
@ -928,15 +928,7 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||||
case nir_op_u2f64:
|
case nir_op_u2f64:
|
||||||
result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
|
result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
|
||||||
break;
|
break;
|
||||||
case nir_op_f2f16_rtz:
|
case nir_op_f2f16_rtz: {
|
||||||
case nir_op_f2f16:
|
|
||||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
|
||||||
|
|
||||||
/* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
|
|
||||||
* all f32->f16 conversions have to round towards zero, because both scalar
|
|
||||||
* and vec2 down-conversions have to round equally.
|
|
||||||
*/
|
|
||||||
if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {
|
|
||||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||||
|
|
||||||
if (LLVMTypeOf(src[0]) == ctx->ac.f64)
|
if (LLVMTypeOf(src[0]) == ctx->ac.f64)
|
||||||
|
|
@ -958,15 +950,9 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||||
LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->ac.f32)};
|
LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->ac.f32)};
|
||||||
result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
|
result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
|
||||||
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
|
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
|
||||||
} else {
|
|
||||||
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
|
|
||||||
result =
|
|
||||||
LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
|
|
||||||
else
|
|
||||||
result =
|
|
||||||
LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
case nir_op_f2f16:
|
||||||
case nir_op_f2f16_rtne:
|
case nir_op_f2f16_rtne:
|
||||||
case nir_op_f2f32:
|
case nir_op_f2f32:
|
||||||
case nir_op_f2f64:
|
case nir_op_f2f64:
|
||||||
|
|
|
||||||
|
|
@ -1381,6 +1381,15 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
||||||
nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 |
|
nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 |
|
||||||
nir_lower_divmod64 | nir_lower_minmax64 | nir_lower_iabs64 |
|
nir_lower_divmod64 | nir_lower_minmax64 | nir_lower_iabs64 |
|
||||||
nir_lower_iadd_sat64 | nir_lower_conv64,
|
nir_lower_iadd_sat64 | nir_lower_conv64,
|
||||||
|
|
||||||
|
/* For OpenGL, rounding mode is undefined. We want fast packing with v_cvt_pkrtz_f16,
|
||||||
|
* but if we use it, all f32->f16 conversions have to round towards zero,
|
||||||
|
* because both scalar and vec2 down-conversions have to round equally.
|
||||||
|
*
|
||||||
|
* For OpenCL, rounding mode is explicit. This will only lower f2f16 to f2f16_rtz
|
||||||
|
* when execution mode is rtz instead of rtne.
|
||||||
|
*/
|
||||||
|
.force_f2f16_rtz = true,
|
||||||
};
|
};
|
||||||
*sscreen->nir_options = nir_options;
|
*sscreen->nir_options = nir_options;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue