mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-20 09:08:07 +02:00
radeonsi: emit 1/sqrt for RSQ
We don't need the clamped version and we don't have to use any intrinsic. Stats on Tonga: 15382 shaders in 9128 tests Totals: SGPRS: 1230560 -> 1230560 (0.00 %) VGPRS: 469577 -> 462504 (-1.51 %) Code Size: 22089908 -> 21730052 (-1.63 %) bytes LDS: 598 -> 598 (0.00 %) blocks Scratch: 283648 -> 281600 (-0.72 %) bytes per wave Max Waves: 125664 -> 126969 (1.04 %) Wait states: 0 -> 0 (0.00 %) Totals from affected shaders: SGPRS: 547280 -> 547280 (0.00 %) VGPRS: 269132 -> 262059 (-2.63 %) Code Size: 15709604 -> 15349748 (-2.29 %) bytes LDS: 198 -> 198 (0.00 %) blocks Scratch: 74752 -> 72704 (-2.74 %) bytes per wave Max Waves: 47840 -> 49145 (2.73 %) Wait states: 0 -> 0 (0.00 %) Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
This commit is contained in:
parent
54c4d525da
commit
0e1fefa722
1 changed files with 16 additions and 2 deletions
|
|
@ -1523,6 +1523,21 @@ static void emit_up2h(const struct lp_build_tgsi_action *action,
|
|||
}
|
||||
}
|
||||
|
||||
/* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
|
||||
* the target machine. f64 needs global unsafe math flags to get rsq. */
|
||||
static void emit_rsq(const struct lp_build_tgsi_action *action,
|
||||
struct lp_build_tgsi_context *bld_base,
|
||||
struct lp_build_emit_data *emit_data)
|
||||
{
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
LLVMValueRef sqrt =
|
||||
lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
|
||||
emit_data->args[0]);
|
||||
|
||||
emit_data->output[emit_data->chan] =
|
||||
LLVMBuildFDiv(builder, bld_base->base.one, sqrt, "");
|
||||
}
|
||||
|
||||
void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple)
|
||||
{
|
||||
struct lp_type type;
|
||||
|
|
@ -1661,8 +1676,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *trip
|
|||
bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
|
||||
bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
|
||||
bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = "llvm.AMDGPU.rsq.clamped.f32";
|
||||
bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
|
||||
bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
|
||||
bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
|
||||
bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue