mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 12:18:09 +02:00
radv,aco,ac/llvm: use nir_op_f{sin,cos}_amd
This lets NIR optimize the multiplication, particularly sin/cos(a * #b). fossil-db (Sienna Cichlid): Totals from 12306 (7.58% of 162293) affected shaders: MaxWaves: 224814 -> 224834 (+0.01%) Instrs: 17365273 -> 17338758 (-0.15%); split: -0.16%, +0.00% CodeSize: 93478488 -> 93354912 (-0.13%); split: -0.14%, +0.01% VGPRs: 752080 -> 752072 (-0.00%); split: -0.00%, +0.00% SpillSGPRs: 8440 -> 8410 (-0.36%) Latency: 200402154 -> 200279405 (-0.06%); split: -0.06%, +0.00% InvThroughput: 37588077 -> 37545545 (-0.11%); split: -0.11%, +0.00% VClause: 293863 -> 293874 (+0.00%); split: -0.03%, +0.03% SClause: 619539 -> 619064 (-0.08%); split: -0.09%, +0.01% Copies: 1151591 -> 1151641 (+0.00%); split: -0.04%, +0.05% Branches: 506434 -> 506437 (+0.00%); split: -0.00%, +0.00% PreSGPRs: 877609 -> 877517 (-0.01%); split: -0.01%, +0.00% PreVGPRs: 711938 -> 711940 (+0.00%); split: -0.00%, +0.00% fossil-db (LLVM, Sienna Cichlid): Totals from 4377 (3.59% of 121873) affected shaders: SGPRs: 358960 -> 359176 (+0.06%); split: -0.18%, +0.25% VGPRs: 319832 -> 319720 (-0.04%); split: -0.18%, +0.15% SpillSGPRs: 46983 -> 47007 (+0.05%); split: -0.99%, +1.04% CodeSize: 30872812 -> 30764512 (-0.35%); split: -0.39%, +0.04% MaxWaves: 73814 -> 73904 (+0.12%); split: +0.25%, -0.13% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10587>
This commit is contained in:
parent
bb0415b697
commit
48578713b7
4 changed files with 36 additions and 14 deletions
|
|
@ -2750,27 +2750,22 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_fsin:
|
||||
case nir_op_fcos: {
|
||||
case nir_op_fsin_amd:
|
||||
case nir_op_fcos_amd: {
|
||||
Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
|
||||
aco_ptr<Instruction> norm;
|
||||
if (dst.regClass() == v2b) {
|
||||
Temp half_pi = bld.copy(bld.def(s1), Operand::c32(0x3118u));
|
||||
Temp tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), half_pi, src);
|
||||
aco_opcode opcode =
|
||||
instr->op == nir_op_fsin ? aco_opcode::v_sin_f16 : aco_opcode::v_cos_f16;
|
||||
bld.vop1(opcode, Definition(dst), tmp);
|
||||
instr->op == nir_op_fsin_amd ? aco_opcode::v_sin_f16 : aco_opcode::v_cos_f16;
|
||||
bld.vop1(opcode, Definition(dst), src);
|
||||
} else if (dst.regClass() == v1) {
|
||||
Temp half_pi = bld.copy(bld.def(s1), Operand::c32(0x3e22f983u));
|
||||
Temp tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), half_pi, src);
|
||||
|
||||
/* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
|
||||
if (ctx->options->gfx_level < GFX9)
|
||||
tmp = bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), tmp);
|
||||
src = bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), src);
|
||||
|
||||
aco_opcode opcode =
|
||||
instr->op == nir_op_fsin ? aco_opcode::v_sin_f32 : aco_opcode::v_cos_f32;
|
||||
bld.vop1(opcode, Definition(dst), tmp);
|
||||
instr->op == nir_op_fsin_amd ? aco_opcode::v_sin_f32 : aco_opcode::v_cos_f32;
|
||||
bld.vop1(opcode, Definition(dst), src);
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -501,8 +501,8 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_op_fceil:
|
||||
case nir_op_ftrunc:
|
||||
case nir_op_fround_even:
|
||||
case nir_op_fsin:
|
||||
case nir_op_fcos:
|
||||
case nir_op_fsin_amd:
|
||||
case nir_op_fcos_amd:
|
||||
case nir_op_f2f16:
|
||||
case nir_op_f2f16_rtz:
|
||||
case nir_op_f2f16_rtne:
|
||||
|
|
|
|||
|
|
@ -854,6 +854,16 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
result =
|
||||
emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]);
|
||||
break;
|
||||
case nir_op_fsin_amd:
|
||||
case nir_op_fcos_amd:
|
||||
/* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
|
||||
if (ctx->ac.gfx_level < GFX9)
|
||||
src[0] = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
|
||||
ac_to_float_type(&ctx->ac, def_type), src[0]);
|
||||
result =
|
||||
emit_intrin_1f_param(&ctx->ac, instr->op == nir_op_fsin_amd ? "llvm.amdgcn.sin" : "llvm.amdgcn.cos",
|
||||
ac_to_float_type(&ctx->ac, def_type), src[0]);
|
||||
break;
|
||||
case nir_op_fsqrt:
|
||||
result =
|
||||
emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]);
|
||||
|
|
|
|||
|
|
@ -639,6 +639,21 @@ radv_lower_ms_workgroup_id(nir_shader *nir)
|
|||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_sincos(const nir_instr *instr, const void *_)
|
||||
{
|
||||
return instr->type == nir_instr_type_alu &&
|
||||
(nir_instr_as_alu(instr)->op == nir_op_fsin || nir_instr_as_alu(instr)->op == nir_op_fcos);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_sincos(struct nir_builder *b, nir_instr *instr, void *_)
|
||||
{
|
||||
nir_alu_instr *sincos = nir_instr_as_alu(instr);
|
||||
nir_ssa_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
|
||||
return sincos->op == nir_op_fsin ? nir_fsin_amd(b, src) : nir_fcos_amd(b, src);
|
||||
}
|
||||
|
||||
nir_shader *
|
||||
radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_stage *stage,
|
||||
const struct radv_pipeline_key *key)
|
||||
|
|
@ -849,6 +864,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_
|
|||
}
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_doubles, NULL, lower_doubles);
|
||||
|
||||
NIR_PASS(_, nir, nir_shader_lower_instructions, &is_sincos, &lower_sincos, NULL);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue