mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 19:58:09 +02:00
nir: rename fsin_amd and fcos_amd to a more generic name
Nvidia implements both the same way as AMD does, so it makes sense to allow for code sharing here. Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40541>
This commit is contained in:
parent
1b6ed1b34e
commit
5bb3c9f69c
11 changed files with 26 additions and 26 deletions
|
|
@ -19,7 +19,7 @@ lower_sin_cos(struct nir_builder *b, nir_alu_instr *sincos, UNUSED void *_)
|
||||||
b->fp_math_ctrl = sincos->fp_math_ctrl;
|
b->fp_math_ctrl = sincos->fp_math_ctrl;
|
||||||
|
|
||||||
nir_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
|
nir_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
|
||||||
nir_def *replace = sincos->op == nir_op_fsin ? nir_fsin_amd(b, src) : nir_fcos_amd(b, src);
|
nir_def *replace = sincos->op == nir_op_fsin ? nir_fsin_normalized_2_pi(b, src) : nir_fcos_normalized_2_pi(b, src);
|
||||||
nir_def_replace(&sincos->def, replace);
|
nir_def_replace(&sincos->def, replace);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
||||||
|
|
@ -514,8 +514,8 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||||
case nir_op_fsqrt:
|
case nir_op_fsqrt:
|
||||||
case nir_op_fexp2:
|
case nir_op_fexp2:
|
||||||
case nir_op_flog2:
|
case nir_op_flog2:
|
||||||
case nir_op_fsin_amd:
|
case nir_op_fsin_normalized_2_pi:
|
||||||
case nir_op_fcos_amd:
|
case nir_op_fcos_normalized_2_pi:
|
||||||
case nir_op_pack_half_2x16_rtz_split:
|
case nir_op_pack_half_2x16_rtz_split:
|
||||||
case nir_op_pack_half_2x16_split: {
|
case nir_op_pack_half_2x16_split: {
|
||||||
if (ctx->program->gfx_level < GFX11_5 ||
|
if (ctx->program->gfx_level < GFX11_5 ||
|
||||||
|
|
|
||||||
|
|
@ -2512,10 +2512,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_op_fsin_amd:
|
case nir_op_fsin_normalized_2_pi:
|
||||||
case nir_op_fcos_amd: {
|
case nir_op_fcos_normalized_2_pi: {
|
||||||
if (instr->def.bit_size == 16 || instr->def.bit_size == 32) {
|
if (instr->def.bit_size == 16 || instr->def.bit_size == 32) {
|
||||||
bool is_sin = instr->op == nir_op_fsin_amd;
|
bool is_sin = instr->op == nir_op_fsin_normalized_2_pi;
|
||||||
aco_opcode opcode, fract;
|
aco_opcode opcode, fract;
|
||||||
RegClass rc;
|
RegClass rc;
|
||||||
if (instr->def.bit_size == 16) {
|
if (instr->def.bit_size == 16) {
|
||||||
|
|
|
||||||
|
|
@ -712,12 +712,12 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||||
case nir_op_ffract:
|
case nir_op_ffract:
|
||||||
result = emit_fp_intrinsic(&ctx->ac, "llvm.amdgcn.fract", def_type, src[0], NULL, NULL);
|
result = emit_fp_intrinsic(&ctx->ac, "llvm.amdgcn.fract", def_type, src[0], NULL, NULL);
|
||||||
break;
|
break;
|
||||||
case nir_op_fsin_amd:
|
case nir_op_fsin_normalized_2_pi:
|
||||||
case nir_op_fcos_amd:
|
case nir_op_fcos_normalized_2_pi:
|
||||||
/* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
|
/* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
|
||||||
if (ctx->ac.gfx_level < GFX9)
|
if (ctx->ac.gfx_level < GFX9)
|
||||||
src[0] = emit_fp_intrinsic(&ctx->ac, "llvm.amdgcn.fract", def_type, src[0], NULL, NULL);
|
src[0] = emit_fp_intrinsic(&ctx->ac, "llvm.amdgcn.fract", def_type, src[0], NULL, NULL);
|
||||||
result = emit_fp_intrinsic(&ctx->ac, instr->op == nir_op_fsin_amd ? "llvm.amdgcn.sin" : "llvm.amdgcn.cos",
|
result = emit_fp_intrinsic(&ctx->ac, instr->op == nir_op_fsin_normalized_2_pi ? "llvm.amdgcn.sin" : "llvm.amdgcn.cos",
|
||||||
def_type, src[0], NULL, NULL);
|
def_type, src[0], NULL, NULL);
|
||||||
break;
|
break;
|
||||||
case nir_op_fsqrt:
|
case nir_op_fsqrt:
|
||||||
|
|
|
||||||
|
|
@ -1441,11 +1441,11 @@ unop_horiz("cube_amd", 4, tfloat32, 3, tfloat32, """
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# r600/gcn specific sin and cos
|
# amd/nv specific sin and cos
|
||||||
# these trigeometric functions need some lowering because the supported
|
# these trigeometric functions need some lowering because the supported
|
||||||
# input values are expected to be normalized by dividing by (2 * pi)
|
# input values are expected to be normalized by dividing by (2 * pi)
|
||||||
unop("fsin_amd", tfloat, "sinf(6.2831853 * src0)")
|
unop("fsin_normalized_2_pi", tfloat, "sinf(6.2831853 * src0)")
|
||||||
unop("fcos_amd", tfloat, "cosf(6.2831853 * src0)")
|
unop("fcos_normalized_2_pi", tfloat, "cosf(6.2831853 * src0)")
|
||||||
|
|
||||||
opcode("alignbyte_amd", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False, "", """
|
opcode("alignbyte_amd", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False, "", """
|
||||||
uint64_t src = src1 | ((uint64_t)src0 << 32);
|
uint64_t src = src1 | ((uint64_t)src0 << 32);
|
||||||
|
|
|
||||||
|
|
@ -3380,7 +3380,7 @@ for op in ['fpow']:
|
||||||
(('bcsel', a, (op, b, c), (op + '(is_used_once)', d, c)), (op, ('bcsel', a, b, d), c)),
|
(('bcsel', a, (op, b, c), (op + '(is_used_once)', d, c)), (op, ('bcsel', a, b, d), c)),
|
||||||
]
|
]
|
||||||
|
|
||||||
for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fsin_amd', 'fcos_amd', 'fsin_mdg', 'fcos_mdg', 'fsin_agx', 'fneg', 'fabs', 'fsign', 'fcanonicalize']:
|
for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fsin_normalized_2_pi', 'fcos_normalized_2_pi', 'fsin_mdg', 'fcos_mdg', 'fsin_agx', 'fneg', 'fabs', 'fsign', 'fcanonicalize']:
|
||||||
optimizations += [
|
optimizations += [
|
||||||
(('bcsel', c, (op + '(is_used_once)', a), (op + '(is_used_once)', b)), (op, ('bcsel', c, a, b))),
|
(('bcsel', c, (op + '(is_used_once)', a), (op + '(is_used_once)', b)), (op, ('bcsel', c, a, b))),
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -118,7 +118,7 @@ opt_alu_fp_math_ctrl(nir_alu_instr *alu, struct opt_fp_ctrl_state *state)
|
||||||
case nir_op_fexp2:
|
case nir_op_fexp2:
|
||||||
case nir_op_flog2:
|
case nir_op_flog2:
|
||||||
case nir_op_fcos:
|
case nir_op_fcos:
|
||||||
case nir_op_fcos_amd:
|
case nir_op_fcos_normalized_2_pi:
|
||||||
case nir_op_fmulz:
|
case nir_op_fmulz:
|
||||||
case nir_op_ffract:
|
case nir_op_ffract:
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -5151,8 +5151,8 @@ default_varying_estimate_instr_cost(nir_instr *instr)
|
||||||
case nir_op_fsqrt:
|
case nir_op_fsqrt:
|
||||||
case nir_op_fsin:
|
case nir_op_fsin:
|
||||||
case nir_op_fcos:
|
case nir_op_fcos:
|
||||||
case nir_op_fsin_amd:
|
case nir_op_fsin_normalized_2_pi:
|
||||||
case nir_op_fcos_amd:
|
case nir_op_fcos_normalized_2_pi:
|
||||||
/* FP64 is usually much slower. */
|
/* FP64 is usually much slower. */
|
||||||
return dst_bit_size == 64 ? 32 : 4;
|
return dst_bit_size == 64 ? 32 : 4;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -805,8 +805,8 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
|
||||||
case nir_op_ffract:
|
case nir_op_ffract:
|
||||||
case nir_op_fsin:
|
case nir_op_fsin:
|
||||||
case nir_op_fcos:
|
case nir_op_fcos:
|
||||||
case nir_op_fsin_amd:
|
case nir_op_fsin_normalized_2_pi:
|
||||||
case nir_op_fcos_amd:
|
case nir_op_fcos_normalized_2_pi:
|
||||||
case nir_op_f2f16:
|
case nir_op_f2f16:
|
||||||
case nir_op_f2f16_rtz:
|
case nir_op_f2f16_rtz:
|
||||||
case nir_op_f2f16_rtne:
|
case nir_op_f2f16_rtne:
|
||||||
|
|
@ -1203,8 +1203,8 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
|
||||||
|
|
||||||
case nir_op_fsin:
|
case nir_op_fsin:
|
||||||
case nir_op_fcos:
|
case nir_op_fcos:
|
||||||
case nir_op_fsin_amd:
|
case nir_op_fsin_normalized_2_pi:
|
||||||
case nir_op_fcos_amd: {
|
case nir_op_fcos_normalized_2_pi: {
|
||||||
/* [-1, +1], and sin/cos(Inf) is NaN */
|
/* [-1, +1], and sin/cos(Inf) is NaN */
|
||||||
r = FP_CLASS_NEG_ONE | FP_CLASS_LT_ZERO_GT_NEG_ONE | FP_CLASS_ANY_ZERO |
|
r = FP_CLASS_NEG_ONE | FP_CLASS_LT_ZERO_GT_NEG_ONE | FP_CLASS_ANY_ZERO |
|
||||||
FP_CLASS_GT_ZERO_LT_POS_ONE | FP_CLASS_POS_ONE | FP_CLASS_NON_INTEGRAL;
|
FP_CLASS_GT_ZERO_LT_POS_ONE | FP_CLASS_POS_ONE | FP_CLASS_NON_INTEGRAL;
|
||||||
|
|
|
||||||
|
|
@ -1672,7 +1672,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
|
||||||
|
|
||||||
if (shader.chip_class() == ISA_CC_CAYMAN) {
|
if (shader.chip_class() == ISA_CC_CAYMAN) {
|
||||||
switch (alu->op) {
|
switch (alu->op) {
|
||||||
case nir_op_fcos_amd:
|
case nir_op_fcos_normalized_2_pi:
|
||||||
return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
|
return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
|
||||||
case nir_op_fexp2:
|
case nir_op_fexp2:
|
||||||
return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
|
return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
|
||||||
|
|
@ -1684,7 +1684,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
|
||||||
return emit_alu_trans_op1_cayman(*alu, op1_recipsqrt_ieee1, shader);
|
return emit_alu_trans_op1_cayman(*alu, op1_recipsqrt_ieee1, shader);
|
||||||
case nir_op_fsqrt:
|
case nir_op_fsqrt:
|
||||||
return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
|
return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
|
||||||
case nir_op_fsin_amd:
|
case nir_op_fsin_normalized_2_pi:
|
||||||
return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
|
return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
|
||||||
case nir_op_i2f32:
|
case nir_op_i2f32:
|
||||||
return emit_alu_op1(*alu, op1_int_to_flt, shader);
|
return emit_alu_op1(*alu, op1_int_to_flt, shader);
|
||||||
|
|
@ -1746,7 +1746,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
|
||||||
return emit_alu_trans_op1_eg(*alu, op1_flt_to_int, shader);
|
return emit_alu_trans_op1_eg(*alu, op1_flt_to_int, shader);
|
||||||
case nir_op_f2u32:
|
case nir_op_f2u32:
|
||||||
return emit_alu_trans_op1_eg(*alu, op1_flt_to_uint, shader);
|
return emit_alu_trans_op1_eg(*alu, op1_flt_to_uint, shader);
|
||||||
case nir_op_fcos_amd:
|
case nir_op_fcos_normalized_2_pi:
|
||||||
return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
|
return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
|
||||||
case nir_op_fexp2:
|
case nir_op_fexp2:
|
||||||
return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
|
return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
|
||||||
|
|
@ -1756,7 +1756,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
|
||||||
return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
|
return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
|
||||||
case nir_op_frsq:
|
case nir_op_frsq:
|
||||||
return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
|
return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
|
||||||
case nir_op_fsin_amd:
|
case nir_op_fsin_normalized_2_pi:
|
||||||
return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
|
return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
|
||||||
case nir_op_fsqrt:
|
case nir_op_fsqrt:
|
||||||
return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
|
return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
|
||||||
|
|
|
||||||
|
|
@ -102,9 +102,9 @@ LowerSinCos::lower(nir_instr *instr)
|
||||||
: nir_ffma_imm12(b, fract, 2.0f * M_PI, -M_PI);
|
: nir_ffma_imm12(b, fract, 2.0f * M_PI, -M_PI);
|
||||||
|
|
||||||
if (alu->op == nir_op_fsin)
|
if (alu->op == nir_op_fsin)
|
||||||
return nir_fsin_amd(b, normalized);
|
return nir_fsin_normalized_2_pi(b, normalized);
|
||||||
else
|
else
|
||||||
return nir_fcos_amd(b, normalized);
|
return nir_fcos_normalized_2_pi(b, normalized);
|
||||||
}
|
}
|
||||||
|
|
||||||
class FixKcacheIndirectRead : public NirLowerInstruction {
|
class FixKcacheIndirectRead : public NirLowerInstruction {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue