diff --git a/src/amd/ci/restricted-traces-amd.yml b/src/amd/ci/restricted-traces-amd.yml index 1f08af9bbbb..3471713be3c 100644 --- a/src/amd/ci/restricted-traces-amd.yml +++ b/src/amd/ci/restricted-traces-amd.yml @@ -13,7 +13,7 @@ traces: checksum: c2264076c2f0ca6b48c3bab152a65a36 angle/libangle_restricted_traces_candy_crush_soda_saga.so: vk-radv-raven: - checksum: 3370be7bbc95e6d2ec5b1b3ed36c56a0 + checksum: a3e8f7c78583fb0e6c5d13f576dad5d2 angle/libangle_restricted_traces_free_fire.so: vk-radv-raven: checksum: 62307dd7a10d656733bca7e0f065ac99 @@ -28,7 +28,7 @@ traces: checksum: 47c004fed88bed8d3d387295399f0810 angle/libangle_restricted_traces_pubg_mobile_battle_royale.so: vk-radv-raven: - checksum: 95092db8df12af91de8de5463e4e4bb6 + checksum: f028e6aeb48690e5a36744be55a56773 angle/libangle_restricted_traces_temple_run_300.so: vk-radv-raven: checksum: 4fa6a73dad7d9dd747ba7cbf82aae42e diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 13a9481fe93..b3c151b72e1 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -1209,8 +1209,6 @@ private: ir_swizzle *matrix_elt(ir_variable *var, int col, int row); ir_dereference_record *record_ref(ir_variable *var, const char *field); - ir_expression *asin_expr(ir_variable *x, float p0, float p1); - /** * Call function \param f with parameters specified as the linked * list \param params of \c ir_variable objects. \param ret should @@ -6507,6 +6505,8 @@ builtin_builder::_degrees(builtin_available_predicate avail, UNOPA(sin, ir_unop_sin) UNOPA(cos, ir_unop_cos) +UNOPA(asin, ir_unop_asin) +UNOPA(acos, ir_unop_acos) ir_function_signature * builtin_builder::_tan(builtin_available_predicate avail, @@ -6518,20 +6518,6 @@ builtin_builder::_tan(builtin_available_predicate avail, return sig; } -ir_expression * -builtin_builder::asin_expr(ir_variable *x, float p0, float p1) -{ - return mul(sign(x), - sub(IMM_FP(x->type, M_PI_2f), - mul(sqrt(sub(IMM_FP(x->type, 1.0f), abs(x))), - add(IMM_FP(x->type, M_PI_2f), - mul(abs(x), - add(IMM_FP(x->type, (M_PI_4f - 1.0f)), - mul(abs(x), - add(IMM_FP(x->type, p0), - mul(abs(x), IMM_FP(x->type, p1)))))))))); -} - /** * Generate a ir_call to a function with a set of parameters * @@ -6568,30 +6554,6 @@ builtin_builder::call(ir_function *f, ir_variable *ret, ir_exec_list params) return new(linalloc) ir_call(sig, deref, &actual_params); } -ir_function_signature * -builtin_builder::_asin(builtin_available_predicate avail, - const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, avail, 1, x); - - body.emit(ret(asin_expr(x, 0.086566724f, -0.03102955f))); - - return sig; -} - -ir_function_signature * -builtin_builder::_acos(builtin_available_predicate avail, - const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, avail, 1, x); - - body.emit(ret(sub(IMM_FP(type, M_PI_2f), asin_expr(x, 0.08132463f, -0.02363318f)))); - - return sig; -} - ir_function_signature * builtin_builder::_sinh(builtin_available_predicate avail, const glsl_type *type) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 8daded0fde9..4100602d8c4 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -2358,6 +2358,12 @@ nir_visitor::visit(ir_expression *ir) return; } + case ir_unop_asin: + result = nir_asin(&b, srcs[0]); + break; + case ir_unop_acos: + result = nir_acos(&b, srcs[0]); + break; case ir_unop_atan: result = nir_atan(&b, srcs[0]); break; diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp index 7b18a1b775b..d67b7731484 100644 --- a/src/compiler/glsl/ir.cpp +++ b/src/compiler/glsl/ir.cpp @@ -236,6 +236,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) case ir_unop_round_even: case ir_unop_sin: case ir_unop_cos: + case ir_unop_asin: + case ir_unop_acos: case ir_unop_dFdx: case ir_unop_dFdx_coarse: case ir_unop_dFdx_fine: diff --git a/src/compiler/glsl/ir_expression_operation.py b/src/compiler/glsl/ir_expression_operation.py index bb055db21a6..026c112133d 100644 --- a/src/compiler/glsl/ir_expression_operation.py +++ b/src/compiler/glsl/ir_expression_operation.py @@ -568,6 +568,8 @@ ir_expression_operation = [ operation("sin", 1, source_types=(float_type,), c_expression="sinf({src0})"), operation("cos", 1, source_types=(float_type,), c_expression="cosf({src0})"), operation("atan", 1, source_types=(float_type,), c_expression="atan({src0})"), + operation("acos", 1, source_types=(float_type,), c_expression="acosf({src0})"), + operation("asin", 1, source_types=(float_type,), c_expression="asinf({src0})"), # Partial derivatives. operation("dFdx", 1, source_types=(float_type,), c_expression="0.0f"), diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp index 4a7c0f30fc2..e21f7252d96 100644 --- a/src/compiler/glsl/ir_validate.cpp +++ b/src/compiler/glsl/ir_validate.cpp @@ -515,6 +515,8 @@ ir_validate::visit_leave(ir_expression *ir) break; case ir_unop_sin: case ir_unop_cos: + case ir_unop_asin: + case ir_unop_acos: case ir_unop_dFdx: case ir_unop_dFdx_coarse: case ir_unop_dFdx_fine: diff --git a/src/compiler/nir/nir_builtin_builder.c b/src/compiler/nir/nir_builtin_builder.c index ee902044bf2..f64b3b7e10a 100644 --- a/src/compiler/nir/nir_builtin_builder.c +++ b/src/compiler/nir/nir_builtin_builder.c @@ -28,6 +28,13 @@ #include "nir_builder.h" #include "nir_builtin_builder.h" +#ifndef M_PIf +#define M_PIf ((float) M_PI) +#endif +#ifndef M_PI_2f +#define M_PI_2f ((float) M_PI_2) +#endif + nir_def * nir_cross3(nir_builder *b, nir_def *x, nir_def *y) { @@ -159,6 +166,81 @@ nir_upsample(nir_builder *b, nir_def *hi, nir_def *lo) return nir_vec(b, res, lo->num_components); } +/** + * Approximate asin(x) by formula 4.45 from Abramowitz & Stegun, "Handbook + * of Mathematical Functions": + * + * asin~(x) = (π/2 - sqrt(1 - |x|) * ( a0 + a1 * |x| + a2 * |x|^2 + a3 * |x|^3 ) + * + * where a0 = 1.5707288 a1 = -0.2121144 a2 = 0.0742610 a3 = -0.0187203 + * + * This has a very small absolute error, but the relative error can become + * large when |x| is small. For small |x| the Taylor series makes a good + * approximation, so when the relative error matters (i.e. for asin rather + * than acos) we do a piecewise approximation with the Taylor series for + * |x| < 0.21502245 and formula 4.45 elsewhere. The crossover point is + * the value in [0.1, 0.7071] where the two approximations are equal. + */ +static nir_def * +build_asin(nir_builder *b, nir_def *x, bool piecewise) +{ + if (x->bit_size == 16) { + /* The polynomial approximation may not be precise enough to meet half-float + * precision requirements. Alternatively, we could implement this using + * the formula: + * + * asin(x) = atan2(x, sqrt(1 - x*x)) + * + * But that is very expensive, so instead we just do the polynomial + * approximation in 32-bit math and then we convert the result back to + * 16-bit. + */ + return nir_f2f16(b, build_asin(b, nir_f2f32(b, x), piecewise)); + } + nir_def *abs_x = nir_fabs(b, x); + + nir_def *p0_plus_xp1 = nir_ffma_imm12(b, abs_x, -0.0187293, 0.0742610); + + nir_def *expr_tail = + nir_ffma_imm2(b, abs_x, + nir_ffma_imm2(b, abs_x, p0_plus_xp1, -0.2121144), + 1.5707288); + + nir_def *result0 = nir_fmul(b, nir_fsign(b, x), + nir_a_minus_bc(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size), + nir_fsqrt(b, + nir_fsub_imm(b, 1.0, abs_x)), + expr_tail)); + if (piecewise) { + /* use taylor approximation for |x| < 0.21502245 */ + + nir_def *x2 = nir_fmul(b, x, x); + nir_def *result1 = nir_fmul(b, + x, + nir_ffma_imm12(b, x2, (1.0/6.0), 1.0)); + return nir_bcsel(b, + nir_flt_imm(b, abs_x, 0.21502245), + result1, + result0); + } else { + return result0; + } +} + +nir_def * +nir_asin(nir_builder *b, nir_def *x) +{ + /* use piecewise approximation to keep low relative error near 0 */ + return build_asin(b, x, true); +} + +nir_def * +nir_acos(nir_builder *b, nir_def *x) +{ + /* piecewise approximation not needed to keep low relative error */ + return nir_fsub_imm(b, M_PI_2f, build_asin(b, x, false)); +} + nir_def * nir_atan(nir_builder *b, nir_def *y_over_x) { diff --git a/src/compiler/nir/nir_builtin_builder.h b/src/compiler/nir/nir_builtin_builder.h index 67accb8a085..115fb08b5bb 100644 --- a/src/compiler/nir/nir_builtin_builder.h +++ b/src/compiler/nir/nir_builtin_builder.h @@ -44,9 +44,12 @@ nir_def *nir_normalize(nir_builder *b, nir_def *vec); nir_def *nir_smoothstep(nir_builder *b, nir_def *edge0, nir_def *edge1, nir_def *x); nir_def *nir_upsample(nir_builder *b, nir_def *hi, nir_def *lo); +nir_def *nir_acos(nir_builder *b, nir_def *x); +nir_def *nir_asin(nir_builder *b, nir_def *x); nir_def *nir_atan(nir_builder *b, nir_def *y_over_x); nir_def *nir_atan2(nir_builder *b, nir_def *y, nir_def *x); + nir_def * nir_build_texture_query(nir_builder *b, nir_tex_instr *tex, nir_texop texop, unsigned components, nir_alu_type dest_type, diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 21fc4c83ffe..bfa3c1a174b 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -120,73 +120,6 @@ matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src) return val; } -/** - * Approximate asin(x) by the piecewise formula: - * for |x| < 0.5, asin~(x) = x * (1 + x²(pS0 + x²(pS1 + x²*pS2)) / (1 + x²*qS1)) - * for |x| ≥ 0.5, asin~(x) = sign(x) * (π/2 - sqrt(1 - |x|) * (π/2 + |x|(π/4 - 1 + |x|(p0 + |x|p1)))) - * - * The latter is correct to first order at x=0 and x=±1 regardless of the p - * coefficients but can be made second-order correct at both ends by selecting - * the fit coefficients appropriately. Different p coefficients can be used - * in the asin and acos implementation to minimize some relative error metric - * in each case. - */ -static nir_def * -build_asin(nir_builder *b, nir_def *x, float p0, float p1, bool piecewise) -{ - if (x->bit_size == 16) { - /* The polynomial approximation isn't precise enough to meet half-float - * precision requirements. Alternatively, we could implement this using - * the formula: - * - * asin(x) = atan2(x, sqrt(1 - x*x)) - * - * But that is very expensive, so instead we just do the polynomial - * approximation in 32-bit math and then we convert the result back to - * 16-bit. - */ - nir_def *result = - nir_f2f16(b, build_asin(b, nir_f2f32(b, x), p0, p1, piecewise)); - - return result; - } - nir_def *one = nir_imm_floatN_t(b, 1.0f, x->bit_size); - nir_def *half = nir_imm_floatN_t(b, 0.5f, x->bit_size); - nir_def *abs_x = nir_fabs(b, x); - - nir_def *p0_plus_xp1 = nir_ffma_imm12(b, abs_x, p1, p0); - - nir_def *expr_tail = - nir_ffma_imm2(b, abs_x, - nir_ffma_imm2(b, abs_x, p0_plus_xp1, M_PI_4f - 1.0f), - M_PI_2f); - - nir_def *result0 = nir_fmul(b, nir_fsign(b, x), - nir_a_minus_bc(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size), - nir_fsqrt(b, nir_fsub(b, one, abs_x)), - expr_tail)); - if (piecewise) { - /* approximation for |x| < 0.5 */ - const float pS0 = 1.6666586697e-01f; - const float pS1 = -4.2743422091e-02f; - const float pS2 = -8.6563630030e-03f; - const float qS1 = -7.0662963390e-01f; - - nir_def *x2 = nir_fmul(b, x, x); - nir_def *p = nir_fmul(b, - x2, - nir_ffma_imm2(b, x2, - nir_ffma_imm12(b, x2, pS2, pS1), - pS0)); - - nir_def *q = nir_ffma_imm1(b, x2, qS1, one); - nir_def *result1 = nir_ffma(b, x, nir_fdiv(b, p, q), x); - return nir_bcsel(b, nir_flt(b, abs_x, half), result1, result0); - } else { - return result0; - } -} - static nir_op vtn_nir_alu_op_for_spirv_glsl_opcode(struct vtn_builder *b, enum GLSLstd450 opcode, @@ -567,13 +500,11 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, } case GLSLstd450Asin: - dest->def = build_asin(nb, src[0], 0.086566724, -0.03102955, true); + dest->def = nir_asin(nb, src[0]); break; case GLSLstd450Acos: - dest->def = - nir_fsub_imm(nb, M_PI_2f, - build_asin(nb, src[0], 0.08132463, -0.02363318, false)); + dest->def = nir_acos(nb, src[0]); break; case GLSLstd450Atan: diff --git a/src/gallium/drivers/virgl/ci/traces-virgl.yml b/src/gallium/drivers/virgl/ci/traces-virgl.yml index 7ce54d5a325..2841e1f1555 100644 --- a/src/gallium/drivers/virgl/ci/traces-virgl.yml +++ b/src/gallium/drivers/virgl/ci/traces-virgl.yml @@ -109,7 +109,7 @@ traces: label: [crash] godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc: gl-virgl: - checksum: 7ba4f0c52a719d94b805653d31ce22d7 + checksum: 465410a06d966b377bc8c41e52b21b0f ror/ror-default.trace: gl-virgl: label: [crash] diff --git a/src/gallium/drivers/zink/ci/traces-zink.yml b/src/gallium/drivers/zink/ci/traces-zink.yml index 19fdac82554..20c687075bd 100644 --- a/src/gallium/drivers/zink/ci/traces-zink.yml +++ b/src/gallium/drivers/zink/ci/traces-zink.yml @@ -44,17 +44,17 @@ traces: checksum: 58e2d6ea105ca2735b05ce06709691fb text: Clear color for the background can be black or grey zink-radv-gfx1201: - checksum: c65527b7db55687e3c2741c0286b70e2 + checksum: 9c55251c2979a107ba852719d7e41889 blender/blender-demo-ellie_pose.trace: gl-zink-anv-adl: - checksum: ca8df31308386674816c0e801f220565 + checksum: ddae9bcbff102e46739454eaa09772c0 gl-zink-anv-tgl: - checksum: ca8df31308386674816c0e801f220565 + checksum: ddae9bcbff102e46739454eaa09772c0 zink-radv-vangogh: - checksum: 4670acc17f40e2d20ecf355a64b5612e + checksum: c6a14ab73815748450f4feaf50fa9f2e zink-radv-gfx1201: - checksum: 4670acc17f40e2d20ecf355a64b5612e + checksum: c6a14ab73815748450f4feaf50fa9f2e glxgears/glxgears-2-v2.trace: gl-zink-anv-adl: @@ -97,9 +97,9 @@ traces: checksum: dbe1de4e2e812413f173ea6c423117ff text: "'egl_platform.cpp( 227) - Error - Couldn't find a suitable EGL config' -- revisit when we can turn on X11?" zink-radv-vangogh: - checksum: 0615d3d7598d0088393bfc8fa24f73c8 + checksum: 65e86206d1ba3ed8365f8b95dc9e9e71 zink-radv-gfx1201: - checksum: 07f0e057eeea449544e16c7ff65e965f + checksum: f22ee8433dd7b689c46816d576bf3c12 gputest/pixmark-julia-fp32-v2.trace: gl-zink-anv-adl: diff --git a/src/intel/ci/restricted-traces-intel.yml b/src/intel/ci/restricted-traces-intel.yml index a469e07b291..e20304d6010 100644 --- a/src/intel/ci/restricted-traces-intel.yml +++ b/src/intel/ci/restricted-traces-intel.yml @@ -13,7 +13,7 @@ traces: checksum: e038ef82470fff7b94cf958fa869d130 angle/libangle_restricted_traces_candy_crush_soda_saga.so: vk-anv-adl: - checksum: dcfc2dbc1e383d10b86a9c3a52452aa8 + checksum: dc5d96a70af6165d88f882b96fb73e73 angle/libangle_restricted_traces_free_fire.so: vk-anv-adl: checksum: 1a9f03ed30ee254842afe7ab7792a493 @@ -28,7 +28,7 @@ traces: checksum: ae8371870059d87d475da217a453acfc angle/libangle_restricted_traces_pubg_mobile_battle_royale.so: vk-anv-adl: - checksum: b1970da0e8d29498078e53d2b6bbff71 + checksum: 692350e4315d58f2644f5901b954fa15 angle/libangle_restricted_traces_temple_run_300.so: vk-anv-adl: checksum: 72610fdadbfb7d99aa5de13d78598ff2 diff --git a/src/intel/ci/traces-iris.yml b/src/intel/ci/traces-iris.yml index 028b13e0ffe..c676d191f55 100644 --- a/src/intel/ci/traces-iris.yml +++ b/src/intel/ci/traces-iris.yml @@ -557,7 +557,7 @@ traces: checksum: 85b4f3279fde206dace85ffd727057ea gl-intel-kbl: label: [no-perf] - checksum: efab5903f88f7f34e7c948cfcfb54cd9 + checksum: 46555aa91e3f9b3f12f93a28b4ff069b gl-intel-whl: label: [no-perf] checksum: 6a0e17d98eca33e80810b6158e998aa1 @@ -569,19 +569,19 @@ traces: checksum: 85b4f3279fde206dace85ffd727057ea blender/blender-demo-ellie_pose.trace: gl-intel-apl: - checksum: 7d0da76cdf72ff785a1ecc6d4e404e0c + checksum: 61d144ac9cd8cbba11b19bb1174855bc gl-intel-glx: - checksum: 7d0da76cdf72ff785a1ecc6d4e404e0c + checksum: 61d144ac9cd8cbba11b19bb1174855bc gl-intel-amly: - checksum: 7d0da76cdf72ff785a1ecc6d4e404e0c + checksum: 61d144ac9cd8cbba11b19bb1174855bc gl-intel-kbl: - checksum: 7d0da76cdf72ff785a1ecc6d4e404e0c + checksum: 61d144ac9cd8cbba11b19bb1174855bc gl-intel-whl: - checksum: 7d0da76cdf72ff785a1ecc6d4e404e0c + checksum: 61d144ac9cd8cbba11b19bb1174855bc gl-intel-cml: - checksum: 7d0da76cdf72ff785a1ecc6d4e404e0c + checksum: 61d144ac9cd8cbba11b19bb1174855bc gl-intel-adl: - checksum: 7d0da76cdf72ff785a1ecc6d4e404e0c + checksum: 61d144ac9cd8cbba11b19bb1174855bc unvanquished/unvanquished-lowest.trace: gl-intel-apl: checksum: 884096b6dd40e2f500d0837501287fac