From 1fb4427a7aeeca7ada1ff57faad69a56da1c53cd Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 27 Feb 2022 15:46:17 -0500 Subject: [PATCH] pan/bi: Imply round mode most of the time Much less noisy, and provides a path to further improvements. There is a slight behaviour change: int-to-float conversions now use RTE instead of RTZ. For 32-bit opcodes, this affects conversions of integers with magnitude greater than 2^23 by at most 1 ulp. As this behaviour is unspecified in GLSL, this change is believed to be acceptable. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_builder.h.py | 18 ++- src/panfrost/bifrost/bifrost_compile.c | 140 +++++++++--------- src/panfrost/bifrost/bifrost_isa.py | 1 + .../bifrost/test/test-constant-fold.cpp | 4 +- src/panfrost/bifrost/test/test-optimizer.cpp | 134 ++++++++--------- .../test/test-scheduler-predicates.cpp | 8 +- .../bifrost/valhall/test/test-add-imm.cpp | 40 ++--- .../bifrost/valhall/test/test-lower-isel.cpp | 2 +- .../bifrost/valhall/test/test-packing.cpp | 31 ++-- .../valhall/test/test-validate-fau.cpp | 40 +++-- 10 files changed, 209 insertions(+), 209 deletions(-) diff --git a/src/panfrost/bifrost/bi_builder.h.py b/src/panfrost/bifrost/bi_builder.h.py index 81b78b7177e..5ba37818264 100644 --- a/src/panfrost/bifrost/bi_builder.h.py +++ b/src/panfrost/bifrost/bi_builder.h.py @@ -19,7 +19,9 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SKIP = set(["lane", "lane_dest", "lanes", "lanes", "replicate", "swz", "widen", "swap", "neg", "abs", "not", "sign", "extend", "divzero", "clamp", "sem", "not_result", "skip"]) +SKIP = set(["lane", "lane_dest", "lanes", "lanes", "replicate", "swz", "widen", + "swap", "neg", "abs", "not", "sign", "extend", "divzero", "clamp", "sem", + "not_result", "skip", "round"]) TEMPLATE = """ #ifndef _BI_BUILDER_H_ @@ -99,10 +101,13 @@ bi_instr * bi_${opcode.replace('.', '_').lower()}${to_suffix(ops[opcode])}(${sig I->src[${src}] = src${src}; % endfor % for mod in ops[opcode]["modifiers"]: -% if not should_skip(mod): +% if not should_skip(mod, opcode): I->${mod} = ${mod}; % endif % endfor +% if ops[opcode]["rtz"]: + I->round = BI_ROUND_RTZ; +% endif % for imm in ops[opcode]["immediates"]: I->${imm} = ${imm}; % endfor @@ -170,11 +175,16 @@ modifier_lists = order_modifiers(ir_instructions) # Generate type signature for a builder routine -def should_skip(mod): +def should_skip(mod, op): + # FROUND and HADD only make sense in context of a round mode, so override + # the usual skip + if mod == "round" and ("FROUND" in op or "HADD" in op): + return False + return mod in SKIP or mod[0:-1] in SKIP def modifier_signature(op): - return sorted([m for m in op["modifiers"].keys() if not should_skip(m)]) + return sorted([m for m in op["modifiers"].keys() if not should_skip(m, op["key"])]) def signature(op, modifiers, typeful = False, sized = False, no_dests = False): return ", ".join( diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 15b5807aae5..83507f477a0 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -147,7 +147,7 @@ bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr) if (sz == 16) { f16 = bi_fma_v2f16(b, offset, bi_imm_f16(256.0), - bi_imm_f16(128.0), BI_ROUND_NONE); + bi_imm_f16(128.0)); } else { assert(sz == 32); bi_index f[2]; @@ -155,13 +155,13 @@ bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr) f[i] = bi_fadd_rscale_f32(b, bi_word(offset, i), bi_imm_f32(0.5), bi_imm_u32(8), - BI_ROUND_NONE, BI_SPECIAL_NONE); + BI_SPECIAL_NONE); } - f16 = bi_v2f32_to_v2f16(b, f[0], f[1], BI_ROUND_NONE); + f16 = bi_v2f32_to_v2f16(b, f[0], f[1]); } - return bi_v2f16_to_v2s16(b, f16, BI_ROUND_RTZ); + return bi_v2f16_to_v2s16(b, f16); } case nir_intrinsic_load_barycentric_pixel: @@ -1244,7 +1244,7 @@ bi_emit_load_frag_coord(bi_builder *b, nir_intrinsic_instr *instr) for (unsigned i = 0; i < 2; ++i) { src[i] = bi_fadd_f32(b, bi_u16_to_f32(b, bi_half(bi_register(59), i)), - bi_imm_f32(0.5f), BI_ROUND_NONE); + bi_imm_f32(0.5f)); } for (unsigned i = 0; i < 2; ++i) { @@ -1691,7 +1691,7 @@ bi_nir_round(nir_op op) static bi_index bi_fmul_f32(bi_builder *b, bi_index s0, bi_index s1) { - return bi_fma_f32(b, s0, s1, bi_imm_f32(-0.0f), BI_ROUND_NONE); + return bi_fma_f32(b, s0, s1, bi_imm_f32(-0.0f)); } /* Approximate with FRCP_APPROX.f32 and apply a single iteration of @@ -1704,9 +1704,8 @@ bi_lower_frcp_32(bi_builder *b, bi_index dst, bi_index s0) bi_index m = bi_frexpm_f32(b, s0, false, false); bi_index e = bi_frexpe_f32(b, bi_neg(s0), false, false); bi_index t1 = bi_fma_rscale_f32(b, m, bi_neg(x1), bi_imm_f32(1.0), - bi_zero(), BI_ROUND_NONE, BI_SPECIAL_N); - bi_fma_rscale_f32_to(b, dst, t1, x1, x1, e, - BI_ROUND_NONE, BI_SPECIAL_NONE); + bi_zero(), BI_SPECIAL_N); + bi_fma_rscale_f32_to(b, dst, t1, x1, x1, e, BI_SPECIAL_NONE); } static void @@ -1717,9 +1716,8 @@ bi_lower_frsq_32(bi_builder *b, bi_index dst, bi_index s0) bi_index e = bi_frexpe_f32(b, bi_neg(s0), false, true); bi_index t1 = bi_fmul_f32(b, x1, x1); bi_index t2 = bi_fma_rscale_f32(b, m, bi_neg(t1), bi_imm_f32(1.0), - bi_imm_u32(-1), BI_ROUND_NONE, BI_SPECIAL_N); - bi_fma_rscale_f32_to(b, dst, t2, x1, x1, e, - BI_ROUND_NONE, BI_SPECIAL_N); + bi_imm_u32(-1), BI_SPECIAL_N); + bi_fma_rscale_f32_to(b, dst, t2, x1, x1, e, BI_SPECIAL_N); } /* More complex transcendentals, see @@ -1730,26 +1728,23 @@ static void bi_lower_fexp2_32(bi_builder *b, bi_index dst, bi_index s0) { bi_index t1 = bi_temp(b->shader); - bi_instr *t1_instr = bi_fadd_f32_to(b, t1, - s0, bi_imm_u32(0x49400000), BI_ROUND_NONE); + bi_instr *t1_instr = bi_fadd_f32_to(b, t1, s0, bi_imm_u32(0x49400000)); t1_instr->clamp = BI_CLAMP_CLAMP_0_INF; - bi_index t2 = bi_fadd_f32(b, t1, bi_imm_u32(0xc9400000), BI_ROUND_NONE); + bi_index t2 = bi_fadd_f32(b, t1, bi_imm_u32(0xc9400000)); - bi_instr *a2 = bi_fadd_f32_to(b, bi_temp(b->shader), - s0, bi_neg(t2), BI_ROUND_NONE); + bi_instr *a2 = bi_fadd_f32_to(b, bi_temp(b->shader), s0, bi_neg(t2)); a2->clamp = BI_CLAMP_CLAMP_M1_1; bi_index a1t = bi_fexp_table_u4(b, t1, BI_ADJ_NONE); bi_index t3 = bi_isub_u32(b, t1, bi_imm_u32(0x49400000), false); bi_index a1i = bi_arshift_i32(b, t3, bi_null(), bi_imm_u8(4)); bi_index p1 = bi_fma_f32(b, a2->dest[0], bi_imm_u32(0x3d635635), - bi_imm_u32(0x3e75fffa), BI_ROUND_NONE); - bi_index p2 = bi_fma_f32(b, p1, a2->dest[0], - bi_imm_u32(0x3f317218), BI_ROUND_NONE); + bi_imm_u32(0x3e75fffa)); + bi_index p2 = bi_fma_f32(b, p1, a2->dest[0], bi_imm_u32(0x3f317218)); bi_index p3 = bi_fmul_f32(b, a2->dest[0], p2); bi_instr *x = bi_fma_rscale_f32_to(b, bi_temp(b->shader), - p3, a1t, a1t, a1i, BI_ROUND_NONE, BI_SPECIAL_NONE); + p3, a1t, a1t, a1i, BI_SPECIAL_NONE); x->clamp = BI_CLAMP_CLAMP_0_INF; bi_instr *max = bi_fmax_f32_to(b, dst, x->dest[0], s0); @@ -1762,12 +1757,13 @@ bi_fexp_32(bi_builder *b, bi_index dst, bi_index s0, bi_index log2_base) /* Scale by base, Multiply by 2*24 and convert to integer to get a 8:24 * fixed-point input */ bi_index scale = bi_fma_rscale_f32(b, s0, log2_base, bi_negzero(), - bi_imm_u32(24), BI_ROUND_NONE, BI_SPECIAL_NONE); - bi_index fixed_pt = bi_f32_to_s32(b, scale, BI_ROUND_NONE); + bi_imm_u32(24), BI_SPECIAL_NONE); + bi_instr *fixed_pt = bi_f32_to_s32_to(b, bi_temp(b->shader), scale); + fixed_pt->round = BI_ROUND_NONE; // XXX /* Compute the result for the fixed-point input, but pass along * the floating-point scale for correct NaN propagation */ - bi_fexp_f32_to(b, dst, fixed_pt, scale); + bi_fexp_f32_to(b, dst, fixed_pt->dest[0], scale); } static void @@ -1776,7 +1772,7 @@ bi_lower_flog2_32(bi_builder *b, bi_index dst, bi_index s0) /* s0 = a1 * 2^e, with a1 in [0.75, 1.5) */ bi_index a1 = bi_frexpm_f32(b, s0, true, false); bi_index ei = bi_frexpe_f32(b, s0, true, false); - bi_index ef = bi_s32_to_f32(b, ei, BI_ROUND_RTZ); + bi_index ef = bi_s32_to_f32(b, ei); /* xt estimates -log(r1), a coarse approximation of log(a1) */ bi_index r1 = bi_flog_table_f32(b, s0, BI_MODE_RED, BI_PRECISION_NONE); @@ -1785,33 +1781,32 @@ bi_lower_flog2_32(bi_builder *b, bi_index dst, bi_index s0) /* log(s0) = log(a1 * 2^e) = e + log(a1) = e + log(a1 * r1) - * log(r1), so let x1 = e - log(r1) ~= e + xt and x2 = log(a1 * r1), * and then log(s0) = x1 + x2 */ - bi_index x1 = bi_fadd_f32(b, ef, xt, BI_ROUND_NONE); + bi_index x1 = bi_fadd_f32(b, ef, xt); /* Since a1 * r1 is close to 1, x2 = log(a1 * r1) may be computed by * polynomial approximation around 1. The series is expressed around * 1, so set y = (a1 * r1) - 1.0 */ - bi_index y = bi_fma_f32(b, a1, r1, bi_imm_f32(-1.0), BI_ROUND_NONE); + bi_index y = bi_fma_f32(b, a1, r1, bi_imm_f32(-1.0)); /* x2 = log_2(1 + y) = log_e(1 + y) * (1/log_e(2)), so approximate * log_e(1 + y) by the Taylor series (lower precision than the blob): * y - y^2/2 + O(y^3) = y(1 - y/2) + O(y^3) */ bi_index loge = bi_fmul_f32(b, y, - bi_fma_f32(b, y, bi_imm_f32(-0.5), bi_imm_f32(1.0), BI_ROUND_NONE)); + bi_fma_f32(b, y, bi_imm_f32(-0.5), bi_imm_f32(1.0))); bi_index x2 = bi_fmul_f32(b, loge, bi_imm_f32(1.0 / logf(2.0))); /* log(s0) = x1 + x2 */ - bi_fadd_f32_to(b, dst, x1, x2, BI_ROUND_NONE); + bi_fadd_f32_to(b, dst, x1, x2); } static void bi_flog2_32(bi_builder *b, bi_index dst, bi_index s0) { bi_index frexp = bi_frexpe_f32(b, s0, true, false); - bi_index frexpi = bi_s32_to_f32(b, frexp, BI_ROUND_RTZ); + bi_index frexpi = bi_s32_to_f32(b, frexp); bi_index add = bi_fadd_lscale_f32(b, bi_imm_f32(-1.0f), s0); - bi_fma_f32_to(b, dst, bi_flogd_f32(b, s0), add, frexpi, - BI_ROUND_NONE); + bi_fma_f32_to(b, dst, bi_flogd_f32(b, s0), add, frexpi); } static void @@ -1862,12 +1857,11 @@ static void bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos) { /* bottom 6-bits of result times pi/32 approximately s0 mod 2pi */ - bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS, BI_ROUND_NONE); + bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS); /* Approximate domain error (small) */ - bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS), - BI_ROUND_NONE), - MPI_OVER_TWO, s0, BI_ROUND_NONE); + bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS)), + MPI_OVER_TWO, s0); /* Lookup sin(x), cos(x) */ bi_index sinx = bi_fsin_table_u6(b, x_u6, false); @@ -1875,21 +1869,21 @@ bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos) /* e^2 / 2 */ bi_index e2_over_2 = bi_fma_rscale_f32(b, e, e, bi_negzero(), - bi_imm_u32(-1), BI_ROUND_NONE, BI_SPECIAL_NONE); + bi_imm_u32(-1), BI_SPECIAL_NONE); /* (-e^2)/2 f''(x) */ bi_index quadratic = bi_fma_f32(b, bi_neg(e2_over_2), cos ? cosx : sinx, - bi_negzero(), BI_ROUND_NONE); + bi_negzero()); /* e f'(x) - (e^2/2) f''(x) */ bi_instr *I = bi_fma_f32_to(b, bi_temp(b->shader), e, cos ? bi_neg(sinx) : cosx, - quadratic, BI_ROUND_NONE); + quadratic); I->clamp = BI_CLAMP_CLAMP_M1_1; /* f(x) + e f'(x) - (e^2/2) f''(x) */ - bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE); + bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx); } /* The XOR lane op is useful for derivative calculation, but was added in v7. @@ -2056,7 +2050,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) bi_index s1 = comps > 1 ? bi_word(idx, instr->src[0].swizzle[1]) : s0; - bi_v2f32_to_v2f16_to(b, dst, s0, s1, BI_ROUND_NONE); + bi_v2f32_to_v2f16_to(b, dst, s0, s1); return; /* Vectorized downcasts */ @@ -2095,9 +2089,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) bi_half(s1, false)); if (instr->op == nir_op_u2f16) - bi_v2u16_to_v2f16_to(b, dst, t, BI_ROUND_NONE); + bi_v2u16_to_v2f16_to(b, dst, t); else - bi_v2s16_to_v2f16_to(b, dst, t, BI_ROUND_NONE); + bi_v2s16_to_v2f16_to(b, dst, t); return; } @@ -2158,18 +2152,18 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) switch (instr->op) { case nir_op_ffma: - bi_fma_to(b, sz, dst, s0, s1, s2, BI_ROUND_NONE); + bi_fma_to(b, sz, dst, s0, s1, s2); break; case nir_op_fmul: - bi_fma_to(b, sz, dst, s0, s1, bi_negzero(), BI_ROUND_NONE); + bi_fma_to(b, sz, dst, s0, s1, bi_negzero()); break; case nir_op_fsub: s1 = bi_neg(s1); FALLTHROUGH; case nir_op_fadd: - bi_fadd_to(b, sz, dst, s0, s1, BI_ROUND_NONE); + bi_fadd_to(b, sz, dst, s0, s1); break; case nir_op_fsat: { @@ -2245,7 +2239,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) break; case nir_op_ldexp: - bi_ldexp_to(b, sz, dst, s0, s1, BI_ROUND_NONE); + bi_ldexp_to(b, sz, dst, s0, s1); break; case nir_op_b8csel: @@ -2290,7 +2284,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) case nir_op_fddy_must_abs_mali: { bi_index bit = bi_imm_u32(instr->op == nir_op_fddx_must_abs_mali ? 1 : 2); bi_index adjacent = bi_clper_xor(b, s0, bit); - bi_fadd_to(b, sz, dst, adjacent, bi_neg(s0), BI_ROUND_NONE); + bi_fadd_to(b, sz, dst, adjacent, bi_neg(s0)); break; } @@ -2355,7 +2349,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) BI_SUBGROUP_SUBGROUP4); } - bi_fadd_to(b, sz, dst, right, bi_neg(left), BI_ROUND_NONE); + bi_fadd_to(b, sz, dst, right, bi_neg(left)); break; } @@ -2365,45 +2359,45 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) case nir_op_f2i32: if (src_sz == 32) - bi_f32_to_s32_to(b, dst, s0, BI_ROUND_RTZ); + bi_f32_to_s32_to(b, dst, s0); else - bi_f16_to_s32_to(b, dst, s0, BI_ROUND_RTZ); + bi_f16_to_s32_to(b, dst, s0); break; /* Note 32-bit sources => no vectorization, so 32-bit works */ case nir_op_f2u16: if (src_sz == 32) - bi_f32_to_u32_to(b, dst, s0, BI_ROUND_RTZ); + bi_f32_to_u32_to(b, dst, s0); else - bi_v2f16_to_v2u16_to(b, dst, s0, BI_ROUND_RTZ); + bi_v2f16_to_v2u16_to(b, dst, s0); break; case nir_op_f2i16: if (src_sz == 32) - bi_f32_to_s32_to(b, dst, s0, BI_ROUND_RTZ); + bi_f32_to_s32_to(b, dst, s0); else - bi_v2f16_to_v2s16_to(b, dst, s0, BI_ROUND_RTZ); + bi_v2f16_to_v2s16_to(b, dst, s0); break; case nir_op_f2u32: if (src_sz == 32) - bi_f32_to_u32_to(b, dst, s0, BI_ROUND_RTZ); + bi_f32_to_u32_to(b, dst, s0); else - bi_f16_to_u32_to(b, dst, s0, BI_ROUND_RTZ); + bi_f16_to_u32_to(b, dst, s0); break; case nir_op_u2f16: if (src_sz == 32) - bi_v2u16_to_v2f16_to(b, dst, bi_half(s0, false), BI_ROUND_RTZ); + bi_v2u16_to_v2f16_to(b, dst, bi_half(s0, false)); else if (src_sz == 16) - bi_v2u16_to_v2f16_to(b, dst, s0, BI_ROUND_RTZ); + bi_v2u16_to_v2f16_to(b, dst, s0); else if (src_sz == 8) bi_v2u8_to_v2f16_to(b, dst, s0); break; case nir_op_u2f32: if (src_sz == 32) - bi_u32_to_f32_to(b, dst, s0, BI_ROUND_RTZ); + bi_u32_to_f32_to(b, dst, s0); else if (src_sz == 16) bi_u16_to_f32_to(b, dst, s0); else @@ -2412,9 +2406,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) case nir_op_i2f16: if (src_sz == 32) - bi_v2s16_to_v2f16_to(b, dst, bi_half(s0, false), BI_ROUND_RTZ); + bi_v2s16_to_v2f16_to(b, dst, bi_half(s0, false)); else if (src_sz == 16) - bi_v2s16_to_v2f16_to(b, dst, s0, BI_ROUND_RTZ); + bi_v2s16_to_v2f16_to(b, dst, s0); else if (src_sz == 8) bi_v2s8_to_v2f16_to(b, dst, s0); break; @@ -2423,7 +2417,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) assert(src_sz == 32 || src_sz == 16 || src_sz == 8); if (src_sz == 32) - bi_s32_to_f32_to(b, dst, s0, BI_ROUND_RTZ); + bi_s32_to_f32_to(b, dst, s0); else if (src_sz == 16) bi_s16_to_f32_to(b, dst, s0); else if (src_sz == 8) @@ -2732,7 +2726,9 @@ bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T) * 0, dt - 1). So we use round RTE, clamping is handled at the data * structure level */ - return bi_f32_to_u32(b, idx, BI_ROUND_NONE); + bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx); + I->round = BI_ROUND_NONE; + return I->dest[0]; } /* TEXC's explicit and bias LOD modes requires the LOD to be transformed to a @@ -2760,16 +2756,15 @@ bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16) bi_instr *fsat = bi_fma_f32_to(b, bi_temp(b->shader), fp16 ? bi_half(lod, false) : lod, - bi_imm_f32(1.0f / max_lod), bi_negzero(), BI_ROUND_NONE); + bi_imm_f32(1.0f / max_lod), bi_negzero()); fsat->clamp = BI_CLAMP_CLAMP_M1_1; bi_index fmul = bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f), - bi_negzero(), BI_ROUND_NONE); + bi_negzero()); return bi_mkvec_v2i16(b, - bi_half(bi_f32_to_s32(b, fmul, BI_ROUND_RTZ), false), - bi_imm_u16(0)); + bi_half(bi_f32_to_s32(b, fmul), false), bi_imm_u16(0)); } /* FETCH takes a 32-bit staging register containing the LOD as an integer in @@ -2911,17 +2906,14 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord, bi_index rcp = bi_frcp_f32(b, maxxyz); /* Calculate 0.5 * (1.0 / max{x, y, z}) */ - bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero(), - BI_ROUND_NONE); + bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero()); /* Transform the coordinates */ *s = bi_temp(b->shader); *t = bi_temp(b->shader); - bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f), - BI_ROUND_NONE); - bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f), - BI_ROUND_NONE); + bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f)); + bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f)); S->clamp = BI_CLAMP_CLAMP_0_1; T->clamp = BI_CLAMP_CLAMP_0_1; diff --git a/src/panfrost/bifrost/bifrost_isa.py b/src/panfrost/bifrost/bifrost_isa.py index ba5e62aba48..f2626140708 100644 --- a/src/panfrost/bifrost/bifrost_isa.py +++ b/src/panfrost/bifrost/bifrost_isa.py @@ -278,6 +278,7 @@ def combine_ir_variants(instructions, key): # Great, we've checked srcs/immediates are consistent and we've summed over # modifiers return { + 'key': key, 'srcs': variants[0]['srcs'], 'dests': variants[0]['dests'], 'staging': variants[0]['staging'], diff --git a/src/panfrost/bifrost/test/test-constant-fold.cpp b/src/panfrost/bifrost/test/test-constant-fold.cpp index 660e7793d1a..d28fc953ae8 100644 --- a/src/panfrost/bifrost/test/test-constant-fold.cpp +++ b/src/panfrost/bifrost/test/test-constant-fold.cpp @@ -186,6 +186,6 @@ TEST_F(ConstantFold, OtherOperationsShouldNotFold) bi_index zero = bi_fau(bir_fau(BIR_FAU_IMMEDIATE | 0), false); bi_index reg = bi_register(0); - EXPECT_NOT_FOLD(bi_fma_f32_to(b, reg, zero, zero, zero, BI_ROUND_NONE)); - EXPECT_NOT_FOLD(bi_fadd_f32_to(b, reg, zero, zero, BI_ROUND_NONE)); + EXPECT_NOT_FOLD(bi_fma_f32_to(b, reg, zero, zero, zero)); + EXPECT_NOT_FOLD(bi_fadd_f32_to(b, reg, zero, zero)); } diff --git a/src/panfrost/bifrost/test/test-optimizer.cpp b/src/panfrost/bifrost/test/test-optimizer.cpp index 80b81597786..50593ea25f0 100644 --- a/src/panfrost/bifrost/test/test-optimizer.cpp +++ b/src/panfrost/bifrost/test/test-optimizer.cpp @@ -63,17 +63,17 @@ protected: TEST_F(Optimizer, FusedFABSNEG) { - CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), y, BI_ROUND_NONE), - bi_fadd_f32_to(b, reg, bi_abs(x), y, BI_ROUND_NONE)); + CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), y), + bi_fadd_f32_to(b, reg, bi_abs(x), y)); - CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_neg(x)), y, BI_ROUND_NONE), - bi_fadd_f32_to(b, reg, bi_neg(x), y, BI_ROUND_NONE)); + CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_neg(x)), y), + bi_fadd_f32_to(b, reg, bi_neg(x), y)); - CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), y, BI_ROUND_NONE), - bi_fadd_f32_to(b, reg, negabsx, y, BI_ROUND_NONE)); + CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), y), + bi_fadd_f32_to(b, reg, negabsx, y)); - CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, x), y, BI_ROUND_NONE), - bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE)); + CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, x), y), + bi_fadd_f32_to(b, reg, x, y)); CASE(bi_fmin_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), bi_neg(y)), bi_fmin_f32_to(b, reg, negabsx, bi_neg(y))); @@ -81,8 +81,8 @@ TEST_F(Optimizer, FusedFABSNEG) TEST_F(Optimizer, FusedFABSNEGForFP16) { - CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y, BI_ROUND_NONE), - bi_fadd_v2f16_to(b, reg, negabsx, y, BI_ROUND_NONE)); + CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y), + bi_fadd_v2f16_to(b, reg, negabsx, y)); CASE(bi_fmin_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), bi_neg(y)), bi_fmin_v2f16_to(b, reg, negabsx, bi_neg(y))); @@ -91,26 +91,26 @@ TEST_F(Optimizer, FusedFABSNEGForFP16) TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp) { CASE({ - bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x), BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x)); I->clamp = BI_CLAMP_CLAMP_0_1; }, { - bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x)); I->clamp = BI_CLAMP_CLAMP_0_1; }); CASE({ - bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)), BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x))); I->clamp = BI_CLAMP_CLAMP_0_1; }, { - bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x)); I->clamp = BI_CLAMP_CLAMP_0_1; }); CASE({ - bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x), BI_ROUND_NONE)); + bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x))); I->clamp = BI_CLAMP_CLAMP_0_INF; }, { - bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x)); I->clamp = BI_CLAMP_CLAMP_0_INF; }); } @@ -118,26 +118,26 @@ TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp) TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp) { CASE({ - bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y), BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y)); I->clamp = BI_CLAMP_CLAMP_0_1; }, { - bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y)); I->clamp = BI_CLAMP_CLAMP_0_1; }); CASE({ - bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)), BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y))); I->clamp = BI_CLAMP_CLAMP_0_1; }, { - bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y)); I->clamp = BI_CLAMP_CLAMP_0_1; }); CASE({ - bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y), BI_ROUND_NONE)); + bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y))); I->clamp = BI_CLAMP_CLAMP_0_INF; }, { - bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y)); I->clamp = BI_CLAMP_CLAMP_0_INF; }); } @@ -145,57 +145,57 @@ TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp) TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp) { NEGCASE({ - bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x), BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x)); I->clamp = BI_CLAMP_CLAMP_0_1; }); NEGCASE({ - bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)), BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x))); I->clamp = BI_CLAMP_CLAMP_0_1; }); NEGCASE({ - bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x), BI_ROUND_NONE)); + bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x))); I->clamp = BI_CLAMP_CLAMP_0_INF; }); } TEST_F(Optimizer, SwizzlesComposedForFP16) { - CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y, BI_ROUND_NONE), - bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y, BI_ROUND_NONE)); + CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y), + bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y)); - CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y, BI_ROUND_NONE), - bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y, BI_ROUND_NONE)); + CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y), + bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y)); - CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y, BI_ROUND_NONE), - bi_fadd_v2f16_to(b, reg, negabsx, y, BI_ROUND_NONE)); + CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y), + bi_fadd_v2f16_to(b, reg, negabsx, y)); - CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y, BI_ROUND_NONE), - bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y, BI_ROUND_NONE)); + CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y), + bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y)); - CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y, BI_ROUND_NONE), - bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y, BI_ROUND_NONE)); + CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y), + bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y)); } TEST_F(Optimizer, PreserveWidens) { /* Check that widens are passed through */ - CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, false)), y, BI_ROUND_NONE), - bi_fadd_f32_to(b, reg, bi_half(negabsx, false), y, BI_ROUND_NONE)); + CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, false)), y), + bi_fadd_f32_to(b, reg, bi_half(negabsx, false), y)); - CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y, BI_ROUND_NONE), - bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y, BI_ROUND_NONE)); + CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y), + bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y)); - CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false)), BI_ROUND_NONE), - bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false), BI_ROUND_NONE)); + CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false))), + bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false))); } TEST_F(Optimizer, DoNotMixSizesForFABSNEG) { /* Refuse to mix sizes for fabsneg, that's wrong */ - NEGCASE(bi_fadd_f32_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y, BI_ROUND_NONE)); - NEGCASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_f32(b, negabsx), y, BI_ROUND_NONE)); + NEGCASE(bi_fadd_f32_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y)); + NEGCASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_f32(b, negabsx), y)); } TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns) @@ -206,27 +206,27 @@ TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns) bi_index zero = bi_zero(); - NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), zero, BI_ROUND_NONE), y, BI_ROUND_NONE)); - NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(x), zero, BI_ROUND_NONE), y, BI_ROUND_NONE)); - NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(bi_abs(x)), zero, BI_ROUND_NONE), y, BI_ROUND_NONE)); - NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, zero, BI_ROUND_NONE), y, BI_ROUND_NONE)); + NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), zero), y)); + NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(x), zero), y)); + NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(bi_abs(x)), zero), y)); + NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, zero), y)); } TEST_F(Optimizer, ClampsPropagated) { CASE({ - bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y, BI_ROUND_NONE)); + bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y)); I->clamp = BI_CLAMP_CLAMP_0_INF; }, { - bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, x, y); I->clamp = BI_CLAMP_CLAMP_0_INF; }); CASE({ - bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y, BI_ROUND_NONE)); + bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y)); I->clamp = BI_CLAMP_CLAMP_0_1; }, { - bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y); I->clamp = BI_CLAMP_CLAMP_0_1; }); } @@ -235,62 +235,62 @@ TEST_F(Optimizer, ClampsPropagated) TEST_F(Optimizer, ClampsComposed) { CASE({ - bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y); bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]); I->clamp = BI_CLAMP_CLAMP_M1_1; J->clamp = BI_CLAMP_CLAMP_0_INF; }, { - bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, x, y); I->clamp = BI_CLAMP_CLAMP_0_1; }); CASE({ - bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y); bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]); I->clamp = BI_CLAMP_CLAMP_0_1; J->clamp = BI_CLAMP_CLAMP_0_INF; }, { - bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, x, y); I->clamp = BI_CLAMP_CLAMP_0_1; }); CASE({ - bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y); bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]); I->clamp = BI_CLAMP_CLAMP_0_INF; J->clamp = BI_CLAMP_CLAMP_0_INF; }, { - bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, x, y); I->clamp = BI_CLAMP_CLAMP_0_INF; }); CASE({ - bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y); bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]); I->clamp = BI_CLAMP_CLAMP_M1_1; J->clamp = BI_CLAMP_CLAMP_0_INF; }, { - bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y); I->clamp = BI_CLAMP_CLAMP_0_1; }); CASE({ - bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y); bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]); I->clamp = BI_CLAMP_CLAMP_0_1; J->clamp = BI_CLAMP_CLAMP_0_INF; }, { - bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y); I->clamp = BI_CLAMP_CLAMP_0_1; }); CASE({ - bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y); bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]); I->clamp = BI_CLAMP_CLAMP_0_INF; J->clamp = BI_CLAMP_CLAMP_0_INF; }, { - bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y); I->clamp = BI_CLAMP_CLAMP_0_INF; }); } @@ -298,12 +298,12 @@ TEST_F(Optimizer, ClampsComposed) TEST_F(Optimizer, DoNotMixSizesWhenClamping) { NEGCASE({ - bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_v2f16(b, x, y, BI_ROUND_NONE)); + bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_v2f16(b, x, y)); I->clamp = BI_CLAMP_CLAMP_0_1; }); NEGCASE({ - bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_f32(b, x, y, BI_ROUND_NONE)); + bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_f32(b, x, y)); I->clamp = BI_CLAMP_CLAMP_0_1; }); } @@ -314,12 +314,12 @@ TEST_F(Optimizer, DoNotUseAdditionByZeroForClamps) /* We can't use addition by 0.0 for clamps due to signed zeros. */ NEGCASE({ - bi_instr *I = bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, y, BI_ROUND_NONE), zero, BI_ROUND_NONE); + bi_instr *I = bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, y), zero); I->clamp = BI_CLAMP_CLAMP_M1_1; }); NEGCASE({ - bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y, BI_ROUND_NONE), zero, BI_ROUND_NONE); + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y), zero); I->clamp = BI_CLAMP_CLAMP_0_1; }); } diff --git a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp index c00402ea7e1..65d700fbfa6 100644 --- a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp +++ b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp @@ -56,7 +56,7 @@ TEST_F(SchedulerPredicates, MOV) TEST_F(SchedulerPredicates, FMA) { - bi_instr *fma = bi_fma_f32_to(b, TMP(), TMP(), TMP(), bi_zero(), BI_ROUND_NONE); + bi_instr *fma = bi_fma_f32_to(b, TMP(), TMP(), TMP(), bi_zero()); ASSERT_TRUE(bi_can_fma(fma)); ASSERT_FALSE(bi_can_add(fma)); ASSERT_FALSE(bi_must_message(fma)); @@ -96,12 +96,12 @@ TEST_F(SchedulerPredicates, BLEND) TEST_F(SchedulerPredicates, RestrictionsOnModifiersOfSameCycleTemporaries) { - bi_instr *fadd = bi_fadd_f32_to(b, TMP(), TMP(), TMP(), BI_ROUND_NONE); + bi_instr *fadd = bi_fadd_f32_to(b, TMP(), TMP(), TMP()); ASSERT_TRUE(bi_reads_t(fadd, 0)); for (unsigned i = 0; i < 2; ++i) { for (unsigned j = 0; j < 2; ++j) { - bi_instr *fadd = bi_fadd_f32_to(b, TMP(), TMP(), TMP(), BI_ROUND_NONE); + bi_instr *fadd = bi_fadd_f32_to(b, TMP(), TMP(), TMP()); fadd->src[i] = bi_swz_16(TMP(), j, j); ASSERT_TRUE(bi_reads_t(fadd, 1 - i)); ASSERT_FALSE(bi_reads_t(fadd, i)); @@ -115,7 +115,7 @@ TEST_F(SchedulerPredicates, RestrictionsOnFAddV2F16) bi_index y = bi_register(1); /* Basic */ - bi_instr *fadd = bi_fadd_v2f16_to(b, TMP(), x, x, BI_ROUND_NONE); + bi_instr *fadd = bi_fadd_v2f16_to(b, TMP(), x, x); ASSERT_TRUE(bi_can_fma(fadd)); ASSERT_TRUE(bi_can_add(fadd)); diff --git a/src/panfrost/bifrost/valhall/test/test-add-imm.cpp b/src/panfrost/bifrost/valhall/test/test-add-imm.cpp index 7004bbbd564..53b394b8b59 100644 --- a/src/panfrost/bifrost/valhall/test/test-add-imm.cpp +++ b/src/panfrost/bifrost/valhall/test/test-add-imm.cpp @@ -60,45 +60,37 @@ TEST_F(AddImm, Basic) { CASE(bi_mov_i32_to(b, bi_register(63), bi_imm_u32(0xABAD1DEA)), bi_iadd_imm_i32_to(b, bi_register(63), bi_zero(), 0xABAD1DEA)); - CASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0), BI_ROUND_NONE), + CASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0)), bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0))); - CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_imm_f32(42.0), BI_ROUND_NONE), + CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_imm_f32(42.0)), bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(42.0))); - CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_neg(bi_imm_f32(42.0)), BI_ROUND_NONE), + CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_neg(bi_imm_f32(42.0))), bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(-42.0))); } TEST_F(AddImm, Commutativty) { - CASE(bi_fadd_f32_to(b, bi_register(1), bi_imm_f32(42.0), bi_register(2), BI_ROUND_NONE), + CASE(bi_fadd_f32_to(b, bi_register(1), bi_imm_f32(42.0), bi_register(2)), bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0))); } TEST_F(AddImm, NoModifiers) { - NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0), - BI_ROUND_RTP)); - - NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)), bi_imm_f32(42.0), - BI_ROUND_NONE)); - - NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)), bi_imm_f32(42.0), - BI_ROUND_NONE)); - - NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_f32(42.0), - BI_ROUND_NONE)); + NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)), bi_imm_f32(42.0))); + NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)), bi_imm_f32(42.0))); + NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_f32(42.0))); } TEST_F(AddImm, NoClamp) { NEGCASE({ bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2), - bi_imm_f32(42.0), BI_ROUND_NONE); + bi_imm_f32(42.0)); I->clamp = BI_CLAMP_CLAMP_M1_1; }); } TEST_F(AddImm, OtherTypes) { - CASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0), BI_ROUND_NONE), + CASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0)), bi_fadd_imm_v2f16_to(b, bi_register(1), bi_register(2), 0x51405140)); CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false), @@ -119,7 +111,6 @@ TEST_F(AddImm, OtherTypes) { CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false), bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF)); - NEGCASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0), BI_ROUND_RTZ)); NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false)); NEGCASE(bi_iadd_v2u16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false)); NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true)); @@ -135,3 +126,16 @@ TEST_F(AddImm, Int8) { NEGCASE(bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false)); NEGCASE(bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false)); } + +TEST_F(AddImm, OnlyRTE) { + NEGCASE({ + bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0)); + I->round = BI_ROUND_RTP; + }); + + NEGCASE({ + bi_instr *I = bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0)); + I->round = BI_ROUND_RTZ; + }); +} + diff --git a/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp b/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp index f142e116d2c..de6994b6c4c 100644 --- a/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp +++ b/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp @@ -102,7 +102,7 @@ TEST_F(LowerIsel, IntegerCSEL) { } TEST_F(LowerIsel, Smoke) { - NEGCASE(bi_fadd_f32_to(b, reg, reg, reg, BI_ROUND_RTP)); + NEGCASE(bi_fadd_f32_to(b, reg, reg, reg)); NEGCASE(bi_csel_s32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT)); NEGCASE(bi_csel_u32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT)); } diff --git a/src/panfrost/bifrost/valhall/test/test-packing.cpp b/src/panfrost/bifrost/valhall/test/test-packing.cpp index 34ed4e56924..ce46da36422 100644 --- a/src/panfrost/bifrost/valhall/test/test-packing.cpp +++ b/src/panfrost/bifrost/valhall/test/test-packing.cpp @@ -65,42 +65,42 @@ TEST_F(ValhallPacking, Moves) { } TEST_F(ValhallPacking, Fadd) { - CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2), BI_ROUND_NONE), + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2)), 0x00a4c00000000201ULL); - CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2)), BI_ROUND_NONE), + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))), 0x00a4c02000000201ULL); - CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2)), BI_ROUND_NONE), + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))), 0x00a4c01000000201ULL); CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_swz_16(bi_register(1), false, false), - bi_swz_16(bi_register(0), true, true), BI_ROUND_NONE), + bi_swz_16(bi_register(0), true, true)), 0x00a5c0000c000001ULL); - CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0), BI_ROUND_NONE), + CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0)), 0x00a5c00028000001ULL); CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), - bi_swz_16(bi_register(0), true, false), BI_ROUND_NONE), + bi_swz_16(bi_register(0), true, false)), 0x00a5c00024000001ULL); CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))), - bi_neg(zero), BI_ROUND_NONE), + bi_neg(zero)), 0x00a5c0902800c040ULL); CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), - zero, BI_ROUND_NONE), + zero), 0x00a4c0000000c001ULL); CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), - bi_neg(zero), BI_ROUND_NONE), + bi_neg(zero)), 0x00a4c0100000c001ULL); CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), - bi_half(bi_register(0), true), BI_ROUND_NONE), + bi_half(bi_register(0), true)), 0x00a4c00008000001ULL); CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), - bi_half(bi_register(0), false), BI_ROUND_NONE), + bi_half(bi_register(0), false)), 0x00a4c00004000001ULL); } @@ -112,8 +112,7 @@ TEST_F(ValhallPacking, Clper) { TEST_F(ValhallPacking, Clamps) { bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), - bi_neg(bi_abs(bi_register(2))), - BI_ROUND_NONE); + bi_neg(bi_abs(bi_register(2)))); CASE(I, 0x00a4c03000000201ULL); I->clamp = BI_CLAMP_CLAMP_M1_1; @@ -123,7 +122,7 @@ TEST_F(ValhallPacking, Clamps) { TEST_F(ValhallPacking, Misc) { CASE(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 4), false), - bi_neg(zero), BI_ROUND_NONE), + bi_neg(zero)), 0x00b2c10400c08841ULL); CASE(bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))), @@ -164,7 +163,7 @@ TEST_F(ValhallPacking, Comparions) { } TEST_F(ValhallPacking, Conversions) { - CASE(bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)), BI_ROUND_NONE), + CASE(bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2))), 0x0090c22000070042); } @@ -219,7 +218,7 @@ TEST_F(ValhallPacking, Transcendentals) { CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)), 0x009cc20000020001); - CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_ROUND_NONE, BI_SPECIAL_LEFT), + CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_SPECIAL_LEFT), 0x0162c00440c04241); } diff --git a/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp b/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp index 4275d0359fd..4aae6e027b3 100644 --- a/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp +++ b/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp @@ -66,53 +66,47 @@ protected: TEST_F(ValidateFau, One64BitUniformSlot) { VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3), - unif, BI_ROUND_NONE)); + unif)); VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1), - unif, BI_ROUND_NONE)); - VALID(bi_fma_f32_to(b, bi_register(1), unif, unif, bi_word(unif, 1), - BI_ROUND_NONE)); - INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_register(1), - BI_ROUND_NONE)); - INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_word(unif, 1), - BI_ROUND_NONE)); + unif)); + VALID(bi_fma_f32_to(b, bi_register(1), unif, unif, bi_word(unif, 1))); + INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_register(1))); + INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_word(unif, 1))); /* Crafted case that appears correct at first glance and was erronously * marked as valid in early versions of the validator. */ INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false), - bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 1), true), - BI_ROUND_NONE)); + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 1), true))); } TEST_F(ValidateFau, Combined64BitUniformsConstants) { VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1), - unif, BI_ROUND_NONE)); - VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), zero, - unif, BI_ROUND_NONE)); - VALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm1, BI_ROUND_NONE)); - INVALID(bi_fma_f32_to(b, bi_register(1), zero, bi_word(unif, 1), - unif, BI_ROUND_NONE)); - INVALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm2, BI_ROUND_NONE)); + unif)); + VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), zero, unif)); + VALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm1)); + INVALID(bi_fma_f32_to(b, bi_register(1), zero, bi_word(unif, 1), unif)); + INVALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm2)); } TEST_F(ValidateFau, UniformsOnlyInDefaultMode) { INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1), - lane_id, BI_ROUND_NONE)); + lane_id)); INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1), - core_id, BI_ROUND_NONE)); + core_id)); } TEST_F(ValidateFau, SingleSpecialImmediate) { VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2), - lane_id, BI_ROUND_NONE)); + lane_id)); VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2), - core_id, BI_ROUND_NONE)); + core_id)); INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id, - core_id, BI_ROUND_NONE)); + core_id)); } TEST_F(ValidateFau, SmokeTests) @@ -120,5 +114,5 @@ TEST_F(ValidateFau, SmokeTests) VALID(bi_mov_i32_to(b, bi_register(1), bi_register(2))); VALID(bi_mov_i32_to(b, bi_register(1), unif)); VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)), - unif, bi_neg(zero), BI_ROUND_NONE)); + unif, bi_neg(zero))); }