pan/bi: Imply round mode most of the time

Much less noisy, and provides a path to further improvements. There is a slight
behaviour change: int-to-float conversions now use RTE instead of RTZ. For
32-bit opcodes, this affects conversions of integers with magnitude greater than
2^23 by at most 1 ulp. As this behaviour is unspecified in GLSL, this change is
believed to be acceptable.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15187>
This commit is contained in:
Alyssa Rosenzweig 2022-02-27 15:46:17 -05:00 committed by Marge Bot
parent a747708b9d
commit 1fb4427a7a
10 changed files with 209 additions and 209 deletions

View file

@ -19,7 +19,9 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
SKIP = set(["lane", "lane_dest", "lanes", "lanes", "replicate", "swz", "widen", "swap", "neg", "abs", "not", "sign", "extend", "divzero", "clamp", "sem", "not_result", "skip"])
SKIP = set(["lane", "lane_dest", "lanes", "lanes", "replicate", "swz", "widen",
"swap", "neg", "abs", "not", "sign", "extend", "divzero", "clamp", "sem",
"not_result", "skip", "round"])
TEMPLATE = """
#ifndef _BI_BUILDER_H_
@ -99,10 +101,13 @@ bi_instr * bi_${opcode.replace('.', '_').lower()}${to_suffix(ops[opcode])}(${sig
I->src[${src}] = src${src};
% endfor
% for mod in ops[opcode]["modifiers"]:
% if not should_skip(mod):
% if not should_skip(mod, opcode):
I->${mod} = ${mod};
% endif
% endfor
% if ops[opcode]["rtz"]:
I->round = BI_ROUND_RTZ;
% endif
% for imm in ops[opcode]["immediates"]:
I->${imm} = ${imm};
% endfor
@ -170,11 +175,16 @@ modifier_lists = order_modifiers(ir_instructions)
# Generate type signature for a builder routine
def should_skip(mod):
def should_skip(mod, op):
# FROUND and HADD only make sense in context of a round mode, so override
# the usual skip
if mod == "round" and ("FROUND" in op or "HADD" in op):
return False
return mod in SKIP or mod[0:-1] in SKIP
def modifier_signature(op):
return sorted([m for m in op["modifiers"].keys() if not should_skip(m)])
return sorted([m for m in op["modifiers"].keys() if not should_skip(m, op["key"])])
def signature(op, modifiers, typeful = False, sized = False, no_dests = False):
return ", ".join(

View file

@ -147,7 +147,7 @@ bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
if (sz == 16) {
f16 = bi_fma_v2f16(b, offset, bi_imm_f16(256.0),
bi_imm_f16(128.0), BI_ROUND_NONE);
bi_imm_f16(128.0));
} else {
assert(sz == 32);
bi_index f[2];
@ -155,13 +155,13 @@ bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
f[i] = bi_fadd_rscale_f32(b,
bi_word(offset, i),
bi_imm_f32(0.5), bi_imm_u32(8),
BI_ROUND_NONE, BI_SPECIAL_NONE);
BI_SPECIAL_NONE);
}
f16 = bi_v2f32_to_v2f16(b, f[0], f[1], BI_ROUND_NONE);
f16 = bi_v2f32_to_v2f16(b, f[0], f[1]);
}
return bi_v2f16_to_v2s16(b, f16, BI_ROUND_RTZ);
return bi_v2f16_to_v2s16(b, f16);
}
case nir_intrinsic_load_barycentric_pixel:
@ -1244,7 +1244,7 @@ bi_emit_load_frag_coord(bi_builder *b, nir_intrinsic_instr *instr)
for (unsigned i = 0; i < 2; ++i) {
src[i] = bi_fadd_f32(b,
bi_u16_to_f32(b, bi_half(bi_register(59), i)),
bi_imm_f32(0.5f), BI_ROUND_NONE);
bi_imm_f32(0.5f));
}
for (unsigned i = 0; i < 2; ++i) {
@ -1691,7 +1691,7 @@ bi_nir_round(nir_op op)
static bi_index
bi_fmul_f32(bi_builder *b, bi_index s0, bi_index s1)
{
return bi_fma_f32(b, s0, s1, bi_imm_f32(-0.0f), BI_ROUND_NONE);
return bi_fma_f32(b, s0, s1, bi_imm_f32(-0.0f));
}
/* Approximate with FRCP_APPROX.f32 and apply a single iteration of
@ -1704,9 +1704,8 @@ bi_lower_frcp_32(bi_builder *b, bi_index dst, bi_index s0)
bi_index m = bi_frexpm_f32(b, s0, false, false);
bi_index e = bi_frexpe_f32(b, bi_neg(s0), false, false);
bi_index t1 = bi_fma_rscale_f32(b, m, bi_neg(x1), bi_imm_f32(1.0),
bi_zero(), BI_ROUND_NONE, BI_SPECIAL_N);
bi_fma_rscale_f32_to(b, dst, t1, x1, x1, e,
BI_ROUND_NONE, BI_SPECIAL_NONE);
bi_zero(), BI_SPECIAL_N);
bi_fma_rscale_f32_to(b, dst, t1, x1, x1, e, BI_SPECIAL_NONE);
}
static void
@ -1717,9 +1716,8 @@ bi_lower_frsq_32(bi_builder *b, bi_index dst, bi_index s0)
bi_index e = bi_frexpe_f32(b, bi_neg(s0), false, true);
bi_index t1 = bi_fmul_f32(b, x1, x1);
bi_index t2 = bi_fma_rscale_f32(b, m, bi_neg(t1), bi_imm_f32(1.0),
bi_imm_u32(-1), BI_ROUND_NONE, BI_SPECIAL_N);
bi_fma_rscale_f32_to(b, dst, t2, x1, x1, e,
BI_ROUND_NONE, BI_SPECIAL_N);
bi_imm_u32(-1), BI_SPECIAL_N);
bi_fma_rscale_f32_to(b, dst, t2, x1, x1, e, BI_SPECIAL_N);
}
/* More complex transcendentals, see
@ -1730,26 +1728,23 @@ static void
bi_lower_fexp2_32(bi_builder *b, bi_index dst, bi_index s0)
{
bi_index t1 = bi_temp(b->shader);
bi_instr *t1_instr = bi_fadd_f32_to(b, t1,
s0, bi_imm_u32(0x49400000), BI_ROUND_NONE);
bi_instr *t1_instr = bi_fadd_f32_to(b, t1, s0, bi_imm_u32(0x49400000));
t1_instr->clamp = BI_CLAMP_CLAMP_0_INF;
bi_index t2 = bi_fadd_f32(b, t1, bi_imm_u32(0xc9400000), BI_ROUND_NONE);
bi_index t2 = bi_fadd_f32(b, t1, bi_imm_u32(0xc9400000));
bi_instr *a2 = bi_fadd_f32_to(b, bi_temp(b->shader),
s0, bi_neg(t2), BI_ROUND_NONE);
bi_instr *a2 = bi_fadd_f32_to(b, bi_temp(b->shader), s0, bi_neg(t2));
a2->clamp = BI_CLAMP_CLAMP_M1_1;
bi_index a1t = bi_fexp_table_u4(b, t1, BI_ADJ_NONE);
bi_index t3 = bi_isub_u32(b, t1, bi_imm_u32(0x49400000), false);
bi_index a1i = bi_arshift_i32(b, t3, bi_null(), bi_imm_u8(4));
bi_index p1 = bi_fma_f32(b, a2->dest[0], bi_imm_u32(0x3d635635),
bi_imm_u32(0x3e75fffa), BI_ROUND_NONE);
bi_index p2 = bi_fma_f32(b, p1, a2->dest[0],
bi_imm_u32(0x3f317218), BI_ROUND_NONE);
bi_imm_u32(0x3e75fffa));
bi_index p2 = bi_fma_f32(b, p1, a2->dest[0], bi_imm_u32(0x3f317218));
bi_index p3 = bi_fmul_f32(b, a2->dest[0], p2);
bi_instr *x = bi_fma_rscale_f32_to(b, bi_temp(b->shader),
p3, a1t, a1t, a1i, BI_ROUND_NONE, BI_SPECIAL_NONE);
p3, a1t, a1t, a1i, BI_SPECIAL_NONE);
x->clamp = BI_CLAMP_CLAMP_0_INF;
bi_instr *max = bi_fmax_f32_to(b, dst, x->dest[0], s0);
@ -1762,12 +1757,13 @@ bi_fexp_32(bi_builder *b, bi_index dst, bi_index s0, bi_index log2_base)
/* Scale by base, Multiply by 2*24 and convert to integer to get a 8:24
* fixed-point input */
bi_index scale = bi_fma_rscale_f32(b, s0, log2_base, bi_negzero(),
bi_imm_u32(24), BI_ROUND_NONE, BI_SPECIAL_NONE);
bi_index fixed_pt = bi_f32_to_s32(b, scale, BI_ROUND_NONE);
bi_imm_u32(24), BI_SPECIAL_NONE);
bi_instr *fixed_pt = bi_f32_to_s32_to(b, bi_temp(b->shader), scale);
fixed_pt->round = BI_ROUND_NONE; // XXX
/* Compute the result for the fixed-point input, but pass along
* the floating-point scale for correct NaN propagation */
bi_fexp_f32_to(b, dst, fixed_pt, scale);
bi_fexp_f32_to(b, dst, fixed_pt->dest[0], scale);
}
static void
@ -1776,7 +1772,7 @@ bi_lower_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
/* s0 = a1 * 2^e, with a1 in [0.75, 1.5) */
bi_index a1 = bi_frexpm_f32(b, s0, true, false);
bi_index ei = bi_frexpe_f32(b, s0, true, false);
bi_index ef = bi_s32_to_f32(b, ei, BI_ROUND_RTZ);
bi_index ef = bi_s32_to_f32(b, ei);
/* xt estimates -log(r1), a coarse approximation of log(a1) */
bi_index r1 = bi_flog_table_f32(b, s0, BI_MODE_RED, BI_PRECISION_NONE);
@ -1785,33 +1781,32 @@ bi_lower_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
/* log(s0) = log(a1 * 2^e) = e + log(a1) = e + log(a1 * r1) -
* log(r1), so let x1 = e - log(r1) ~= e + xt and x2 = log(a1 * r1),
* and then log(s0) = x1 + x2 */
bi_index x1 = bi_fadd_f32(b, ef, xt, BI_ROUND_NONE);
bi_index x1 = bi_fadd_f32(b, ef, xt);
/* Since a1 * r1 is close to 1, x2 = log(a1 * r1) may be computed by
* polynomial approximation around 1. The series is expressed around
* 1, so set y = (a1 * r1) - 1.0 */
bi_index y = bi_fma_f32(b, a1, r1, bi_imm_f32(-1.0), BI_ROUND_NONE);
bi_index y = bi_fma_f32(b, a1, r1, bi_imm_f32(-1.0));
/* x2 = log_2(1 + y) = log_e(1 + y) * (1/log_e(2)), so approximate
* log_e(1 + y) by the Taylor series (lower precision than the blob):
* y - y^2/2 + O(y^3) = y(1 - y/2) + O(y^3) */
bi_index loge = bi_fmul_f32(b, y,
bi_fma_f32(b, y, bi_imm_f32(-0.5), bi_imm_f32(1.0), BI_ROUND_NONE));
bi_fma_f32(b, y, bi_imm_f32(-0.5), bi_imm_f32(1.0)));
bi_index x2 = bi_fmul_f32(b, loge, bi_imm_f32(1.0 / logf(2.0)));
/* log(s0) = x1 + x2 */
bi_fadd_f32_to(b, dst, x1, x2, BI_ROUND_NONE);
bi_fadd_f32_to(b, dst, x1, x2);
}
static void
bi_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
{
bi_index frexp = bi_frexpe_f32(b, s0, true, false);
bi_index frexpi = bi_s32_to_f32(b, frexp, BI_ROUND_RTZ);
bi_index frexpi = bi_s32_to_f32(b, frexp);
bi_index add = bi_fadd_lscale_f32(b, bi_imm_f32(-1.0f), s0);
bi_fma_f32_to(b, dst, bi_flogd_f32(b, s0), add, frexpi,
BI_ROUND_NONE);
bi_fma_f32_to(b, dst, bi_flogd_f32(b, s0), add, frexpi);
}
static void
@ -1862,12 +1857,11 @@ static void
bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
{
/* bottom 6-bits of result times pi/32 approximately s0 mod 2pi */
bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS, BI_ROUND_NONE);
bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS);
/* Approximate domain error (small) */
bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS),
BI_ROUND_NONE),
MPI_OVER_TWO, s0, BI_ROUND_NONE);
bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS)),
MPI_OVER_TWO, s0);
/* Lookup sin(x), cos(x) */
bi_index sinx = bi_fsin_table_u6(b, x_u6, false);
@ -1875,21 +1869,21 @@ bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
/* e^2 / 2 */
bi_index e2_over_2 = bi_fma_rscale_f32(b, e, e, bi_negzero(),
bi_imm_u32(-1), BI_ROUND_NONE, BI_SPECIAL_NONE);
bi_imm_u32(-1), BI_SPECIAL_NONE);
/* (-e^2)/2 f''(x) */
bi_index quadratic = bi_fma_f32(b, bi_neg(e2_over_2),
cos ? cosx : sinx,
bi_negzero(), BI_ROUND_NONE);
bi_negzero());
/* e f'(x) - (e^2/2) f''(x) */
bi_instr *I = bi_fma_f32_to(b, bi_temp(b->shader), e,
cos ? bi_neg(sinx) : cosx,
quadratic, BI_ROUND_NONE);
quadratic);
I->clamp = BI_CLAMP_CLAMP_M1_1;
/* f(x) + e f'(x) - (e^2/2) f''(x) */
bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE);
bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx);
}
/* The XOR lane op is useful for derivative calculation, but was added in v7.
@ -2056,7 +2050,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_index s1 = comps > 1 ?
bi_word(idx, instr->src[0].swizzle[1]) : s0;
bi_v2f32_to_v2f16_to(b, dst, s0, s1, BI_ROUND_NONE);
bi_v2f32_to_v2f16_to(b, dst, s0, s1);
return;
/* Vectorized downcasts */
@ -2095,9 +2089,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_half(s1, false));
if (instr->op == nir_op_u2f16)
bi_v2u16_to_v2f16_to(b, dst, t, BI_ROUND_NONE);
bi_v2u16_to_v2f16_to(b, dst, t);
else
bi_v2s16_to_v2f16_to(b, dst, t, BI_ROUND_NONE);
bi_v2s16_to_v2f16_to(b, dst, t);
return;
}
@ -2158,18 +2152,18 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
switch (instr->op) {
case nir_op_ffma:
bi_fma_to(b, sz, dst, s0, s1, s2, BI_ROUND_NONE);
bi_fma_to(b, sz, dst, s0, s1, s2);
break;
case nir_op_fmul:
bi_fma_to(b, sz, dst, s0, s1, bi_negzero(), BI_ROUND_NONE);
bi_fma_to(b, sz, dst, s0, s1, bi_negzero());
break;
case nir_op_fsub:
s1 = bi_neg(s1);
FALLTHROUGH;
case nir_op_fadd:
bi_fadd_to(b, sz, dst, s0, s1, BI_ROUND_NONE);
bi_fadd_to(b, sz, dst, s0, s1);
break;
case nir_op_fsat: {
@ -2245,7 +2239,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
break;
case nir_op_ldexp:
bi_ldexp_to(b, sz, dst, s0, s1, BI_ROUND_NONE);
bi_ldexp_to(b, sz, dst, s0, s1);
break;
case nir_op_b8csel:
@ -2290,7 +2284,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
case nir_op_fddy_must_abs_mali: {
bi_index bit = bi_imm_u32(instr->op == nir_op_fddx_must_abs_mali ? 1 : 2);
bi_index adjacent = bi_clper_xor(b, s0, bit);
bi_fadd_to(b, sz, dst, adjacent, bi_neg(s0), BI_ROUND_NONE);
bi_fadd_to(b, sz, dst, adjacent, bi_neg(s0));
break;
}
@ -2355,7 +2349,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
BI_SUBGROUP_SUBGROUP4);
}
bi_fadd_to(b, sz, dst, right, bi_neg(left), BI_ROUND_NONE);
bi_fadd_to(b, sz, dst, right, bi_neg(left));
break;
}
@ -2365,45 +2359,45 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
case nir_op_f2i32:
if (src_sz == 32)
bi_f32_to_s32_to(b, dst, s0, BI_ROUND_RTZ);
bi_f32_to_s32_to(b, dst, s0);
else
bi_f16_to_s32_to(b, dst, s0, BI_ROUND_RTZ);
bi_f16_to_s32_to(b, dst, s0);
break;
/* Note 32-bit sources => no vectorization, so 32-bit works */
case nir_op_f2u16:
if (src_sz == 32)
bi_f32_to_u32_to(b, dst, s0, BI_ROUND_RTZ);
bi_f32_to_u32_to(b, dst, s0);
else
bi_v2f16_to_v2u16_to(b, dst, s0, BI_ROUND_RTZ);
bi_v2f16_to_v2u16_to(b, dst, s0);
break;
case nir_op_f2i16:
if (src_sz == 32)
bi_f32_to_s32_to(b, dst, s0, BI_ROUND_RTZ);
bi_f32_to_s32_to(b, dst, s0);
else
bi_v2f16_to_v2s16_to(b, dst, s0, BI_ROUND_RTZ);
bi_v2f16_to_v2s16_to(b, dst, s0);
break;
case nir_op_f2u32:
if (src_sz == 32)
bi_f32_to_u32_to(b, dst, s0, BI_ROUND_RTZ);
bi_f32_to_u32_to(b, dst, s0);
else
bi_f16_to_u32_to(b, dst, s0, BI_ROUND_RTZ);
bi_f16_to_u32_to(b, dst, s0);
break;
case nir_op_u2f16:
if (src_sz == 32)
bi_v2u16_to_v2f16_to(b, dst, bi_half(s0, false), BI_ROUND_RTZ);
bi_v2u16_to_v2f16_to(b, dst, bi_half(s0, false));
else if (src_sz == 16)
bi_v2u16_to_v2f16_to(b, dst, s0, BI_ROUND_RTZ);
bi_v2u16_to_v2f16_to(b, dst, s0);
else if (src_sz == 8)
bi_v2u8_to_v2f16_to(b, dst, s0);
break;
case nir_op_u2f32:
if (src_sz == 32)
bi_u32_to_f32_to(b, dst, s0, BI_ROUND_RTZ);
bi_u32_to_f32_to(b, dst, s0);
else if (src_sz == 16)
bi_u16_to_f32_to(b, dst, s0);
else
@ -2412,9 +2406,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
case nir_op_i2f16:
if (src_sz == 32)
bi_v2s16_to_v2f16_to(b, dst, bi_half(s0, false), BI_ROUND_RTZ);
bi_v2s16_to_v2f16_to(b, dst, bi_half(s0, false));
else if (src_sz == 16)
bi_v2s16_to_v2f16_to(b, dst, s0, BI_ROUND_RTZ);
bi_v2s16_to_v2f16_to(b, dst, s0);
else if (src_sz == 8)
bi_v2s8_to_v2f16_to(b, dst, s0);
break;
@ -2423,7 +2417,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
if (src_sz == 32)
bi_s32_to_f32_to(b, dst, s0, BI_ROUND_RTZ);
bi_s32_to_f32_to(b, dst, s0);
else if (src_sz == 16)
bi_s16_to_f32_to(b, dst, s0);
else if (src_sz == 8)
@ -2732,7 +2726,9 @@ bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T)
* 0, dt - 1). So we use round RTE, clamping is handled at the data
* structure level */
return bi_f32_to_u32(b, idx, BI_ROUND_NONE);
bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx);
I->round = BI_ROUND_NONE;
return I->dest[0];
}
/* TEXC's explicit and bias LOD modes requires the LOD to be transformed to a
@ -2760,16 +2756,15 @@ bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16)
bi_instr *fsat = bi_fma_f32_to(b, bi_temp(b->shader),
fp16 ? bi_half(lod, false) : lod,
bi_imm_f32(1.0f / max_lod), bi_negzero(), BI_ROUND_NONE);
bi_imm_f32(1.0f / max_lod), bi_negzero());
fsat->clamp = BI_CLAMP_CLAMP_M1_1;
bi_index fmul = bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f),
bi_negzero(), BI_ROUND_NONE);
bi_negzero());
return bi_mkvec_v2i16(b,
bi_half(bi_f32_to_s32(b, fmul, BI_ROUND_RTZ), false),
bi_imm_u16(0));
bi_half(bi_f32_to_s32(b, fmul), false), bi_imm_u16(0));
}
/* FETCH takes a 32-bit staging register containing the LOD as an integer in
@ -2911,17 +2906,14 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord,
bi_index rcp = bi_frcp_f32(b, maxxyz);
/* Calculate 0.5 * (1.0 / max{x, y, z}) */
bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero(),
BI_ROUND_NONE);
bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero());
/* Transform the coordinates */
*s = bi_temp(b->shader);
*t = bi_temp(b->shader);
bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f),
BI_ROUND_NONE);
bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f),
BI_ROUND_NONE);
bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f));
bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f));
S->clamp = BI_CLAMP_CLAMP_0_1;
T->clamp = BI_CLAMP_CLAMP_0_1;

View file

@ -278,6 +278,7 @@ def combine_ir_variants(instructions, key):
# Great, we've checked srcs/immediates are consistent and we've summed over
# modifiers
return {
'key': key,
'srcs': variants[0]['srcs'],
'dests': variants[0]['dests'],
'staging': variants[0]['staging'],

View file

@ -186,6 +186,6 @@ TEST_F(ConstantFold, OtherOperationsShouldNotFold)
bi_index zero = bi_fau(bir_fau(BIR_FAU_IMMEDIATE | 0), false);
bi_index reg = bi_register(0);
EXPECT_NOT_FOLD(bi_fma_f32_to(b, reg, zero, zero, zero, BI_ROUND_NONE));
EXPECT_NOT_FOLD(bi_fadd_f32_to(b, reg, zero, zero, BI_ROUND_NONE));
EXPECT_NOT_FOLD(bi_fma_f32_to(b, reg, zero, zero, zero));
EXPECT_NOT_FOLD(bi_fadd_f32_to(b, reg, zero, zero));
}

View file

@ -63,17 +63,17 @@ protected:
TEST_F(Optimizer, FusedFABSNEG)
{
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), y, BI_ROUND_NONE),
bi_fadd_f32_to(b, reg, bi_abs(x), y, BI_ROUND_NONE));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), y),
bi_fadd_f32_to(b, reg, bi_abs(x), y));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_neg(x)), y, BI_ROUND_NONE),
bi_fadd_f32_to(b, reg, bi_neg(x), y, BI_ROUND_NONE));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_neg(x)), y),
bi_fadd_f32_to(b, reg, bi_neg(x), y));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), y, BI_ROUND_NONE),
bi_fadd_f32_to(b, reg, negabsx, y, BI_ROUND_NONE));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), y),
bi_fadd_f32_to(b, reg, negabsx, y));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, x), y, BI_ROUND_NONE),
bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, x), y),
bi_fadd_f32_to(b, reg, x, y));
CASE(bi_fmin_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), bi_neg(y)),
bi_fmin_f32_to(b, reg, negabsx, bi_neg(y)));
@ -81,8 +81,8 @@ TEST_F(Optimizer, FusedFABSNEG)
TEST_F(Optimizer, FusedFABSNEGForFP16)
{
CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y, BI_ROUND_NONE),
bi_fadd_v2f16_to(b, reg, negabsx, y, BI_ROUND_NONE));
CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y),
bi_fadd_v2f16_to(b, reg, negabsx, y));
CASE(bi_fmin_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), bi_neg(y)),
bi_fmin_v2f16_to(b, reg, negabsx, bi_neg(y)));
@ -91,26 +91,26 @@ TEST_F(Optimizer, FusedFABSNEGForFP16)
TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)
{
CASE({
bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x), BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)), BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x), BI_ROUND_NONE));
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
I->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
}
@ -118,26 +118,26 @@ TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)
TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)
{
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y), BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)), BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y), BI_ROUND_NONE));
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
I->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
}
@ -145,57 +145,57 @@ TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)
TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp)
{
NEGCASE({
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x), BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
NEGCASE({
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)), BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
NEGCASE({
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x), BI_ROUND_NONE));
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
}
TEST_F(Optimizer, SwizzlesComposedForFP16)
{
CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y, BI_ROUND_NONE),
bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y, BI_ROUND_NONE));
CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y, BI_ROUND_NONE),
bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y, BI_ROUND_NONE));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y, BI_ROUND_NONE),
bi_fadd_v2f16_to(b, reg, negabsx, y, BI_ROUND_NONE));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y),
bi_fadd_v2f16_to(b, reg, negabsx, y));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y, BI_ROUND_NONE),
bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y, BI_ROUND_NONE));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y),
bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y, BI_ROUND_NONE),
bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y, BI_ROUND_NONE));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y),
bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y));
}
TEST_F(Optimizer, PreserveWidens)
{
/* Check that widens are passed through */
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, false)), y, BI_ROUND_NONE),
bi_fadd_f32_to(b, reg, bi_half(negabsx, false), y, BI_ROUND_NONE));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, false)), y),
bi_fadd_f32_to(b, reg, bi_half(negabsx, false), y));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y, BI_ROUND_NONE),
bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y, BI_ROUND_NONE));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y),
bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false)), BI_ROUND_NONE),
bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false), BI_ROUND_NONE));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false))),
bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false)));
}
TEST_F(Optimizer, DoNotMixSizesForFABSNEG)
{
/* Refuse to mix sizes for fabsneg, that's wrong */
NEGCASE(bi_fadd_f32_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y, BI_ROUND_NONE));
NEGCASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_f32(b, negabsx), y, BI_ROUND_NONE));
NEGCASE(bi_fadd_f32_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y));
NEGCASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_f32(b, negabsx), y));
}
TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns)
@ -206,27 +206,27 @@ TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns)
bi_index zero = bi_zero();
NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), zero, BI_ROUND_NONE), y, BI_ROUND_NONE));
NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(x), zero, BI_ROUND_NONE), y, BI_ROUND_NONE));
NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(bi_abs(x)), zero, BI_ROUND_NONE), y, BI_ROUND_NONE));
NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, zero, BI_ROUND_NONE), y, BI_ROUND_NONE));
NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), zero), y));
NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(x), zero), y));
NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(bi_abs(x)), zero), y));
NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, zero), y));
}
TEST_F(Optimizer, ClampsPropagated)
{
CASE({
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y, BI_ROUND_NONE));
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
I->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
CASE({
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y, BI_ROUND_NONE));
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
}
@ -235,62 +235,62 @@ TEST_F(Optimizer, ClampsPropagated)
TEST_F(Optimizer, ClampsComposed)
{
CASE({
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_M1_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_INF;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_M1_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_INF;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y, BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
}
@ -298,12 +298,12 @@ TEST_F(Optimizer, ClampsComposed)
TEST_F(Optimizer, DoNotMixSizesWhenClamping)
{
NEGCASE({
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_v2f16(b, x, y, BI_ROUND_NONE));
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_v2f16(b, x, y));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
NEGCASE({
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_f32(b, x, y, BI_ROUND_NONE));
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_f32(b, x, y));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
}
@ -314,12 +314,12 @@ TEST_F(Optimizer, DoNotUseAdditionByZeroForClamps)
/* We can't use addition by 0.0 for clamps due to signed zeros. */
NEGCASE({
bi_instr *I = bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, y, BI_ROUND_NONE), zero, BI_ROUND_NONE);
bi_instr *I = bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, y), zero);
I->clamp = BI_CLAMP_CLAMP_M1_1;
});
NEGCASE({
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y, BI_ROUND_NONE), zero, BI_ROUND_NONE);
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y), zero);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
}

View file

@ -56,7 +56,7 @@ TEST_F(SchedulerPredicates, MOV)
TEST_F(SchedulerPredicates, FMA)
{
bi_instr *fma = bi_fma_f32_to(b, TMP(), TMP(), TMP(), bi_zero(), BI_ROUND_NONE);
bi_instr *fma = bi_fma_f32_to(b, TMP(), TMP(), TMP(), bi_zero());
ASSERT_TRUE(bi_can_fma(fma));
ASSERT_FALSE(bi_can_add(fma));
ASSERT_FALSE(bi_must_message(fma));
@ -96,12 +96,12 @@ TEST_F(SchedulerPredicates, BLEND)
TEST_F(SchedulerPredicates, RestrictionsOnModifiersOfSameCycleTemporaries)
{
bi_instr *fadd = bi_fadd_f32_to(b, TMP(), TMP(), TMP(), BI_ROUND_NONE);
bi_instr *fadd = bi_fadd_f32_to(b, TMP(), TMP(), TMP());
ASSERT_TRUE(bi_reads_t(fadd, 0));
for (unsigned i = 0; i < 2; ++i) {
for (unsigned j = 0; j < 2; ++j) {
bi_instr *fadd = bi_fadd_f32_to(b, TMP(), TMP(), TMP(), BI_ROUND_NONE);
bi_instr *fadd = bi_fadd_f32_to(b, TMP(), TMP(), TMP());
fadd->src[i] = bi_swz_16(TMP(), j, j);
ASSERT_TRUE(bi_reads_t(fadd, 1 - i));
ASSERT_FALSE(bi_reads_t(fadd, i));
@ -115,7 +115,7 @@ TEST_F(SchedulerPredicates, RestrictionsOnFAddV2F16)
bi_index y = bi_register(1);
/* Basic */
bi_instr *fadd = bi_fadd_v2f16_to(b, TMP(), x, x, BI_ROUND_NONE);
bi_instr *fadd = bi_fadd_v2f16_to(b, TMP(), x, x);
ASSERT_TRUE(bi_can_fma(fadd));
ASSERT_TRUE(bi_can_add(fadd));

View file

@ -60,45 +60,37 @@ TEST_F(AddImm, Basic) {
CASE(bi_mov_i32_to(b, bi_register(63), bi_imm_u32(0xABAD1DEA)),
bi_iadd_imm_i32_to(b, bi_register(63), bi_zero(), 0xABAD1DEA));
CASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0), BI_ROUND_NONE),
CASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0)),
bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_imm_f32(42.0), BI_ROUND_NONE),
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_imm_f32(42.0)),
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(42.0)));
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_neg(bi_imm_f32(42.0)), BI_ROUND_NONE),
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_neg(bi_imm_f32(42.0))),
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(-42.0)));
}
TEST_F(AddImm, Commutativty) {
CASE(bi_fadd_f32_to(b, bi_register(1), bi_imm_f32(42.0), bi_register(2), BI_ROUND_NONE),
CASE(bi_fadd_f32_to(b, bi_register(1), bi_imm_f32(42.0), bi_register(2)),
bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));
}
TEST_F(AddImm, NoModifiers) {
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0),
BI_ROUND_RTP));
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)), bi_imm_f32(42.0),
BI_ROUND_NONE));
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)), bi_imm_f32(42.0),
BI_ROUND_NONE));
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_f32(42.0),
BI_ROUND_NONE));
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)), bi_imm_f32(42.0)));
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)), bi_imm_f32(42.0)));
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_f32(42.0)));
}
TEST_F(AddImm, NoClamp) {
NEGCASE({
bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2),
bi_imm_f32(42.0), BI_ROUND_NONE);
bi_imm_f32(42.0));
I->clamp = BI_CLAMP_CLAMP_M1_1;
});
}
TEST_F(AddImm, OtherTypes) {
CASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0), BI_ROUND_NONE),
CASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0)),
bi_fadd_imm_v2f16_to(b, bi_register(1), bi_register(2), 0x51405140));
CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
@ -119,7 +111,6 @@ TEST_F(AddImm, OtherTypes) {
CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
NEGCASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0), BI_ROUND_RTZ));
NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_v2u16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true));
@ -135,3 +126,16 @@ TEST_F(AddImm, Int8) {
NEGCASE(bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
}
TEST_F(AddImm, OnlyRTE) {
NEGCASE({
bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
I->round = BI_ROUND_RTP;
});
NEGCASE({
bi_instr *I = bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0));
I->round = BI_ROUND_RTZ;
});
}

View file

@ -102,7 +102,7 @@ TEST_F(LowerIsel, IntegerCSEL) {
}
TEST_F(LowerIsel, Smoke) {
NEGCASE(bi_fadd_f32_to(b, reg, reg, reg, BI_ROUND_RTP));
NEGCASE(bi_fadd_f32_to(b, reg, reg, reg));
NEGCASE(bi_csel_s32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));
NEGCASE(bi_csel_u32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));
}

View file

@ -65,42 +65,42 @@ TEST_F(ValhallPacking, Moves) {
}
TEST_F(ValhallPacking, Fadd) {
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2), BI_ROUND_NONE),
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2)),
0x00a4c00000000201ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2)), BI_ROUND_NONE),
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))),
0x00a4c02000000201ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2)), BI_ROUND_NONE),
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))),
0x00a4c01000000201ULL);
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_swz_16(bi_register(1), false, false),
bi_swz_16(bi_register(0), true, true), BI_ROUND_NONE),
bi_swz_16(bi_register(0), true, true)),
0x00a5c0000c000001ULL);
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0), BI_ROUND_NONE),
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0)),
0x00a5c00028000001ULL);
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1),
bi_swz_16(bi_register(0), true, false), BI_ROUND_NONE),
bi_swz_16(bi_register(0), true, false)),
0x00a5c00024000001ULL);
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))),
bi_neg(zero), BI_ROUND_NONE),
bi_neg(zero)),
0x00a5c0902800c040ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
zero, BI_ROUND_NONE),
zero),
0x00a4c0000000c001ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_neg(zero), BI_ROUND_NONE),
bi_neg(zero)),
0x00a4c0100000c001ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_half(bi_register(0), true), BI_ROUND_NONE),
bi_half(bi_register(0), true)),
0x00a4c00008000001ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_half(bi_register(0), false), BI_ROUND_NONE),
bi_half(bi_register(0), false)),
0x00a4c00004000001ULL);
}
@ -112,8 +112,7 @@ TEST_F(ValhallPacking, Clper) {
TEST_F(ValhallPacking, Clamps) {
bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_neg(bi_abs(bi_register(2))),
BI_ROUND_NONE);
bi_neg(bi_abs(bi_register(2))));
CASE(I, 0x00a4c03000000201ULL);
I->clamp = BI_CLAMP_CLAMP_M1_1;
@ -123,7 +122,7 @@ TEST_F(ValhallPacking, Clamps) {
TEST_F(ValhallPacking, Misc) {
CASE(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 4), false),
bi_neg(zero), BI_ROUND_NONE),
bi_neg(zero)),
0x00b2c10400c08841ULL);
CASE(bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))),
@ -164,7 +163,7 @@ TEST_F(ValhallPacking, Comparions) {
}
TEST_F(ValhallPacking, Conversions) {
CASE(bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)), BI_ROUND_NONE),
CASE(bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2))),
0x0090c22000070042);
}
@ -219,7 +218,7 @@ TEST_F(ValhallPacking, Transcendentals) {
CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)),
0x009cc20000020001);
CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_ROUND_NONE, BI_SPECIAL_LEFT),
CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_SPECIAL_LEFT),
0x0162c00440c04241);
}

View file

@ -66,53 +66,47 @@ protected:
TEST_F(ValidateFau, One64BitUniformSlot)
{
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3),
unif, BI_ROUND_NONE));
unif));
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1),
unif, BI_ROUND_NONE));
VALID(bi_fma_f32_to(b, bi_register(1), unif, unif, bi_word(unif, 1),
BI_ROUND_NONE));
INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_register(1),
BI_ROUND_NONE));
INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_word(unif, 1),
BI_ROUND_NONE));
unif));
VALID(bi_fma_f32_to(b, bi_register(1), unif, unif, bi_word(unif, 1)));
INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_register(1)));
INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_word(unif, 1)));
/* Crafted case that appears correct at first glance and was erronously
* marked as valid in early versions of the validator.
*/
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 1), true),
BI_ROUND_NONE));
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 1), true)));
}
TEST_F(ValidateFau, Combined64BitUniformsConstants)
{
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1),
unif, BI_ROUND_NONE));
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), zero,
unif, BI_ROUND_NONE));
VALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm1, BI_ROUND_NONE));
INVALID(bi_fma_f32_to(b, bi_register(1), zero, bi_word(unif, 1),
unif, BI_ROUND_NONE));
INVALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm2, BI_ROUND_NONE));
unif));
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), zero, unif));
VALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm1));
INVALID(bi_fma_f32_to(b, bi_register(1), zero, bi_word(unif, 1), unif));
INVALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm2));
}
TEST_F(ValidateFau, UniformsOnlyInDefaultMode)
{
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1),
lane_id, BI_ROUND_NONE));
lane_id));
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1),
core_id, BI_ROUND_NONE));
core_id));
}
TEST_F(ValidateFau, SingleSpecialImmediate)
{
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
lane_id, BI_ROUND_NONE));
lane_id));
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
core_id, BI_ROUND_NONE));
core_id));
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id,
core_id, BI_ROUND_NONE));
core_id));
}
TEST_F(ValidateFau, SmokeTests)
@ -120,5 +114,5 @@ TEST_F(ValidateFau, SmokeTests)
VALID(bi_mov_i32_to(b, bi_register(1), bi_register(2)));
VALID(bi_mov_i32_to(b, bi_register(1), unif));
VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
unif, bi_neg(zero), BI_ROUND_NONE));
unif, bi_neg(zero)));
}