mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
nir/lower_idiv: Add 8-bit and 16-bit lowering path
Roundtrip to a larger float and divide there. The extra details for mod/rem are handled directly in integer space to simplify verification of rounding details. The one issue is that the mantissa might be rounded down which will cause issues; adding 1 unconditionally (proposed by Jonathan Marek) fixes this. The lowerings here were tested exhaustively on all pairs of 16-bit integers. v2: Update idiv lowering per Rhys Perry's comment. v3: Rewrite lowerings. v4: Remove useless ftrunc, fix 8-bit issue, simplify code. v5: Remove useless ffloor Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Tested-by: Danylo Piliaiev <dpiliaiev@igalia.com> Tested-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8339>
This commit is contained in:
parent
e91dec1327
commit
1286e73c2c
1 changed files with 43 additions and 2 deletions
|
|
@ -198,6 +198,45 @@ convert_instr_precise(nir_builder *bld, nir_op op,
|
|||
return emit_idiv(bld, numer, denom, op);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
convert_instr_small(nir_builder *b, nir_op op,
|
||||
nir_ssa_def *numer, nir_ssa_def *denom)
|
||||
{
|
||||
unsigned sz = numer->bit_size;
|
||||
nir_alu_type int_type = nir_op_infos[op].output_type | sz;
|
||||
nir_alu_type float_type = nir_type_float | (sz * 2);
|
||||
|
||||
nir_ssa_def *p = nir_type_convert(b, numer, int_type, float_type);
|
||||
nir_ssa_def *q = nir_type_convert(b, denom, int_type, float_type);
|
||||
|
||||
/* Take 1/q but offset mantissa by 1 to correct for rounding. This is
|
||||
* needed for correct results and has been checked exhaustively for
|
||||
* all pairs of 16-bit integers */
|
||||
nir_ssa_def *rcp = nir_iadd_imm(b, nir_frcp(b, q), 1);
|
||||
|
||||
/* Divide by multiplying by adjusted reciprocal */
|
||||
nir_ssa_def *res = nir_fmul(b, p, rcp);
|
||||
|
||||
/* Convert back to integer space with rounding inferred by type */
|
||||
res = nir_type_convert(b, res, float_type, int_type);
|
||||
|
||||
/* Get remainder given the quotient */
|
||||
if (op == nir_op_umod || op == nir_op_imod || op == nir_op_irem)
|
||||
res = nir_isub(b, numer, nir_imul(b, denom, res));
|
||||
|
||||
/* Adjust for sign, see constant folding definition */
|
||||
if (op == nir_op_imod) {
|
||||
nir_ssa_def *zero = nir_imm_zero(b, 1, sz);
|
||||
nir_ssa_def *diff_sign =
|
||||
nir_ine(b, nir_ige(b, numer, zero), nir_ige(b, denom, zero));
|
||||
|
||||
nir_ssa_def *adjust = nir_iand(b, diff_sign, nir_ine(b, res, zero));
|
||||
res = nir_iadd(b, res, nir_bcsel(b, adjust, denom, zero));
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_idiv(nir_builder *b, nir_instr *instr, void *_data)
|
||||
{
|
||||
|
|
@ -207,7 +246,9 @@ lower_idiv(nir_builder *b, nir_instr *instr, void *_data)
|
|||
nir_ssa_def *numer = nir_ssa_for_alu_src(b, alu, 0);
|
||||
nir_ssa_def *denom = nir_ssa_for_alu_src(b, alu, 1);
|
||||
|
||||
if (*path == nir_lower_idiv_precise)
|
||||
if (numer->bit_size < 32)
|
||||
return convert_instr_small(b, alu->op, numer, denom);
|
||||
else if (*path == nir_lower_idiv_precise)
|
||||
return convert_instr_precise(b, alu->op, numer, denom);
|
||||
else
|
||||
return convert_instr(b, alu->op, numer, denom);
|
||||
|
|
@ -221,7 +262,7 @@ inst_is_idiv(const nir_instr *instr, UNUSED const void *_state)
|
|||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
|
||||
if (alu->dest.dest.ssa.bit_size != 32)
|
||||
if (alu->dest.dest.ssa.bit_size > 32)
|
||||
return false;
|
||||
|
||||
switch (alu->op) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue