pan/bi: Handle fp16/abs scheduling restriction

See previous commit for the packing side. Here we update the scheduler
to accomodate this. Note we don't actually hit this path yet, but it's
good to be proactive.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4382>
This commit is contained in:
Alyssa Rosenzweig 2020-03-30 12:25:20 -04:00 committed by Marge Bot
parent c88f816169
commit bd19e76340
3 changed files with 33 additions and 3 deletions

View file

@ -71,6 +71,24 @@ bi_clause_type_for_ins(bi_instruction *ins)
}
}
/* There is an encoding restriction against FMA fp16 add/min/max
* having both sources with abs(..) with a duplicated source. This is
* due to the packing being order-sensitive, so the ports must end up distinct
* to handle both having abs(..). The swizzle doesn't matter here. Note
* BIR_INDEX_REGISTER generally should not be used pre-schedule (TODO: enforce
* this).
*/
static bool
bi_ambiguous_abs(bi_instruction *ins)
{
bool classy = bi_class_props[ins->type] & BI_NO_ABS_ABS_FP16_FMA;
bool typey = ins->dest_type == nir_type_float16;
bool absy = ins->src_abs[0] && ins->src_abs[1];
return classy && typey && absy;
}
/* Eventually, we'll need a proper scheduling, grouping instructions
* into clauses and ordering/assigning grouped instructions to the
* appropriate FMA/ADD slots. Right now we do the dumbest possible
@ -95,7 +113,16 @@ bi_schedule(bi_context *ctx)
bi_clause *u = rzalloc(ctx, bi_clause);
u->bundle_count = 1;
if (props & BI_SCHED_FMA)
/* Check for scheduling restrictions */
bool can_fma = props & BI_SCHED_FMA;
bool can_add = props & BI_SCHED_ADD;
can_fma &= !bi_ambiguous_abs(ins);
assert(can_fma || can_add);
if (can_fma)
u->bundles[0].fma = ins;
else
u->bundles[0].add = ins;

View file

@ -27,7 +27,7 @@
#include "compiler.h"
unsigned bi_class_props[BI_NUM_CLASSES] = {
[BI_ADD] = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
[BI_ADD] = BI_GENERIC | BI_MODS | BI_SCHED_ALL | BI_NO_ABS_ABS_FP16_FMA,
[BI_ATEST] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD,
[BI_BRANCH] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD,
[BI_CMP] = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
@ -45,7 +45,7 @@ unsigned bi_class_props[BI_NUM_CLASSES] = {
[BI_LOAD_ATTR] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_VAR] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_VAR_ADDRESS] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_DATA_REG_DEST,
[BI_MINMAX] = BI_GENERIC | BI_SCHED_ALL,
[BI_MINMAX] = BI_GENERIC | BI_SCHED_ALL | BI_NO_ABS_ABS_FP16_FMA,
[BI_MOV] = BI_SCHED_ALL,
[BI_FMOV] = BI_MODS | BI_SCHED_ALL,
[BI_SHIFT] = BI_SCHED_ALL,

View file

@ -121,6 +121,9 @@ extern unsigned bi_class_props[BI_NUM_CLASSES];
#define BI_DATA_REG_SRC (1 << 9)
#define BI_DATA_REG_DEST (1 << 10)
/* Quirk: cannot encode multiple abs on FMA in fp16 mode */
#define BI_NO_ABS_ABS_FP16_FMA (1 << 11)
/* It can't get any worse than csel4... can it? */
#define BIR_SRC_COUNT 4