pan/bi: Skip over data registers in port assignment

They bypass the usual mechanism entirely, let's add some props to
describe this and respect them.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4242>
This commit is contained in:
Alyssa Rosenzweig 2020-03-18 12:08:28 -04:00 committed by Marge Bot
parent 32e5a7e6e9
commit d4fbf751cf
3 changed files with 28 additions and 10 deletions

View file

@ -110,15 +110,28 @@ bi_assign_ports(bi_bundle now, bi_bundle prev)
{
struct bi_registers regs = { 0 };
/* We assign ports for the main register mechanism. Special ops
* use the data registers, which has its own mechanism entirely
* and thus gets skipped over here. */
unsigned read_dreg = now.add &&
bi_class_props[now.add->type] & BI_DATA_REG_SRC;
unsigned write_dreg = prev.add &&
bi_class_props[prev.add->type] & BI_DATA_REG_DEST;
/* First, assign reads */
if (now.fma)
bi_foreach_src(now.fma, src)
bi_assign_port_read(&regs, now.fma->src[src]);
if (now.add)
bi_foreach_src(now.add, src)
bi_assign_port_read(&regs, now.add->src[src]);
if (now.add) {
bi_foreach_src(now.add, src) {
if (!(src == 0 && read_dreg))
bi_assign_port_read(&regs, now.add->src[src]);
}
}
/* Next, assign writes */
@ -127,7 +140,7 @@ bi_assign_ports(bi_bundle now, bi_bundle prev)
regs.write_fma = true;
}
if (prev.add && prev.add->dest & BIR_INDEX_REGISTER) {
if (prev.add && prev.add->dest & BIR_INDEX_REGISTER && !write_dreg) {
unsigned r = prev.add->dest & ~BIR_INDEX_REGISTER;
if (regs.write_fma) {

View file

@ -39,16 +39,16 @@ unsigned bi_class_props[BI_NUM_CLASSES] = {
[BI_FMA] = BI_ROUNDMODE | BI_SCHED_FMA,
[BI_FREXP] = BI_SCHED_ALL,
[BI_ISUB] = BI_GENERIC | BI_SCHED_ALL,
[BI_LOAD] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_LOAD_UNIFORM] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_LOAD_ATTR] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_LOAD_VAR] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_LOAD] = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_UNIFORM] = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_ATTR] = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_VAR] = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_VAR_ADDRESS] = BI_SCHED_HI_LATENCY,
[BI_MINMAX] = BI_GENERIC | BI_SCHED_ALL,
[BI_MOV] = BI_MODS | BI_SCHED_ALL,
[BI_SHIFT] = BI_SCHED_ALL,
[BI_STORE] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_STORE_VAR] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_STORE] = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_SRC,
[BI_STORE_VAR] = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_SRC,
[BI_SPECIAL] = BI_SCHED_ADD | BI_SCHED_SLOW,
[BI_SWIZZLE] = BI_SCHED_ALL | BI_SWIZZLABLE,
[BI_TEX] = BI_SCHED_HI_LATENCY | BI_VECTOR,

View file

@ -113,6 +113,11 @@ extern unsigned bi_class_props[BI_NUM_CLASSES];
/* Intrinsic is vectorized and should read 4 components regardless of writemask */
#define BI_VECTOR (1 << 8)
/* Use a data register for src0/dest respectively, bypassing the usual
* register accessor. Mutually exclusive. */
#define BI_DATA_REG_SRC (1 << 9)
#define BI_DATA_REG_DEST (1 << 10)
/* It can't get any worse than csel4... can it? */
#define BIR_SRC_COUNT 4