freedreno/ir3: HIGH reg w/a for a6xx

It seems like some instructions (noticed this w/ cat3), cannot read HIGH
regs.. cat1 (mov/cov) can, and possibly some/all of cat2.

The blob seems to stick w/ an extra mov into low regs.  So lets do the
same.

This fixes WGID on a6xx, which unsurprisingly is related to a lot of
deqp compute fails.

Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Rob Clark 2019-02-11 11:39:43 -05:00
parent 947848524d
commit c1a27ba9ba
4 changed files with 26 additions and 3 deletions

View file

@ -1088,7 +1088,7 @@ ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY);
src_reg->array = src->regs[0]->array;
} else {
__ssa_src(instr, src, 0);
__ssa_src(instr, src, src->regs[0]->flags & IR3_REG_HIGH);
}
debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
instr->cat1.src_type = type;

View file

@ -246,6 +246,16 @@ put_dst(struct ir3_context *ctx, nir_dest *dst)
{
unsigned bit_size = nir_dest_bit_size(*dst);
/* add extra mov if dst value is HIGH reg.. in some cases not all
* instructions can read from HIGH regs, in cases where they can
* ir3_cp will clean up the extra mov:
*/
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
if (ctx->last_dst[i]->regs[0]->flags & IR3_REG_HIGH) {
ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
}
}
if (bit_size < 32) {
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
struct ir3_instruction *dst = ctx->last_dst[i];
@ -275,6 +285,7 @@ put_dst(struct ir3_context *ctx, nir_dest *dst)
ralloc_free(ctx->last_dst);
}
ctx->last_dst = NULL;
ctx->last_dst_n = 0;
}
@ -346,10 +357,12 @@ ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
return;
}
unsigned flags = src->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH);
for (int i = 0, j = 0; i < n; i++) {
struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
ir3_reg_create(split, 0, IR3_REG_SSA);
ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src;
ir3_reg_create(split, 0, IR3_REG_SSA | flags);
ir3_reg_create(split, 0, IR3_REG_SSA | flags)->instr = src;
split->fo.off = i + base;
if (prev) {

View file

@ -27,6 +27,7 @@
#include <math.h>
#include "ir3.h"
#include "ir3_compiler.h"
#include "ir3_shader.h"
/*
@ -88,6 +89,12 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
unsigned flags)
{
unsigned valid_flags;
if ((flags & IR3_REG_HIGH) &&
(opc_cat(instr->opc) > 1) &&
(instr->block->shader->compiler->gpu_id >= 600))
return false;
flags = cp_flags(flags);
/* If destination is indirect, then source cannot be.. at least
@ -243,6 +250,7 @@ static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
*dstflags |= srcflags & IR3_REG_IMMED;
*dstflags |= srcflags & IR3_REG_RELATIV;
*dstflags |= srcflags & IR3_REG_ARRAY;
*dstflags |= srcflags & IR3_REG_HIGH;
/* if src of the src is boolean we can drop the (abs) since we know
* the source value is already a postitive integer. This cleans

View file

@ -125,6 +125,8 @@ static void print_reg_name(struct ir3_register *reg)
else
printf("\x1b[0;31mr<a0.x + %d>\x1b[0m (%u)", reg->array.offset, reg->size);
} else {
if (reg->flags & IR3_REG_HIGH)
printf("H");
if (reg->flags & IR3_REG_HALF)
printf("h");
if (reg->flags & IR3_REG_CONST)