mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 09:20:12 +01:00
r600g/sb: implement r600 gpr index workaround. (v3.1)
r600, rv610 and rv630 all have a bug in their GPR indexing and how the hw inserts access to PV. If the base index for the src is the same as the dst gpr in a previous group, then it will use PV instead of using the indexed gpr correctly. The workaround is to insert a NOP when you detect this. v2: add second part of fix detecting DST rel writes followed by same src base index reads. v3: forget adding stuff to structs, just iterate over the previous node group again, makes it more obvious. v3.1: drop local_nop. Fixes ~200 piglit regressions on rv635 since SB was introduced. Reviewed-By: Glenn Kennard <glenn.kennard@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
de0fd375f6
commit
3c8ef3a74b
4 changed files with 57 additions and 9 deletions
|
|
@ -616,6 +616,8 @@ public:
|
||||||
unsigned num_slots;
|
unsigned num_slots;
|
||||||
bool uses_mova_gpr;
|
bool uses_mova_gpr;
|
||||||
|
|
||||||
|
bool r6xx_gpr_index_workaround;
|
||||||
|
|
||||||
bool stack_workaround_8xx;
|
bool stack_workaround_8xx;
|
||||||
bool stack_workaround_9xx;
|
bool stack_workaround_9xx;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,18 @@
|
||||||
|
|
||||||
namespace r600_sb {
|
namespace r600_sb {
|
||||||
|
|
||||||
|
void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
|
||||||
|
|
||||||
|
alu_group_node *g = sh.create_alu_group();
|
||||||
|
alu_node *a = sh.create_alu();
|
||||||
|
|
||||||
|
a->bc.set_op(ALU_OP0_NOP);
|
||||||
|
a->bc.last = 1;
|
||||||
|
|
||||||
|
g->push_back(a);
|
||||||
|
b4->insert_before(g);
|
||||||
|
}
|
||||||
|
|
||||||
int bc_finalizer::run() {
|
int bc_finalizer::run() {
|
||||||
|
|
||||||
run_on(sh.root);
|
run_on(sh.root);
|
||||||
|
|
@ -211,12 +223,12 @@ void bc_finalizer::finalize_if(region_node* r) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void bc_finalizer::run_on(container_node* c) {
|
void bc_finalizer::run_on(container_node* c) {
|
||||||
|
node *prev_node = NULL;
|
||||||
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
|
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
|
||||||
node *n = *I;
|
node *n = *I;
|
||||||
|
|
||||||
if (n->is_alu_group()) {
|
if (n->is_alu_group()) {
|
||||||
finalize_alu_group(static_cast<alu_group_node*>(n));
|
finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
|
||||||
} else {
|
} else {
|
||||||
if (n->is_alu_clause()) {
|
if (n->is_alu_clause()) {
|
||||||
cf_node *c = static_cast<cf_node*>(n);
|
cf_node *c = static_cast<cf_node*>(n);
|
||||||
|
|
@ -251,17 +263,22 @@ void bc_finalizer::run_on(container_node* c) {
|
||||||
if (n->is_container())
|
if (n->is_container())
|
||||||
run_on(static_cast<container_node*>(n));
|
run_on(static_cast<container_node*>(n));
|
||||||
}
|
}
|
||||||
|
prev_node = n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void bc_finalizer::finalize_alu_group(alu_group_node* g) {
|
void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
|
||||||
|
|
||||||
alu_node *last = NULL;
|
alu_node *last = NULL;
|
||||||
|
alu_group_node *prev_g = NULL;
|
||||||
|
bool add_nop = false;
|
||||||
|
if (prev_node && prev_node->is_alu_group()) {
|
||||||
|
prev_g = static_cast<alu_group_node*>(prev_node);
|
||||||
|
}
|
||||||
|
|
||||||
for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
|
for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
|
||||||
alu_node *n = static_cast<alu_node*>(*I);
|
alu_node *n = static_cast<alu_node*>(*I);
|
||||||
unsigned slot = n->bc.slot;
|
unsigned slot = n->bc.slot;
|
||||||
|
|
||||||
value *d = n->dst.empty() ? NULL : n->dst[0];
|
value *d = n->dst.empty() ? NULL : n->dst[0];
|
||||||
|
|
||||||
if (d && d->is_special_reg()) {
|
if (d && d->is_special_reg()) {
|
||||||
|
|
@ -299,17 +316,22 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) {
|
||||||
|
|
||||||
update_ngpr(n->bc.dst_gpr);
|
update_ngpr(n->bc.dst_gpr);
|
||||||
|
|
||||||
finalize_alu_src(g, n);
|
add_nop |= finalize_alu_src(g, n, prev_g);
|
||||||
|
|
||||||
last = n;
|
last = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (add_nop) {
|
||||||
|
if (sh.get_ctx().r6xx_gpr_index_workaround) {
|
||||||
|
insert_rv6xx_load_ar_workaround(g);
|
||||||
|
}
|
||||||
|
}
|
||||||
last->bc.last = 1;
|
last->bc.last = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
|
bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
|
||||||
vvec &sv = a->src;
|
vvec &sv = a->src;
|
||||||
|
bool add_nop = false;
|
||||||
FBC_DUMP(
|
FBC_DUMP(
|
||||||
sblog << "finalize_alu_src: ";
|
sblog << "finalize_alu_src: ";
|
||||||
dump::dump_op(a);
|
dump::dump_op(a);
|
||||||
|
|
@ -336,6 +358,15 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
|
||||||
if (!v->rel->is_const()) {
|
if (!v->rel->is_const()) {
|
||||||
src.rel = 1;
|
src.rel = 1;
|
||||||
update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
|
update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
|
||||||
|
if (prev && !add_nop) {
|
||||||
|
for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
|
||||||
|
alu_node *pn = static_cast<alu_node*>(*pI);
|
||||||
|
if (pn->bc.dst_gpr == src.sel) {
|
||||||
|
add_nop = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
src.rel = 0;
|
src.rel = 0;
|
||||||
|
|
||||||
|
|
@ -393,11 +424,23 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
|
||||||
assert(!"unknown value kind");
|
assert(!"unknown value kind");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (prev && !add_nop) {
|
||||||
|
for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
|
||||||
|
alu_node *pn = static_cast<alu_node*>(*pI);
|
||||||
|
if (pn->bc.dst_rel) {
|
||||||
|
if (pn->bc.dst_gpr == src.sel) {
|
||||||
|
add_nop = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (si < 3) {
|
while (si < 3) {
|
||||||
a->bc.src[si++].sel = 0;
|
a->bc.src[si++].sel = 0;
|
||||||
}
|
}
|
||||||
|
return add_nop;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
|
void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,8 @@ int sb_context::init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass) {
|
||||||
|
|
||||||
uses_mova_gpr = is_r600() && chip != HW_CHIP_RV670;
|
uses_mova_gpr = is_r600() && chip != HW_CHIP_RV670;
|
||||||
|
|
||||||
|
r6xx_gpr_index_workaround = is_r600() && chip != HW_CHIP_RV670 && chip != HW_CHIP_RS780 && chip != HW_CHIP_RS880;
|
||||||
|
|
||||||
switch (chip) {
|
switch (chip) {
|
||||||
case HW_CHIP_RV610:
|
case HW_CHIP_RV610:
|
||||||
case HW_CHIP_RS780:
|
case HW_CHIP_RS780:
|
||||||
|
|
|
||||||
|
|
@ -695,8 +695,9 @@ public:
|
||||||
|
|
||||||
void run_on(container_node *c);
|
void run_on(container_node *c);
|
||||||
|
|
||||||
void finalize_alu_group(alu_group_node *g);
|
void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
|
||||||
void finalize_alu_src(alu_group_node *g, alu_node *a);
|
void finalize_alu_group(alu_group_node *g, node *prev_node);
|
||||||
|
bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node);
|
||||||
|
|
||||||
void emit_set_grad(fetch_node* f);
|
void emit_set_grad(fetch_node* f);
|
||||||
void finalize_fetch(fetch_node *f);
|
void finalize_fetch(fetch_node *f);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue