mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 11:28:05 +02:00
nv50: modified FP attribute loading
VP outputs that should be loadable in the FP are mapped to interpolant indices by HPOS, COL0 etc.; of course HPOS is always written, so the highest byte of 1988 is a bitmask that selects which components of HPOS are used for interpolants, i.e. the FP inputs in COL0 start at index POPCNT(1988[24:28]).
This commit is contained in:
parent
e88ec312df
commit
dd9ded42b9
2 changed files with 147 additions and 45 deletions
|
|
@ -86,6 +86,7 @@ struct nv50_reg {
|
||||||
int hw;
|
int hw;
|
||||||
int neg;
|
int neg;
|
||||||
|
|
||||||
|
int rhw; /* result hw for FP outputs, or interpolant index */
|
||||||
int acc; /* instruction where this reg is last read (first insn == 1) */
|
int acc; /* instruction where this reg is last read (first insn == 1) */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -112,6 +113,9 @@ struct nv50_pc {
|
||||||
unsigned temp_temp_nr;
|
unsigned temp_temp_nr;
|
||||||
|
|
||||||
unsigned interp_mode[32];
|
unsigned interp_mode[32];
|
||||||
|
/* perspective interpolation registers */
|
||||||
|
struct nv50_reg *iv_p;
|
||||||
|
struct nv50_reg *iv_c;
|
||||||
|
|
||||||
/* current instruction and total number of insns */
|
/* current instruction and total number of insns */
|
||||||
unsigned insn_cur;
|
unsigned insn_cur;
|
||||||
|
|
@ -374,20 +378,29 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
|
||||||
#define INTERP_PERSPECTIVE 2
|
#define INTERP_PERSPECTIVE 2
|
||||||
#define INTERP_CENTROID 4
|
#define INTERP_CENTROID 4
|
||||||
|
|
||||||
|
/* interpolant index has been stored in dst->rhw */
|
||||||
static void
|
static void
|
||||||
emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
|
emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv,
|
||||||
struct nv50_reg *src, struct nv50_reg *iv)
|
unsigned mode)
|
||||||
{
|
{
|
||||||
|
assert(dst->rhw != -1);
|
||||||
struct nv50_program_exec *e = exec(pc);
|
struct nv50_program_exec *e = exec(pc);
|
||||||
|
|
||||||
e->inst[0] |= 0x80000000;
|
e->inst[0] |= 0x80000000;
|
||||||
set_dst(pc, dst, e);
|
set_dst(pc, dst, e);
|
||||||
alloc_reg(pc, src);
|
e->inst[0] |= (dst->rhw << 16);
|
||||||
e->inst[0] |= (src->hw << 16);
|
|
||||||
if (iv) {
|
if (mode & INTERP_FLAT) {
|
||||||
e->inst[0] |= (1 << 25);
|
e->inst[0] |= (1 << 8);
|
||||||
alloc_reg(pc, iv);
|
} else {
|
||||||
e->inst[0] |= (iv->hw << 9);
|
if (mode & INTERP_PERSPECTIVE) {
|
||||||
|
e->inst[0] |= (1 << 25);
|
||||||
|
alloc_reg(pc, iv);
|
||||||
|
e->inst[0] |= (iv->hw << 9);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mode & INTERP_CENTROID)
|
||||||
|
e->inst[0] |= (1 << 24);
|
||||||
}
|
}
|
||||||
|
|
||||||
emit(pc, e);
|
emit(pc, e);
|
||||||
|
|
@ -1443,6 +1456,40 @@ prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,
|
||||||
|
int *aid, int *p_oid)
|
||||||
|
{
|
||||||
|
struct nv50_reg *iv;
|
||||||
|
int oid, c, n;
|
||||||
|
unsigned mask = 0;
|
||||||
|
|
||||||
|
iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p;
|
||||||
|
|
||||||
|
for (c = 0, n = i * 4; c < 4; c++, n++) {
|
||||||
|
oid = (*p_oid)++;
|
||||||
|
pc->attr[n].type = P_TEMP;
|
||||||
|
pc->attr[n].index = i;
|
||||||
|
|
||||||
|
if (pc->attr[n].acc == acc[n])
|
||||||
|
continue;
|
||||||
|
mask |= (1 << c);
|
||||||
|
|
||||||
|
pc->attr[n].acc = acc[n];
|
||||||
|
pc->attr[n].rhw = pc->attr[n].hw = -1;
|
||||||
|
alloc_reg(pc, &pc->attr[n]);
|
||||||
|
|
||||||
|
pc->attr[n].rhw = (*aid)++;
|
||||||
|
emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]);
|
||||||
|
|
||||||
|
pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4));
|
||||||
|
(*mid)++;
|
||||||
|
pc->p->cfg.fp.regs[1] += 0x00010001;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
static boolean
|
static boolean
|
||||||
nv50_program_tx_prep(struct nv50_pc *pc)
|
nv50_program_tx_prep(struct nv50_pc *pc)
|
||||||
{
|
{
|
||||||
|
|
@ -1462,6 +1509,11 @@ nv50_program_tx_prep(struct nv50_pc *pc)
|
||||||
|
|
||||||
depr = fcol = bcol = fcrd = 0xffff;
|
depr = fcol = bcol = fcrd = 0xffff;
|
||||||
|
|
||||||
|
if (pc->p->type == PIPE_SHADER_FRAGMENT) {
|
||||||
|
pc->p->cfg.fp.regs[0] = 0x01000404;
|
||||||
|
pc->p->cfg.fp.regs[1] = 0x00000400;
|
||||||
|
}
|
||||||
|
|
||||||
tgsi_parse_init(&p, pc->p->pipe.tokens);
|
tgsi_parse_init(&p, pc->p->pipe.tokens);
|
||||||
while (!tgsi_parse_end_of_tokens(&p)) {
|
while (!tgsi_parse_end_of_tokens(&p)) {
|
||||||
const union tgsi_full_token *tok = &p.FullToken;
|
const union tgsi_full_token *tok = &p.FullToken;
|
||||||
|
|
@ -1503,6 +1555,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
|
||||||
switch (d->Semantic.SemanticName) {
|
switch (d->Semantic.SemanticName) {
|
||||||
case TGSI_SEMANTIC_POSITION:
|
case TGSI_SEMANTIC_POSITION:
|
||||||
depr = first;
|
depr = first;
|
||||||
|
pc->p->cfg.fp.regs[2] |= 0x00000100;
|
||||||
|
pc->p->cfg.fp.regs[3] |= 0x00000011;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
@ -1589,6 +1643,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
|
||||||
for (c = 0; c < 4; c++) {
|
for (c = 0; c < 4; c++) {
|
||||||
pc->temp[i*4+c].type = P_TEMP;
|
pc->temp[i*4+c].type = P_TEMP;
|
||||||
pc->temp[i*4+c].hw = -1;
|
pc->temp[i*4+c].hw = -1;
|
||||||
|
pc->temp[i*4+c].rhw = -1;
|
||||||
pc->temp[i*4+c].index = i;
|
pc->temp[i*4+c].index = i;
|
||||||
pc->temp[i*4+c].acc = r_usage[0][i*4+c];
|
pc->temp[i*4+c].acc = r_usage[0][i*4+c];
|
||||||
}
|
}
|
||||||
|
|
@ -1596,51 +1651,87 @@ nv50_program_tx_prep(struct nv50_pc *pc)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pc->attr_nr) {
|
if (pc->attr_nr) {
|
||||||
struct nv50_reg *iv = NULL;
|
int oid = 4, mid = 4, aid = 0;
|
||||||
int aid = 0;
|
/* oid = VP output id
|
||||||
|
* aid = FP attribute/interpolant id
|
||||||
|
* mid = VP output mapping field ID
|
||||||
|
*/
|
||||||
|
|
||||||
pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
|
pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
|
||||||
if (!pc->attr)
|
if (!pc->attr)
|
||||||
goto out_err;
|
goto out_err;
|
||||||
|
|
||||||
if (pc->p->type == PIPE_SHADER_FRAGMENT) {
|
if (pc->p->type == PIPE_SHADER_FRAGMENT) {
|
||||||
iv = alloc_temp(pc, NULL);
|
/* position should be loaded first */
|
||||||
emit_interp(pc, iv, iv, NULL);
|
if (fcrd != 0xffff) {
|
||||||
emit_flop(pc, 0, iv, iv);
|
unsigned mask;
|
||||||
aid++;
|
mid = 0;
|
||||||
}
|
mask = load_fp_attrib(pc, fcrd, r_usage[1],
|
||||||
|
&mid, &aid, &oid);
|
||||||
|
oid = 0;
|
||||||
|
pc->p->cfg.fp.regs[1] |= (mask << 24);
|
||||||
|
pc->p->cfg.fp.map[0] = 0x04040404 * fcrd;
|
||||||
|
}
|
||||||
|
pc->p->cfg.fp.map[0] += 0x03020100;
|
||||||
|
|
||||||
for (i = 0; i < pc->attr_nr; i++) {
|
/* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */
|
||||||
struct nv50_reg *a = &pc->attr[i*4];
|
|
||||||
|
|
||||||
for (c = 0; c < 4; c++) {
|
if (perspect_loads) {
|
||||||
if (pc->p->type == PIPE_SHADER_FRAGMENT) {
|
pc->iv_p = alloc_temp(pc, NULL);
|
||||||
struct nv50_reg *at =
|
|
||||||
alloc_temp(pc, NULL);
|
if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) {
|
||||||
pc->attr[i*4+c].type = at->type;
|
pc->p->cfg.fp.regs[1] |= 0x08000000;
|
||||||
pc->attr[i*4+c].hw = at->hw;
|
pc->iv_p->rhw = aid++;
|
||||||
pc->attr[i*4+c].index = at->index;
|
emit_interp(pc, pc->iv_p, NULL,
|
||||||
pc->attr[i*4+c].acc = r_usage[1][i*4+c];
|
INTERP_LINEAR);
|
||||||
|
emit_flop(pc, 0, pc->iv_p, pc->iv_p);
|
||||||
} else {
|
} else {
|
||||||
pc->p->cfg.vp.attr[aid/32] |=
|
pc->iv_p->rhw = aid - 1;
|
||||||
(1 << (aid % 32));
|
emit_flop(pc, 0, pc->iv_p,
|
||||||
pc->attr[i*4+c].type = P_ATTR;
|
&pc->attr[fcrd * 4 + 3]);
|
||||||
pc->attr[i*4+c].hw = aid++;
|
|
||||||
pc->attr[i*4+c].index = i;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pc->p->type != PIPE_SHADER_FRAGMENT)
|
if (centroid_loads) {
|
||||||
continue;
|
pc->iv_c = alloc_temp(pc, NULL);
|
||||||
|
pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++;
|
||||||
|
emit_interp(pc, pc->iv_c, NULL,
|
||||||
|
INTERP_CENTROID);
|
||||||
|
emit_flop(pc, 0, pc->iv_c, pc->iv_c);
|
||||||
|
pc->p->cfg.fp.regs[1] |= 0x08000000;
|
||||||
|
}
|
||||||
|
|
||||||
emit_interp(pc, &a[0], &a[0], iv);
|
for (c = 0; c < 4; c++) {
|
||||||
emit_interp(pc, &a[1], &a[1], iv);
|
/* I don't know what these values do, but
|
||||||
emit_interp(pc, &a[2], &a[2], iv);
|
* let's set them like the blob does:
|
||||||
emit_interp(pc, &a[3], &a[3], iv);
|
*/
|
||||||
|
if (fcol != 0xffff && r_usage[1][fcol * 4 + c])
|
||||||
|
pc->p->cfg.fp.regs[0] += 0x00010000;
|
||||||
|
if (bcol != 0xffff && r_usage[1][bcol * 4 + c])
|
||||||
|
pc->p->cfg.fp.regs[0] += 0x00010000;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < pc->attr_nr; i++)
|
||||||
|
load_fp_attrib(pc, i, r_usage[1],
|
||||||
|
&mid, &aid, &oid);
|
||||||
|
|
||||||
|
if (pc->iv_p)
|
||||||
|
free_temp(pc, pc->iv_p);
|
||||||
|
if (pc->iv_c)
|
||||||
|
free_temp(pc, pc->iv_c);
|
||||||
|
|
||||||
|
pc->p->cfg.fp.high_map = (mid / 4);
|
||||||
|
pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0);
|
||||||
|
} else {
|
||||||
|
/* vertex program */
|
||||||
|
for (i = 0; i < pc->attr_nr * 4; i++) {
|
||||||
|
pc->p->cfg.vp.attr[aid / 32] |=
|
||||||
|
(1 << (aid % 32));
|
||||||
|
pc->attr[i].type = P_ATTR;
|
||||||
|
pc->attr[i].hw = aid++;
|
||||||
|
pc->attr[i].index = i / 4;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iv)
|
|
||||||
free_temp(pc, iv);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pc->result_nr) {
|
if (pc->result_nr) {
|
||||||
|
|
@ -1983,6 +2074,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
|
||||||
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
struct nouveau_grobj *tesla = nv50->screen->tesla;
|
||||||
struct nv50_program *p = nv50->fragprog;
|
struct nv50_program *p = nv50->fragprog;
|
||||||
struct nouveau_stateobj *so;
|
struct nouveau_stateobj *so;
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
if (!p->translated) {
|
if (!p->translated) {
|
||||||
nv50_program_validate(nv50, p);
|
nv50_program_validate(nv50, p);
|
||||||
|
|
@ -2000,17 +2092,22 @@ nv50_fragprog_validate(struct nv50_context *nv50)
|
||||||
so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
|
so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
|
||||||
NOUVEAU_BO_LOW, 0, 0);
|
NOUVEAU_BO_LOW, 0, 0);
|
||||||
so_method(so, tesla, 0x1904, 4);
|
so_method(so, tesla, 0x1904, 4);
|
||||||
so_data (so, 0x00040404); /* p: 0x01000404 */
|
so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */
|
||||||
so_data (so, 0x00000004);
|
so_data (so, 0x00000004);
|
||||||
so_data (so, 0x00000000);
|
so_data (so, 0x00000000);
|
||||||
so_data (so, 0x00000000);
|
so_data (so, 0x00000000);
|
||||||
so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
|
so_method(so, tesla, 0x16bc, p->cfg.fp.high_map);
|
||||||
so_data (so, 0x03020100);
|
for (i = 0; i < p->cfg.fp.high_map; i++)
|
||||||
so_data (so, 0x07060504);
|
so_data(so, p->cfg.fp.map[i]);
|
||||||
so_data (so, 0x0b0a0908);
|
|
||||||
so_method(so, tesla, 0x1988, 2);
|
so_method(so, tesla, 0x1988, 2);
|
||||||
so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
|
so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */
|
||||||
so_data (so, p->cfg.high_temp);
|
so_data (so, p->cfg.high_temp);
|
||||||
|
so_method(so, tesla, 0x1298, 1);
|
||||||
|
so_data (so, p->cfg.high_result);
|
||||||
|
so_method(so, tesla, 0x19a8, 1);
|
||||||
|
so_data (so, p->cfg.fp.regs[2]);
|
||||||
|
so_method(so, tesla, 0x196c, 1);
|
||||||
|
so_data (so, p->cfg.fp.regs[3]);
|
||||||
so_method(so, tesla, 0x1414, 1);
|
so_method(so, tesla, 0x1414, 1);
|
||||||
so_data (so, 0); /* program start offset */
|
so_data (so, 0); /* program start offset */
|
||||||
so_ref(so, &nv50->state.fragprog);
|
so_ref(so, &nv50->state.fragprog);
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,11 @@ struct nv50_program {
|
||||||
struct {
|
struct {
|
||||||
unsigned attr[2];
|
unsigned attr[2];
|
||||||
} vp;
|
} vp;
|
||||||
|
struct {
|
||||||
|
unsigned regs[4];
|
||||||
|
unsigned map[5];
|
||||||
|
unsigned high_map;
|
||||||
|
} fp;
|
||||||
} cfg;
|
} cfg;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue