nv40: ensure all required buffers are accounted for during state validation

This commit is contained in:
Ben Skeggs 2007-12-24 18:53:41 +11:00
parent 5fcffcd312
commit f9cfc32376
6 changed files with 121 additions and 41 deletions

View file

@ -44,6 +44,21 @@ struct nv40_context {
struct pipe_texture *tex_miptree[PIPE_MAX_SAMPLERS];
uint32_t tex_dirty;
uint32_t rt_enable;
struct pipe_buffer_handle *rt[4];
struct pipe_buffer_handle *zeta;
struct {
struct pipe_buffer_handle *buffer;
uint32_t format;
} tex[16];
unsigned vb_enable;
struct {
struct pipe_buffer_handle *buffer;
unsigned delta;
} vb[16];
struct {
struct nouveau_resource *exec_heap;
struct nouveau_resource *data_heap;

View file

@ -796,10 +796,6 @@ nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp)
fp->on_hw = TRUE;
}
BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
OUT_RELOC (fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
BEGIN_RING(curie, NV40TCL_FP_CONTROL, 1);
OUT_RING (fp->fp_control);

View file

@ -630,50 +630,36 @@ nv40_set_framebuffer_state(struct pipe_context *pipe,
}
if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
OUT_RELOCo(rt[0]->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 2);
BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 1);
OUT_RING (rt[0]->pitch * rt[0]->cpp);
OUT_RELOCl(rt[0]->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
nv40->rt[0] = rt[0]->buffer;
}
if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
OUT_RELOCo(rt[1]->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 2);
OUT_RELOCl(rt[1]->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR1_PITCH, 2);
OUT_RING (rt[1]->pitch * rt[1]->cpp);
nv40->rt[1] = rt[1]->buffer;
}
if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
OUT_RELOCo(rt[2]->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
OUT_RELOCl(rt[2]->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR2_PITCH, 1);
OUT_RING (rt[2]->pitch * rt[2]->cpp);
nv40->rt[2] = rt[2]->buffer;
}
if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
OUT_RELOCo(rt[3]->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
OUT_RELOCl(rt[3]->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR3_PITCH, 1);
OUT_RING (rt[3]->pitch * rt[3]->cpp);
nv40->rt[3] = rt[3]->buffer;
}
if (zeta_format) {
BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
OUT_RELOCo(zeta->buffer,
NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_RD);
BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR |
NOUVEAU_BO_RD);
BEGIN_RING(curie, NV40TCL_ZETA_PITCH, 1);
OUT_RING (zeta->pitch * zeta->cpp);
nv40->zeta = zeta->buffer;
}
nv40->rt_enable = rt_enable;
BEGIN_RING(curie, NV40TCL_RT_ENABLE, 1);
OUT_RING (rt_enable);
BEGIN_RING(curie, NV40TCL_RT_HORIZ, 3);

View file

@ -5,6 +5,8 @@
void
nv40_emit_hw_state(struct nv40_context *nv40)
{
int i;
if (nv40->dirty & NV40_NEW_FRAGPROG) {
nv40_fragprog_bind(nv40, nv40->fragprog.current);
/*XXX: clear NV40_NEW_FRAGPROG if no now program uploaded */
@ -25,5 +27,68 @@ nv40_emit_hw_state(struct nv40_context *nv40)
nv40_vertprog_bind(nv40, nv40->vertprog.current);
nv40->dirty &= ~NV40_NEW_VERTPROG;
}
/* Emit relocs for every referenced buffer.
* This is to ensure the bufmgr has an accurate idea of how
* the buffer is used. This isn't very efficient, but we don't
* seem to take a significant performance hit. Will be improved
* at some point. Vertex arrays are emitted by nv40_vbo.c
*/
/* Render targets */
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
OUT_RELOCo(nv40->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR0_OFFSET, 1);
OUT_RELOCl(nv40->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
OUT_RELOCo(nv40->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 1);
OUT_RELOCl(nv40->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
OUT_RELOCo(nv40->rt[2], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
OUT_RELOCl(nv40->rt[2], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
OUT_RELOCo(nv40->rt[3], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
OUT_RELOCl(nv40->rt[3], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
if (nv40->zeta) {
BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
OUT_RELOCo(nv40->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
OUT_RELOCl(nv40->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
/* Texture images */
for (i = 0; i < 16; i++) {
if (!nv40->tex[i].buffer)
continue;
BEGIN_RING(curie, NV40TCL_TEX_OFFSET(i), 2);
OUT_RELOCl(nv40->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCd(nv40->tex[i].buffer, nv40->tex[i].format,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
NV40TCL_TEX_FORMAT_DMA1);
}
/* Fragment program */
BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
OUT_RELOC (nv40->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0,
NV40TCL_FP_ADDRESS_DMA1);
}

View file

@ -109,12 +109,10 @@ nv40_tex_unit_enable(struct nv40_context *nv40, int unit)
if (pt->format == PIPE_FORMAT_U_A8_L8)
txs |= (1<<16); /*nfi*/
BEGIN_RING(curie, NV40TCL_TEX_OFFSET(unit), 8);
OUT_RELOCl(nv40mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
NOUVEAU_BO_RD);
OUT_RELOCd(nv40mt->buffer, txf, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
NOUVEAU_BO_OR | NOUVEAU_BO_RD, NV40TCL_TEX_FORMAT_DMA0,
NV40TCL_TEX_FORMAT_DMA1);
nv40->tex[unit].buffer = nv40mt->buffer;
nv40->tex[unit].format = txf;
BEGIN_RING(curie, NV40TCL_TEX_WRAP(unit), 6);
OUT_RING (ps->wrap);
OUT_RING (NV40TCL_TEX_ENABLE_ENABLE | ps->en |
(0x00078000) /* mipmap related? */);
@ -135,6 +133,7 @@ nv40_state_tex_update(struct nv40_context *nv40)
if (nv40->tex_miptree[unit]) {
nv40_tex_unit_enable(nv40, unit);
} else {
nv40->tex[unit].buffer = NULL;
BEGIN_RING(curie, NV40TCL_TEX_ENABLE(unit), 1);
OUT_RING (0);
}

View file

@ -6,6 +6,9 @@
#include "nv40_dma.h"
#include "nv40_state.h"
#include "pipe/nouveau/nouveau_channel.h"
#include "pipe/nouveau/nouveau_pushbuf.h"
static INLINE int
nv40_vbo_ncomp(uint format)
{
@ -101,6 +104,8 @@ nv40_vbo_arrays_update(struct nv40_context *nv40)
uint32_t inputs, vtxfmt[16];
int hw, num_hw;
nv40->vb_enable = 0;
inputs = vp->ir;
for (hw = 0; hw < 16 && inputs; hw++) {
if (inputs & (1 << hw)) {
@ -129,19 +134,16 @@ nv40_vbo_arrays_update(struct nv40_context *nv40)
continue;
}
BEGIN_RING(curie, NV40TCL_VTXBUF_ADDRESS(hw), 1);
OUT_RELOC(vb->buffer, vb->buffer_offset + ve->src_offset,
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0,
NV40TCL_VTXBUF_ADDRESS_DMA1);
nv40->vb_enable |= (1 << hw);
nv40->vb[hw].delta = vb->buffer_offset + ve->src_offset;
nv40->vb[hw].buffer = vb->buffer;
vtxfmt[hw] = ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) |
(nv40_vbo_ncomp(ve->src_format) <<
NV40TCL_VTXFMT_SIZE_SHIFT) |
nv40_vbo_type(ve->src_format));
}
BEGIN_RING(curie, 0x1710, 1);
OUT_RING (0); /* vtx cache flush */
BEGIN_RING(curie, NV40TCL_VTXFMT(0), num_hw);
OUT_RINGp (vtxfmt, num_hw);
}
@ -149,14 +151,31 @@ nv40_vbo_arrays_update(struct nv40_context *nv40)
static boolean
nv40_vbo_validate_state(struct nv40_context *nv40)
{
if (nv40->dirty & ~NV40_NEW_ARRAYS)
nv40_emit_hw_state(nv40);
unsigned inputs;
nv40_emit_hw_state(nv40);
if (nv40->dirty & NV40_NEW_ARRAYS) {
nv40_vbo_arrays_update(nv40);
nv40->dirty &= ~NV40_NEW_ARRAYS;
}
inputs = nv40->vb_enable;
while (inputs) {
unsigned a = ffs(inputs) - 1;
inputs &= ~(1 << a);
BEGIN_RING(curie, NV40TCL_VTXBUF_ADDRESS(a), 1);
OUT_RELOC (nv40->vb[a].buffer, nv40->vb[a].delta,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_LOW |
NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0,
NV40TCL_VTXBUF_ADDRESS_DMA1);
}
BEGIN_RING(curie, 0x1710, 1);
OUT_RING (0); /* vtx cache flush */
return TRUE;
}