nv50: use "real" constbufs for shaders + tcb uploads

This commit is contained in:
Ben Skeggs 2008-06-01 23:10:31 +10:00
parent f722fd937d
commit 716c1cd2ec
7 changed files with 105 additions and 60 deletions

View file

@ -23,6 +23,14 @@
#define NOUVEAU_MSG(fmt, args...) \
fprintf(stderr, "nouveau: "fmt, ##args);
/* Constant buffer assignment */
#define NV50_CB_PMISC 0
#define NV50_CB_PVP 1
#define NV50_CB_PFP 2
#define NV50_CB_PGP 3
#define NV50_CB_TIC 4
#define NV50_CB_TSC 5
#define NV50_NEW_BLEND (1 << 0)
#define NV50_NEW_ZSA (1 << 1)
#define NV50_NEW_BLEND_COLOUR (1 << 2)
@ -32,8 +40,10 @@
#define NV50_NEW_RASTERIZER (1 << 6)
#define NV50_NEW_FRAMEBUFFER (1 << 7)
#define NV50_NEW_VERTPROG (1 << 8)
#define NV50_NEW_FRAGPROG (1 << 9)
#define NV50_NEW_ARRAYS (1 << 10)
#define NV50_NEW_VERTPROG_CB (1 << 9)
#define NV50_NEW_FRAGPROG (1 << 10)
#define NV50_NEW_FRAGPROG_CB (1 << 11)
#define NV50_NEW_ARRAYS (1 << 12)
struct nv50_blend_stateobj {
struct pipe_blend_state pipe;

View file

@ -179,6 +179,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst,
if (src1) {
if (src1->type == P_CONST || src1->type == P_IMMD) {
if (src1->type == P_IMMD)
inst[1] |= (NV50_CB_PMISC << 22);
else
inst[1] |= (NV50_CB_PVP << 22);
inst[0] |= 0x00800000; /* src1 is const */
/*XXX: does src1 come from "src2" now? */
alloc_reg(pc, src1);
@ -196,6 +200,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst,
if (src2) {
if (src2->type == P_CONST || src2->type == P_IMMD) {
if (src2->type == P_IMMD)
inst[1] |= (NV50_CB_PMISC << 22);
else
inst[1] |= (NV50_CB_PVP << 22);
inst[0] |= 0x01000000; /* src2 is const */
inst[1] |= (src2->hw << 14);
} else {
@ -526,7 +534,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
}
if (pc->immd_nr) {
int rid = pc->param_nr * 4;
int rid = 0;
pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
if (!pc->immd)
@ -581,7 +589,6 @@ nv50_program_tx(struct nv50_program *p)
}
}
p->param_nr = pc->param_nr * 4;
p->immd_nr = pc->immd_nr * 4;
p->immd = pc->immd_buf;
@ -654,23 +661,9 @@ nv50_vertprog_validate(struct nv50_context *nv50)
memcpy(map, p->insns, p->insns_nr * 4);
ws->buffer_unmap(ws, p->buffer);
if (p->param_nr) {
float *cb;
cb = ws->buffer_map(ws, nv50->constbuf[PIPE_SHADER_VERTEX],
PIPE_BUFFER_USAGE_CPU_READ);
for (i = 0; i < p->param_nr; i++) {
BEGIN_RING(tesla, 0x0f00, 2);
OUT_RING (i << 8);
OUT_RING (fui(cb[i]));
}
ws->buffer_unmap(ws, nv50->constbuf[PIPE_SHADER_VERTEX]);
}
for (i = 0; i < p->immd_nr; i++) {
BEGIN_RING(tesla, 0x0f00, 2);
OUT_RING ((p->param_nr + i) << 8);
OUT_RING ((NV50_CB_PMISC << 16) | (i << 8));
OUT_RING (fui(p->immd[i]));
}

View file

@ -203,14 +203,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
return NULL;
}
/* Static constant buffer */
screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4);
if (nvws->res_init(&screen->vp_data_heap, 0, 256)) {
NOUVEAU_ERR("Error initialising constant buffer\n");
nv50_screen_destroy(&screen->pipe);
return NULL;
}
/* Static tesla init */
so = so_new(256, 20);
@ -245,37 +237,56 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
so_method(so, screen->tesla, 0x16b8, 1);
so_data (so, 8);
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, 0x00001000);
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, 0x00014000);
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, 0x00024000);
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, 0x00034000);
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, 0x00040100);
/* Shared constant buffer */
screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4);
if (nvws->res_init(&screen->vp_data_heap, 0, 256)) {
NOUVEAU_ERR("Error initialising constant buffer\n");
nv50_screen_destroy(&screen->pipe);
return NULL;
}
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_PMISC << 16) | 0x00001000);
/* Texture sampler/image unit setup - we abuse the constant buffer
* upload mechanism for the moment to upload data to the tex config
* blocks. At some point we *may* want to go the NVIDIA way of doing
* things?
*/
screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_TIC << 16) | 0x0800);
so_method(so, screen->tesla, 0x1574, 3);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, 0x00000800);
screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4);
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_TSC << 16) | 0x0800);
so_method(so, screen->tesla, 0x155c, 3);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, 0x00000800);
/* Vertex array limits - max them out */
for (i = 0; i < 16; i++) {
so_method(so, screen->tesla, 0x1080 + (i * 8), 2);
so_data (so, 0x000000ff);

View file

@ -15,6 +15,9 @@ struct nv50_screen {
struct pipe_buffer *constbuf;
struct nouveau_resource *vp_data_heap;
struct pipe_buffer *tic;
struct pipe_buffer *tsc;
};
static INLINE struct nv50_screen *

View file

@ -406,11 +406,11 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
if (shader == PIPE_SHADER_VERTEX) {
nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer;
nv50->dirty |= NV50_NEW_VERTPROG;
nv50->dirty |= NV50_NEW_VERTPROG_CB;
} else
if (shader == PIPE_SHADER_FRAGMENT) {
nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer;
nv50->dirty |= NV50_NEW_FRAGPROG;
nv50->dirty |= NV50_NEW_FRAGPROG_CB;
}
}

View file

@ -20,7 +20,6 @@ struct nv50_program {
struct pipe_buffer *buffer;
unsigned param_nr;
float *immd;
unsigned immd_nr;

View file

@ -168,6 +168,35 @@ nv50_state_validate(struct nv50_context *nv50)
so_ref(NULL, &so);
}
if (nv50->dirty & NV50_NEW_VERTPROG_CB) {
so = so_new(4, 2);
so_method(so, tesla, 0x1280, 3);
so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0,
NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM,
0, 0);
so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0,
NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM,
0, 0);
so_data (so, (NV50_CB_PVP << 16) | 0x1000);
so_emit(nvws, so);
so_ref(NULL, &so);
}
if (nv50->dirty & NV50_NEW_FRAGPROG_CB) {
so = so_new(4, 2);
so_method(so, tesla, 0x1280, 3);
so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0,
NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM,
0, 0);
so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0,
NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM,
0, 0);
so_data (so, (NV50_CB_PFP << 16) | 0x1000);
so_emit(nvws, so);
so_ref(NULL, &so);
}
if (nv50->dirty & NV50_NEW_ARRAYS)
nv50_vbo_validate(nv50);