i965: checkpoint commit: VS constant buffers

Hook up a constant buffer, binding table, etc for the VS unit.
This will allow using large constant buffers with vertex shaders.
The new code is disabled at this time (use_const_buffer=FALSE).
This commit is contained in:
Brian Paul 2009-04-14 11:08:42 -06:00
parent 43c7ffaea6
commit cafea75280
11 changed files with 477 additions and 91 deletions

View file

@ -141,7 +141,8 @@ struct brw_context;
#define BRW_NEW_BATCH 0x10000
/** brw->depth_region updated */
#define BRW_NEW_DEPTH_BUFFER 0x20000
#define BRW_NEW_NR_SURFACES 0x40000
#define BRW_NEW_NR_WM_SURFACES 0x40000
#define BRW_NEW_NR_VS_SURFACES 0x80000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@ -245,20 +246,30 @@ struct brw_vs_ouput_sizes {
#define BRW_MAX_TEX_UNIT 16
/**
* Size of our surface binding table.
* Size of our surface binding table for the WM.
* This contains pointers to the drawing surfaces and current texture
* objects and shader constant buffers (+2).
*/
#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2)
#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
/**
* Helpers to convert drawing buffers, textures and constant buffers
* to surface binding table indexes.
* to surface binding table indexes, for WM.
*/
#define SURF_INDEX_DRAW(d) (d)
#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS + 0)
#define SURF_INDEX_VERT_CONST_BUFFER (MAX_DRAW_BUFFERS + 1)
#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 2 + t)
#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS)
#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t))
/**
* Size of surface binding table for the VS.
* Only one constant buffer for now.
*/
#define BRW_VS_MAX_SURF 1
/**
* Only a VS constant buffer
*/
#define SURF_INDEX_VERT_CONST_BUFFER 0
enum brw_cache_id {
@ -566,6 +577,11 @@ struct brw_context
dri_bo *prog_bo;
dri_bo *state_bo;
/** Binding table of pointers to surf_bo entries */
dri_bo *bind_bo;
dri_bo *surf_bo[BRW_VS_MAX_SURF];
GLuint nr_surfaces;
} vs;
struct {

View file

@ -357,6 +357,7 @@ update_constant_buffer(struct brw_context *brw,
}
/** Copy current vertex program's parameters into the constant buffer */
static void
update_vertex_constant_buffer(struct brw_context *brw)
{
@ -366,6 +367,7 @@ update_vertex_constant_buffer(struct brw_context *brw)
}
/** Copy current fragment program's parameters into the constant buffer */
static void
update_fragment_constant_buffer(struct brw_context *brw)
{

View file

@ -862,9 +862,18 @@ void brw_dp_READ_4( struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
GLboolean relAddr,
GLuint scratch_offset,
GLuint location,
GLuint bind_table_index );
/* XXX this function is temporary - merge with brw_dp_READ_4() above. */
void brw_dp_READ_4_vs( struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src,
GLuint msg_reg_nr,
GLboolean relAddr,
GLuint location,
GLuint bind_table_index );
void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
GLuint msg_reg_nr,

View file

@ -952,7 +952,7 @@ void brw_dp_READ_16( struct brw_compile *p,
/**
* Read a float[4] vector from the data port Data Cache (const buffer).
* Scratch offset should be a multiple of 16.
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
* If relAddr is true, we'll do an indirect fetch using the address register.
*/
@ -960,7 +960,7 @@ void brw_dp_READ_4( struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
GLboolean relAddr,
GLuint scratch_offset,
GLuint location,
GLuint bind_table_index )
{
{
@ -971,7 +971,7 @@ void brw_dp_READ_4( struct brw_compile *p,
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
brw_imm_d(scratch_offset));
brw_imm_d(location));
brw_pop_insn_state(p);
}
@ -1001,6 +1001,66 @@ void brw_dp_READ_4( struct brw_compile *p,
}
/* XXX this function is temporary - merge with brw_dp_READ_4() above. */
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src,
GLuint msg_reg_nr,
GLboolean relAddr,
GLuint location,
GLuint bind_table_index)
{
{
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
/*src.nr = 0;*/
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
#if 1
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
#elif 0
retype(brw_vec1_grf(src.nr, 2), BRW_REGISTER_TYPE_UD),
#endif
brw_imm_d(location));
brw_pop_insn_state(p);
}
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditonalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
/* cast dest to a uword[8] vector */
// dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
brw_set_dest(insn, dest);
#if 1
brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
#elif 0
brw_set_src0(insn, retype(brw_vec8_grf(src.nr, 0), BRW_REGISTER_TYPE_UW));
#endif
printf("vs const read msg, location %u, msg_reg_nr %d\n", location, msg_reg_nr);
brw_set_dp_read_message(insn,
bind_table_index,
0, /* msg_control (0 means 1 Oword) */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
0, /* source cache = data cache */
1, /* msg_length */
1, /* response_length (1 Oword) */
0); /* eot */
}
}
void brw_fb_WRITE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,

View file

@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = {
static void prepare_binding_table_pointers(struct brw_context *brw)
{
brw_add_validated_bo(brw, brw->vs.bind_bo);
brw_add_validated_bo(brw, brw->wm.bind_bo);
}
@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
OUT_BATCH(0); /* vs */
OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
OUT_RELOC(brw->wm.bind_bo,
I915_GEM_DOMAIN_SAMPLER, 0,
0);
OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
ADVANCE_BATCH();
}

View file

@ -75,6 +75,13 @@ struct brw_vs_compile {
struct brw_reg userplane[6];
/** using a real constant buffer? */
GLboolean use_const_buffer;
/** we may need up to 3 constants per instruction (if use_const_buffer) */
struct {
GLint index;
struct brw_reg reg;
} current_const[3];
};
void brw_vs_emit( struct brw_vs_compile *c );

View file

@ -38,8 +38,31 @@
#include "brw_vs.h"
static struct brw_reg get_tmp( struct brw_vs_compile *c )
{
struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
/* Do things as simply as possible. Allocate and populate all regs
if (++c->last_tmp > c->prog_data.total_grf)
c->prog_data.total_grf = c->last_tmp;
return tmp;
}
static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
{
if (tmp.nr == c->last_tmp-1)
c->last_tmp--;
}
static void release_tmps( struct brw_vs_compile *c )
{
c->last_tmp = c->first_tmp;
}
/**
* Preallocate GRF register before code emit.
* Do things as simply as possible. Allocate and populate all regs
* ahead of time.
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
@ -47,6 +70,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
GLuint i, reg = 0, mrf;
GLuint nr_params;
#if 0
if (c->vp->program.Base.Parameters->NumParameters >= 6)
c->use_const_buffer = 1;
else
#endif
c->use_const_buffer = GL_FALSE;
/*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
/* r0 -- reserved as usual
*/
c->r0 = brw_vec8_grf(reg, 0);
@ -66,13 +97,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Vertex program parameters from curbe:
*/
nr_params = c->vp->program.Base.Parameters->NumParameters;
for (i = 0; i < nr_params; i++) {
c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
}
reg += (nr_params + 1) / 2;
c->prog_data.curb_read_length = reg - 1;
if (c->use_const_buffer) {
/* get constants from a real constant buffer */
c->prog_data.curb_read_length = 0;
}
else {
/* use a section of the GRF for constants */
nr_params = c->vp->program.Base.Parameters->NumParameters;
for (i = 0; i < nr_params; i++) {
c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
}
reg += (nr_params + 1) / 2;
c->prog_data.curb_read_length = reg - 1;
}
/* Allocate input regs:
*/
@ -157,6 +194,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
c->prog_data.total_grf = reg;
if (c->use_const_buffer) {
for (i = 0; i < 3; i++) {
c->current_const[i].index = -1;
c->current_const[i].reg = get_tmp(c);
}
}
if (INTEL_DEBUG & DEBUG_VS) {
_mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
_mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
@ -165,28 +209,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
static struct brw_reg get_tmp( struct brw_vs_compile *c )
{
struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
if (++c->last_tmp > c->prog_data.total_grf)
c->prog_data.total_grf = c->last_tmp;
return tmp;
}
static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
{
if (tmp.nr == c->last_tmp-1)
c->last_tmp--;
}
static void release_tmps( struct brw_vs_compile *c )
{
c->last_tmp = c->first_tmp;
}
/**
* If an instruction uses a temp reg both as a src and the dest, we
* sometimes need to allocate an intermediate temporary.
@ -673,13 +695,59 @@ static void emit_nrm( struct brw_vs_compile *c,
}
static struct brw_reg
get_constant(struct brw_vs_compile *c,
const struct prog_instruction *inst,
GLuint argIndex)
{
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
struct brw_reg const_reg;
if (c->current_const[argIndex].index != src->Index) {
struct brw_reg src_reg = get_tmp(c);
struct brw_reg t = get_tmp(c);
c->current_const[argIndex].index = src->Index;
brw_MOV(p, t, brw_vec8_grf(0, 0));/*SAVE*/
#if 0
printf(" fetch const[%d] for arg %d into reg %d\n",
src->Index, argIndex, c->current_const[argIndex].reg.nr);
#endif
/* need to fetch the constant now */
brw_dp_READ_4_vs(p,
c->current_const[argIndex].reg, /* writeback dest */
src_reg, /* src reg */
1, /* msg_reg */
src->RelAddr, /* relative indexing? */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
brw_MOV(p, brw_vec8_grf(0, 0), t);/*RESTORE*/
release_tmp(c, src_reg);
release_tmp(c, t);
}
/* replicate lower four floats into upper four floats (to get XYZWXYZW) */
const_reg = c->current_const[argIndex].reg;
const_reg = stride(const_reg, 0, 4, 0);
const_reg.subnr = 0;
return const_reg;
}
/* TODO: relative addressing!
*/
static struct brw_reg get_reg( struct brw_vs_compile *c,
gl_register_file file,
GLuint index )
{
switch (file) {
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
@ -708,13 +776,63 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
}
/**
* Get brw reg corresponding to the instruction's [argIndex] src reg.
* TODO: relative addressing!
*/
static struct brw_reg
get_src_reg( struct brw_vs_compile *c,
const struct prog_instruction *inst,
GLuint argIndex )
{
const GLuint file = inst->SrcReg[argIndex].File;
const GLint index = inst->SrcReg[argIndex].Index;
switch (file) {
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
case PROGRAM_OUTPUT:
assert(c->regs[file][index].nr != 0);
return c->regs[file][index];
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
if (c->use_const_buffer) {
return get_constant(c, inst, argIndex);
}
else {
assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
return c->regs[PROGRAM_STATE_VAR][index];
}
case PROGRAM_ADDRESS:
assert(index == 0);
return c->regs[file][index];
case PROGRAM_UNDEFINED:
/* this is a normal case since we loop over all three src args */
return brw_null_reg();
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_WRITE_ONLY:
default:
assert(0);
return brw_null_reg();
}
}
/**
* Indirect addressing: get reg[[arg] + offset].
*/
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
GLint offset)
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = vec4(get_tmp(c));
struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
struct brw_reg indirect = brw_vec4_indirect(0,0);
@ -758,22 +876,29 @@ static void emit_arl( struct brw_vs_compile *c,
}
/* Will return mangled results for SWZ op. The emit_swz() function
/**
* Return the brw reg for the given instruction's src argument.
* Will return mangled results for SWZ op. The emit_swz() function
* ignores this result and recalculates taking extended swizzles into
* account.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
struct prog_src_register *src )
const struct prog_instruction *inst,
GLuint argIndex )
{
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_reg reg;
if (src->File == PROGRAM_UNDEFINED)
return brw_null_reg();
if (src->RelAddr)
if (src->RelAddr) {
/* XXX fix */
reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
else
reg = get_reg(c, src->File, src->Index);
}
else {
reg = get_src_reg(c, inst, argIndex);
}
/* Convert 3-bit swizzle to 2-bit.
*/
@ -790,10 +915,28 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
}
/**
* Get brw register for the given program dest register.
*/
static struct brw_reg get_dst( struct brw_vs_compile *c,
struct prog_dst_register dst )
{
struct brw_reg reg = get_reg(c, dst.File, dst.Index);
struct brw_reg reg;
switch (dst.File) {
case PROGRAM_TEMPORARY:
case PROGRAM_OUTPUT:
assert(c->regs[dst.File][dst.Index].nr != 0);
reg = c->regs[dst.File][dst.Index];
break;
case PROGRAM_UNDEFINED:
/* we may hit this for OPCODE_END, OPCODE_KIL, etc */
reg = brw_null_reg();
break;
default:
assert(0);
reg = brw_null_reg();
}
reg.dw1.bits.writemask = dst.WriteMask;
@ -803,8 +946,10 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
static void emit_swz( struct brw_vs_compile *c,
struct brw_reg dst,
struct prog_src_register src )
const struct prog_instruction *inst)
{
const GLuint argIndex = 0;
const struct prog_src_register src = inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
GLuint zeros_mask = 0;
GLuint ones_mask = 0;
@ -847,7 +992,7 @@ static void emit_swz( struct brw_vs_compile *c,
if (src.RelAddr)
arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
else
arg0 = get_reg(c, src.File, src.Index);
arg0 = get_src_reg(c, inst, argIndex);
arg0 = brw_swizzle(arg0,
src_swz[0], src_swz[1],
@ -1053,7 +1198,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
args[i] = c->output_regs[index].reg;
else
args[i] = get_arg(c, src);
args[i] = get_arg(c, inst, i);
}
/* Get dest regs. Note that it is possible for a reg to be both
@ -1181,7 +1326,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
/* The args[0] value can't be used here as it won't have
* correctly encoded the full swizzle:
*/
emit_swz(c, dst, inst->SrcReg[0] );
emit_swz(c, dst, inst);
break;
case OPCODE_TRUNC:
/* round toward zero */

View file

@ -44,6 +44,8 @@ struct brw_vs_unit_key {
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
unsigned int nr_surfaces;
};
static void
@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
key->nr_urb_entries = brw->urb.nr_vs_entries;
key->urb_size = brw->urb.vsize;
/* BRW_NEW_NR_VS_SURFACES */
key->nr_surfaces = brw->vs.nr_surfaces;
/* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled) {
/* Note that we read in the userclip planes as well, hence
@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
* brw_urb_WRITE() results.
*/
vs.thread1.single_program_flow = 0;
vs.thread1.binding_table_entry_count = key->nr_surfaces;
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
vs.thread3.dispatch_grf_start_reg = 1;
@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_VS_PROG
},

View file

@ -79,6 +79,7 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->curbe.curbe_bo);
dri_bo_release(&brw->vs.prog_bo);
dri_bo_release(&brw->vs.state_bo);
dri_bo_release(&brw->vs.bind_bo);
dri_bo_release(&brw->gs.prog_bo);
dri_bo_release(&brw->gs.state_bo);
dri_bo_release(&brw->clip.prog_bo);

View file

@ -290,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_SURFACES),
BRW_NEW_NR_WM_SURFACES),
.cache = (CACHE_NEW_WM_PROG |
CACHE_NEW_SAMPLER)

View file

@ -176,7 +176,11 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
}
}
struct brw_wm_surface_key {
/**
* Use same key for WM and VS surfaces.
*/
struct brw_surface_key {
GLenum target, depthmode;
dri_bo *bo;
GLint format, internal_format;
@ -187,6 +191,7 @@ struct brw_wm_surface_key {
GLuint offset;
};
static void
brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
{
@ -208,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
static dri_bo *
brw_create_texture_surface( struct brw_context *brw,
struct brw_wm_surface_key *key )
struct brw_surface_key *key )
{
struct brw_surface_state surf;
dri_bo *bo;
@ -287,7 +292,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
struct brw_wm_surface_key key;
struct brw_surface_key key;
const GLuint surf = SURF_INDEX_TEXTURE(unit);
memset(&key, 0, sizeof(key));
@ -328,12 +333,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
/**
* Create the constant buffer surface. Fragment shader constanst will be
* Create the constant buffer surface. Vertex/fragment shader constants will be
* read from this buffer with Data Port Read instructions/messages.
*/
static dri_bo *
brw_create_constant_surface( struct brw_context *brw,
struct brw_wm_surface_key *key )
struct brw_surface_key *key )
{
const GLint w = key->width - 1;
struct brw_surface_state surf;
@ -345,8 +350,6 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss0.surface_type = BRW_SURFACE_BUFFER;
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
/* This is ok for all textures with channel width 8bit or less:
*/
assert(key->bo);
if (key->bo)
surf.ss1.base_addr = key->bo->offset; /* reloc */
@ -356,8 +359,8 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
surf.ss3.pitch = (key->pitch * key->cpp) - 1;
brw_set_surface_tiling(&surf, key->tiling);
surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
key, sizeof(*key),
@ -379,17 +382,17 @@ brw_create_constant_surface( struct brw_context *brw,
/**
* Update the surface state for a constant buffer.
* Update the surface state for a WM constant buffer.
* The constant buffer will be (re)allocated here if needed.
*/
static dri_bo *
brw_update_constant_surface( GLcontext *ctx,
GLuint surf,
dri_bo *const_buffer,
const struct gl_program_parameter_list *params)
brw_update_wm_constant_surface( GLcontext *ctx,
GLuint surf,
dri_bo *const_buffer,
const struct gl_program_parameter_list *params)
{
struct brw_context *brw = brw_context(ctx);
struct brw_wm_surface_key key;
struct brw_surface_key key;
struct intel_context *intel = &brw->intel;
const int size = params->NumParameters * 4 * sizeof(GLfloat);
@ -402,7 +405,7 @@ brw_update_constant_surface( GLcontext *ctx,
/* alloc new buffer if needed */
if (!const_buffer) {
const_buffer =
drm_intel_bo_alloc(intel->bufmgr, "vp/fp_const_buffer", size, 64);
drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64);
}
memset(&key, 0, sizeof(key));
@ -436,6 +439,66 @@ brw_update_constant_surface( GLcontext *ctx,
}
/**
* Update the surface state for a VS constant buffer.
* The constant buffer will be (re)allocated here if needed.
*/
static dri_bo *
brw_update_vs_constant_surface( GLcontext *ctx,
GLuint surf,
dri_bo *const_buffer,
const struct gl_program_parameter_list *params)
{
struct brw_context *brw = brw_context(ctx);
struct brw_surface_key key;
struct intel_context *intel = &brw->intel;
const int size = params->NumParameters * 4 * sizeof(GLfloat);
assert(surf == 0);
/* free old const buffer if too small */
if (const_buffer && const_buffer->size < size) {
dri_bo_unreference(const_buffer);
const_buffer = NULL;
}
/* alloc new buffer if needed */
if (!const_buffer) {
const_buffer =
drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
}
memset(&key, 0, sizeof(key));
key.format = MESA_FORMAT_RGBA_FLOAT32;
key.internal_format = GL_RGBA;
key.bo = const_buffer;
key.depthmode = GL_NONE;
key.pitch = params->NumParameters;
key.width = params->NumParameters;
key.height = 1;
key.depth = 1;
key.cpp = 16;
/*
printf("%s:\n", __FUNCTION__);
printf(" width %d height %d depth %d cpp %d pitch %d\n",
key.width, key.height, key.depth, key.cpp, key.pitch);
*/
dri_bo_unreference(brw->vs.surf_bo[surf]);
brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
if (brw->vs.surf_bo[surf] == NULL) {
brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
}
return const_buffer;
}
/**
* Sets up a surface state structure to point at the given region.
* While it is only used for the front/back buffer currently, it should be
@ -515,7 +578,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
/* Key size will never match key size for textures, so we're safe. */
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
&key, sizeof(key),
&key, sizeof(key),
&region_bo, 1,
&surf, sizeof(surf),
NULL, NULL);
@ -544,6 +607,8 @@ brw_wm_get_binding_table(struct brw_context *brw)
{
dri_bo *bind_bo;
assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
@ -603,25 +668,13 @@ static void prepare_wm_surfaces(struct brw_context *brw )
old_nr_surfaces = brw->wm.nr_surfaces;
brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
/* Update surface / buffer for vertex shader constant buffer */
{
const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
struct brw_vertex_program *vp =
(struct brw_vertex_program *) brw->vertex_program;
vp->const_buffer =
brw_update_constant_surface(ctx, surf, vp->const_buffer,
vp->program.Base.Parameters);
brw->wm.nr_surfaces = surf + 1;
}
/* Update surface / buffer for fragment shader constant buffer */
{
const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
struct brw_fragment_program *fp =
(struct brw_fragment_program *) brw->fragment_program;
fp->const_buffer =
brw_update_constant_surface(ctx, surf, fp->const_buffer,
brw_update_wm_constant_surface(ctx, surf, fp->const_buffer,
fp->program.Base.Parameters);
brw->wm.nr_surfaces = surf + 1;
@ -655,17 +708,103 @@ static void prepare_wm_surfaces(struct brw_context *brw )
brw->wm.bind_bo = brw_wm_get_binding_table(brw);
if (brw->wm.nr_surfaces != old_nr_surfaces)
brw->state.dirty.brw |= BRW_NEW_NR_SURFACES;
brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
}
/**
* Constructs the binding table for the VS surface state.
*/
static dri_bo *
brw_vs_get_binding_table(struct brw_context *brw)
{
dri_bo *bind_bo;
assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->vs.surf_bo, brw->vs.nr_surfaces,
NULL);
if (bind_bo == NULL) {
GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint);
uint32_t *data = malloc(data_size);
int i;
for (i = 0; i < brw->vs.nr_surfaces; i++)
if (brw->vs.surf_bo[i])
data[i] = brw->vs.surf_bo[i]->offset;
else
data[i] = 0;
bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->vs.surf_bo, brw->vs.nr_surfaces,
data, data_size,
NULL, NULL);
/* Emit binding table relocations to surface state */
for (i = 0; i < BRW_VS_MAX_SURF; i++) {
if (brw->vs.surf_bo[i] != NULL) {
dri_bo_emit_reloc(bind_bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
i * sizeof(GLuint),
brw->vs.surf_bo[i]);
}
}
free(data);
}
return bind_bo;
}
/**
* Vertex shader surfaces. Just constant buffer for now. Could add vertex
* shader textures in the future.
*/
static void prepare_vs_surfaces(struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
/* Update surface / buffer for vertex shader constant buffer */
{
const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
struct brw_vertex_program *vp =
(struct brw_vertex_program *) brw->vertex_program;
vp->const_buffer =
brw_update_vs_constant_surface(ctx, surf, vp->const_buffer,
vp->program.Base.Parameters);
brw->vs.nr_surfaces = 1;
}
dri_bo_unreference(brw->vs.bind_bo);
brw->vs.bind_bo = brw_vs_get_binding_table(brw);
if (1)
brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
}
static void
prepare_surfaces(struct brw_context *brw)
{
prepare_wm_surfaces(brw);
prepare_vs_surfaces(brw);
}
const struct brw_tracked_state brw_wm_surfaces = {
.dirty = {
.mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
.mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
.prepare = prepare_wm_surfaces,
.prepare = prepare_surfaces,
};