mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 22:40:09 +01:00
i965: checkpoint commit: VS constant buffers
Hook up a constant buffer, binding table, etc for the VS unit. This will allow using large constant buffers with vertex shaders. The new code is disabled at this time (use_const_buffer=FALSE).
This commit is contained in:
parent
43c7ffaea6
commit
cafea75280
11 changed files with 477 additions and 91 deletions
|
|
@ -141,7 +141,8 @@ struct brw_context;
|
|||
#define BRW_NEW_BATCH 0x10000
|
||||
/** brw->depth_region updated */
|
||||
#define BRW_NEW_DEPTH_BUFFER 0x20000
|
||||
#define BRW_NEW_NR_SURFACES 0x40000
|
||||
#define BRW_NEW_NR_WM_SURFACES 0x40000
|
||||
#define BRW_NEW_NR_VS_SURFACES 0x80000
|
||||
|
||||
struct brw_state_flags {
|
||||
/** State update flags signalled by mesa internals */
|
||||
|
|
@ -245,20 +246,30 @@ struct brw_vs_ouput_sizes {
|
|||
#define BRW_MAX_TEX_UNIT 16
|
||||
|
||||
/**
|
||||
* Size of our surface binding table.
|
||||
* Size of our surface binding table for the WM.
|
||||
* This contains pointers to the drawing surfaces and current texture
|
||||
* objects and shader constant buffers (+2).
|
||||
*/
|
||||
#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2)
|
||||
#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
|
||||
|
||||
/**
|
||||
* Helpers to convert drawing buffers, textures and constant buffers
|
||||
* to surface binding table indexes.
|
||||
* to surface binding table indexes, for WM.
|
||||
*/
|
||||
#define SURF_INDEX_DRAW(d) (d)
|
||||
#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS + 0)
|
||||
#define SURF_INDEX_VERT_CONST_BUFFER (MAX_DRAW_BUFFERS + 1)
|
||||
#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 2 + t)
|
||||
#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS)
|
||||
#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t))
|
||||
|
||||
/**
|
||||
* Size of surface binding table for the VS.
|
||||
* Only one constant buffer for now.
|
||||
*/
|
||||
#define BRW_VS_MAX_SURF 1
|
||||
|
||||
/**
|
||||
* Only a VS constant buffer
|
||||
*/
|
||||
#define SURF_INDEX_VERT_CONST_BUFFER 0
|
||||
|
||||
|
||||
enum brw_cache_id {
|
||||
|
|
@ -566,6 +577,11 @@ struct brw_context
|
|||
|
||||
dri_bo *prog_bo;
|
||||
dri_bo *state_bo;
|
||||
|
||||
/** Binding table of pointers to surf_bo entries */
|
||||
dri_bo *bind_bo;
|
||||
dri_bo *surf_bo[BRW_VS_MAX_SURF];
|
||||
GLuint nr_surfaces;
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -357,6 +357,7 @@ update_constant_buffer(struct brw_context *brw,
|
|||
}
|
||||
|
||||
|
||||
/** Copy current vertex program's parameters into the constant buffer */
|
||||
static void
|
||||
update_vertex_constant_buffer(struct brw_context *brw)
|
||||
{
|
||||
|
|
@ -366,6 +367,7 @@ update_vertex_constant_buffer(struct brw_context *brw)
|
|||
}
|
||||
|
||||
|
||||
/** Copy current fragment program's parameters into the constant buffer */
|
||||
static void
|
||||
update_fragment_constant_buffer(struct brw_context *brw)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -862,9 +862,18 @@ void brw_dp_READ_4( struct brw_compile *p,
|
|||
struct brw_reg dest,
|
||||
GLuint msg_reg_nr,
|
||||
GLboolean relAddr,
|
||||
GLuint scratch_offset,
|
||||
GLuint location,
|
||||
GLuint bind_table_index );
|
||||
|
||||
/* XXX this function is temporary - merge with brw_dp_READ_4() above. */
|
||||
void brw_dp_READ_4_vs( struct brw_compile *p,
|
||||
struct brw_reg dest,
|
||||
struct brw_reg src,
|
||||
GLuint msg_reg_nr,
|
||||
GLboolean relAddr,
|
||||
GLuint location,
|
||||
GLuint bind_table_index );
|
||||
|
||||
void brw_dp_WRITE_16( struct brw_compile *p,
|
||||
struct brw_reg src,
|
||||
GLuint msg_reg_nr,
|
||||
|
|
|
|||
|
|
@ -952,7 +952,7 @@ void brw_dp_READ_16( struct brw_compile *p,
|
|||
|
||||
/**
|
||||
* Read a float[4] vector from the data port Data Cache (const buffer).
|
||||
* Scratch offset should be a multiple of 16.
|
||||
* Location (in buffer) should be a multiple of 16.
|
||||
* Used for fetching shader constants.
|
||||
* If relAddr is true, we'll do an indirect fetch using the address register.
|
||||
*/
|
||||
|
|
@ -960,7 +960,7 @@ void brw_dp_READ_4( struct brw_compile *p,
|
|||
struct brw_reg dest,
|
||||
GLuint msg_reg_nr,
|
||||
GLboolean relAddr,
|
||||
GLuint scratch_offset,
|
||||
GLuint location,
|
||||
GLuint bind_table_index )
|
||||
{
|
||||
{
|
||||
|
|
@ -971,7 +971,7 @@ void brw_dp_READ_4( struct brw_compile *p,
|
|||
/* set message header global offset field (reg 0, element 2) */
|
||||
brw_MOV(p,
|
||||
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_d(scratch_offset));
|
||||
brw_imm_d(location));
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
|
|
@ -1001,6 +1001,66 @@ void brw_dp_READ_4( struct brw_compile *p,
|
|||
}
|
||||
|
||||
|
||||
/* XXX this function is temporary - merge with brw_dp_READ_4() above. */
|
||||
void brw_dp_READ_4_vs(struct brw_compile *p,
|
||||
struct brw_reg dest,
|
||||
struct brw_reg src,
|
||||
GLuint msg_reg_nr,
|
||||
GLboolean relAddr,
|
||||
GLuint location,
|
||||
GLuint bind_table_index)
|
||||
{
|
||||
{
|
||||
brw_push_insn_state(p);
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_set_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
||||
/*src.nr = 0;*/
|
||||
|
||||
/* set message header global offset field (reg 0, element 2) */
|
||||
brw_MOV(p,
|
||||
#if 1
|
||||
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
|
||||
#elif 0
|
||||
retype(brw_vec1_grf(src.nr, 2), BRW_REGISTER_TYPE_UD),
|
||||
#endif
|
||||
brw_imm_d(location));
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
{
|
||||
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
|
||||
|
||||
insn->header.predicate_control = BRW_PREDICATE_NONE;
|
||||
insn->header.compression_control = BRW_COMPRESSION_NONE;
|
||||
insn->header.destreg__conditonalmod = msg_reg_nr;
|
||||
insn->header.mask_control = BRW_MASK_DISABLE;
|
||||
|
||||
/* cast dest to a uword[8] vector */
|
||||
// dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
brw_set_dest(insn, dest);
|
||||
#if 1
|
||||
brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
|
||||
#elif 0
|
||||
brw_set_src0(insn, retype(brw_vec8_grf(src.nr, 0), BRW_REGISTER_TYPE_UW));
|
||||
#endif
|
||||
|
||||
printf("vs const read msg, location %u, msg_reg_nr %d\n", location, msg_reg_nr);
|
||||
brw_set_dp_read_message(insn,
|
||||
bind_table_index,
|
||||
0, /* msg_control (0 means 1 Oword) */
|
||||
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
|
||||
0, /* source cache = data cache */
|
||||
1, /* msg_length */
|
||||
1, /* response_length (1 Oword) */
|
||||
0); /* eot */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void brw_fb_WRITE(struct brw_compile *p,
|
||||
struct brw_reg dest,
|
||||
GLuint msg_reg_nr,
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = {
|
|||
|
||||
static void prepare_binding_table_pointers(struct brw_context *brw)
|
||||
{
|
||||
brw_add_validated_bo(brw, brw->vs.bind_bo);
|
||||
brw_add_validated_bo(brw, brw->wm.bind_bo);
|
||||
}
|
||||
|
||||
|
|
@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw)
|
|||
|
||||
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
|
||||
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
|
||||
OUT_BATCH(0); /* vs */
|
||||
OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
|
||||
OUT_BATCH(0); /* gs */
|
||||
OUT_BATCH(0); /* clip */
|
||||
OUT_BATCH(0); /* sf */
|
||||
OUT_RELOC(brw->wm.bind_bo,
|
||||
I915_GEM_DOMAIN_SAMPLER, 0,
|
||||
0);
|
||||
OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -75,6 +75,13 @@ struct brw_vs_compile {
|
|||
|
||||
struct brw_reg userplane[6];
|
||||
|
||||
/** using a real constant buffer? */
|
||||
GLboolean use_const_buffer;
|
||||
/** we may need up to 3 constants per instruction (if use_const_buffer) */
|
||||
struct {
|
||||
GLint index;
|
||||
struct brw_reg reg;
|
||||
} current_const[3];
|
||||
};
|
||||
|
||||
void brw_vs_emit( struct brw_vs_compile *c );
|
||||
|
|
|
|||
|
|
@ -38,8 +38,31 @@
|
|||
#include "brw_vs.h"
|
||||
|
||||
|
||||
static struct brw_reg get_tmp( struct brw_vs_compile *c )
|
||||
{
|
||||
struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
|
||||
|
||||
/* Do things as simply as possible. Allocate and populate all regs
|
||||
if (++c->last_tmp > c->prog_data.total_grf)
|
||||
c->prog_data.total_grf = c->last_tmp;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
|
||||
{
|
||||
if (tmp.nr == c->last_tmp-1)
|
||||
c->last_tmp--;
|
||||
}
|
||||
|
||||
static void release_tmps( struct brw_vs_compile *c )
|
||||
{
|
||||
c->last_tmp = c->first_tmp;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Preallocate GRF register before code emit.
|
||||
* Do things as simply as possible. Allocate and populate all regs
|
||||
* ahead of time.
|
||||
*/
|
||||
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
||||
|
|
@ -47,6 +70,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
|||
GLuint i, reg = 0, mrf;
|
||||
GLuint nr_params;
|
||||
|
||||
#if 0
|
||||
if (c->vp->program.Base.Parameters->NumParameters >= 6)
|
||||
c->use_const_buffer = 1;
|
||||
else
|
||||
#endif
|
||||
c->use_const_buffer = GL_FALSE;
|
||||
/*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
|
||||
|
||||
/* r0 -- reserved as usual
|
||||
*/
|
||||
c->r0 = brw_vec8_grf(reg, 0);
|
||||
|
|
@ -66,13 +97,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
|||
|
||||
/* Vertex program parameters from curbe:
|
||||
*/
|
||||
nr_params = c->vp->program.Base.Parameters->NumParameters;
|
||||
for (i = 0; i < nr_params; i++) {
|
||||
c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
|
||||
}
|
||||
reg += (nr_params + 1) / 2;
|
||||
|
||||
c->prog_data.curb_read_length = reg - 1;
|
||||
if (c->use_const_buffer) {
|
||||
/* get constants from a real constant buffer */
|
||||
c->prog_data.curb_read_length = 0;
|
||||
}
|
||||
else {
|
||||
/* use a section of the GRF for constants */
|
||||
nr_params = c->vp->program.Base.Parameters->NumParameters;
|
||||
for (i = 0; i < nr_params; i++) {
|
||||
c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
|
||||
}
|
||||
reg += (nr_params + 1) / 2;
|
||||
c->prog_data.curb_read_length = reg - 1;
|
||||
}
|
||||
|
||||
/* Allocate input regs:
|
||||
*/
|
||||
|
|
@ -157,6 +194,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
|||
c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
|
||||
c->prog_data.total_grf = reg;
|
||||
|
||||
if (c->use_const_buffer) {
|
||||
for (i = 0; i < 3; i++) {
|
||||
c->current_const[i].index = -1;
|
||||
c->current_const[i].reg = get_tmp(c);
|
||||
}
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG & DEBUG_VS) {
|
||||
_mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
|
||||
_mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
|
||||
|
|
@ -165,28 +209,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
|||
}
|
||||
|
||||
|
||||
static struct brw_reg get_tmp( struct brw_vs_compile *c )
|
||||
{
|
||||
struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
|
||||
|
||||
if (++c->last_tmp > c->prog_data.total_grf)
|
||||
c->prog_data.total_grf = c->last_tmp;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
|
||||
{
|
||||
if (tmp.nr == c->last_tmp-1)
|
||||
c->last_tmp--;
|
||||
}
|
||||
|
||||
static void release_tmps( struct brw_vs_compile *c )
|
||||
{
|
||||
c->last_tmp = c->first_tmp;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* If an instruction uses a temp reg both as a src and the dest, we
|
||||
* sometimes need to allocate an intermediate temporary.
|
||||
|
|
@ -673,13 +695,59 @@ static void emit_nrm( struct brw_vs_compile *c,
|
|||
}
|
||||
|
||||
|
||||
static struct brw_reg
|
||||
get_constant(struct brw_vs_compile *c,
|
||||
const struct prog_instruction *inst,
|
||||
GLuint argIndex)
|
||||
{
|
||||
const struct prog_src_register *src = &inst->SrcReg[argIndex];
|
||||
struct brw_compile *p = &c->func;
|
||||
struct brw_reg const_reg;
|
||||
|
||||
if (c->current_const[argIndex].index != src->Index) {
|
||||
struct brw_reg src_reg = get_tmp(c);
|
||||
struct brw_reg t = get_tmp(c);
|
||||
|
||||
c->current_const[argIndex].index = src->Index;
|
||||
|
||||
brw_MOV(p, t, brw_vec8_grf(0, 0));/*SAVE*/
|
||||
|
||||
#if 0
|
||||
printf(" fetch const[%d] for arg %d into reg %d\n",
|
||||
src->Index, argIndex, c->current_const[argIndex].reg.nr);
|
||||
#endif
|
||||
|
||||
/* need to fetch the constant now */
|
||||
brw_dp_READ_4_vs(p,
|
||||
c->current_const[argIndex].reg, /* writeback dest */
|
||||
src_reg, /* src reg */
|
||||
1, /* msg_reg */
|
||||
src->RelAddr, /* relative indexing? */
|
||||
16 * src->Index, /* byte offset */
|
||||
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
|
||||
);
|
||||
|
||||
brw_MOV(p, brw_vec8_grf(0, 0), t);/*RESTORE*/
|
||||
release_tmp(c, src_reg);
|
||||
release_tmp(c, t);
|
||||
}
|
||||
|
||||
/* replicate lower four floats into upper four floats (to get XYZWXYZW) */
|
||||
const_reg = c->current_const[argIndex].reg;
|
||||
const_reg = stride(const_reg, 0, 4, 0);
|
||||
const_reg.subnr = 0;
|
||||
|
||||
return const_reg;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* TODO: relative addressing!
|
||||
*/
|
||||
static struct brw_reg get_reg( struct brw_vs_compile *c,
|
||||
gl_register_file file,
|
||||
GLuint index )
|
||||
{
|
||||
|
||||
switch (file) {
|
||||
case PROGRAM_TEMPORARY:
|
||||
case PROGRAM_INPUT:
|
||||
|
|
@ -708,13 +776,63 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get brw reg corresponding to the instruction's [argIndex] src reg.
|
||||
* TODO: relative addressing!
|
||||
*/
|
||||
static struct brw_reg
|
||||
get_src_reg( struct brw_vs_compile *c,
|
||||
const struct prog_instruction *inst,
|
||||
GLuint argIndex )
|
||||
{
|
||||
const GLuint file = inst->SrcReg[argIndex].File;
|
||||
const GLint index = inst->SrcReg[argIndex].Index;
|
||||
|
||||
switch (file) {
|
||||
case PROGRAM_TEMPORARY:
|
||||
case PROGRAM_INPUT:
|
||||
case PROGRAM_OUTPUT:
|
||||
assert(c->regs[file][index].nr != 0);
|
||||
return c->regs[file][index];
|
||||
case PROGRAM_STATE_VAR:
|
||||
case PROGRAM_CONSTANT:
|
||||
case PROGRAM_UNIFORM:
|
||||
if (c->use_const_buffer) {
|
||||
return get_constant(c, inst, argIndex);
|
||||
}
|
||||
else {
|
||||
assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
|
||||
return c->regs[PROGRAM_STATE_VAR][index];
|
||||
}
|
||||
case PROGRAM_ADDRESS:
|
||||
assert(index == 0);
|
||||
return c->regs[file][index];
|
||||
|
||||
case PROGRAM_UNDEFINED:
|
||||
/* this is a normal case since we loop over all three src args */
|
||||
return brw_null_reg();
|
||||
|
||||
case PROGRAM_LOCAL_PARAM:
|
||||
case PROGRAM_ENV_PARAM:
|
||||
case PROGRAM_WRITE_ONLY:
|
||||
default:
|
||||
assert(0);
|
||||
return brw_null_reg();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Indirect addressing: get reg[[arg] + offset].
|
||||
*/
|
||||
static struct brw_reg deref( struct brw_vs_compile *c,
|
||||
struct brw_reg arg,
|
||||
GLint offset)
|
||||
{
|
||||
struct brw_compile *p = &c->func;
|
||||
struct brw_reg tmp = vec4(get_tmp(c));
|
||||
struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
|
||||
struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
|
||||
struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
|
||||
GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
|
||||
struct brw_reg indirect = brw_vec4_indirect(0,0);
|
||||
|
||||
|
|
@ -758,22 +876,29 @@ static void emit_arl( struct brw_vs_compile *c,
|
|||
}
|
||||
|
||||
|
||||
/* Will return mangled results for SWZ op. The emit_swz() function
|
||||
/**
|
||||
* Return the brw reg for the given instruction's src argument.
|
||||
* Will return mangled results for SWZ op. The emit_swz() function
|
||||
* ignores this result and recalculates taking extended swizzles into
|
||||
* account.
|
||||
*/
|
||||
static struct brw_reg get_arg( struct brw_vs_compile *c,
|
||||
struct prog_src_register *src )
|
||||
const struct prog_instruction *inst,
|
||||
GLuint argIndex )
|
||||
{
|
||||
const struct prog_src_register *src = &inst->SrcReg[argIndex];
|
||||
struct brw_reg reg;
|
||||
|
||||
if (src->File == PROGRAM_UNDEFINED)
|
||||
return brw_null_reg();
|
||||
|
||||
if (src->RelAddr)
|
||||
if (src->RelAddr) {
|
||||
/* XXX fix */
|
||||
reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
|
||||
else
|
||||
reg = get_reg(c, src->File, src->Index);
|
||||
}
|
||||
else {
|
||||
reg = get_src_reg(c, inst, argIndex);
|
||||
}
|
||||
|
||||
/* Convert 3-bit swizzle to 2-bit.
|
||||
*/
|
||||
|
|
@ -790,10 +915,28 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get brw register for the given program dest register.
|
||||
*/
|
||||
static struct brw_reg get_dst( struct brw_vs_compile *c,
|
||||
struct prog_dst_register dst )
|
||||
{
|
||||
struct brw_reg reg = get_reg(c, dst.File, dst.Index);
|
||||
struct brw_reg reg;
|
||||
|
||||
switch (dst.File) {
|
||||
case PROGRAM_TEMPORARY:
|
||||
case PROGRAM_OUTPUT:
|
||||
assert(c->regs[dst.File][dst.Index].nr != 0);
|
||||
reg = c->regs[dst.File][dst.Index];
|
||||
break;
|
||||
case PROGRAM_UNDEFINED:
|
||||
/* we may hit this for OPCODE_END, OPCODE_KIL, etc */
|
||||
reg = brw_null_reg();
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
reg = brw_null_reg();
|
||||
}
|
||||
|
||||
reg.dw1.bits.writemask = dst.WriteMask;
|
||||
|
||||
|
|
@ -803,8 +946,10 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
|
|||
|
||||
static void emit_swz( struct brw_vs_compile *c,
|
||||
struct brw_reg dst,
|
||||
struct prog_src_register src )
|
||||
const struct prog_instruction *inst)
|
||||
{
|
||||
const GLuint argIndex = 0;
|
||||
const struct prog_src_register src = inst->SrcReg[argIndex];
|
||||
struct brw_compile *p = &c->func;
|
||||
GLuint zeros_mask = 0;
|
||||
GLuint ones_mask = 0;
|
||||
|
|
@ -847,7 +992,7 @@ static void emit_swz( struct brw_vs_compile *c,
|
|||
if (src.RelAddr)
|
||||
arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
|
||||
else
|
||||
arg0 = get_reg(c, src.File, src.Index);
|
||||
arg0 = get_src_reg(c, inst, argIndex);
|
||||
|
||||
arg0 = brw_swizzle(arg0,
|
||||
src_swz[0], src_swz[1],
|
||||
|
|
@ -1053,7 +1198,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
|
|||
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
|
||||
args[i] = c->output_regs[index].reg;
|
||||
else
|
||||
args[i] = get_arg(c, src);
|
||||
args[i] = get_arg(c, inst, i);
|
||||
}
|
||||
|
||||
/* Get dest regs. Note that it is possible for a reg to be both
|
||||
|
|
@ -1181,7 +1326,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
|
|||
/* The args[0] value can't be used here as it won't have
|
||||
* correctly encoded the full swizzle:
|
||||
*/
|
||||
emit_swz(c, dst, inst->SrcReg[0] );
|
||||
emit_swz(c, dst, inst);
|
||||
break;
|
||||
case OPCODE_TRUNC:
|
||||
/* round toward zero */
|
||||
|
|
|
|||
|
|
@ -44,6 +44,8 @@ struct brw_vs_unit_key {
|
|||
unsigned int curbe_offset;
|
||||
|
||||
unsigned int nr_urb_entries, urb_size;
|
||||
|
||||
unsigned int nr_surfaces;
|
||||
};
|
||||
|
||||
static void
|
||||
|
|
@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
|
|||
key->nr_urb_entries = brw->urb.nr_vs_entries;
|
||||
key->urb_size = brw->urb.vsize;
|
||||
|
||||
/* BRW_NEW_NR_VS_SURFACES */
|
||||
key->nr_surfaces = brw->vs.nr_surfaces;
|
||||
|
||||
/* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
|
||||
if (ctx->Transform.ClipPlanesEnabled) {
|
||||
/* Note that we read in the userclip planes as well, hence
|
||||
|
|
@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
|
|||
* brw_urb_WRITE() results.
|
||||
*/
|
||||
vs.thread1.single_program_flow = 0;
|
||||
vs.thread1.binding_table_entry_count = key->nr_surfaces;
|
||||
|
||||
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
|
||||
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
|
||||
vs.thread3.dispatch_grf_start_reg = 1;
|
||||
|
|
@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = {
|
|||
.dirty = {
|
||||
.mesa = _NEW_TRANSFORM,
|
||||
.brw = (BRW_NEW_CURBE_OFFSETS |
|
||||
BRW_NEW_NR_VS_SURFACES |
|
||||
BRW_NEW_URB_FENCE),
|
||||
.cache = CACHE_NEW_VS_PROG
|
||||
},
|
||||
|
|
|
|||
|
|
@ -79,6 +79,7 @@ static void brw_destroy_context( struct intel_context *intel )
|
|||
dri_bo_release(&brw->curbe.curbe_bo);
|
||||
dri_bo_release(&brw->vs.prog_bo);
|
||||
dri_bo_release(&brw->vs.state_bo);
|
||||
dri_bo_release(&brw->vs.bind_bo);
|
||||
dri_bo_release(&brw->gs.prog_bo);
|
||||
dri_bo_release(&brw->gs.state_bo);
|
||||
dri_bo_release(&brw->clip.prog_bo);
|
||||
|
|
|
|||
|
|
@ -290,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = {
|
|||
|
||||
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
|
||||
BRW_NEW_CURBE_OFFSETS |
|
||||
BRW_NEW_NR_SURFACES),
|
||||
BRW_NEW_NR_WM_SURFACES),
|
||||
|
||||
.cache = (CACHE_NEW_WM_PROG |
|
||||
CACHE_NEW_SAMPLER)
|
||||
|
|
|
|||
|
|
@ -176,7 +176,11 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
|
|||
}
|
||||
}
|
||||
|
||||
struct brw_wm_surface_key {
|
||||
|
||||
/**
|
||||
* Use same key for WM and VS surfaces.
|
||||
*/
|
||||
struct brw_surface_key {
|
||||
GLenum target, depthmode;
|
||||
dri_bo *bo;
|
||||
GLint format, internal_format;
|
||||
|
|
@ -187,6 +191,7 @@ struct brw_wm_surface_key {
|
|||
GLuint offset;
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
|
||||
{
|
||||
|
|
@ -208,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
|
|||
|
||||
static dri_bo *
|
||||
brw_create_texture_surface( struct brw_context *brw,
|
||||
struct brw_wm_surface_key *key )
|
||||
struct brw_surface_key *key )
|
||||
{
|
||||
struct brw_surface_state surf;
|
||||
dri_bo *bo;
|
||||
|
|
@ -287,7 +292,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
|
|||
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
|
||||
struct intel_texture_object *intelObj = intel_texture_object(tObj);
|
||||
struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
|
||||
struct brw_wm_surface_key key;
|
||||
struct brw_surface_key key;
|
||||
const GLuint surf = SURF_INDEX_TEXTURE(unit);
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
|
@ -328,12 +333,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
|
|||
|
||||
|
||||
/**
|
||||
* Create the constant buffer surface. Fragment shader constanst will be
|
||||
* Create the constant buffer surface. Vertex/fragment shader constants will be
|
||||
* read from this buffer with Data Port Read instructions/messages.
|
||||
*/
|
||||
static dri_bo *
|
||||
brw_create_constant_surface( struct brw_context *brw,
|
||||
struct brw_wm_surface_key *key )
|
||||
struct brw_surface_key *key )
|
||||
{
|
||||
const GLint w = key->width - 1;
|
||||
struct brw_surface_state surf;
|
||||
|
|
@ -345,8 +350,6 @@ brw_create_constant_surface( struct brw_context *brw,
|
|||
surf.ss0.surface_type = BRW_SURFACE_BUFFER;
|
||||
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
|
||||
|
||||
/* This is ok for all textures with channel width 8bit or less:
|
||||
*/
|
||||
assert(key->bo);
|
||||
if (key->bo)
|
||||
surf.ss1.base_addr = key->bo->offset; /* reloc */
|
||||
|
|
@ -356,8 +359,8 @@ brw_create_constant_surface( struct brw_context *brw,
|
|||
surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
|
||||
surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
|
||||
surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
|
||||
surf.ss3.pitch = (key->pitch * key->cpp) - 1;
|
||||
brw_set_surface_tiling(&surf, key->tiling);
|
||||
surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
|
||||
brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
|
||||
|
||||
bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
key, sizeof(*key),
|
||||
|
|
@ -379,17 +382,17 @@ brw_create_constant_surface( struct brw_context *brw,
|
|||
|
||||
|
||||
/**
|
||||
* Update the surface state for a constant buffer.
|
||||
* Update the surface state for a WM constant buffer.
|
||||
* The constant buffer will be (re)allocated here if needed.
|
||||
*/
|
||||
static dri_bo *
|
||||
brw_update_constant_surface( GLcontext *ctx,
|
||||
GLuint surf,
|
||||
dri_bo *const_buffer,
|
||||
const struct gl_program_parameter_list *params)
|
||||
brw_update_wm_constant_surface( GLcontext *ctx,
|
||||
GLuint surf,
|
||||
dri_bo *const_buffer,
|
||||
const struct gl_program_parameter_list *params)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_wm_surface_key key;
|
||||
struct brw_surface_key key;
|
||||
struct intel_context *intel = &brw->intel;
|
||||
const int size = params->NumParameters * 4 * sizeof(GLfloat);
|
||||
|
||||
|
|
@ -402,7 +405,7 @@ brw_update_constant_surface( GLcontext *ctx,
|
|||
/* alloc new buffer if needed */
|
||||
if (!const_buffer) {
|
||||
const_buffer =
|
||||
drm_intel_bo_alloc(intel->bufmgr, "vp/fp_const_buffer", size, 64);
|
||||
drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64);
|
||||
}
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
|
@ -436,6 +439,66 @@ brw_update_constant_surface( GLcontext *ctx,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Update the surface state for a VS constant buffer.
|
||||
* The constant buffer will be (re)allocated here if needed.
|
||||
*/
|
||||
static dri_bo *
|
||||
brw_update_vs_constant_surface( GLcontext *ctx,
|
||||
GLuint surf,
|
||||
dri_bo *const_buffer,
|
||||
const struct gl_program_parameter_list *params)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_surface_key key;
|
||||
struct intel_context *intel = &brw->intel;
|
||||
const int size = params->NumParameters * 4 * sizeof(GLfloat);
|
||||
|
||||
assert(surf == 0);
|
||||
|
||||
/* free old const buffer if too small */
|
||||
if (const_buffer && const_buffer->size < size) {
|
||||
dri_bo_unreference(const_buffer);
|
||||
const_buffer = NULL;
|
||||
}
|
||||
|
||||
/* alloc new buffer if needed */
|
||||
if (!const_buffer) {
|
||||
const_buffer =
|
||||
drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
|
||||
}
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
||||
key.format = MESA_FORMAT_RGBA_FLOAT32;
|
||||
key.internal_format = GL_RGBA;
|
||||
key.bo = const_buffer;
|
||||
key.depthmode = GL_NONE;
|
||||
key.pitch = params->NumParameters;
|
||||
key.width = params->NumParameters;
|
||||
key.height = 1;
|
||||
key.depth = 1;
|
||||
key.cpp = 16;
|
||||
|
||||
/*
|
||||
printf("%s:\n", __FUNCTION__);
|
||||
printf(" width %d height %d depth %d cpp %d pitch %d\n",
|
||||
key.width, key.height, key.depth, key.cpp, key.pitch);
|
||||
*/
|
||||
|
||||
dri_bo_unreference(brw->vs.surf_bo[surf]);
|
||||
brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
&key, sizeof(key),
|
||||
&key.bo, key.bo ? 1 : 0,
|
||||
NULL);
|
||||
if (brw->vs.surf_bo[surf] == NULL) {
|
||||
brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
|
||||
}
|
||||
|
||||
return const_buffer;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets up a surface state structure to point at the given region.
|
||||
* While it is only used for the front/back buffer currently, it should be
|
||||
|
|
@ -515,7 +578,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
|
|||
|
||||
/* Key size will never match key size for textures, so we're safe. */
|
||||
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
&key, sizeof(key),
|
||||
&key, sizeof(key),
|
||||
®ion_bo, 1,
|
||||
&surf, sizeof(surf),
|
||||
NULL, NULL);
|
||||
|
|
@ -544,6 +607,8 @@ brw_wm_get_binding_table(struct brw_context *brw)
|
|||
{
|
||||
dri_bo *bind_bo;
|
||||
|
||||
assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
|
||||
|
||||
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
|
||||
NULL, 0,
|
||||
brw->wm.surf_bo, brw->wm.nr_surfaces,
|
||||
|
|
@ -603,25 +668,13 @@ static void prepare_wm_surfaces(struct brw_context *brw )
|
|||
old_nr_surfaces = brw->wm.nr_surfaces;
|
||||
brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
|
||||
|
||||
/* Update surface / buffer for vertex shader constant buffer */
|
||||
{
|
||||
const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
|
||||
struct brw_vertex_program *vp =
|
||||
(struct brw_vertex_program *) brw->vertex_program;
|
||||
vp->const_buffer =
|
||||
brw_update_constant_surface(ctx, surf, vp->const_buffer,
|
||||
vp->program.Base.Parameters);
|
||||
|
||||
brw->wm.nr_surfaces = surf + 1;
|
||||
}
|
||||
|
||||
/* Update surface / buffer for fragment shader constant buffer */
|
||||
{
|
||||
const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
|
||||
struct brw_fragment_program *fp =
|
||||
(struct brw_fragment_program *) brw->fragment_program;
|
||||
fp->const_buffer =
|
||||
brw_update_constant_surface(ctx, surf, fp->const_buffer,
|
||||
brw_update_wm_constant_surface(ctx, surf, fp->const_buffer,
|
||||
fp->program.Base.Parameters);
|
||||
|
||||
brw->wm.nr_surfaces = surf + 1;
|
||||
|
|
@ -655,17 +708,103 @@ static void prepare_wm_surfaces(struct brw_context *brw )
|
|||
brw->wm.bind_bo = brw_wm_get_binding_table(brw);
|
||||
|
||||
if (brw->wm.nr_surfaces != old_nr_surfaces)
|
||||
brw->state.dirty.brw |= BRW_NEW_NR_SURFACES;
|
||||
brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs the binding table for the VS surface state.
|
||||
*/
|
||||
static dri_bo *
|
||||
brw_vs_get_binding_table(struct brw_context *brw)
|
||||
{
|
||||
dri_bo *bind_bo;
|
||||
|
||||
assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
|
||||
|
||||
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
|
||||
NULL, 0,
|
||||
brw->vs.surf_bo, brw->vs.nr_surfaces,
|
||||
NULL);
|
||||
|
||||
if (bind_bo == NULL) {
|
||||
GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint);
|
||||
uint32_t *data = malloc(data_size);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < brw->vs.nr_surfaces; i++)
|
||||
if (brw->vs.surf_bo[i])
|
||||
data[i] = brw->vs.surf_bo[i]->offset;
|
||||
else
|
||||
data[i] = 0;
|
||||
|
||||
bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
|
||||
NULL, 0,
|
||||
brw->vs.surf_bo, brw->vs.nr_surfaces,
|
||||
data, data_size,
|
||||
NULL, NULL);
|
||||
|
||||
/* Emit binding table relocations to surface state */
|
||||
for (i = 0; i < BRW_VS_MAX_SURF; i++) {
|
||||
if (brw->vs.surf_bo[i] != NULL) {
|
||||
dri_bo_emit_reloc(bind_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, 0,
|
||||
0,
|
||||
i * sizeof(GLuint),
|
||||
brw->vs.surf_bo[i]);
|
||||
}
|
||||
}
|
||||
|
||||
free(data);
|
||||
}
|
||||
|
||||
return bind_bo;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Vertex shader surfaces. Just constant buffer for now. Could add vertex
|
||||
* shader textures in the future.
|
||||
*/
|
||||
static void prepare_vs_surfaces(struct brw_context *brw )
|
||||
{
|
||||
GLcontext *ctx = &brw->intel.ctx;
|
||||
|
||||
/* Update surface / buffer for vertex shader constant buffer */
|
||||
{
|
||||
const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
|
||||
struct brw_vertex_program *vp =
|
||||
(struct brw_vertex_program *) brw->vertex_program;
|
||||
vp->const_buffer =
|
||||
brw_update_vs_constant_surface(ctx, surf, vp->const_buffer,
|
||||
vp->program.Base.Parameters);
|
||||
|
||||
brw->vs.nr_surfaces = 1;
|
||||
}
|
||||
|
||||
dri_bo_unreference(brw->vs.bind_bo);
|
||||
brw->vs.bind_bo = brw_vs_get_binding_table(brw);
|
||||
|
||||
if (1)
|
||||
brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
prepare_surfaces(struct brw_context *brw)
|
||||
{
|
||||
prepare_wm_surfaces(brw);
|
||||
prepare_vs_surfaces(brw);
|
||||
}
|
||||
|
||||
|
||||
const struct brw_tracked_state brw_wm_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
|
||||
.mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
|
||||
.brw = BRW_NEW_CONTEXT,
|
||||
.cache = 0
|
||||
},
|
||||
.prepare = prepare_wm_surfaces,
|
||||
.prepare = prepare_surfaces,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue