i965/fs: Add gen6 register spilling support.

Most of this is code movement to get the scratch space allocated in a
shared location.  Other than that, the only real changes are that the
old oword block messages now operate on oword-aligned areas (with new
messages for unaligned access, which we don't do), and that the
caching control is in the SFID part of the descriptor instead of
message control.

Fixes glsl-fs-convolution-1.
This commit is contained in:
Eric Anholt 2011-04-14 19:36:28 -07:00
parent 14eedf3028
commit 59c6b775a6
5 changed files with 58 additions and 31 deletions

View file

@ -496,6 +496,8 @@ static void brw_set_dp_write_message( struct brw_context *brw,
insn->bits3.dp_render_cache.response_length = response_length;
insn->bits3.dp_render_cache.msg_length = msg_length;
insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
/* We always use the render cache for write messages */
insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
/* XXX really need below? */
insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
@ -539,6 +541,13 @@ brw_set_dp_read_message(struct brw_context *brw,
brw_set_src1(insn, brw_imm_d(0));
if (intel->gen >= 6) {
uint32_t target_function;
if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */
else
target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache */
insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
insn->bits3.dp_render_cache.msg_control = msg_control;
insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
@ -548,9 +557,9 @@ brw_set_dp_read_message(struct brw_context *brw,
insn->bits3.dp_render_cache.response_length = response_length;
insn->bits3.dp_render_cache.msg_length = msg_length;
insn->bits3.dp_render_cache.end_of_thread = 0;
insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ;
insn->header.destreg__conditionalmod = target_function;
/* XXX really need below? */
insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
insn->bits2.send_gen5.sfid = target_function;
insn->bits2.send_gen5.end_of_thread = 0;
} else if (intel->gen == 5) {
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
@ -1486,9 +1495,12 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
GLuint offset)
{
struct intel_context *intel = &p->brw->intel;
uint32_t msg_control;
uint32_t msg_control, msg_type;
int mlen;
if (intel->gen >= 6)
offset /= 16;
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
if (num_regs == 1) {
@ -1554,13 +1566,22 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
}
brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_null_reg());
if (intel->gen >= 6) {
brw_set_src0(insn, mrf);
} else {
brw_set_src0(insn, brw_null_reg());
}
if (intel->gen >= 6)
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
else
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
brw_set_dp_write_message(p->brw,
insn,
255, /* binding table index (255=stateless) */
msg_control,
BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
msg_type,
mlen,
GL_TRUE, /* header_present */
0, /* pixel scoreboard */
@ -1585,9 +1606,13 @@ brw_oword_block_read_scratch(struct brw_compile *p,
int num_regs,
GLuint offset)
{
struct intel_context *intel = &p->brw->intel;
uint32_t msg_control;
int rlen;
if (intel->gen >= 6)
offset /= 16;
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
dest = retype(dest, BRW_REGISTER_TYPE_UW);
@ -1624,14 +1649,18 @@ brw_oword_block_read_scratch(struct brw_compile *p,
insn->header.destreg__conditionalmod = mrf.nr;
brw_set_dest(p, insn, dest); /* UW? */
brw_set_src0(insn, brw_null_reg());
if (intel->gen >= 6) {
brw_set_src0(insn, mrf);
} else {
brw_set_src0(insn, brw_null_reg());
}
brw_set_dp_read_message(p->brw,
insn,
255, /* binding table index (255=stateless) */
msg_control,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1, /* target cache (render/scratch) */
BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1, /* msg_length */
rlen);
}
@ -1839,7 +1868,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
bind_table_index,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
msg_type,
0, /* source cache = data cache */
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
1); /* response_length */
}

View file

@ -228,8 +228,6 @@ fs_visitor::assign_regs()
if (reg == -1) {
fail("no register to spill\n");
} else if (intel->gen >= 6) {
fail("no spilling support on gen6 yet\n");
} else {
spill_reg(reg);
}

View file

@ -185,6 +185,7 @@ static void do_wm_prog( struct brw_context *brw,
struct brw_fragment_program *fp,
struct brw_wm_prog_key *key)
{
struct intel_context *intel = &brw->intel;
struct brw_wm_compile *c;
const GLuint *program;
GLuint program_size;
@ -238,12 +239,26 @@ static void do_wm_prog( struct brw_context *brw,
/* Scratch space is used for register spilling */
if (c->last_scratch) {
uint32_t total_scratch;
/* Per-thread scratch space is power-of-two sized. */
for (c->prog_data.total_scratch = 1024;
c->prog_data.total_scratch <= c->last_scratch;
c->prog_data.total_scratch *= 2) {
/* empty */
}
total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
drm_intel_bo_unreference(brw->wm.scratch_bo);
brw->wm.scratch_bo = NULL;
}
if (brw->wm.scratch_bo == NULL) {
brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
"wm scratch",
total_scratch,
4096);
}
}
else {
c->prog_data.total_scratch = 0;

View file

@ -278,30 +278,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
static void upload_wm_unit( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
struct brw_wm_unit_key key;
drm_intel_bo *reloc_bufs[3];
wm_unit_populate_key(brw, &key);
/* Allocate the necessary scratch space if we haven't already. Don't
* bother reducing the allocation later, since we use scratch so
* rarely.
*/
if (key.total_scratch) {
GLuint total = key.total_scratch * brw->wm_max_threads;
if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
drm_intel_bo_unreference(brw->wm.scratch_bo);
brw->wm.scratch_bo = NULL;
}
if (brw->wm.scratch_bo == NULL) {
brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
"wm scratch",
total,
4096);
}
}
reloc_bufs[0] = brw->wm.prog_bo;
reloc_bufs[1] = brw->wm.scratch_bo;
reloc_bufs[2] = brw->wm.sampler_bo;

View file

@ -184,7 +184,12 @@ upload_wm_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH(dw2);
OUT_BATCH(0); /* scratch space base offset */
if (brw->wm.prog_data->total_scratch) {
OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
ffs(brw->wm.prog_data->total_scratch) - 11);
} else {
OUT_BATCH(0);
}
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(dw6);