diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 12889a62e6b..93448a77f08 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -60,6 +60,7 @@ struct brw_gs_compile { struct { struct brw_reg R0; struct brw_reg vertex[MAX_GS_VERTS]; + struct brw_reg header; struct brw_reg temp; } reg; diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index e9875cdbf11..6d39df195f6 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -58,35 +58,66 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c, i += c->nr_regs; } - c->reg.temp = brw_vec8_grf(i, 0); + c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); + c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); c->prog_data.urb_read_length = c->nr_regs; c->prog_data.total_grf = i; } -static void brw_gs_emit_vue(struct brw_gs_compile *c, - struct brw_reg vert, - bool last, - GLuint header) +/** + * Set up the initial value of c->reg.header register based on c->reg.R0. + * + * The following information is passed to the GS thread in R0, and needs to be + * included in the first URB_WRITE or FF_SYNC message sent by the GS: + * + * - DWORD 0 [31:0] handle info (Gen4 only) + * - DWORD 5 [7:0] FFTID + * - DWORD 6 [31:0] Debug info + * - DWORD 7 [31:0] Debug info + * + * This function sets up the above data by copying by copying the contents of + * R0 to the header register. + */ +static void brw_gs_initialize_header(struct brw_gs_compile *c) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, c->reg.header, c->reg.R0); +} + +/** + * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value. + * + * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart, + * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we + * need to be able to update on a per-vertex basis. + */ +static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c, + unsigned dw2) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2)); +} + +/** + * Emit a vertex using the URB_WRITE message. Use the contents of + * c->reg.header for the message header, and the registers starting at \c vert + * for the vertex data. + * + * If \c last is true, then this is the last vertex, so no further URB space + * should be allocated, and this message should end the thread. + * + * If \c last is false, then a new URB entry will be allocated, and its handle + * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE + * message. + */ +static void brw_gs_emit_vue(struct brw_gs_compile *c, + struct brw_reg vert, + bool last) { struct brw_compile *p = &c->func; - struct intel_context *intel = &c->func.brw->intel; bool allocate = !last; - struct brw_reg temp; - - if (intel->gen < 6) - temp = c->reg.R0; - else { - temp = c->reg.temp; - brw_MOV(p, retype(temp, BRW_REGISTER_TYPE_UD), - retype(c->reg.R0, BRW_REGISTER_TYPE_UD)); - } - - /* Overwrite PrimType and PrimStart in the message header, for - * each vertex in turn: - */ - brw_MOV(p, get_element_ud(temp, 2), brw_imm_ud(header)); /* Copy the vertex from vertn into m1..mN+1: */ @@ -99,9 +130,10 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, * allocated each time. */ brw_urb_WRITE(p, - allocate ? temp : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + allocate ? c->reg.temp + : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, - temp, + c->reg.header, allocate, 1, /* used */ c->nr_regs + 1, /* msg length */ @@ -111,38 +143,37 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); - if (intel->gen >= 6 && allocate) - brw_MOV(p, get_element_ud(c->reg.R0, 0), get_element_ud(temp, 0)); + if (allocate) { + brw_MOV(p, get_element_ud(c->reg.header, 0), + get_element_ud(c->reg.temp, 0)); + } } +/** + * Send an FF_SYNC message to ensure that all previously spawned GS threads + * have finished sending primitives down the pipeline, and to allocate a URB + * entry for the first output vertex. Only needed when intel->needs_ff_sync + * is true. + * + * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which + * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to + * the allocated URB entry (which will be needed by the URB_WRITE meesage that + * follows). + */ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) { struct brw_compile *p = &c->func; - struct intel_context *intel = &c->func.brw->intel; - if (intel->gen < 6) { - brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); - brw_ff_sync(p, - c->reg.R0, - 0, - c->reg.R0, - 1, /* allocate */ - 1, /* response length */ - 0 /* eot */); - } else { - brw_MOV(p, retype(c->reg.temp, BRW_REGISTER_TYPE_UD), - retype(c->reg.R0, BRW_REGISTER_TYPE_UD)); - brw_MOV(p, get_element_ud(c->reg.temp, 1), brw_imm_ud(num_prim)); - brw_ff_sync(p, - c->reg.temp, - 0, - c->reg.temp, - 1, /* allocate */ - 1, /* response length */ - 0 /* eot */); - brw_MOV(p, get_element_ud(c->reg.R0, 0), - get_element_ud(c->reg.temp, 0)); - } + brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.temp, + 0, + c->reg.header, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); + brw_MOV(p, get_element_ud(c->reg.header, 0), + get_element_ud(c->reg.temp, 0)); } @@ -151,23 +182,28 @@ void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) struct intel_context *intel = &c->func.brw->intel; brw_gs_alloc_regs(c, 4); - + brw_gs_initialize_header(c); /* Use polygons for correct edgeflag behaviour. Note that vertex 3 * is the PV for quads, but vertex 0 for polygons: */ if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); + brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_START); if (key->pv_first) { - brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_gs_overwrite_header_dw2(c, _3DPRIM_POLYGON << 2); + brw_gs_emit_vue(c, c->reg.vertex[1], 0); + brw_gs_emit_vue(c, c->reg.vertex[2], 0); + brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_END); + brw_gs_emit_vue(c, c->reg.vertex[3], 1); } else { - brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0); + brw_gs_overwrite_header_dw2(c, _3DPRIM_POLYGON << 2); + brw_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_gs_emit_vue(c, c->reg.vertex[1], 0); + brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_END); + brw_gs_emit_vue(c, c->reg.vertex[2], 1); } } @@ -176,20 +212,26 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) struct intel_context *intel = &c->func.brw->intel; brw_gs_alloc_regs(c, 4); + brw_gs_initialize_header(c); if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); + brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_START); if (key->pv_first) { - brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_gs_overwrite_header_dw2(c, _3DPRIM_POLYGON << 2); + brw_gs_emit_vue(c, c->reg.vertex[1], 0); + brw_gs_emit_vue(c, c->reg.vertex[2], 0); + brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_END); + brw_gs_emit_vue(c, c->reg.vertex[3], 1); } else { - brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + brw_gs_emit_vue(c, c->reg.vertex[2], 0); + brw_gs_overwrite_header_dw2(c, _3DPRIM_POLYGON << 2); + brw_gs_emit_vue(c, c->reg.vertex[3], 0); + brw_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_END); + brw_gs_emit_vue(c, c->reg.vertex[1], 1); } } @@ -198,9 +240,12 @@ void brw_gs_lines( struct brw_gs_compile *c ) struct intel_context *intel = &c->func.brw->intel; brw_gs_alloc_regs(c, 2); + brw_gs_initialize_header(c); if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); + brw_gs_overwrite_header_dw2(c, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_gs_overwrite_header_dw2(c, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + brw_gs_emit_vue(c, c->reg.vertex[1], 1); }