i965/gen6/gs: Compute URB entry size for user-provided geometry shaders.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
Iago Toral Quiroga 2014-07-01 13:08:25 +02:00
parent 621685ad4c
commit c09ddf82ff
2 changed files with 61 additions and 32 deletions

View file

@ -1595,10 +1595,14 @@ enum brw_message_target {
# define GEN7_URB_ENTRY_SIZE_SHIFT 16
# define GEN7_URB_STARTING_ADDRESS_SHIFT 25
/* "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
* is 2^9, or 512. It's counted in multiples of 64 bytes.
*/
#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64)
#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64)
/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
* (128 bytes) URB rows and the maximum allowed value is 5 rows.
*/
#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128)
#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */
#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GEN7+ */

View file

@ -75,31 +75,36 @@ do_gs_prog(struct brw_context *brw,
*/
c.prog_data.base.base.nr_params = ALIGN(param_count, 4) / 4 + gs->num_samplers;
if (gp->program.OutputType == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
* to multiple streams, and EndPrimitive() has no effect. So we
* configure the hardware to interpret the control data as stream ID.
*/
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
if (brw->gen >= 7) {
if (gp->program.OutputType == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
* to multiple streams, and EndPrimitive() has no effect. So we
* configure the hardware to interpret the control data as stream ID.
*/
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
/* We only have to emit control bits if we are using streams */
if (prog->Geom.UsesStreams)
c.control_data_bits_per_vertex = 2;
else
c.control_data_bits_per_vertex = 0;
/* We only have to emit control bits if we are using streams */
if (prog->Geom.UsesStreams)
c.control_data_bits_per_vertex = 2;
else
c.control_data_bits_per_vertex = 0;
} else {
/* When the output type is triangle_strip or line_strip, EndPrimitive()
* may be used to terminate the current strip and start a new one
* (similar to primitive restart), and outputting data to multiple
* streams is not supported. So we configure the hardware to interpret
* the control data as EndPrimitive information (a.k.a. "cut bits").
*/
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
/* We only need to output control data if the shader actually calls
* EndPrimitive().
*/
c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
}
} else {
/* When the output type is triangle_strip or line_strip, EndPrimitive()
* may be used to terminate the current strip and start a new one
* (similar to primitive restart), and outputting data to multiple
* streams is not supported. So we configure the hardware to interpret
* the control data as EndPrimitive information (a.k.a. "cut bits").
*/
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
/* We only need to output control data if the shader actually calls
* EndPrimitive().
*/
c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
/* There are no control data bits in gen6. */
c.control_data_bits_per_vertex = 0;
}
c.control_data_header_size_bits =
gp->program.VerticesOut * c.control_data_bits_per_vertex;
@ -170,7 +175,8 @@ do_gs_prog(struct brw_context *brw,
*
*/
unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
assert(output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
assert(brw->gen == 6 ||
output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
c.prog_data.output_vertex_size_hwords =
ALIGN(output_vertex_size_bytes, 32) / 32;
@ -200,10 +206,20 @@ do_gs_prog(struct brw_context *brw,
* the above figures are all worst-case, and most of them scale with the
* number of output vertices. So we'll just calculate the amount of space
* we need, and if it's too large, fail to compile.
*
* The above is for gen7+ where we have a single URB entry that will hold
* all the output. In gen6, we will have to allocate URB entries for every
* vertex we emit, so our URB entries only need to be large enough to hold
* a single vertex. Also, gen6 does not have a control data header.
*/
unsigned output_size_bytes =
c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
unsigned output_size_bytes;
if (brw->gen >= 7) {
output_size_bytes =
c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
} else {
output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
}
/* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
* which comes before the control header.
@ -212,11 +228,20 @@ do_gs_prog(struct brw_context *brw,
output_size_bytes += 32;
assert(output_size_bytes >= 1);
if (output_size_bytes > GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES)
int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (brw->gen == 6)
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (output_size_bytes > max_output_size_bytes)
return false;
/* URB entry sizes are stored as a multiple of 64 bytes. */
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
* a multiple of 128 bytes in gen6.
*/
if (brw->gen >= 7)
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
else
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
c.prog_data.output_topology =
get_hw_prim_for_gl_prim(gp->program.OutputType);