mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
i965/gen6/gs: Compute URB entry size for user-provided geometry shaders.
Acked-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
parent
621685ad4c
commit
c09ddf82ff
2 changed files with 61 additions and 32 deletions
|
|
@ -1595,10 +1595,14 @@ enum brw_message_target {
|
|||
# define GEN7_URB_ENTRY_SIZE_SHIFT 16
|
||||
# define GEN7_URB_STARTING_ADDRESS_SHIFT 25
|
||||
|
||||
/* "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
|
||||
/* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
|
||||
* is 2^9, or 512. It's counted in multiples of 64 bytes.
|
||||
*/
|
||||
#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64)
|
||||
#define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES (512*64)
|
||||
/* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
|
||||
* (128 bytes) URB rows and the maximum allowed value is 5 rows.
|
||||
*/
|
||||
#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128)
|
||||
|
||||
#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */
|
||||
#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GEN7+ */
|
||||
|
|
|
|||
|
|
@ -75,31 +75,36 @@ do_gs_prog(struct brw_context *brw,
|
|||
*/
|
||||
c.prog_data.base.base.nr_params = ALIGN(param_count, 4) / 4 + gs->num_samplers;
|
||||
|
||||
if (gp->program.OutputType == GL_POINTS) {
|
||||
/* When the output type is points, the geometry shader may output data
|
||||
* to multiple streams, and EndPrimitive() has no effect. So we
|
||||
* configure the hardware to interpret the control data as stream ID.
|
||||
*/
|
||||
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
|
||||
if (brw->gen >= 7) {
|
||||
if (gp->program.OutputType == GL_POINTS) {
|
||||
/* When the output type is points, the geometry shader may output data
|
||||
* to multiple streams, and EndPrimitive() has no effect. So we
|
||||
* configure the hardware to interpret the control data as stream ID.
|
||||
*/
|
||||
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
|
||||
|
||||
/* We only have to emit control bits if we are using streams */
|
||||
if (prog->Geom.UsesStreams)
|
||||
c.control_data_bits_per_vertex = 2;
|
||||
else
|
||||
c.control_data_bits_per_vertex = 0;
|
||||
/* We only have to emit control bits if we are using streams */
|
||||
if (prog->Geom.UsesStreams)
|
||||
c.control_data_bits_per_vertex = 2;
|
||||
else
|
||||
c.control_data_bits_per_vertex = 0;
|
||||
} else {
|
||||
/* When the output type is triangle_strip or line_strip, EndPrimitive()
|
||||
* may be used to terminate the current strip and start a new one
|
||||
* (similar to primitive restart), and outputting data to multiple
|
||||
* streams is not supported. So we configure the hardware to interpret
|
||||
* the control data as EndPrimitive information (a.k.a. "cut bits").
|
||||
*/
|
||||
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
|
||||
|
||||
/* We only need to output control data if the shader actually calls
|
||||
* EndPrimitive().
|
||||
*/
|
||||
c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
|
||||
}
|
||||
} else {
|
||||
/* When the output type is triangle_strip or line_strip, EndPrimitive()
|
||||
* may be used to terminate the current strip and start a new one
|
||||
* (similar to primitive restart), and outputting data to multiple
|
||||
* streams is not supported. So we configure the hardware to interpret
|
||||
* the control data as EndPrimitive information (a.k.a. "cut bits").
|
||||
*/
|
||||
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
|
||||
|
||||
/* We only need to output control data if the shader actually calls
|
||||
* EndPrimitive().
|
||||
*/
|
||||
c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
|
||||
/* There are no control data bits in gen6. */
|
||||
c.control_data_bits_per_vertex = 0;
|
||||
}
|
||||
c.control_data_header_size_bits =
|
||||
gp->program.VerticesOut * c.control_data_bits_per_vertex;
|
||||
|
|
@ -170,7 +175,8 @@ do_gs_prog(struct brw_context *brw,
|
|||
*
|
||||
*/
|
||||
unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
|
||||
assert(output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
|
||||
assert(brw->gen == 6 ||
|
||||
output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
|
||||
c.prog_data.output_vertex_size_hwords =
|
||||
ALIGN(output_vertex_size_bytes, 32) / 32;
|
||||
|
||||
|
|
@ -200,10 +206,20 @@ do_gs_prog(struct brw_context *brw,
|
|||
* the above figures are all worst-case, and most of them scale with the
|
||||
* number of output vertices. So we'll just calculate the amount of space
|
||||
* we need, and if it's too large, fail to compile.
|
||||
*
|
||||
* The above is for gen7+ where we have a single URB entry that will hold
|
||||
* all the output. In gen6, we will have to allocate URB entries for every
|
||||
* vertex we emit, so our URB entries only need to be large enough to hold
|
||||
* a single vertex. Also, gen6 does not have a control data header.
|
||||
*/
|
||||
unsigned output_size_bytes =
|
||||
c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
|
||||
output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
|
||||
unsigned output_size_bytes;
|
||||
if (brw->gen >= 7) {
|
||||
output_size_bytes =
|
||||
c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
|
||||
output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
|
||||
} else {
|
||||
output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
|
||||
}
|
||||
|
||||
/* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
|
||||
* which comes before the control header.
|
||||
|
|
@ -212,11 +228,20 @@ do_gs_prog(struct brw_context *brw,
|
|||
output_size_bytes += 32;
|
||||
|
||||
assert(output_size_bytes >= 1);
|
||||
if (output_size_bytes > GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES)
|
||||
int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
|
||||
if (brw->gen == 6)
|
||||
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
|
||||
if (output_size_bytes > max_output_size_bytes)
|
||||
return false;
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||||
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
|
||||
* a multiple of 128 bytes in gen6.
|
||||
*/
|
||||
if (brw->gen >= 7)
|
||||
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
else
|
||||
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
|
||||
|
||||
c.prog_data.output_topology =
|
||||
get_hw_prim_for_gl_prim(gp->program.OutputType);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue