mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 04:10:09 +01:00
i965: Implement "Static Vertex Count" geometry shader optimization.
Broadwell's 3DSTATE_GS contains new "Static Output" and "Static Vertex Count" fields, which control a new optimization. Normally, geometry shaders can output arbitrary numbers of vertices, which means that resource allocation has to be done on the fly. However, if the number of vertices is statically known, the hardware can pre-allocate resources up front, which is more efficient. Thanks to the new NIR GS intrinsics, this is easy. We just call the function introduced in the previous commit to get the vertex count. If it obtains a count, we stop emitting the extra 32-bit "Vertex Count" field in the VUE, and instead fill out the 3DSTATE_GS fields. Improves performance of Gl32GSCloth by 5.16347% +/- 0.12611% (n=91) on my Lenovo X250 laptop (Broadwell GT2) at 1024x768. shader-db statistics for geometry shaders only: total instructions in shared programs: 3227 -> 3207 (-0.62%) instructions in affected programs: 242 -> 222 (-8.26%) helped: 10 v2: Don't break non-NIR paths (just skip this optimization). Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
parent
bcef2abad7
commit
f0a618ee7c
5 changed files with 28 additions and 4 deletions
|
|
@ -792,6 +792,11 @@ struct brw_gs_prog_data
|
|||
|
||||
bool include_primitive_id;
|
||||
|
||||
/**
|
||||
* The number of vertices emitted, if constant - otherwise -1.
|
||||
*/
|
||||
int static_vertex_count;
|
||||
|
||||
int invocations;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1964,6 +1964,11 @@ enum brw_message_target {
|
|||
# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
|
||||
# define GEN6_GS_ENABLE (1 << 15)
|
||||
|
||||
/* Gen8+ DW8 */
|
||||
# define GEN8_GS_STATIC_OUTPUT (1 << 30)
|
||||
# define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT 16
|
||||
# define GEN8_GS_STATIC_VERTEX_COUNT_MASK INTEL_MASK(26, 16)
|
||||
|
||||
/* Gen8+ DW9 */
|
||||
# define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
|
||||
# define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT 16
|
||||
|
|
|
|||
|
|
@ -73,6 +73,11 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
c.prog_data.base.base.nr_params = param_count;
|
||||
c.prog_data.base.base.nr_image_params = gs->NumImages;
|
||||
|
||||
if (brw->gen >= 8) {
|
||||
c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
|
||||
nir_gs_count_vertices(gp->program.Base.nir);
|
||||
}
|
||||
|
||||
if (brw->gen >= 7) {
|
||||
if (gp->program.OutputType == GL_POINTS) {
|
||||
/* When the output type is points, the geometry shader may output data
|
||||
|
|
|
|||
|
|
@ -234,17 +234,20 @@ vec4_gs_visitor::emit_thread_end()
|
|||
*/
|
||||
int base_mrf = 1;
|
||||
|
||||
bool static_vertex_count = c->prog_data.static_vertex_count != -1;
|
||||
|
||||
current_annotation = "thread end";
|
||||
dst_reg mrf_reg(MRF, base_mrf);
|
||||
src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
||||
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
|
||||
inst->force_writemask_all = true;
|
||||
emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
|
||||
if (devinfo->gen < 8 || !static_vertex_count)
|
||||
emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
|
||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
|
||||
emit_shader_time_end();
|
||||
inst = emit(GS_OPCODE_THREAD_END);
|
||||
inst->base_mrf = base_mrf;
|
||||
inst->mlen = devinfo->gen >= 8 ? 2 : 1;
|
||||
inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -284,7 +287,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
|
|||
/* We need to increment Global Offset by 1 to make room for Broadwell's
|
||||
* extra "Vertex Count" payload at the beginning of the URB entry.
|
||||
*/
|
||||
if (devinfo->gen >= 8)
|
||||
if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
|
||||
inst->offset++;
|
||||
|
||||
inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
||||
|
|
@ -421,7 +424,7 @@ vec4_gs_visitor::emit_control_data_bits()
|
|||
* URB entry. Since this is an OWord message, Global Offset is counted
|
||||
* in 128-bit units, so we must set it to 2.
|
||||
*/
|
||||
if (devinfo->gen >= 8)
|
||||
if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
|
||||
inst->offset = 2;
|
||||
inst->base_mrf = base_mrf;
|
||||
inst->mlen = 2;
|
||||
|
|
|
|||
|
|
@ -90,6 +90,12 @@ gen8_upload_gs_state(struct brw_context *brw)
|
|||
uint32_t dw8 = brw->gs.prog_data->control_data_format <<
|
||||
HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
|
||||
|
||||
if (brw->gs.prog_data->static_vertex_count != -1) {
|
||||
dw8 |= GEN8_GS_STATIC_OUTPUT |
|
||||
SET_FIELD(brw->gs.prog_data->static_vertex_count,
|
||||
GEN8_GS_STATIC_VERTEX_COUNT);
|
||||
}
|
||||
|
||||
if (brw->gen < 9)
|
||||
dw7 |= (brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT;
|
||||
else
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue