radeonsi/gfx10: emit VGT_GS_OUT_PRIM_TYPE from draw and add it to VS_STATE

With NGG, the VGT_GS_OUT_PRIM_TYPE can change without a shader change.

The VS_STATE is required for both streamout and culling from a vertex
shader without pre-compiling outprim-specific variants.

We could consider compiling specialized variants in the future. We
could also consider compiling the NGG logic as an epilog.

Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
Nicolai Hähnle 2019-05-08 01:40:29 +02:00 committed by Marek Olšák
parent 4ecc39e1aa
commit 77e715541c
5 changed files with 52 additions and 48 deletions

View file

@ -463,7 +463,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE] = 0x00000000;

View file

@ -246,6 +246,8 @@ enum {
#define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE
#define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1)
#define C_VS_STATE_INDEXED 0xFFFFFFFD
#define S_VS_STATE_OUTPRIM(x) (((unsigned)(x) & 0x3) << 2)
#define C_VS_STATE_OUTPRIM 0xFFFFFFF3
#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8)
#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24)
@ -666,7 +668,6 @@ struct si_shader {
unsigned vgt_gsvs_ring_offset_1;
unsigned vgt_gsvs_ring_offset_2;
unsigned vgt_gsvs_ring_offset_3;
unsigned vgt_gs_out_prim_type;
unsigned vgt_gsvs_ring_itemsize;
unsigned vgt_gs_max_vert_out;
unsigned vgt_gs_vert_itemsize;

View file

@ -298,10 +298,9 @@ enum si_tracked_reg {
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 4 consecutive registers */
SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 3 consecutive registers */
SI_TRACKED_VGT_GSVS_RING_OFFSET_2,
SI_TRACKED_VGT_GSVS_RING_OFFSET_3,
SI_TRACKED_VGT_GS_OUT_PRIM_TYPE,
SI_TRACKED_VGT_GSVS_RING_ITEMSIZE,
SI_TRACKED_VGT_GS_MAX_VERT_OUT,

View file

@ -549,6 +549,30 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
return ia_multi_vgt_param;
}
static unsigned si_conv_prim_to_gs_out(unsigned mode)
{
static const int prim_conv[] = {
[PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
[PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
};
assert(mode < ARRAY_SIZE(prim_conv));
return prim_conv[mode];
}
/* rast_prim is the primitive type after GS. */
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
{
@ -556,24 +580,34 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
enum pipe_prim_type rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
/* Skip this if not rendering lines. */
if (!util_prim_is_lines(rast_prim))
if (likely(rast_prim == sctx->last_rast_prim &&
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple))
return;
if (rast_prim == sctx->last_rast_prim &&
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
return;
if (util_prim_is_lines(rast_prim)) {
/* For lines, reset the stipple pattern at each primitive. Otherwise,
* reset the stipple pattern at each packet (line strips, line loops).
*/
radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
rs->pa_sc_line_stipple |
S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
sctx->context_roll = true;
}
/* For lines, reset the stipple pattern at each primitive. Otherwise,
* reset the stipple pattern at each packet (line strips, line loops).
*/
radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
rs->pa_sc_line_stipple |
S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
if (rast_prim != sctx->last_rast_prim &&
(sctx->ngg || sctx->gs_shader.cso)) {
unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
sctx->context_roll = true;
if (sctx->chip_class >= GFX10) {
sctx->current_vs_state &= C_VS_STATE_OUTPRIM;
sctx->current_vs_state |= S_VS_STATE_OUTPRIM(gs_out);
}
}
sctx->last_rast_prim = rast_prim;
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
sctx->context_roll = true;
}
static void si_emit_vs_state(struct si_context *sctx,

View file

@ -622,30 +622,6 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4);
}
static unsigned si_conv_prim_to_gs_out(unsigned mode)
{
static const int prim_conv[] = {
[PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
[PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
};
assert(mode < ARRAY_SIZE(prim_conv));
return prim_conv[mode];
}
void gfx9_get_gs_info(struct si_shader_selector *es,
struct si_shader_selector *gs,
struct gfx9_gs_info *out)
@ -753,14 +729,12 @@ static void si_emit_shader_gs(struct si_context *sctx)
return;
/* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
* R_028A68_VGT_GSVS_RING_OFFSET_3, R_028A6C_VGT_GS_OUT_PRIM_TYPE */
radeon_opt_set_context_reg4(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1,
* R_028A68_VGT_GSVS_RING_OFFSET_3 */
radeon_opt_set_context_reg3(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1,
SI_TRACKED_VGT_GSVS_RING_OFFSET_1,
shader->ctx_reg.gs.vgt_gsvs_ring_offset_1,
shader->ctx_reg.gs.vgt_gsvs_ring_offset_2,
shader->ctx_reg.gs.vgt_gsvs_ring_offset_3,
shader->ctx_reg.gs.vgt_gs_out_prim_type);
shader->ctx_reg.gs.vgt_gsvs_ring_offset_3);
/* R_028AB0_VGT_GSVS_RING_ITEMSIZE */
radeon_opt_set_context_reg(sctx, R_028AB0_VGT_GSVS_RING_ITEMSIZE,
@ -841,9 +815,6 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
offset += num_components[2] * sel->gs_max_out_vertices;
shader->ctx_reg.gs.vgt_gsvs_ring_offset_3 = offset;
shader->ctx_reg.gs.vgt_gs_out_prim_type =
si_conv_prim_to_gs_out(sel->gs_output_prim);
if (max_stream >= 3)
offset += num_components[3] * sel->gs_max_out_vertices;
shader->ctx_reg.gs.vgt_gsvs_ring_itemsize = offset;