radeonsi/gfx10: export correct PrimitiveID from NGG vertex shaders

Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
Marek Olšák 2019-06-05 20:20:47 -04:00
parent 3203a74dcb
commit b680f723f8
8 changed files with 71 additions and 11 deletions

View file

@ -616,6 +616,30 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
emitted_prims = nggso.emit[0];
}
/* Copy Primitive IDs from GS threads to the LDS address corresponding
* to the ES thread of the provoking vertex.
*/
if (ctx->type == PIPE_SHADER_VERTEX &&
ctx->shader->key.mono.u.vs_export_prim_id) {
/* Streamout uses LDS. We need to wait for it before we can reuse it. */
if (sel->so.num_outputs)
ac_build_s_barrier(&ctx->ac);
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
/* Extract the PROVOKING_VTX_INDEX field. */
LLVMValueRef provoking_vtx_in_prim =
si_unpack_param(ctx, ctx->param_vs_state_bits, 4, 2);
/* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */
LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3);
LLVMValueRef provoking_vtx_index =
LLVMBuildExtractElement(builder, indices, provoking_vtx_in_prim, "");
LLVMBuildStore(builder, ctx->abi.gs_prim_id,
ac_build_gep0(&ctx->ac, ctx->esgs_ring, provoking_vtx_index));
ac_build_endif(&ctx->ac, 5400);
}
/* TODO: primitive culling */
build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
@ -700,12 +724,23 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
}
}
/* TODO: Vertex shaders have to get PrimitiveID from GS VGPRs. */
if (ctx->type == PIPE_SHADER_TESS_EVAL &&
ctx->shader->key.mono.u.vs_export_prim_id) {
if (ctx->shader->key.mono.u.vs_export_prim_id) {
outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
outputs[i].semantic_index = 0;
outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
if (ctx->type == PIPE_SHADER_VERTEX) {
/* Wait for GS stores to finish. */
ac_build_s_barrier(&ctx->ac);
tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring,
get_thread_id_in_tg(ctx));
outputs[i].values[0] = LLVMBuildLoad(builder, tmp, "");
} else {
assert(ctx->type == PIPE_SHADER_TESS_EVAL);
outputs[i].values[0] = si_get_primitive_id(ctx, 0);
}
outputs[i].values[0] = ac_to_float(&ctx->ac, outputs[i].values[0]);
for (unsigned j = 1; j < 4; j++)
outputs[i].values[j] = LLVMGetUndef(ctx->f32);

View file

@ -441,6 +441,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->last_prim = -1;
ctx->last_multi_vgt_param = -1;
ctx->last_rast_prim = -1;
ctx->last_flatshade_first = -1;
ctx->last_sc_line_stipple = ~0;
ctx->last_vs_state = ~0;
ctx->last_ls = NULL;

View file

@ -1059,6 +1059,7 @@ struct si_context {
int last_prim;
int last_multi_vgt_param;
int last_rast_prim;
int last_flatshade_first;
unsigned last_sc_line_stipple;
unsigned current_vs_state;
unsigned last_vs_state;

View file

@ -5228,10 +5228,20 @@ static bool si_shader_binary_open(struct si_screen *screen,
esgs_ring_size = shader->gs_info.esgs_ring_size;;
}
if (sel && shader->key.as_ngg && sel->so.num_outputs) {
unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
esgs_ring_size = MAX2(esgs_ring_size,
shader->ngg.max_out_verts * esgs_vertex_bytes);
if (sel && shader->key.as_ngg) {
if (sel->so.num_outputs) {
unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
esgs_ring_size = MAX2(esgs_ring_size,
shader->ngg.max_out_verts * esgs_vertex_bytes);
}
/* GS stores Primitive IDs into LDS at the address corresponding
* to the provoking vertex. All vertex threads load and export
* PrimitiveID for their thread.
*/
if (sel->type == PIPE_SHADER_VERTEX &&
shader->key.mono.u.vs_export_prim_id)
esgs_ring_size = MAX2(esgs_ring_size, shader->ngg.max_out_verts * 4);
}
if (esgs_ring_size) {

View file

@ -241,13 +241,14 @@ enum {
};
/* Fields of driver-defined VS state SGPR. */
/* Clamp vertex color output (only used in VS as VS). */
#define S_VS_STATE_CLAMP_VERTEX_COLOR(x) (((unsigned)(x) & 0x1) << 0)
#define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE
#define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1)
#define C_VS_STATE_INDEXED 0xFFFFFFFD
#define S_VS_STATE_OUTPRIM(x) (((unsigned)(x) & 0x3) << 2)
#define C_VS_STATE_OUTPRIM 0xFFFFFFF3
#define S_VS_STATE_PROVOKING_VTX_INDEX(x) (((unsigned)(x) & 0x3) << 4)
#define C_VS_STATE_PROVOKING_VTX_INDEX 0xFFFFFFCF
#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8)
#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24)

View file

@ -892,6 +892,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
rs->clamp_fragment_color = state->clamp_fragment_color;
rs->clamp_vertex_color = state->clamp_vertex_color;
rs->flatshade = state->flatshade;
rs->flatshade_first = state->flatshade_first;
rs->sprite_coord_enable = state->sprite_coord_enable;
rs->rasterizer_discard = state->rasterizer_discard;
rs->pa_sc_line_stipple = state->line_stipple_enable ?

View file

@ -74,6 +74,7 @@ struct si_state_rasterizer {
unsigned clip_plane_enable:8;
unsigned half_pixel_center:1;
unsigned flatshade:1;
unsigned flatshade_first:1;
unsigned two_side:1;
unsigned multisample_enable:1;
unsigned force_persample_interp:1;

View file

@ -586,7 +586,9 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
if (likely(rast_prim == sctx->last_rast_prim &&
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple))
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple &&
(sctx->chip_class <= GFX9 ||
rs->flatshade_first == sctx->last_flatshade_first)))
return;
if (util_prim_is_lines(rast_prim)) {
@ -599,9 +601,10 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
sctx->context_roll = true;
}
unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
if (rast_prim != sctx->last_rast_prim &&
(sctx->ngg || sctx->gs_shader.cso)) {
unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
sctx->context_roll = true;
@ -611,8 +614,15 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
}
}
if (sctx->chip_class >= GFX10) {
unsigned vtx_index = rs->flatshade_first ? 0 : gs_out;
sctx->current_vs_state &= C_VS_STATE_PROVOKING_VTX_INDEX;
sctx->current_vs_state |= S_VS_STATE_PROVOKING_VTX_INDEX(vtx_index);
}
sctx->last_rast_prim = rast_prim;
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
sctx->last_flatshade_first = rs->flatshade_first;
}
static void si_emit_vs_state(struct si_context *sctx,