mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
radeonsi/gfx10: export correct PrimitiveID from NGG vertex shaders
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
parent
3203a74dcb
commit
b680f723f8
8 changed files with 71 additions and 11 deletions
|
|
@ -616,6 +616,30 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
|
|||
emitted_prims = nggso.emit[0];
|
||||
}
|
||||
|
||||
/* Copy Primitive IDs from GS threads to the LDS address corresponding
|
||||
* to the ES thread of the provoking vertex.
|
||||
*/
|
||||
if (ctx->type == PIPE_SHADER_VERTEX &&
|
||||
ctx->shader->key.mono.u.vs_export_prim_id) {
|
||||
/* Streamout uses LDS. We need to wait for it before we can reuse it. */
|
||||
if (sel->so.num_outputs)
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
|
||||
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
|
||||
/* Extract the PROVOKING_VTX_INDEX field. */
|
||||
LLVMValueRef provoking_vtx_in_prim =
|
||||
si_unpack_param(ctx, ctx->param_vs_state_bits, 4, 2);
|
||||
|
||||
/* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */
|
||||
LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3);
|
||||
LLVMValueRef provoking_vtx_index =
|
||||
LLVMBuildExtractElement(builder, indices, provoking_vtx_in_prim, "");
|
||||
|
||||
LLVMBuildStore(builder, ctx->abi.gs_prim_id,
|
||||
ac_build_gep0(&ctx->ac, ctx->esgs_ring, provoking_vtx_index));
|
||||
ac_build_endif(&ctx->ac, 5400);
|
||||
}
|
||||
|
||||
/* TODO: primitive culling */
|
||||
|
||||
build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
|
||||
|
|
@ -700,12 +724,23 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
|
|||
}
|
||||
}
|
||||
|
||||
/* TODO: Vertex shaders have to get PrimitiveID from GS VGPRs. */
|
||||
if (ctx->type == PIPE_SHADER_TESS_EVAL &&
|
||||
ctx->shader->key.mono.u.vs_export_prim_id) {
|
||||
if (ctx->shader->key.mono.u.vs_export_prim_id) {
|
||||
outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
|
||||
outputs[i].semantic_index = 0;
|
||||
outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
|
||||
|
||||
if (ctx->type == PIPE_SHADER_VERTEX) {
|
||||
/* Wait for GS stores to finish. */
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
|
||||
tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring,
|
||||
get_thread_id_in_tg(ctx));
|
||||
outputs[i].values[0] = LLVMBuildLoad(builder, tmp, "");
|
||||
} else {
|
||||
assert(ctx->type == PIPE_SHADER_TESS_EVAL);
|
||||
outputs[i].values[0] = si_get_primitive_id(ctx, 0);
|
||||
}
|
||||
|
||||
outputs[i].values[0] = ac_to_float(&ctx->ac, outputs[i].values[0]);
|
||||
for (unsigned j = 1; j < 4; j++)
|
||||
outputs[i].values[j] = LLVMGetUndef(ctx->f32);
|
||||
|
||||
|
|
|
|||
|
|
@ -441,6 +441,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
|
|||
ctx->last_prim = -1;
|
||||
ctx->last_multi_vgt_param = -1;
|
||||
ctx->last_rast_prim = -1;
|
||||
ctx->last_flatshade_first = -1;
|
||||
ctx->last_sc_line_stipple = ~0;
|
||||
ctx->last_vs_state = ~0;
|
||||
ctx->last_ls = NULL;
|
||||
|
|
|
|||
|
|
@ -1059,6 +1059,7 @@ struct si_context {
|
|||
int last_prim;
|
||||
int last_multi_vgt_param;
|
||||
int last_rast_prim;
|
||||
int last_flatshade_first;
|
||||
unsigned last_sc_line_stipple;
|
||||
unsigned current_vs_state;
|
||||
unsigned last_vs_state;
|
||||
|
|
|
|||
|
|
@ -5228,10 +5228,20 @@ static bool si_shader_binary_open(struct si_screen *screen,
|
|||
esgs_ring_size = shader->gs_info.esgs_ring_size;;
|
||||
}
|
||||
|
||||
if (sel && shader->key.as_ngg && sel->so.num_outputs) {
|
||||
unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
|
||||
esgs_ring_size = MAX2(esgs_ring_size,
|
||||
shader->ngg.max_out_verts * esgs_vertex_bytes);
|
||||
if (sel && shader->key.as_ngg) {
|
||||
if (sel->so.num_outputs) {
|
||||
unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
|
||||
esgs_ring_size = MAX2(esgs_ring_size,
|
||||
shader->ngg.max_out_verts * esgs_vertex_bytes);
|
||||
}
|
||||
|
||||
/* GS stores Primitive IDs into LDS at the address corresponding
|
||||
* to the provoking vertex. All vertex threads load and export
|
||||
* PrimitiveID for their thread.
|
||||
*/
|
||||
if (sel->type == PIPE_SHADER_VERTEX &&
|
||||
shader->key.mono.u.vs_export_prim_id)
|
||||
esgs_ring_size = MAX2(esgs_ring_size, shader->ngg.max_out_verts * 4);
|
||||
}
|
||||
|
||||
if (esgs_ring_size) {
|
||||
|
|
|
|||
|
|
@ -241,13 +241,14 @@ enum {
|
|||
};
|
||||
|
||||
/* Fields of driver-defined VS state SGPR. */
|
||||
/* Clamp vertex color output (only used in VS as VS). */
|
||||
#define S_VS_STATE_CLAMP_VERTEX_COLOR(x) (((unsigned)(x) & 0x1) << 0)
|
||||
#define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE
|
||||
#define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1)
|
||||
#define C_VS_STATE_INDEXED 0xFFFFFFFD
|
||||
#define S_VS_STATE_OUTPRIM(x) (((unsigned)(x) & 0x3) << 2)
|
||||
#define C_VS_STATE_OUTPRIM 0xFFFFFFF3
|
||||
#define S_VS_STATE_PROVOKING_VTX_INDEX(x) (((unsigned)(x) & 0x3) << 4)
|
||||
#define C_VS_STATE_PROVOKING_VTX_INDEX 0xFFFFFFCF
|
||||
#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8)
|
||||
#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF
|
||||
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24)
|
||||
|
|
|
|||
|
|
@ -892,6 +892,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
|
|||
rs->clamp_fragment_color = state->clamp_fragment_color;
|
||||
rs->clamp_vertex_color = state->clamp_vertex_color;
|
||||
rs->flatshade = state->flatshade;
|
||||
rs->flatshade_first = state->flatshade_first;
|
||||
rs->sprite_coord_enable = state->sprite_coord_enable;
|
||||
rs->rasterizer_discard = state->rasterizer_discard;
|
||||
rs->pa_sc_line_stipple = state->line_stipple_enable ?
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ struct si_state_rasterizer {
|
|||
unsigned clip_plane_enable:8;
|
||||
unsigned half_pixel_center:1;
|
||||
unsigned flatshade:1;
|
||||
unsigned flatshade_first:1;
|
||||
unsigned two_side:1;
|
||||
unsigned multisample_enable:1;
|
||||
unsigned force_persample_interp:1;
|
||||
|
|
|
|||
|
|
@ -586,7 +586,9 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
|
|||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
|
||||
if (likely(rast_prim == sctx->last_rast_prim &&
|
||||
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple))
|
||||
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple &&
|
||||
(sctx->chip_class <= GFX9 ||
|
||||
rs->flatshade_first == sctx->last_flatshade_first)))
|
||||
return;
|
||||
|
||||
if (util_prim_is_lines(rast_prim)) {
|
||||
|
|
@ -599,9 +601,10 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
|
|||
sctx->context_roll = true;
|
||||
}
|
||||
|
||||
unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
|
||||
|
||||
if (rast_prim != sctx->last_rast_prim &&
|
||||
(sctx->ngg || sctx->gs_shader.cso)) {
|
||||
unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
|
||||
radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
|
||||
sctx->context_roll = true;
|
||||
|
||||
|
|
@ -611,8 +614,15 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
|
|||
}
|
||||
}
|
||||
|
||||
if (sctx->chip_class >= GFX10) {
|
||||
unsigned vtx_index = rs->flatshade_first ? 0 : gs_out;
|
||||
sctx->current_vs_state &= C_VS_STATE_PROVOKING_VTX_INDEX;
|
||||
sctx->current_vs_state |= S_VS_STATE_PROVOKING_VTX_INDEX(vtx_index);
|
||||
}
|
||||
|
||||
sctx->last_rast_prim = rast_prim;
|
||||
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
|
||||
sctx->last_flatshade_first = rs->flatshade_first;
|
||||
}
|
||||
|
||||
static void si_emit_vs_state(struct si_context *sctx,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue