mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
radeonsi: mostly fix NGG streamout overflow queries when XFB is disabled
When XFB was disabled, we were incrementing primitives_generated but not primitives_emitted, which caused the overflow query to return true, but it should have returned false because XFB was disabled. This disables counting primitives_generated when there is no primitives_generated query. When both primitives_generated and the overflow query are enabled simultaneously and XFB is disabled, it will be incorrect again, but that had been equally incorrect with the non-NGG codepath too, just not discovered because of the lack of tests. This commit just changes NGG streamout queries to behave the same as legacy. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37849>
This commit is contained in:
parent
02db1fbe82
commit
dd4df28ef2
6 changed files with 66 additions and 37 deletions
|
|
@ -423,9 +423,6 @@ KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor_texture_1d_rgb
|
|||
KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor_texture_1d_rgba8_snorm,Fail
|
||||
KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor_texture_1d_rgba8i,Fail
|
||||
KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor_texture_1d_rgba8ui,Fail
|
||||
KHR-GL46.transform_feedback_overflow_query_ARB.advanced-single-stream-interleaved-attribs,Fail
|
||||
KHR-GL46.transform_feedback_overflow_query_ARB.advanced-single-stream-separate-attribs,Fail
|
||||
KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-multiple-buffers-per-stream,Fail
|
||||
KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-one-buffer-per-stream,Fail
|
||||
KHR-GL46.texture_query_lod.sampler1D_test,Fail
|
||||
KHR-GL46.texture_query_lod.sampler2D_test,Fail
|
||||
|
|
|
|||
|
|
|
@ -30,10 +30,6 @@ spec@glsl-es-1.00@linker@glsl-mismatched-uniform-precision-unused,Fail
|
|||
## Fail because GFX10+ removed MS texture support (see si_get_sparse_texture_virtual_page_size)
|
||||
KHR-GL46.sparse_texture2_tests.SparseTexture2Allocation,Fail
|
||||
KHR-GL46.sparse_texture2_tests.SparseTexture2Commitment,Fail
|
||||
## https://gitlab.freedesktop.org/mesa/mesa/-/issues/636
|
||||
KHR-GL46.transform_feedback_overflow_query_ARB.advanced-single-stream-interleaved-attribs,Fail
|
||||
KHR-GL46.transform_feedback_overflow_query_ARB.advanced-single-stream-separate-attribs,Fail
|
||||
KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-multiple-buffers-per-stream,Fail
|
||||
KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-one-buffer-per-stream,Fail
|
||||
|
||||
# See Khronos issue 5587: the test expects one-dimensional (array) texture to work while
|
||||
|
|
|
|||
|
|
|
@ -109,7 +109,6 @@ success:
|
|||
sbuf.buffer_offset = qbuf->head;
|
||||
sbuf.buffer_size = sizeof(struct gfx11_sh_query_buffer_mem);
|
||||
si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, &sbuf);
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 1);
|
||||
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
|
||||
return true;
|
||||
|
|
@ -135,8 +134,14 @@ static bool gfx11_sh_query_begin(struct si_context *sctx, struct si_query *rquer
|
|||
query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
|
||||
query->first_begin = query->first->head;
|
||||
|
||||
sctx->streamout.num_ngg_queries++;
|
||||
query->first->refcount++;
|
||||
si_update_prims_generated_query_state(sctx, query->b.type, 1);
|
||||
|
||||
/* Update num_ngg_streamout_queries. */
|
||||
bool old_streamout_query_enable_state = si_get_streamout_enable_state(sctx);
|
||||
sctx->streamout.num_ngg_queries++;
|
||||
if (old_streamout_query_enable_state != si_get_streamout_enable_state(sctx))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_enable);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -161,11 +166,16 @@ static bool gfx11_sh_query_end(struct si_context *sctx, struct si_query *rquery)
|
|||
0xffffffff, PIPE_QUERY_GPU_FINISHED);
|
||||
}
|
||||
|
||||
si_update_prims_generated_query_state(sctx, query->b.type, -1);
|
||||
|
||||
/* Update num_ngg_streamout_queries. */
|
||||
bool old_streamout_query_enable_state = si_get_streamout_enable_state(sctx);
|
||||
sctx->streamout.num_ngg_queries--;
|
||||
if (old_streamout_query_enable_state != si_get_streamout_enable_state(sctx))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_enable);
|
||||
|
||||
if (sctx->streamout.num_ngg_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) {
|
||||
si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL);
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 0);
|
||||
|
||||
/* If a query_begin is followed by a query_end without a draw
|
||||
* in-between, we need to clear the atom to ensure that the
|
||||
|
|
|
|||
|
|
@ -616,8 +616,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
|||
si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
|
||||
si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
|
||||
si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
|
||||
if (ctx->gfx_level < GFX11)
|
||||
si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
|
||||
si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
|
||||
/* CLEAR_STATE disables all window rectangles. */
|
||||
if (!has_clear_state || ctx->num_window_rectangles > 0)
|
||||
si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
|
||||
|
|
|
|||
|
|
@ -1896,7 +1896,32 @@ static inline struct si_shader_ctx_state *si_get_vs(struct si_context *sctx)
|
|||
|
||||
static inline bool si_get_streamout_enable_state(struct si_context *sctx)
|
||||
{
|
||||
return sctx->streamout.streamout_enabled || sctx->streamout.prims_gen_query_enabled;
|
||||
/* For GFX11, return whether NGG streamout queries are enabled. For older gens, return whether
|
||||
* streamout hw is enabled.
|
||||
*
|
||||
* Note that when both PRIMITIVES_GENERATED and SO_OVERFLOW queries are enabled and XFB is
|
||||
* disabled, SO_OVERFLOW queries will incorrectly return true because PRIMITIVES_GENERATED
|
||||
* is incremented and PRIMITIVES_EMITTED is not. The problem is that SO_OVERFLOW queries
|
||||
* are implemented by comparing PRIMITIVES_GENERATED and PRIMITIVES_EMITTED, however, when
|
||||
* XFB is disabled, SO_OVERFLOW queries should increment neither PRIMITIVES_GENERATED nor
|
||||
* PRIMITIVES_EMITTED, but when a separate PRIMITIVES_GENERATED is active, we should increment
|
||||
* it. So the 2 queries are in conflict when XFB is disabled.
|
||||
*
|
||||
* Possible solutions:
|
||||
* - For NGG: Emulate SO_OVERFLOW queries using memory stores separately from PRIMITIVES_GENERATED.
|
||||
* - For legacy: Emulate SO_OVERFLOW queries using memory stores, same as NGG.
|
||||
*/
|
||||
if (sctx->gfx_level >= GFX11) {
|
||||
/* Enable NGG streamout queries when PRIMITIVES_GENERATED queries are active or when
|
||||
* streamout is enabled and any streamout queries except PRIMITIVES_GENERATED are active.
|
||||
*/
|
||||
return sctx->streamout.prims_gen_query_enabled ||
|
||||
(sctx->streamout.streamout_enabled &&
|
||||
(sctx->streamout.num_ngg_queries -
|
||||
sctx->streamout.prims_gen_query_enabled > 0));
|
||||
} else {
|
||||
return sctx->streamout.streamout_enabled || sctx->streamout.prims_gen_query_enabled;
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size)
|
||||
|
|
|
|||
|
|
@ -419,41 +419,45 @@ void si_emit_streamout_end(struct si_context *sctx)
|
|||
|
||||
static void si_emit_streamout_enable(struct si_context *sctx, unsigned index)
|
||||
{
|
||||
assert(sctx->gfx_level < GFX11);
|
||||
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
radeon_set_context_reg_seq(R_028B94_VGT_STRMOUT_CONFIG, 2);
|
||||
radeon_emit(S_028B94_STREAMOUT_0_EN(si_get_streamout_enable_state(sctx)) |
|
||||
S_028B94_RAST_STREAM(0) |
|
||||
S_028B94_STREAMOUT_1_EN(si_get_streamout_enable_state(sctx)) |
|
||||
S_028B94_STREAMOUT_2_EN(si_get_streamout_enable_state(sctx)) |
|
||||
S_028B94_STREAMOUT_3_EN(si_get_streamout_enable_state(sctx)));
|
||||
radeon_emit(sctx->streamout.hw_enabled_mask & sctx->streamout.enabled_stream_buffers_mask);
|
||||
radeon_end();
|
||||
if (sctx->gfx_level >= GFX11) {
|
||||
SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED,
|
||||
si_get_streamout_enable_state(sctx));
|
||||
} else {
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
radeon_set_context_reg_seq(R_028B94_VGT_STRMOUT_CONFIG, 2);
|
||||
radeon_emit(S_028B94_STREAMOUT_0_EN(si_get_streamout_enable_state(sctx)) |
|
||||
S_028B94_RAST_STREAM(0) |
|
||||
S_028B94_STREAMOUT_1_EN(si_get_streamout_enable_state(sctx)) |
|
||||
S_028B94_STREAMOUT_2_EN(si_get_streamout_enable_state(sctx)) |
|
||||
S_028B94_STREAMOUT_3_EN(si_get_streamout_enable_state(sctx)));
|
||||
radeon_emit(sctx->streamout.hw_enabled_mask & sctx->streamout.enabled_stream_buffers_mask);
|
||||
radeon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static void si_set_streamout_enable(struct si_context *sctx, bool enable)
|
||||
{
|
||||
if (sctx->gfx_level >= GFX11)
|
||||
return;
|
||||
|
||||
bool old_strmout_en = si_get_streamout_enable_state(sctx);
|
||||
unsigned old_hw_enabled_mask = sctx->streamout.hw_enabled_mask;
|
||||
|
||||
sctx->streamout.streamout_enabled = enable;
|
||||
|
||||
sctx->streamout.hw_enabled_mask =
|
||||
sctx->streamout.enabled_mask | (sctx->streamout.enabled_mask << 4) |
|
||||
(sctx->streamout.enabled_mask << 8) | (sctx->streamout.enabled_mask << 12);
|
||||
|
||||
if ((old_strmout_en != si_get_streamout_enable_state(sctx)) ||
|
||||
(old_hw_enabled_mask != sctx->streamout.hw_enabled_mask))
|
||||
if (old_strmout_en != si_get_streamout_enable_state(sctx))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_enable);
|
||||
|
||||
if (sctx->gfx_level < GFX11) {
|
||||
sctx->streamout.hw_enabled_mask =
|
||||
sctx->streamout.enabled_mask | (sctx->streamout.enabled_mask << 4) |
|
||||
(sctx->streamout.enabled_mask << 8) | (sctx->streamout.enabled_mask << 12);
|
||||
|
||||
if (old_hw_enabled_mask != sctx->streamout.hw_enabled_mask)
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_enable);
|
||||
}
|
||||
}
|
||||
|
||||
void si_update_prims_generated_query_state(struct si_context *sctx, unsigned type, int diff)
|
||||
{
|
||||
if (sctx->gfx_level < GFX11 && type == PIPE_QUERY_PRIMITIVES_GENERATED) {
|
||||
if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
|
||||
bool old_strmout_en = si_get_streamout_enable_state(sctx);
|
||||
|
||||
sctx->streamout.num_prims_gen_queries += diff;
|
||||
|
|
@ -479,7 +483,5 @@ void si_init_streamout_functions(struct si_context *sctx)
|
|||
sctx->b.stream_output_target_destroy = si_so_target_destroy;
|
||||
sctx->b.set_stream_output_targets = si_set_streamout_targets;
|
||||
sctx->atoms.s.streamout_begin.emit = si_emit_streamout_begin;
|
||||
|
||||
if (sctx->gfx_level < GFX11)
|
||||
sctx->atoms.s.streamout_enable.emit = si_emit_streamout_enable;
|
||||
sctx->atoms.s.streamout_enable.emit = si_emit_streamout_enable;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue