radv: replace use_ngg_streamout by gfx_level checks

There is no way to enable/disable via debug options or so, it's only
used on GFX11+.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41204>
This commit is contained in:
Samuel Pitoiset 2026-04-27 14:38:25 +02:00 committed by Marge Bot
parent 4b66258717
commit 94ae99f16f
7 changed files with 17 additions and 25 deletions

View file

@ -7006,7 +7006,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
*/
size = 0xffffffff;
if (pdev->use_ngg_streamout) {
if (pdev->info.gfx_level >= GFX11) {
/* With NGG streamout, the buffer size is used to determine the max emit per buffer
* and also acts as a disable bit when it's 0.
*/
@ -8714,8 +8714,8 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
*/
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
if (pdev->use_ngg_streamout && pdev->info.gfx_level < GFX12) {
/* GFX11 needs GDS OA for streamout. */
if (pdev->info.gfx_level >= GFX11 && pdev->info.gfx_level < GFX12) {
/* GFX11-11.5 need GDS OA for streamout. */
cmd_buffer->queue_state.gds_oa_needed = true;
}
}
@ -12137,7 +12137,7 @@ radv_emit_streamout_enable_state(struct radv_cmd_buffer *cmd_buffer)
const bool streamout_enabled = radv_is_streamout_enabled(cmd_buffer);
uint32_t enabled_stream_buffers_mask = 0;
assert(!pdev->use_ngg_streamout);
assert(pdev->info.gfx_level < GFX11);
radeon_begin(cmd_buffer->cs);
@ -15683,15 +15683,14 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
so->hw_enabled_mask =
so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) | (so->enabled_mask << 12);
if (!pdev->use_ngg_streamout && ((old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) ||
(old_hw_enabled_mask != so->hw_enabled_mask)))
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE;
if (pdev->use_ngg_streamout) {
if (pdev->info.gfx_level >= GFX11) {
/* Re-emit streamout desciptors because with NGG streamout, a buffer size of 0 acts like a
* disable bit and this is needed when streamout needs to be ignored in shaders.
*/
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY | RADV_CMD_DIRTY_STREAMOUT_BUFFER;
} else {
if (old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer) || old_hw_enabled_mask != so->hw_enabled_mask)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE;
}
}
@ -15811,7 +15810,7 @@ radv_CmdBeginTransformFeedback2EXT(VkCommandBuffer commandBuffer, uint32_t first
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
radv_emit_cache_flush(cmd_buffer);
}
} else if (!pdev->use_ngg_streamout) {
} else if (pdev->info.gfx_level < GFX11) {
radv_flush_vgt_streamout(cmd_buffer);
}
@ -15831,7 +15830,7 @@ radv_CmdBeginTransformFeedback2EXT(VkCommandBuffer commandBuffer, uint32_t first
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, so->state_va + i * 8 + 4,
AC_CP_COPY_DATA_WR_CONFIRM, false);
}
} else if (pdev->use_ngg_streamout) {
} else if (pdev->info.gfx_level >= GFX11) {
if (append) {
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, va,
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, AC_CP_COPY_DATA_WR_CONFIRM, false);
@ -15880,7 +15879,7 @@ radv_CmdBeginTransformFeedback2EXT(VkCommandBuffer commandBuffer, uint32_t first
radv_set_streamout_enable(cmd_buffer, true);
if (!pdev->use_ngg_streamout)
if (pdev->info.gfx_level < GFX11)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE;
}
@ -15931,7 +15930,7 @@ radv_CmdEndTransformFeedback2EXT(VkCommandBuffer commandBuffer, uint32_t firstCo
assert(firstCounterRange + counterRangeCount <= MAX_SO_BUFFERS);
if (pdev->use_ngg_streamout) {
if (pdev->info.gfx_level >= GFX11) {
/* Wait for streamout to finish before copying back the number of bytes
* written.
*/
@ -15962,7 +15961,7 @@ radv_CmdEndTransformFeedback2EXT(VkCommandBuffer commandBuffer, uint32_t firstCo
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, so->state_va + i * 8 + 4, va,
AC_CP_COPY_DATA_WR_CONFIRM, false);
}
} else if (pdev->use_ngg_streamout) {
} else if (pdev->info.gfx_level >= GFX11) {
if (append) {
ac_emit_cp_copy_data(cs->b, COPY_DATA_REG, COPY_DATA_DST_MEM,
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i, va, AC_CP_COPY_DATA_WR_CONFIRM,

View file

@ -1198,7 +1198,6 @@ radv_device_init_compiler_info(struct radv_device *device)
/* Shader features */
.device_robustness_state = &device->vk.robustness_state,
.use_ngg = pdev->use_ngg,
.use_ngg_streamout = pdev->use_ngg_streamout,
.load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr,
.emulate_ngg_gs_query_pipeline_stat = pdev->emulate_ngg_gs_query_pipeline_stat,
.primitives_generated_query = device->cache_key.primitives_generated_query,

View file

@ -2622,8 +2622,6 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
(instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
!(instance->debug_flags & RADV_DEBUG_NO_NGGC);
pdev->use_ngg_streamout = pdev->info.gfx_level >= GFX11;
pdev->emulate_ngg_gs_query_pipeline_stat = pdev->use_ngg && pdev->info.gfx_level < GFX11;
pdev->emulate_mesh_shader_queries = pdev->info.gfx_level == GFX10_3;

View file

@ -135,9 +135,6 @@ struct radv_physical_device {
/* Whether to enable NGG culling. */
bool use_ngg_culling;
/* Whether to enable NGG streamout. */
bool use_ngg_streamout;
/* Whether to emulate the number of primitives generated by GS. */
bool emulate_ngg_gs_query_pipeline_stat;

View file

@ -967,7 +967,7 @@ radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t i
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (pdev->use_ngg_streamout) {
if (pdev->info.gfx_level >= GFX11) {
/* generated prim counter */
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 4, 0x80000000);
@ -996,7 +996,7 @@ radv_end_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t ind
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cmd_stream *cs = cmd_buffer->cs;
if (pdev->use_ngg_streamout) {
if (pdev->info.gfx_level >= GFX11) {
/* generated prim counter */
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 20, 0x80000000);

View file

@ -979,7 +979,7 @@ radv_lower_ngg(const struct radv_compiler_info *compiler_info, struct radv_shade
options.vs_output_param_offset = info->outinfo.vs_output_param_offset;
options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports;
options.can_cull = info->has_ngg_culling;
options.disable_streamout = !compiler_info->use_ngg_streamout;
options.disable_streamout = compiler_info->ac->gfx_level < GFX11;
options.has_xfb_prim_query = info->has_xfb_query;
options.has_gs_primitives_query = compiler_info->ac->gfx_level < GFX11;
options.force_vrs = info->force_vrs_per_vertex;
@ -2211,7 +2211,7 @@ radv_postprocess_binary_config(const struct radv_compiler_info *compiler_info, s
}
}
if (gfx_level <= GFX10_3 && !compiler_info->use_ngg_streamout) {
if (gfx_level < GFX11) {
config->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
S_00B12C_SO_EN(!!info->so.enabled_stream_buffers_mask);

View file

@ -570,7 +570,6 @@ struct radv_compiler_info {
uint32_t buffer_descriptor_size;
uint32_t buffer_descriptor_alignment;
bool use_ngg;
bool use_ngg_streamout;
bool load_grid_size_from_user_sgpr;
bool emulate_ngg_gs_query_pipeline_stat;
bool primitives_generated_query;