radv: disable instance packing to fix pipeline query stats

RDNA2 is affected by a hardware bug when instance packing is enabled
for adjacent primitive topologies and instance_count > 1, pipeline
stats generated by GE are incorrect. It needs to be applied for
indexed and non-indexed draws.

This is based on PAL waDisableInstancePacking.

This fixes KHR-GL46.pipeline_statistics_query_tests_ARB.* with Zink.

Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6257
Cc: mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15877>
(cherry picked from commit 9348620946)
This commit is contained in:
Samuel Pitoiset 2022-04-19 13:46:12 +02:00 committed by Dylan Baker
parent 718a8223ff
commit 45cbab0442
2 changed files with 36 additions and 15 deletions

View file

@ -94,7 +94,7 @@
"description": "radv: disable instance packing to fix pipeline query stats",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null
},

View file

@ -2626,18 +2626,6 @@ radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer, bool indirect)
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_cmd_state *state = &cmd_buffer->state;
if (state->index_type != state->last_index_type) {
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
R_03090C_VGT_INDEX_TYPE, 2, state->index_type);
} else {
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cs, state->index_type);
}
state->last_index_type = state->index_type;
}
/* For the direct indexed draws we use DRAW_INDEX_2, which includes
* the index_va and max_index_count already. */
if (!indirect)
@ -3763,7 +3751,8 @@ struct radv_draw_info {
static uint32_t
radv_get_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
{
switch (cmd_buffer->state.index_type) {
uint32_t index_type = G_028A7C_INDEX_TYPE(cmd_buffer->state.index_type);
switch (index_type) {
case V_028A7C_VGT_INDEX_8:
return 0xffu;
case V_028A7C_VGT_INDEX_16:
@ -3810,6 +3799,8 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
struct radv_cmd_state *state = &cmd_buffer->state;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint32_t topology = state->dynamic.primitive_topology;
bool disable_instance_packing = false;
/* Draw state. */
if (info->chip_class < GFX10) {
@ -3844,6 +3835,35 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
}
/* RDNA2 is affected by a hardware bug when instance packing is enabled for adjacent primitive
* topologies and instance_count > 1, pipeline stats generated by GE are incorrect. It needs to
* be applied for indexed and non-indexed draws.
*/
if (info->chip_class == GFX10_3 && state->active_pipeline_queries > 0 &&
(draw_info->instance_count > 1 || draw_info->indirect) &&
(topology == V_008958_DI_PT_LINELIST_ADJ ||
topology == V_008958_DI_PT_LINESTRIP_ADJ ||
topology == V_008958_DI_PT_TRILIST_ADJ ||
topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
disable_instance_packing = true;
}
if ((draw_info->indexed && state->index_type != state->last_index_type) ||
(info->chip_class == GFX10_3 && (state->last_index_type == -1 ||
disable_instance_packing != G_028A7C_DISABLE_INSTANCE_PACKING(state->last_index_type)))) {
uint32_t index_type = state->index_type | S_028A7C_DISABLE_INSTANCE_PACKING(disable_instance_packing);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
R_03090C_VGT_INDEX_TYPE, 2, index_type);
} else {
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cs, index_type);
}
state->last_index_type = index_type;
}
}
static void
@ -4755,7 +4775,8 @@ vk_to_index_type(VkIndexType type)
static uint32_t
radv_get_vgt_index_size(uint32_t type)
{
switch (type) {
uint32_t index_type = G_028A7C_INDEX_TYPE(type);
switch (index_type) {
case V_028A7C_VGT_INDEX_8:
return 1;
case V_028A7C_VGT_INDEX_16: