turnip: Add debug option to find usage of stale reg values

MESA_VK_ABORT_ON_DEVICE_LOSS=1 \
TU_DEBUG_STALE_REGS_RANGE=0x00000c00,0x0000be01 \
TU_DEBUG_STALE_REGS_FLAGS=cmdbuf,renderpass \
./app

To pinpoint the reg causing a failure reducing regs range could be
used for bisection. Some failures may be caused by multi-reg combination,
in such case set 'inverse' flag which would change the meaning of reg
range to "do not stomp these regs".

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21226>
This commit is contained in:
Danylo Piliaiev 2023-02-06 20:50:41 +01:00 committed by Marge Bot
parent 084d10a702
commit a66d9c815d
5 changed files with 324 additions and 0 deletions

View file

@ -0,0 +1,212 @@
/*
* Copyright © 2023 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#ifndef __FREEDRENO_STOMPABLE_REGS_H__
#define __FREEDRENO_STOMPABLE_REGS_H__
#include <stdint.h>
#include "a6xx.xml.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
/* In order to debug issues with usage of stale reg data we need to have
* a list of regs which we allowed to stomp.
* The regs we are NOT allowed to stomp are:
* - Write protected;
* - Written by kernel but are not write protected;
* - Some regs that are not written by anyone but do affect the result.
*
* In addition, some regs are only emmitted during cmdbuf setup
* so we have to have additional filter to get a reduced list of regs
* stompable before each renderpass/blit.
*/
struct fd_stompable_reg_range {
uint16_t start_reg;
uint16_t end_reg;
};
static const struct fd_stompable_reg_range
a6xx_fd_cmdbuf_stompable_reg_ranges[] = {
{REG_A6XX_VSC_BIN_SIZE, REG_A6XX_VSC_DRAW_STRM_SIZE(31)},
{REG_A6XX_UCHE_UNKNOWN_0E12, REG_A6XX_UCHE_UNKNOWN_0E12},
{REG_A6XX_GRAS_CL_CNTL, REG_A6XX_GRAS_LRZ_DEPTH_VIEW},
{REG_A6XX_GRAS_2D_BLIT_CNTL, REG_A6XX_GRAS_2D_RESOLVE_CNTL_2},
{REG_A6XX_RB_BIN_CONTROL, REG_A6XX_RB_SAMPLE_LOCATION_1},
{REG_A6XX_RB_RENDER_CONTROL0, REG_A6XX_RB_UNKNOWN_8811},
{REG_A6XX_RB_UNKNOWN_8818, REG_A6XX_RB_UNKNOWN_881E},
{REG_A6XX_RB_MRT(0), REG_A6XX_RB_BLEND_CNTL},
{REG_A6XX_RB_DEPTH_PLANE_CNTL, REG_A6XX_RB_Z_BOUNDS_MAX},
{REG_A6XX_RB_STENCIL_CONTROL, REG_A6XX_RB_STENCILWRMASK},
{REG_A6XX_RB_WINDOW_OFFSET, REG_A6XX_RB_SAMPLE_COUNT_CONTROL},
{REG_A6XX_RB_LRZ_CNTL, REG_A6XX_RB_LRZ_CNTL},
{REG_A6XX_RB_Z_CLAMP_MIN, REG_A6XX_RB_Z_CLAMP_MAX},
{REG_A6XX_RB_UNKNOWN_88D0, REG_A6XX_RB_BLIT_SCISSOR_BR},
{REG_A6XX_RB_BIN_CONTROL2, REG_A6XX_RB_BLIT_INFO},
{REG_A6XX_RB_UNKNOWN_88F0, REG_A6XX_RB_UNKNOWN_88F4},
{REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, REG_A6XX_RB_MRT_FLAG_BUFFER(7)},
{REG_A6XX_RB_SAMPLE_COUNT_ADDR, REG_A6XX_RB_SAMPLE_COUNT_ADDR},
{REG_A6XX_RB_2D_DST_INFO, REG_A6XX_RB_2D_SRC_SOLID_C3},
{REG_A6XX_RB_DBG_ECO_CNTL, REG_A6XX_RB_ADDR_MODE_CNTL},
{REG_A6XX_RB_CCU_CNTL, REG_A6XX_RB_CCU_CNTL},
{REG_A6XX_VPC_GS_PARAM, REG_A6XX_VPC_POLYGON_MODE},
{REG_A6XX_VPC_VARYING_INTERP(0), REG_A6XX_VPC_POINT_COORD_INVERT},
{REG_A6XX_VPC_UNKNOWN_9300, REG_A6XX_VPC_SO_DISABLE},
{REG_A6XX_VPC_DBG_ECO_CNTL, REG_A6XX_VPC_PERFCTR_VPC_SEL(5)},
{REG_A6XX_PC_TESS_NUM_VERTEX, REG_A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL},
{REG_A6XX_PC_POLYGON_MODE, REG_A6XX_PC_RASTER_CNTL},
{REG_A6XX_PC_PRIMITIVE_CNTL_0, REG_A6XX_PC_MULTIVIEW_MASK},
{REG_A6XX_PC_DRAW_INDX_BASE, REG_A6XX_PC_TESSFACTOR_ADDR},
{REG_A6XX_PC_VSTREAM_CONTROL, REG_A6XX_PC_BIN_DRAW_STRM},
{REG_A6XX_PC_VISIBILITY_OVERRIDE, REG_A6XX_PC_VISIBILITY_OVERRIDE},
{REG_A6XX_VFD_CONTROL_0, REG_A6XX_VFD_DEST_CNTL(31)},
{REG_A6XX_VFD_POWER_CNTL, REG_A6XX_VFD_POWER_CNTL},
{REG_A6XX_SP_VS_CTRL_REG0, REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET},
{REG_A6XX_SP_HS_CTRL_REG0, REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET},
{REG_A6XX_SP_DS_CTRL_REG0, REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET},
{REG_A6XX_SP_GS_CTRL_REG0, REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET},
{REG_A6XX_SP_VS_TEX_SAMP, REG_A6XX_SP_GS_TEX_CONST},
{REG_A6XX_SP_FS_CTRL_REG0, REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET},
{REG_A6XX_SP_CS_CTRL_REG0, REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET},
{REG_A6XX_SP_CS_CNTL_0, REG_A6XX_SP_CS_CNTL_1},
{REG_A6XX_SP_FS_TEX_SAMP, REG_A6XX_SP_CS_TEX_CONST},
{REG_A6XX_SP_CS_IBO, REG_A6XX_SP_CS_IBO},
{REG_A6XX_SP_CS_IBO_COUNT, REG_A6XX_SP_CS_IBO_COUNT},
{REG_A6XX_SP_MODE_CONTROL, REG_A6XX_SP_BINDLESS_BASE(0)},
{REG_A6XX_SP_IBO, REG_A6XX_SP_IBO_COUNT},
{REG_A6XX_SP_CHICKEN_BITS, REG_A6XX_SP_FLOAT_CNTL},
{REG_A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR, REG_A6XX_SP_UNKNOWN_B183},
{REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, REG_A6XX_SP_TP_MODE_CNTL},
{REG_A6XX_SP_PS_2D_SRC_INFO, REG_A6XX_SP_WINDOW_OFFSET},
{REG_A6XX_TPL1_DBG_ECO_CNTL, REG_A6XX_TPL1_DBG_ECO_CNTL},
{REG_A6XX_HLSQ_VS_CNTL, REG_A6XX_HLSQ_GS_CNTL},
{REG_A6XX_HLSQ_FS_CNTL_0, REG_A6XX_HLSQ_CS_CNTL},
{REG_A6XX_HLSQ_CS_NDRANGE_0, REG_A6XX_HLSQ_CS_KERNEL_GROUP_Z},
{REG_A6XX_HLSQ_CS_BINDLESS_BASE(0), REG_A6XX_HLSQ_CS_BINDLESS_BASE(0)},
{REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0},
{REG_A6XX_HLSQ_FS_CNTL, REG_A6XX_HLSQ_SHARED_CONSTS},
{REG_A6XX_HLSQ_BINDLESS_BASE(0), REG_A6XX_HLSQ_BINDLESS_BASE(0)},
{REG_A6XX_HLSQ_UNKNOWN_BE00, REG_A6XX_HLSQ_UNKNOWN_BE01},
};
/* Return true if it is expected that reg is overwritten by a renderpass or
* not used by anything in a renderpass. So it's safe to stomp the reg
* beforehand.
*/
static bool
a6xx_fd_reg_rp_stompable(bool turnip, uint16_t reg)
{
switch (reg) {
case REG_A6XX_VSC_DRAW_STRM_SIZE_ADDRESS ... REG_A6XX_VSC_DRAW_STRM_SIZE_ADDRESS + 1:
return !turnip;
case REG_A6XX_VSC_PRIM_STRM_ADDRESS ... REG_A6XX_VSC_DRAW_STRM_LIMIT:
return false;
case REG_A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL:
return false;
case REG_A6XX_GRAS_SAMPLE_CONFIG ... REG_A6XX_GRAS_SAMPLE_LOCATION_1:
return turnip;
case REG_A6XX_GRAS_UNKNOWN_80AF:
return false;
case REG_A6XX_GRAS_LRZ_DEPTH_VIEW:
return turnip;
case REG_A6XX_GRAS_UNKNOWN_8110:
return false;
case REG_A6XX_GRAS_DBG_ECO_CNTL ... REG_A6XX_GRAS_PERFCTR_LRZ_SEL(3):
return false;
case REG_A6XX_RB_SAMPLE_CONFIG ... REG_A6XX_RB_SAMPLE_LOCATION_1:
return turnip;
case REG_A6XX_RB_DITHER_CNTL:
return !turnip;
case REG_A6XX_RB_UNKNOWN_8811 ... REG_A6XX_RB_UNKNOWN_881E:
return false;
case REG_A6XX_RB_ALPHA_CONTROL:
return !turnip;
case REG_A6XX_RB_UNKNOWN_88F0:
return false;
case REG_A6XX_RB_SAMPLE_COUNT_ADDR ... REG_A6XX_RB_SAMPLE_COUNT_ADDR + 1:
return false;
case REG_A6XX_RB_UNKNOWN_8E01:
return false;
case REG_A6XX_RB_DBG_ECO_CNTL ... REG_A6XX_RB_CCU_CNTL:
return false;
case REG_A6XX_RB_PERFCTR_RB_SEL(0)... REG_A6XX_RB_UNKNOWN_8E51:
return false;
case REG_A6XX_VPC_UNKNOWN_9210 ... REG_A6XX_VPC_UNKNOWN_9211:
return false;
case REG_A6XX_VPC_SO(0) ... REG_A6XX_VPC_POINT_COORD_INVERT:
return false;
case REG_A6XX_VPC_UNKNOWN_9300:
return false;
case REG_A6XX_VPC_DBG_ECO_CNTL ... REG_A6XX_VPC_PERFCTR_VPC_SEL(5):
return false;
case REG_A6XX_PC_DRAW_CMD ... REG_A6XX_PC_MARKER:
return false;
case REG_A6XX_PC_DBG_ECO_CNTL ... REG_A6XX_PC_ADDR_MODE_CNTL:
return false;
case REG_A6XX_PC_TESSFACTOR_ADDR:
return false;
case REG_A6XX_VFD_MODE_CNTL:
return false;
case REG_A6XX_VFD_ADD_OFFSET:
return false;
case REG_A6XX_SP_UNKNOWN_A9A8:
return false;
case REG_A6XX_SP_DBG_ECO_CNTL ... REG_A6XX_SP_PERFCTR_SP_SEL(23):
return false;
case REG_A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR ... REG_A6XX_SP_UNKNOWN_B183:
return false;
case REG_A6XX_SP_UNKNOWN_B190 ... REG_A6XX_SP_UNKNOWN_B191:
return false;
case REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR ... REG_A6XX_SP_TP_SAMPLE_LOCATION_1:
return false;
case REG_A6XX_SP_TP_MODE_CNTL:
return false;
case REG_A6XX_TPL1_DBG_ECO_CNTL ... REG_A6XX_TPL1_PERFCTR_TP_SEL(11):
return false;
case REG_A6XX_HLSQ_UNKNOWN_BE00 ... REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL(5):
return false;
/* We should not stomp compute pipeline since they write registers directly
* into command stream and should survive renderpass.
*/
case REG_A6XX_SP_CS_CTRL_REG0 ... REG_A6XX_SP_CS_CNTL_1:
return false;
case REG_A6XX_HLSQ_CS_CNTL ... REG_A6XX_HLSQ_CS_CNTL_1:
return false;
}
return true;
}
/* Stomping some regs is known to cause issues */
static bool
a6xx_fd_reg_do_not_stomp(bool turnip, uint16_t reg)
{
switch (reg) {
/* Faults in
* dEQP-VK.renderpass.suballocation.formats.r5g6b5_unorm_pack16.clear.clear
* It seems that PC_CCU_FLUSH_COLOR_TS reads REG_A6XX_RB_DEPTH_PLANE_CNTL.
*/
case REG_A6XX_RB_DEPTH_PLANE_CNTL:
return true;
/* Faults in
* dEQP-VK.conditional_rendering.draw.condition_host_memory_expect_noop.draw */
case REG_A6XX_HLSQ_VS_CNTL ... REG_A6XX_HLSQ_GS_CNTL:
return true;
case REG_A6XX_HLSQ_FS_CNTL:
return true;
/* Faults in
* dEQP-VK.memory_model.message_passing.ext.u32.coherent.atomic_atomic.atomicrmw.device.payload_local.image.guard_local.image.comp
* while there is even no fragment shaders.
*/
case REG_A6XX_SP_FS_OBJ_START ... REG_A6XX_SP_FS_OBJ_START + 1:
return true;
}
return false;
}
#endif /* __FREEDRENO_STOMPABLE_REGS_H__ */

View file

@ -390,6 +390,10 @@ r2d_setup_common(struct tu_cmd_buffer *cmd,
bool ubwc,
bool scissor)
{
if (!cmd->state.pass && cmd->device->dbg_renderpass_stomp_cs) {
tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs);
}
enum a6xx_format fmt = blit_base_format(dst_format, ubwc);
fixup_dst_format(src_format, &dst_format, &fmt);
enum a6xx_2d_ifmt ifmt = format_to_ifmt(dst_format);
@ -1224,6 +1228,10 @@ r3d_setup(struct tu_cmd_buffer *cmd,
bool ubwc,
VkSampleCountFlagBits samples)
{
if (!cmd->state.pass && cmd->device->dbg_renderpass_stomp_cs) {
tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs);
}
enum a6xx_format fmt = blit_base_format(dst_format, ubwc);
fixup_dst_format(src_format, &dst_format, &fmt);

View file

@ -939,6 +939,10 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_wfi(cs);
if (dev->dbg_cmdbuf_stomp_cs) {
tu_cs_emit_call(cs, dev->dbg_cmdbuf_stomp_cs);
}
cmd->state.cache.pending_flush_bits &=
~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE);
@ -4321,6 +4325,10 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
return;
}
if (cmd->device->dbg_renderpass_stomp_cs) {
tu_cs_emit_call(&cmd->cs, cmd->device->dbg_renderpass_stomp_cs);
}
for (unsigned i = 0; i < pass->attachment_count; i++) {
cmd->state.attachments[i] = pAttachmentInfo ?
tu_image_view_from_handle(pAttachmentInfo->pAttachments[i]) :

View file

@ -24,6 +24,7 @@
/* for fd_get_driver/device_uuid() */
#include "freedreno/common/freedreno_uuid.h"
#include "freedreno/common/freedreno_stompable_regs.h"
#include "tu_clear_blit.h"
#include "tu_cmd_buffer.h"
@ -1969,6 +1970,86 @@ tu_u_trace_submission_data_finish(
vk_free(&device->vk.alloc, submission_data);
}
enum tu_reg_stomper_flags
{
TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
};
static const struct debug_named_value tu_reg_stomper_options[] = {
{ "inverse", TU_DEBUG_REG_STOMP_INVERSE,
"By default the range specifies the regs to stomp, with 'inverse' it "
"specifies the regs NOT to stomp" },
{ "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
"Stomp regs at the start of a cmdbuf" },
{ "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
"Stomp regs before a renderpass" },
{ NULL, 0 }
};
static void
tu_init_dbg_reg_stomper(struct tu_device *device)
{
const char *stale_reg_range_str =
os_get_option("TU_DEBUG_STALE_REGS_RANGE");
if (!stale_reg_range_str)
return;
uint32_t first_reg, last_reg;
if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
return;
}
uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
tu_reg_stomper_options,
TU_DEBUG_REG_STOMP_CMDBUF);
struct tu_cs *cmdbuf_cs = calloc(1, sizeof(struct tu_cs));
tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
"cmdbuf reg stomp cs");
tu_cs_begin(cmdbuf_cs);
struct tu_cs *rp_cs = calloc(1, sizeof(struct tu_cs));
tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
tu_cs_begin(rp_cs);
size_t reg_ranges_count = ARRAY_SIZE(a6xx_fd_cmdbuf_stompable_reg_ranges);
for (size_t i = 0; i < reg_ranges_count; i++) {
struct fd_stompable_reg_range reg_range =
a6xx_fd_cmdbuf_stompable_reg_ranges[i];
for (uint16_t reg = reg_range.start_reg; reg <= reg_range.end_reg;
reg++) {
if (debug_flags & TU_DEBUG_REG_STOMP_INVERSE) {
if (reg >= first_reg && reg <= last_reg)
continue;
} else {
if (reg < first_reg || reg > last_reg)
continue;
}
if (a6xx_fd_reg_do_not_stomp(true, reg))
continue;
if (debug_flags & TU_DEBUG_REG_STOMP_CMDBUF)
tu_cs_emit_write_reg(cmdbuf_cs, reg, 0xffffffff);
if ((debug_flags & TU_DEBUG_REG_STOMP_RENDERPASS) &&
a6xx_fd_reg_rp_stompable(true, reg)) {
tu_cs_emit_write_reg(rp_cs, reg, 0xffffffff);
}
}
}
tu_cs_end(cmdbuf_cs);
tu_cs_end(rp_cs);
device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
device->dbg_renderpass_stomp_cs = rp_cs;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo *pCreateInfo,
@ -2201,6 +2282,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
}
}
tu_init_dbg_reg_stomper(device);
/* Initialize a condition variable for timeline semaphore */
pthread_condattr_t condattr;
if (pthread_condattr_init(&condattr) != 0) {
@ -2334,6 +2417,16 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
free(device->perfcntrs_pass_cs);
}
if (device->dbg_cmdbuf_stomp_cs) {
tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
free(device->dbg_cmdbuf_stomp_cs);
}
if (device->dbg_renderpass_stomp_cs) {
tu_cs_finish(device->dbg_renderpass_stomp_cs);
free(device->dbg_renderpass_stomp_cs);
}
tu_autotune_fini(&device->autotune, device);
tu_bo_suballocator_finish(&device->pipeline_suballoc);

View file

@ -307,6 +307,9 @@ struct tu_device
struct breadcrumbs_context *breadcrumbs_ctx;
struct tu_cs *dbg_cmdbuf_stomp_cs;
struct tu_cs *dbg_renderpass_stomp_cs;
#ifdef ANDROID
const void *gralloc;
enum {