diff --git a/src/freedreno/common/freedreno_stompable_regs.h b/src/freedreno/common/freedreno_stompable_regs.h new file mode 100644 index 00000000000..f0ddd03b025 --- /dev/null +++ b/src/freedreno/common/freedreno_stompable_regs.h @@ -0,0 +1,212 @@ +/* + * Copyright © 2023 Igalia S.L. + * SPDX-License-Identifier: MIT + */ + +#ifndef __FREEDRENO_STOMPABLE_REGS_H__ +#define __FREEDRENO_STOMPABLE_REGS_H__ + +#include + +#include "a6xx.xml.h" +#include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" + +/* In order to debug issues with usage of stale reg data we need to have + * a list of regs which we allowed to stomp. + * The regs we are NOT allowed to stomp are: + * - Write protected; + * - Written by kernel but are not write protected; + * - Some regs that are not written by anyone but do affect the result. + * + * In addition, some regs are only emmitted during cmdbuf setup + * so we have to have additional filter to get a reduced list of regs + * stompable before each renderpass/blit. + */ + +struct fd_stompable_reg_range { + uint16_t start_reg; + uint16_t end_reg; +}; + +static const struct fd_stompable_reg_range + a6xx_fd_cmdbuf_stompable_reg_ranges[] = { + {REG_A6XX_VSC_BIN_SIZE, REG_A6XX_VSC_DRAW_STRM_SIZE(31)}, + {REG_A6XX_UCHE_UNKNOWN_0E12, REG_A6XX_UCHE_UNKNOWN_0E12}, + {REG_A6XX_GRAS_CL_CNTL, REG_A6XX_GRAS_LRZ_DEPTH_VIEW}, + {REG_A6XX_GRAS_2D_BLIT_CNTL, REG_A6XX_GRAS_2D_RESOLVE_CNTL_2}, + {REG_A6XX_RB_BIN_CONTROL, REG_A6XX_RB_SAMPLE_LOCATION_1}, + {REG_A6XX_RB_RENDER_CONTROL0, REG_A6XX_RB_UNKNOWN_8811}, + {REG_A6XX_RB_UNKNOWN_8818, REG_A6XX_RB_UNKNOWN_881E}, + {REG_A6XX_RB_MRT(0), REG_A6XX_RB_BLEND_CNTL}, + {REG_A6XX_RB_DEPTH_PLANE_CNTL, REG_A6XX_RB_Z_BOUNDS_MAX}, + {REG_A6XX_RB_STENCIL_CONTROL, REG_A6XX_RB_STENCILWRMASK}, + {REG_A6XX_RB_WINDOW_OFFSET, REG_A6XX_RB_SAMPLE_COUNT_CONTROL}, + {REG_A6XX_RB_LRZ_CNTL, REG_A6XX_RB_LRZ_CNTL}, + {REG_A6XX_RB_Z_CLAMP_MIN, REG_A6XX_RB_Z_CLAMP_MAX}, + {REG_A6XX_RB_UNKNOWN_88D0, REG_A6XX_RB_BLIT_SCISSOR_BR}, + {REG_A6XX_RB_BIN_CONTROL2, REG_A6XX_RB_BLIT_INFO}, + {REG_A6XX_RB_UNKNOWN_88F0, REG_A6XX_RB_UNKNOWN_88F4}, + {REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, REG_A6XX_RB_MRT_FLAG_BUFFER(7)}, + {REG_A6XX_RB_SAMPLE_COUNT_ADDR, REG_A6XX_RB_SAMPLE_COUNT_ADDR}, + {REG_A6XX_RB_2D_DST_INFO, REG_A6XX_RB_2D_SRC_SOLID_C3}, + {REG_A6XX_RB_DBG_ECO_CNTL, REG_A6XX_RB_ADDR_MODE_CNTL}, + {REG_A6XX_RB_CCU_CNTL, REG_A6XX_RB_CCU_CNTL}, + {REG_A6XX_VPC_GS_PARAM, REG_A6XX_VPC_POLYGON_MODE}, + {REG_A6XX_VPC_VARYING_INTERP(0), REG_A6XX_VPC_POINT_COORD_INVERT}, + {REG_A6XX_VPC_UNKNOWN_9300, REG_A6XX_VPC_SO_DISABLE}, + {REG_A6XX_VPC_DBG_ECO_CNTL, REG_A6XX_VPC_PERFCTR_VPC_SEL(5)}, + {REG_A6XX_PC_TESS_NUM_VERTEX, REG_A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL}, + {REG_A6XX_PC_POLYGON_MODE, REG_A6XX_PC_RASTER_CNTL}, + {REG_A6XX_PC_PRIMITIVE_CNTL_0, REG_A6XX_PC_MULTIVIEW_MASK}, + {REG_A6XX_PC_DRAW_INDX_BASE, REG_A6XX_PC_TESSFACTOR_ADDR}, + {REG_A6XX_PC_VSTREAM_CONTROL, REG_A6XX_PC_BIN_DRAW_STRM}, + {REG_A6XX_PC_VISIBILITY_OVERRIDE, REG_A6XX_PC_VISIBILITY_OVERRIDE}, + {REG_A6XX_VFD_CONTROL_0, REG_A6XX_VFD_DEST_CNTL(31)}, + {REG_A6XX_VFD_POWER_CNTL, REG_A6XX_VFD_POWER_CNTL}, + {REG_A6XX_SP_VS_CTRL_REG0, REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET}, + {REG_A6XX_SP_HS_CTRL_REG0, REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET}, + {REG_A6XX_SP_DS_CTRL_REG0, REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET}, + {REG_A6XX_SP_GS_CTRL_REG0, REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET}, + {REG_A6XX_SP_VS_TEX_SAMP, REG_A6XX_SP_GS_TEX_CONST}, + {REG_A6XX_SP_FS_CTRL_REG0, REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET}, + {REG_A6XX_SP_CS_CTRL_REG0, REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET}, + {REG_A6XX_SP_CS_CNTL_0, REG_A6XX_SP_CS_CNTL_1}, + {REG_A6XX_SP_FS_TEX_SAMP, REG_A6XX_SP_CS_TEX_CONST}, + {REG_A6XX_SP_CS_IBO, REG_A6XX_SP_CS_IBO}, + {REG_A6XX_SP_CS_IBO_COUNT, REG_A6XX_SP_CS_IBO_COUNT}, + {REG_A6XX_SP_MODE_CONTROL, REG_A6XX_SP_BINDLESS_BASE(0)}, + {REG_A6XX_SP_IBO, REG_A6XX_SP_IBO_COUNT}, + {REG_A6XX_SP_CHICKEN_BITS, REG_A6XX_SP_FLOAT_CNTL}, + {REG_A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR, REG_A6XX_SP_UNKNOWN_B183}, + {REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, REG_A6XX_SP_TP_MODE_CNTL}, + {REG_A6XX_SP_PS_2D_SRC_INFO, REG_A6XX_SP_WINDOW_OFFSET}, + {REG_A6XX_TPL1_DBG_ECO_CNTL, REG_A6XX_TPL1_DBG_ECO_CNTL}, + {REG_A6XX_HLSQ_VS_CNTL, REG_A6XX_HLSQ_GS_CNTL}, + {REG_A6XX_HLSQ_FS_CNTL_0, REG_A6XX_HLSQ_CS_CNTL}, + {REG_A6XX_HLSQ_CS_NDRANGE_0, REG_A6XX_HLSQ_CS_KERNEL_GROUP_Z}, + {REG_A6XX_HLSQ_CS_BINDLESS_BASE(0), REG_A6XX_HLSQ_CS_BINDLESS_BASE(0)}, + {REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0}, + {REG_A6XX_HLSQ_FS_CNTL, REG_A6XX_HLSQ_SHARED_CONSTS}, + {REG_A6XX_HLSQ_BINDLESS_BASE(0), REG_A6XX_HLSQ_BINDLESS_BASE(0)}, + {REG_A6XX_HLSQ_UNKNOWN_BE00, REG_A6XX_HLSQ_UNKNOWN_BE01}, +}; + +/* Return true if it is expected that reg is overwritten by a renderpass or + * not used by anything in a renderpass. So it's safe to stomp the reg + * beforehand. + */ +static bool +a6xx_fd_reg_rp_stompable(bool turnip, uint16_t reg) +{ + switch (reg) { + case REG_A6XX_VSC_DRAW_STRM_SIZE_ADDRESS ... REG_A6XX_VSC_DRAW_STRM_SIZE_ADDRESS + 1: + return !turnip; + case REG_A6XX_VSC_PRIM_STRM_ADDRESS ... REG_A6XX_VSC_DRAW_STRM_LIMIT: + return false; + case REG_A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL: + return false; + case REG_A6XX_GRAS_SAMPLE_CONFIG ... REG_A6XX_GRAS_SAMPLE_LOCATION_1: + return turnip; + case REG_A6XX_GRAS_UNKNOWN_80AF: + return false; + case REG_A6XX_GRAS_LRZ_DEPTH_VIEW: + return turnip; + case REG_A6XX_GRAS_UNKNOWN_8110: + return false; + case REG_A6XX_GRAS_DBG_ECO_CNTL ... REG_A6XX_GRAS_PERFCTR_LRZ_SEL(3): + return false; + case REG_A6XX_RB_SAMPLE_CONFIG ... REG_A6XX_RB_SAMPLE_LOCATION_1: + return turnip; + case REG_A6XX_RB_DITHER_CNTL: + return !turnip; + case REG_A6XX_RB_UNKNOWN_8811 ... REG_A6XX_RB_UNKNOWN_881E: + return false; + case REG_A6XX_RB_ALPHA_CONTROL: + return !turnip; + case REG_A6XX_RB_UNKNOWN_88F0: + return false; + case REG_A6XX_RB_SAMPLE_COUNT_ADDR ... REG_A6XX_RB_SAMPLE_COUNT_ADDR + 1: + return false; + case REG_A6XX_RB_UNKNOWN_8E01: + return false; + case REG_A6XX_RB_DBG_ECO_CNTL ... REG_A6XX_RB_CCU_CNTL: + return false; + case REG_A6XX_RB_PERFCTR_RB_SEL(0)... REG_A6XX_RB_UNKNOWN_8E51: + return false; + case REG_A6XX_VPC_UNKNOWN_9210 ... REG_A6XX_VPC_UNKNOWN_9211: + return false; + case REG_A6XX_VPC_SO(0) ... REG_A6XX_VPC_POINT_COORD_INVERT: + return false; + case REG_A6XX_VPC_UNKNOWN_9300: + return false; + case REG_A6XX_VPC_DBG_ECO_CNTL ... REG_A6XX_VPC_PERFCTR_VPC_SEL(5): + return false; + case REG_A6XX_PC_DRAW_CMD ... REG_A6XX_PC_MARKER: + return false; + case REG_A6XX_PC_DBG_ECO_CNTL ... REG_A6XX_PC_ADDR_MODE_CNTL: + return false; + case REG_A6XX_PC_TESSFACTOR_ADDR: + return false; + case REG_A6XX_VFD_MODE_CNTL: + return false; + case REG_A6XX_VFD_ADD_OFFSET: + return false; + case REG_A6XX_SP_UNKNOWN_A9A8: + return false; + case REG_A6XX_SP_DBG_ECO_CNTL ... REG_A6XX_SP_PERFCTR_SP_SEL(23): + return false; + case REG_A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR ... REG_A6XX_SP_UNKNOWN_B183: + return false; + case REG_A6XX_SP_UNKNOWN_B190 ... REG_A6XX_SP_UNKNOWN_B191: + return false; + case REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR ... REG_A6XX_SP_TP_SAMPLE_LOCATION_1: + return false; + case REG_A6XX_SP_TP_MODE_CNTL: + return false; + case REG_A6XX_TPL1_DBG_ECO_CNTL ... REG_A6XX_TPL1_PERFCTR_TP_SEL(11): + return false; + case REG_A6XX_HLSQ_UNKNOWN_BE00 ... REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL(5): + return false; + + /* We should not stomp compute pipeline since they write registers directly + * into command stream and should survive renderpass. + */ + case REG_A6XX_SP_CS_CTRL_REG0 ... REG_A6XX_SP_CS_CNTL_1: + return false; + case REG_A6XX_HLSQ_CS_CNTL ... REG_A6XX_HLSQ_CS_CNTL_1: + return false; + } + + return true; +} + +/* Stomping some regs is known to cause issues */ +static bool +a6xx_fd_reg_do_not_stomp(bool turnip, uint16_t reg) +{ + switch (reg) { + /* Faults in + * dEQP-VK.renderpass.suballocation.formats.r5g6b5_unorm_pack16.clear.clear + * It seems that PC_CCU_FLUSH_COLOR_TS reads REG_A6XX_RB_DEPTH_PLANE_CNTL. + */ + case REG_A6XX_RB_DEPTH_PLANE_CNTL: + return true; + /* Faults in + * dEQP-VK.conditional_rendering.draw.condition_host_memory_expect_noop.draw */ + case REG_A6XX_HLSQ_VS_CNTL ... REG_A6XX_HLSQ_GS_CNTL: + return true; + case REG_A6XX_HLSQ_FS_CNTL: + return true; + /* Faults in + * dEQP-VK.memory_model.message_passing.ext.u32.coherent.atomic_atomic.atomicrmw.device.payload_local.image.guard_local.image.comp + * while there is even no fragment shaders. + */ + case REG_A6XX_SP_FS_OBJ_START ... REG_A6XX_SP_FS_OBJ_START + 1: + return true; + } + + return false; +} + +#endif /* __FREEDRENO_STOMPABLE_REGS_H__ */ \ No newline at end of file diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 73474722c5b..88d03244005 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -390,6 +390,10 @@ r2d_setup_common(struct tu_cmd_buffer *cmd, bool ubwc, bool scissor) { + if (!cmd->state.pass && cmd->device->dbg_renderpass_stomp_cs) { + tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs); + } + enum a6xx_format fmt = blit_base_format(dst_format, ubwc); fixup_dst_format(src_format, &dst_format, &fmt); enum a6xx_2d_ifmt ifmt = format_to_ifmt(dst_format); @@ -1224,6 +1228,10 @@ r3d_setup(struct tu_cmd_buffer *cmd, bool ubwc, VkSampleCountFlagBits samples) { + if (!cmd->state.pass && cmd->device->dbg_renderpass_stomp_cs) { + tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs); + } + enum a6xx_format fmt = blit_base_format(dst_format, ubwc); fixup_dst_format(src_format, &dst_format, &fmt); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index d6a04246a07..a659fafafe7 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -939,6 +939,10 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_wfi(cs); + if (dev->dbg_cmdbuf_stomp_cs) { + tu_cs_emit_call(cs, dev->dbg_cmdbuf_stomp_cs); + } + cmd->state.cache.pending_flush_bits &= ~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE); @@ -4321,6 +4325,10 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, return; } + if (cmd->device->dbg_renderpass_stomp_cs) { + tu_cs_emit_call(&cmd->cs, cmd->device->dbg_renderpass_stomp_cs); + } + for (unsigned i = 0; i < pass->attachment_count; i++) { cmd->state.attachments[i] = pAttachmentInfo ? tu_image_view_from_handle(pAttachmentInfo->pAttachments[i]) : diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 7c2f5e3530b..8c7cced79ea 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -24,6 +24,7 @@ /* for fd_get_driver/device_uuid() */ #include "freedreno/common/freedreno_uuid.h" +#include "freedreno/common/freedreno_stompable_regs.h" #include "tu_clear_blit.h" #include "tu_cmd_buffer.h" @@ -1969,6 +1970,86 @@ tu_u_trace_submission_data_finish( vk_free(&device->vk.alloc, submission_data); } +enum tu_reg_stomper_flags +{ + TU_DEBUG_REG_STOMP_INVERSE = 1 << 0, + TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1, + TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2, +}; + +static const struct debug_named_value tu_reg_stomper_options[] = { + { "inverse", TU_DEBUG_REG_STOMP_INVERSE, + "By default the range specifies the regs to stomp, with 'inverse' it " + "specifies the regs NOT to stomp" }, + { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF, + "Stomp regs at the start of a cmdbuf" }, + { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS, + "Stomp regs before a renderpass" }, + { NULL, 0 } +}; + +static void +tu_init_dbg_reg_stomper(struct tu_device *device) +{ + const char *stale_reg_range_str = + os_get_option("TU_DEBUG_STALE_REGS_RANGE"); + if (!stale_reg_range_str) + return; + + uint32_t first_reg, last_reg; + + if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) { + mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE"); + return; + } + + uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS", + tu_reg_stomper_options, + TU_DEBUG_REG_STOMP_CMDBUF); + + struct tu_cs *cmdbuf_cs = calloc(1, sizeof(struct tu_cs)); + tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096, + "cmdbuf reg stomp cs"); + tu_cs_begin(cmdbuf_cs); + + struct tu_cs *rp_cs = calloc(1, sizeof(struct tu_cs)); + tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs"); + tu_cs_begin(rp_cs); + + size_t reg_ranges_count = ARRAY_SIZE(a6xx_fd_cmdbuf_stompable_reg_ranges); + for (size_t i = 0; i < reg_ranges_count; i++) { + struct fd_stompable_reg_range reg_range = + a6xx_fd_cmdbuf_stompable_reg_ranges[i]; + for (uint16_t reg = reg_range.start_reg; reg <= reg_range.end_reg; + reg++) { + if (debug_flags & TU_DEBUG_REG_STOMP_INVERSE) { + if (reg >= first_reg && reg <= last_reg) + continue; + } else { + if (reg < first_reg || reg > last_reg) + continue; + } + + if (a6xx_fd_reg_do_not_stomp(true, reg)) + continue; + + if (debug_flags & TU_DEBUG_REG_STOMP_CMDBUF) + tu_cs_emit_write_reg(cmdbuf_cs, reg, 0xffffffff); + + if ((debug_flags & TU_DEBUG_REG_STOMP_RENDERPASS) && + a6xx_fd_reg_rp_stompable(true, reg)) { + tu_cs_emit_write_reg(rp_cs, reg, 0xffffffff); + } + } + } + + tu_cs_end(cmdbuf_cs); + tu_cs_end(rp_cs); + + device->dbg_cmdbuf_stomp_cs = cmdbuf_cs; + device->dbg_renderpass_stomp_cs = rp_cs; +} + VKAPI_ATTR VkResult VKAPI_CALL tu_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, @@ -2201,6 +2282,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, } } + tu_init_dbg_reg_stomper(device); + /* Initialize a condition variable for timeline semaphore */ pthread_condattr_t condattr; if (pthread_condattr_init(&condattr) != 0) { @@ -2334,6 +2417,16 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) free(device->perfcntrs_pass_cs); } + if (device->dbg_cmdbuf_stomp_cs) { + tu_cs_finish(device->dbg_cmdbuf_stomp_cs); + free(device->dbg_cmdbuf_stomp_cs); + } + + if (device->dbg_renderpass_stomp_cs) { + tu_cs_finish(device->dbg_renderpass_stomp_cs); + free(device->dbg_renderpass_stomp_cs); + } + tu_autotune_fini(&device->autotune, device); tu_bo_suballocator_finish(&device->pipeline_suballoc); diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 809b81d176d..4de58d66b05 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -307,6 +307,9 @@ struct tu_device struct breadcrumbs_context *breadcrumbs_ctx; + struct tu_cs *dbg_cmdbuf_stomp_cs; + struct tu_cs *dbg_renderpass_stomp_cs; + #ifdef ANDROID const void *gralloc; enum {