anv: add shader-hash debug option
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Emits a dummy MI_STORE_DATA_IMM with the shader hash in front of :
   - 3DSTATE_VS
   - 3DSTATE_HS
   - 3DSTATE_DS
   - 3DSTATE_HS
   - 3DSTATE_PS
   - COMPUTE_WALKER / GPGPU_WALKER

Example :

0x00000000:  0x10000002:  MI_STORE_DATA_IMM
0x00000000:  0x10000002 : Dword 0
    DWord Length: 2
    Force Write Completion Check : false
    Store Qword: 0
    Use Global GTT: false
0x00000004:  0xffffe0c0 : Dword 1
    Core Mode Enable: 0
0x00000008:  0x0000effe : Dword 2
    Address: 0xeffeffffe0c0
0x0000000c:  0x126e815a : Dword 3  <------------ shader hash
0x00000010:  0x78100007 : Dword 4
    Immediate Data: 309231962
0x00000000:  0x78100007:  3DSTATE_VS
0x00000000:  0x78100007 : Dword 0
    DWord Length: 7
0x00000004:  0x00000000 : Dword 1
0x00000008:  0x00000000 : Dword 2
    Kernel Start Pointer: 0x00000000
0x0000000c:  0x00040000 : Dword 3
    Software Exception Enable: false
    Accesses UAV: false

It'll correlate with the value emitted in the pipeline stats from fossil replay :

  $ grep -i 126e815a /tmp/stats.csv
  fossilize.aab93c5c3f965151.1.foz,GRAPHICS,de1b925dec8a8083,507378,498283,303434,vertex,8,50,4,0,1826,0,0,0,8,17,0,0x00000000126e815a,15

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34332>
This commit is contained in:
Lionel Landwerlin 2025-04-02 00:02:10 +03:00 committed by Marge Bot
parent 789f13359a
commit 72bc74f0be
6 changed files with 65 additions and 0 deletions

View file

@ -878,6 +878,10 @@ Anvil(ANV) driver environment variables
Enables video decoding support
``video-encode``
Enables video encoding support
``shader-hash``
Emits dummy (MI_STORE_DATA_IMM) instructions containing the shader
source hash, preceding shader programming instructions (internal
shaders & ray-tracing shaders are omitted)
If defined to ``1`` or ``true``, this will prevent usage of self
modifying command buffers to implement ``vkCmdExecuteCommands``. As

View file

@ -78,6 +78,7 @@ static const struct debug_control debug_control[] = {
{ "sparse-trtt", ANV_DEBUG_SPARSE_TRTT},
{ "video-decode", ANV_DEBUG_VIDEO_DECODE},
{ "video-encode", ANV_DEBUG_VIDEO_ENCODE},
{ "shader-hash", ANV_DEBUG_SHADER_HASH},
{ NULL, 0 }
};

View file

@ -1298,6 +1298,7 @@ enum anv_debug {
ANV_DEBUG_SPARSE_TRTT = BITFIELD_BIT(4),
ANV_DEBUG_VIDEO_DECODE = BITFIELD_BIT(5),
ANV_DEBUG_VIDEO_ENCODE = BITFIELD_BIT(6),
ANV_DEBUG_SHADER_HASH = BITFIELD_BIT(7),
};
struct anv_instance {

View file

@ -568,8 +568,18 @@ emit_cs_walker(struct anv_cmd_buffer *cmd_buffer,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
bool is_unaligned_size_x)
{
struct anv_device *device = cmd_buffer->device;
struct anv_instance *instance = device->physical->instance;
bool is_indirect = !anv_address_is_null(indirect_addr);
struct mi_builder b;
if (unlikely(instance->debug & ANV_DEBUG_SHADER_HASH)) {
mi_builder_init(&b, device->info, &cmd_buffer->batch);
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
mi_store(&b, mi_mem32(device->workaround_address),
mi_imm(prog_data->base.source_hash));
}
#if GFX_VERx10 >= 125
/* For unaligned dispatch, we need to tweak the dispatch value with
* MI_MATH, so we can't use indirect HW instructions.

View file

@ -963,10 +963,30 @@ cmd_buffer_pre_draw_wa(struct anv_cmd_buffer *cmd_buffer)
VK_COMMAND_POOL_CREATE_PROTECTED_BIT;
UNUSED struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
UNUSED struct anv_device *device = cmd_buffer->device;
UNUSED struct anv_instance *instance = device->physical->instance;
#define DEBUG_SHADER_HASH(stage) do { \
if (unlikely( \
(instance->debug & ANV_DEBUG_SHADER_HASH) && \
anv_pipeline_has_stage(pipeline, stage))) { \
mi_store(&b, \
mi_mem32(device->workaround_address), \
mi_imm(pipeline->base.shaders[stage]-> \
prog_data->source_hash)); \
} \
} while (0)
struct mi_builder b;
if (unlikely(instance->debug & ANV_DEBUG_SHADER_HASH)) {
mi_builder_init(&b, device->info, &cmd_buffer->batch);
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
}
#if INTEL_WA_16011107343_GFX_VER
if (intel_needs_workaround(cmd_buffer->device->info, 16011107343) &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
DEBUG_SHADER_HASH(MESA_SHADER_TESS_CTRL);
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.hs, protected);
}
@ -975,6 +995,7 @@ cmd_buffer_pre_draw_wa(struct anv_cmd_buffer *cmd_buffer)
#if INTEL_WA_22018402687_GFX_VER
if (intel_needs_workaround(cmd_buffer->device->info, 22018402687) &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
DEBUG_SHADER_HASH(MESA_SHADER_TESS_EVAL);
/* Wa_22018402687:
* In any 3D enabled context, just before any Tessellation enabled
* draw call (3D Primitive), re-send the last programmed 3DSTATE_DS
@ -993,6 +1014,8 @@ cmd_buffer_pre_draw_wa(struct anv_cmd_buffer *cmd_buffer)
#endif
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true);
#undef DEBUG_SHADER_HASH
}
ALWAYS_INLINE static void

View file

@ -36,6 +36,8 @@
#include "common/intel_tiled_render.h"
#include "compiler/brw_prim.h"
#include "genX_mi_builder.h"
static const uint32_t vk_to_intel_blend[] = {
[VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
[VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
@ -2146,6 +2148,7 @@ static void
cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
struct anv_instance *instance = device->physical->instance;
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(gfx->base.pipeline);
@ -2157,6 +2160,23 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
const bool protected = cmd_buffer->vk.pool->flags &
VK_COMMAND_POOL_CREATE_PROTECTED_BIT;
#define DEBUG_SHADER_HASH(stage) do { \
if (unlikely( \
(instance->debug & ANV_DEBUG_SHADER_HASH) && \
anv_pipeline_has_stage(pipeline, stage))) { \
mi_store(&b, \
mi_mem32(device->workaround_address), \
mi_imm(pipeline->base.shaders[stage]-> \
prog_data->source_hash)); \
} \
} while (0)
struct mi_builder b;
if (unlikely(instance->debug & ANV_DEBUG_SHADER_HASH)) {
mi_builder_init(&b, device->info, &cmd_buffer->batch);
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
}
#if INTEL_WA_16011107343_GFX_VER
/* Will be emitted in front of every draw instead */
if (intel_needs_workaround(device->info, 16011107343) &&
@ -2219,16 +2239,19 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS)) {
DEBUG_SHADER_HASH(MESA_SHADER_VERTEX);
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.vs, protected);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_HS)) {
DEBUG_SHADER_HASH(MESA_SHADER_TESS_CTRL);
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.hs, protected);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DS)) {
DEBUG_SHADER_HASH(MESA_SHADER_TESS_EVAL);
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.ds, protected);
}
@ -2321,6 +2344,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
/* Now the potentially dynamic instructions */
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) {
DEBUG_SHADER_HASH(MESA_SHADER_FRAGMENT);
anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_PS),
pipeline, partial.ps, ps, protected) {
SET(ps, ps, KernelStartPointer0);
@ -2506,6 +2530,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_GS)) {
DEBUG_SHADER_HASH(MESA_SHADER_GEOMETRY);
anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_GS),
pipeline, partial.gs, gs, protected) {
SET(gs, gs, ReorderMode);
@ -2856,6 +2881,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
#undef INIT
#undef SET
#undef DEBUG_SHADER_HASH
BITSET_ZERO(hw_state->dirty);
}