anv: Write IR header using shader instead of CS

On integrated platforms, we have issue where L3 cache not being coherent
with CS and it forces us to push data out L3.

To avoid data cache flush, let's write the IR header with BLORP shader.
There is a small shader launch latency but eventually that should not
matter because writing data with CS (MI_STORE) commands is slower than
shader execution when we consider large number of BVH tree getting
built.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39971>
This commit is contained in:
Sagar Ghuge 2026-03-12 12:51:03 -07:00 committed by Marge Bot
parent 5bfaf7536a
commit 37f26e346a
2 changed files with 11 additions and 22 deletions

View file

@ -1449,15 +1449,17 @@ anv_cmd_write_buffer_cp(VkCommandBuffer commandBuffer,
void
anv_cmd_flush_buffer_write_cp(VkCommandBuffer commandBuffer)
{
/* TODO: cmd_write_buffer_cp is implemented with MI store +
* ForceWriteCompletionCheck so that should make the content globally
* observable.
*
* If we encounter any functional or perf bottleneck issues, let's revisit
* this helper and add ANV_PIPE_HDC_PIPELINE_FLUSH_BIT +
* ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT +
* ANV_PIPE_DATA_CACHE_FLUSH_BIT.
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
/* IR header would get written by compute shader using BLORP code path, so
* we need to flush HDC and untyped dataport cache.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT,
"Flush buffer write cp");
}
void

View file

@ -7536,18 +7536,5 @@ genX(cmd_write_buffer_cp)(struct anv_cmd_buffer *cmd_buffer,
{
assert(size % 4 == 0);
struct anv_address addr = anv_address_from_u64(dstAddr);
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
for (uint32_t i = 0; i < size; i += 8) {
mi_builder_set_write_check(&b, i >= size - 8);
if (size - i < 8) {
mi_store(&b, mi_mem32(anv_address_add(addr, i)),
mi_imm(*((uint32_t *)((char*)data + i))));
} else {
mi_store(&b, mi_mem64(anv_address_add(addr, i)),
mi_imm(*((uint64_t *)((char*)data + i))));
}
}
anv_cmd_buffer_update_addr(cmd_buffer, addr, size, data);
}