anv: predicate emission of STATE_BASE_ADDRESS

Completely skip the stall & programming if the bindless address has
not changed. Only on Gfx12.5+ since previous generations also program
the binding table pool base address through STATE_BASE_ADDRESS.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29595>
This commit is contained in:
Lionel Landwerlin 2024-06-06 17:32:49 +03:00 committed by Marge Bot
parent 9a3e8508a7
commit 0147908a89
4 changed files with 46 additions and 10 deletions

View file

@ -223,6 +223,12 @@ struct intel_perf_query_result;
*/ */
#define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */ #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
/* We reserve this MI ALU register to hold the last programmed bindless
* surface state base address so that we can predicate STATE_BASE_ADDRESS
* emissions if the address doesn't change.
*/
#define ANV_BINDLESS_SURFACE_BASE_ADDR_REG 0x2668 /* MI_ALU_REG13 */
#define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1) #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
/* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64

View file

@ -239,13 +239,21 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
anv_cmd_buffer_is_video_queue(cmd_buffer)) anv_cmd_buffer_is_video_queue(cmd_buffer))
return; return;
struct anv_device *device = cmd_buffer->device;
struct GENX(STATE_BASE_ADDRESS) sba = {}; struct GENX(STATE_BASE_ADDRESS) sba = {};
fill_state_base_addr(cmd_buffer, &sba); fill_state_base_addr(cmd_buffer, &sba);
/* If we are emitting a new state base address we probably need to re-emit #if GFX_VERx10 >= 125
* binding tables. struct mi_builder b;
*/ mi_builder_init(&b, device->info, &cmd_buffer->batch);
cmd_buffer->state.descriptors_dirty |= ~0; mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
mi_goto_if(&b,
mi_ieq(&b, mi_reg64(ANV_BINDLESS_SURFACE_BASE_ADDR_REG),
mi_imm(sba.BindlessSurfaceStateBaseAddress.offset)),
&t);
#endif
/* Emit a render target cache flush. /* Emit a render target cache flush.
* *
@ -294,10 +302,6 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
genX(flush_pipeline_select)(cmd_buffer, gfx12_wa_pipeline); genX(flush_pipeline_select)(cmd_buffer, gfx12_wa_pipeline);
#endif #endif
#if GFX_VERx10 >= 125
genX(cmd_buffer_emit_bt_pool_base_address)(cmd_buffer);
#endif
/* After re-setting the surface state base address, we have to do some /* After re-setting the surface state base address, we have to do some
* cache flushing so that the sampler engine will pick up the new * cache flushing so that the sampler engine will pick up the new
* SURFACE_STATE objects and binding tables. From the Broadwell PRM, * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
@ -400,6 +404,23 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.compute.base.push_constants_data_dirty = true; cmd_buffer->state.compute.base.push_constants_data_dirty = true;
#endif #endif
} }
#if GFX_VERx10 >= 125
assert(sba.BindlessSurfaceStateBaseAddress.offset != 0);
mi_store(&b, mi_reg64(ANV_BINDLESS_SURFACE_BASE_ADDR_REG),
mi_imm(sba.BindlessSurfaceStateBaseAddress.offset));
mi_goto_target(&b, &t);
#endif
#if GFX_VERx10 >= 125
genX(cmd_buffer_emit_bt_pool_base_address)(cmd_buffer);
#endif
/* If we have emitted a new state base address we probably need to re-emit
* binding tables.
*/
cmd_buffer->state.descriptors_dirty |= ~0;
} }
void void

View file

@ -35,6 +35,8 @@
#include "grl/genX_grl.h" #include "grl/genX_grl.h"
#endif #endif
#include "genX_mi_builder.h"
#include "vk_util.h" #include "vk_util.h"
#include "vk_format.h" #include "vk_format.h"
@ -309,7 +311,13 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
sba.L1CacheControl = L1CC_WB; sba.L1CacheControl = L1CC_WB;
#endif #endif
} }
#endif
struct mi_builder b;
mi_builder_init(&b, device->info, batch);
mi_store(&b, mi_reg64(ANV_BINDLESS_SURFACE_BASE_ADDR_REG),
mi_imm(device->physical->va.internal_surface_state_pool.addr));
#endif /* GFX_VER >= 12 */
#if GFX_VERx10 >= 125 #if GFX_VERx10 >= 125
if (ANV_SUPPORT_RT && device->info->has_ray_tracing) { if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {

View file

@ -8,10 +8,11 @@
#include "genxml/genX_pack.h" #include "genxml/genX_pack.h"
/* We reserve : /* We reserve :
* - GPR 13 for STATE_BASE_ADDRESS bindless surface base address
* - GPR 14 for perf queries * - GPR 14 for perf queries
* - GPR 15 for conditional rendering * - GPR 15 for conditional rendering
*/ */
#define MI_BUILDER_NUM_ALLOC_GPRS 14 #define MI_BUILDER_NUM_ALLOC_GPRS 13
#define MI_BUILDER_CAN_WRITE_BATCH true #define MI_BUILDER_CAN_WRITE_BATCH true
/* Don't do any write check by default, we manually set it where it matters. /* Don't do any write check by default, we manually set it where it matters.
*/ */