From 8831cb38aa9ea3385addaa21dad9207adaf097b9 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 25 Mar 2022 11:53:56 -0700 Subject: [PATCH] anv: Stop updating STATE_BASE_ADDRESS on XeHP Now that we're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to set the base address for the binding table pool separately from surface states, we don't actually need to update surface state base address anymore. Instead, we can just set STATE_BASE_ADDRESS once at context creation, and never bother updating it again, saving some heavyweight flushes and freeing us from the need for address offsetting trickery. This patch was originally written by Jason Ekstrand, with fixes from Lionel Landwerlin, but was targeting Icelake. Doing it there requires additional changes (15:5 -> 18:8 binding table pointer formats) which also involve some trade-offs, whereas the XeHP change is purely a win, so we'll do it here first. Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/vulkan/anv_batch_chain.c | 12 +++++-- src/intel/vulkan/anv_device.c | 14 ++++++-- src/intel/vulkan/genX_cmd_buffer.c | 26 +++++++------- src/intel/vulkan/genX_state.c | 56 +++++++++++++++++++++++++++++- 4 files changed, 90 insertions(+), 18 deletions(-) diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 3b1bdfcbef0..4043c8417de 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -792,8 +792,16 @@ anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->bt_next.map += bt_size; cmd_buffer->bt_next.alloc_size -= bt_size; - assert(bt_block->offset < 0); - *state_offset = -bt_block->offset; + if (cmd_buffer->device->info.verx10 >= 125) { + /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to change the binding + * table address independently from surface state base address. We no + * longer need any sort of offsetting. + */ + *state_offset = 0; + } else { + assert(bt_block->offset < 0); + *state_offset = -bt_block->offset; + } return state; } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index c870ac8dfa1..6ff64130d94 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -3306,7 +3306,15 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_instruction_state_pool; - if (!anv_use_relocations(physical_device)) { + if (device->info.verx10 >= 125) { + /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding + * table its own base address separately from surface state base. + */ + result = anv_state_pool_init(&device->binding_table_pool, device, + "binding table pool", + BINDING_TABLE_POOL_MIN_ADDRESS, 0, + BINDING_TABLE_POOL_BLOCK_SIZE); + } else if (!anv_use_relocations(physical_device)) { int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS - (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS; assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0); @@ -3315,9 +3323,9 @@ VkResult anv_CreateDevice( SURFACE_STATE_POOL_MIN_ADDRESS, bt_pool_offset, BINDING_TABLE_POOL_BLOCK_SIZE); - if (result != VK_SUCCESS) - goto fail_surface_state_pool; } + if (result != VK_SUCCESS) + goto fail_surface_state_pool; if (device->info.has_aux_map) { device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 5e1ccafeb1a..4679bca8645 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -104,6 +104,19 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) */ cmd_buffer->state.descriptors_dirty |= ~0; +#if GFX_VERx10 >= 125 + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.CommandStreamerStallEnable = true; + anv_debug_dump_pc(pc); + } + anv_batch_emit( + &cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) { + btpa.BindingTablePoolBaseAddress = + anv_cmd_buffer_surface_base_address(cmd_buffer); + btpa.BindingTablePoolBufferSize = BINDING_TABLE_POOL_BLOCK_SIZE / 4096; + btpa.MOCS = mocs; + } +#else /* GFX_VERx10 < 125 */ /* Emit a render target cache flush. * * This isn't documented anywhere in the PRM. However, it seems to be @@ -221,18 +234,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) genX(flush_pipeline_select)(cmd_buffer, gfx12_wa_pipeline); #endif -#if GFX_VERx10 >= 125 - anv_batch_emit( - &cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) { - btpa.BindingTablePoolBaseAddress = - anv_cmd_buffer_surface_base_address(cmd_buffer); - btpa.BindingTablePoolBufferSize = BINDING_TABLE_POOL_BLOCK_SIZE / 4096; -#if GFX_VERx10 < 125 - btpa.BindingTablePoolEnable = true; -#endif - btpa.MOCS = mocs; - } -#endif +#endif /* GFX_VERx10 < 125 */ /* After re-setting the surface state base address, we have to do some * cache flusing so that the sampler engine will pick up the new diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index a91c07418f6..a551206b0e2 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -162,7 +162,7 @@ static VkResult init_render_queue_state(struct anv_queue *queue) { struct anv_device *device = queue->device; - uint32_t cmds[64]; + uint32_t cmds[128]; struct anv_batch batch = { .start = cmds, .next = cmds, @@ -188,6 +188,60 @@ init_render_queue_state(struct anv_queue *queue) } #endif +#if GFX_VERx10 >= 125 + /* GEN:BUG:1607854226: + * + * Non-pipelined state has issues with not applying in MEDIA/GPGPU mode. + * Fortunately, we always start the context off in 3D mode. + */ + uint32_t mocs = device->isl_dev.mocs.internal; + anv_batch_emit(&batch, GENX(STATE_BASE_ADDRESS), sba) { + sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 }; + sba.GeneralStateBufferSize = 0xfffff; + sba.GeneralStateMOCS = mocs; + sba.GeneralStateBaseAddressModifyEnable = true; + sba.GeneralStateBufferSizeModifyEnable = true; + + sba.StatelessDataPortAccessMOCS = mocs; + + sba.SurfaceStateBaseAddress = + (struct anv_address) { .offset = SURFACE_STATE_POOL_MIN_ADDRESS }; + sba.SurfaceStateMOCS = mocs; + sba.SurfaceStateBaseAddressModifyEnable = true; + + sba.DynamicStateBaseAddress = + (struct anv_address) { .offset = DYNAMIC_STATE_POOL_MIN_ADDRESS }; + sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096; + sba.DynamicStateMOCS = mocs; + sba.DynamicStateBaseAddressModifyEnable = true; + sba.DynamicStateBufferSizeModifyEnable = true; + + sba.IndirectObjectBaseAddress = (struct anv_address) { NULL, 0 }; + sba.IndirectObjectBufferSize = 0xfffff; + sba.IndirectObjectMOCS = mocs; + sba.IndirectObjectBaseAddressModifyEnable = true; + sba.IndirectObjectBufferSizeModifyEnable = true; + + sba.InstructionBaseAddress = + (struct anv_address) { .offset = INSTRUCTION_STATE_POOL_MIN_ADDRESS }; + sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096; + sba.InstructionMOCS = mocs; + sba.InstructionBaseAddressModifyEnable = true; + sba.InstructionBuffersizeModifyEnable = true; + + sba.BindlessSurfaceStateBaseAddress = + (struct anv_address) { .offset = SURFACE_STATE_POOL_MIN_ADDRESS }; + sba.BindlessSurfaceStateSize = (1 << 20) - 1; + sba.BindlessSurfaceStateMOCS = mocs; + sba.BindlessSurfaceStateBaseAddressModifyEnable = true; + + sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; + sba.BindlessSamplerStateMOCS = mocs; + sba.BindlessSamplerStateBaseAddressModifyEnable = true; + sba.BindlessSamplerStateBufferSize = 0; + } +#endif + anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS), aa); anv_batch_emit(&batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {