From 63fa6d9f49cb882a50f2333b4a3752c3ba0cccad Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 2 Jan 2023 18:20:48 +0200 Subject: [PATCH] anv: fix generated forward jump with more than 67M draws The issue here is that for draw indirect count variants, we want to jump after the last generated draw call to the next location where commands are. But if we have more than 67M draws (8k * 8k chunks), we only know the location once we've generated each of the 8k * 8k chunks. This change adds a CPU side pointer in the push constant struct so that we can create a single linked list of chunks to edit and go through to write the correct jump address after all the generated space has been allocated. Signed-off-by: Lionel Landwerlin Fixes: c950fe97a0 ("anv: implement generated (indexed) indirect draws") Reviewed-by: Ivan Briano Part-of: --- .../vulkan/anv_generated_indirect_draws.h | 6 ++ .../vulkan/genX_cmd_draw_generated_indirect.h | 81 ++++++++++++------- 2 files changed, 58 insertions(+), 29 deletions(-) diff --git a/src/intel/vulkan/anv_generated_indirect_draws.h b/src/intel/vulkan/anv_generated_indirect_draws.h index 7cc97ccbe8b..f79c87c4f54 100644 --- a/src/intel/vulkan/anv_generated_indirect_draws.h +++ b/src/intel/vulkan/anv_generated_indirect_draws.h @@ -66,6 +66,12 @@ struct anv_generate_indirect_params { /* Global address of binding 1 */ uint64_t generated_cmds_addr; + + /* CPU side pointer to the previous item when number of draws has to be + * split into smaller chunks, see while loop in + * genX(cmd_buffer_emit_indirect_generated_draws) + */ + struct anv_generate_indirect_params *prev; }; #endif /* ANV_GENERATED_INDIRECT_DRAWS_H */ diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index 8600c9a9350..6d732243c0c 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -321,10 +321,9 @@ genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer, return push_data_state; } -static void +static struct anv_generate_indirect_params * genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, struct anv_address generated_cmds_addr, - uint32_t generated_cmds_size, struct anv_address indirect_data_addr, uint32_t indirect_data_stride, uint32_t item_base, @@ -392,6 +391,8 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, prim.VertexCountPerInstance = 3; prim.InstanceCount = 1; } + + return push_data; } static void @@ -421,6 +422,22 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b genX(cmd_buffer_emit_generate_draws_pipeline)(cmd_buffer); } +static void +genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer, + struct anv_generate_indirect_params *params) +{ + /* We don't know the end_addr until we have emitted all the generation + * draws. Go and edit the address of all the push parameters. + */ + uint64_t end_addr = + anv_address_physical(anv_batch_current_address(&cmd_buffer->batch)); + while (params != NULL) { + params->draw_count.end_addr_ldw = end_addr & 0xffffffff; + params->draw_count.end_addr_udw = end_addr >> 32; + params = params->prev; + } +} + static void genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer, struct anv_address indirect_data_addr, @@ -458,6 +475,7 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length); + struct anv_generate_indirect_params *last_params = NULL; uint32_t item_base = 0; while (item_base < draw_count) { const uint32_t item_count = MIN2(draw_count - item_base, @@ -476,26 +494,28 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer if (result != VK_SUCCESS) return; - genX(cmd_buffer_emit_generate_draws)( - cmd_buffer, - anv_batch_current_address(&cmd_buffer->batch), - draw_cmd_size, - indirect_data_addr, - indirect_data_stride, - item_base, - item_count, - indexed); + struct anv_generate_indirect_params *params = + genX(cmd_buffer_emit_generate_draws)( + cmd_buffer, + anv_batch_current_address(&cmd_buffer->batch), + indirect_data_addr, + indirect_data_stride, + item_base, + item_count, + indexed); anv_batch_advance(&cmd_buffer->batch, draw_cmd_size); item_base += item_count; + + params->prev = last_params; + last_params = params; } } -static void +static struct anv_generate_indirect_params * genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer, struct anv_address generated_cmds_addr, - uint32_t generated_cmds_size, struct anv_address indirect_data_addr, uint32_t indirect_data_stride, uint32_t item_base, @@ -542,9 +562,6 @@ genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer, genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer); struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - uint64_t end_cmd_addr = - anv_address_physical( - anv_address_add(generated_cmds_addr, generated_cmds_size)); struct anv_generate_indirect_params *push_data = push_data_state.map; *push_data = (struct anv_generate_indirect_params) { @@ -556,8 +573,6 @@ genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer, .draw_count = 0, // Edit this through a the command streamer .instance_multiplier = pipeline->instance_multiplier, .indirect_data_stride = indirect_data_stride, - .end_addr_ldw = end_cmd_addr & 0xffffffff, - .end_addr_udw = end_cmd_addr >> 32, }, .indirect_data_addr = anv_address_physical(indirect_data_addr), .generated_cmds_addr = anv_address_physical(generated_cmds_addr), @@ -585,6 +600,8 @@ genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer, prim.VertexCountPerInstance = 3; prim.InstanceCount = 1; } + + return push_data; } static void @@ -625,6 +642,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_ const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length); + struct anv_generate_indirect_params *last_params = NULL; uint32_t item_base = 0; while (item_base < max_draw_count) { const uint32_t item_count = MIN2(max_draw_count - item_base, @@ -643,22 +661,27 @@ genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_ if (result != VK_SUCCESS) return; - genX(cmd_buffer_emit_generate_draws_count)( - cmd_buffer, - anv_batch_current_address(&cmd_buffer->batch), - draw_cmd_size, - anv_address_add(indirect_data_addr, - item_base * indirect_data_stride), - indirect_data_stride, - item_base, - item_count, - count_addr, - indexed); + struct anv_generate_indirect_params *params = + genX(cmd_buffer_emit_generate_draws_count)( + cmd_buffer, + anv_batch_current_address(&cmd_buffer->batch), + anv_address_add(indirect_data_addr, + item_base * indirect_data_stride), + indirect_data_stride, + item_base, + item_count, + count_addr, + indexed); anv_batch_advance(&cmd_buffer->batch, draw_cmd_size); item_base += item_count; + + params->prev = last_params; + last_params = params; } + + genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params); } static void