anv: fix generated forward jump with more than 67M draws

The issue here is that for draw indirect count variants, we want to
jump after the last generated draw call to the next location where
commands are. But if we have more than 67M draws (8k * 8k chunks), we
only know the location once we've generated each of the 8k * 8k
chunks.

This change adds a CPU side pointer in the push constant struct so
that we can create a single linked list of chunks to edit and go
through to write the correct jump address after all the generated
space has been allocated.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: c950fe97a0 ("anv: implement generated (indexed) indirect draws")
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20497>
This commit is contained in:
Lionel Landwerlin 2023-01-02 18:20:48 +02:00 committed by Marge Bot
parent c1c680c08b
commit 63fa6d9f49
2 changed files with 58 additions and 29 deletions

View file

@ -66,6 +66,12 @@ struct anv_generate_indirect_params {
/* Global address of binding 1 */
uint64_t generated_cmds_addr;
/* CPU side pointer to the previous item when number of draws has to be
* split into smaller chunks, see while loop in
* genX(cmd_buffer_emit_indirect_generated_draws)
*/
struct anv_generate_indirect_params *prev;
};
#endif /* ANV_GENERATED_INDIRECT_DRAWS_H */

View file

@ -321,10 +321,9 @@ genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer,
return push_data_state;
}
static void
static struct anv_generate_indirect_params *
genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address generated_cmds_addr,
uint32_t generated_cmds_size,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
uint32_t item_base,
@ -392,6 +391,8 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
prim.VertexCountPerInstance = 3;
prim.InstanceCount = 1;
}
return push_data;
}
static void
@ -421,6 +422,22 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b
genX(cmd_buffer_emit_generate_draws_pipeline)(cmd_buffer);
}
static void
genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
struct anv_generate_indirect_params *params)
{
/* We don't know the end_addr until we have emitted all the generation
* draws. Go and edit the address of all the push parameters.
*/
uint64_t end_addr =
anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
while (params != NULL) {
params->draw_count.end_addr_ldw = end_addr & 0xffffffff;
params->draw_count.end_addr_udw = end_addr >> 32;
params = params->prev;
}
}
static void
genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address indirect_data_addr,
@ -458,6 +475,7 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
struct anv_generate_indirect_params *last_params = NULL;
uint32_t item_base = 0;
while (item_base < draw_count) {
const uint32_t item_count = MIN2(draw_count - item_base,
@ -476,26 +494,28 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
if (result != VK_SUCCESS)
return;
genX(cmd_buffer_emit_generate_draws)(
cmd_buffer,
anv_batch_current_address(&cmd_buffer->batch),
draw_cmd_size,
indirect_data_addr,
indirect_data_stride,
item_base,
item_count,
indexed);
struct anv_generate_indirect_params *params =
genX(cmd_buffer_emit_generate_draws)(
cmd_buffer,
anv_batch_current_address(&cmd_buffer->batch),
indirect_data_addr,
indirect_data_stride,
item_base,
item_count,
indexed);
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
item_base += item_count;
params->prev = last_params;
last_params = params;
}
}
static void
static struct anv_generate_indirect_params *
genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address generated_cmds_addr,
uint32_t generated_cmds_size,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
uint32_t item_base,
@ -542,9 +562,6 @@ genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer,
genX(cmd_buffer_alloc_generated_push_data)(cmd_buffer);
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
uint64_t end_cmd_addr =
anv_address_physical(
anv_address_add(generated_cmds_addr, generated_cmds_size));
struct anv_generate_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_generate_indirect_params) {
@ -556,8 +573,6 @@ genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer,
.draw_count = 0, // Edit this through a the command streamer
.instance_multiplier = pipeline->instance_multiplier,
.indirect_data_stride = indirect_data_stride,
.end_addr_ldw = end_cmd_addr & 0xffffffff,
.end_addr_udw = end_cmd_addr >> 32,
},
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
@ -585,6 +600,8 @@ genX(cmd_buffer_emit_generate_draws_count)(struct anv_cmd_buffer *cmd_buffer,
prim.VertexCountPerInstance = 3;
prim.InstanceCount = 1;
}
return push_data;
}
static void
@ -625,6 +642,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
struct anv_generate_indirect_params *last_params = NULL;
uint32_t item_base = 0;
while (item_base < max_draw_count) {
const uint32_t item_count = MIN2(max_draw_count - item_base,
@ -643,22 +661,27 @@ genX(cmd_buffer_emit_indirect_generated_draws_count)(struct anv_cmd_buffer *cmd_
if (result != VK_SUCCESS)
return;
genX(cmd_buffer_emit_generate_draws_count)(
cmd_buffer,
anv_batch_current_address(&cmd_buffer->batch),
draw_cmd_size,
anv_address_add(indirect_data_addr,
item_base * indirect_data_stride),
indirect_data_stride,
item_base,
item_count,
count_addr,
indexed);
struct anv_generate_indirect_params *params =
genX(cmd_buffer_emit_generate_draws_count)(
cmd_buffer,
anv_batch_current_address(&cmd_buffer->batch),
anv_address_add(indirect_data_addr,
item_base * indirect_data_stride),
indirect_data_stride,
item_base,
item_count,
count_addr,
indexed);
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
item_base += item_count;
params->prev = last_params;
last_params = params;
}
genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params);
}
static void