mesa/src/intel/vulkan/genX_cmd_draw_generated_indirect.h
Tapani Pälli 8d2dcd55d7 anv: refactor to fix pipe control debugging
While earlier changes to pipe control emission allowed debug dump of
each pipe control, they also changed debug output to almost always print
same reason/function for each pc. These changes fix the output so that
we print the original function name where pc is emitted.

As example:

pc: emit PC=( +depth_flush +rt_flush +pb_stall +depth_stall ) reason: gfx11_batch_emit_pipe_control_write
pc: emit PC=( ) reason: gfx11_batch_emit_pipe_control_write

changes back to:

pc: emit PC=( +depth_flush +rt_flush +pb_stall +depth_stall ) reason: gfx11_emit_apply_pipe_flushes
pc: emit PC=( ) reason: cmd_buffer_emit_depth_stencil

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25282>
2023-09-20 06:04:37 +00:00

394 lines
15 KiB
C

/*
* Copyright © 2022 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef GENX_CMD_GENERATED_INDIRECT_DRAW_H
#define GENX_CMD_GENERATED_INDIRECT_DRAW_H
#include <assert.h>
#include <stdbool.h>
#include "util/macros.h"
#include "common/intel_genX_state.h"
#include "anv_private.h"
#include "anv_internal_kernels.h"
#include "genX_simple_shader.h"
/* This is a maximum number of items a fragment shader can generate due to the
* viewport size.
*/
#define MAX_GENERATED_DRAW_COUNT (8192 * 8192)
static struct anv_generated_indirect_params *
genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address generated_cmds_addr,
uint32_t generated_cmd_stride,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
struct anv_address draw_id_addr,
uint32_t item_base,
uint32_t item_count,
struct anv_address count_addr,
uint32_t max_count,
bool indexed)
{
struct anv_device *device = cmd_buffer->device;
struct anv_batch *batch = &cmd_buffer->generation_batch;
struct anv_state push_data_state =
genX(simple_shader_alloc_push)(&cmd_buffer->generation_shader_state,
sizeof(struct anv_generated_indirect_params));
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
struct anv_generated_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_generated_indirect_params) {
.draw = {
.draw_id_addr = anv_address_physical(draw_id_addr),
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.indirect_data_stride = indirect_data_stride,
.flags = (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
(cmd_buffer->state.conditional_render_enabled ?
ANV_GENERATED_FLAG_PREDICATED : 0) |
((vs_prog_data->uses_firstvertex ||
vs_prog_data->uses_baseinstance) ?
ANV_GENERATED_FLAG_BASE : 0) |
(vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
(anv_mocs(device, indirect_data_addr.bo,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
((generated_cmd_stride / 4) << 16),
.draw_base = item_base,
/* If count_addr is not NULL, we'll edit it through a the command
* streamer.
*/
.draw_count = anv_address_is_null(count_addr) ? max_count : 0,
.max_draw_count = max_count,
.instance_multiplier = pipeline->instance_multiplier,
},
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
.draw_ids_addr = anv_address_physical(draw_id_addr),
};
if (!anv_address_is_null(count_addr)) {
/* Copy the draw count into the push constants so that the generation
* gets the value straight away and doesn't even need to access memory.
*/
struct mi_builder b;
mi_builder_init(&b, device->info, batch);
mi_memcpy(&b,
anv_address_add(
genX(simple_shader_push_state_address)(
&cmd_buffer->generation_shader_state,
push_data_state),
offsetof(struct anv_generated_indirect_params, draw.draw_count)),
count_addr, 4);
/* Make sure the memcpy landed for the generating draw call to pick up
* the value.
*/
genx_batch_emit_pipe_control(batch, cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT);
}
genX(emit_simple_shader_dispatch)(&cmd_buffer->generation_shader_state,
item_count, push_data_state);
return push_data;
}
static void
genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
{
#if GFX_VER >= 12
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = true;
}
#endif
anv_batch_emit_ensure_space(&cmd_buffer->generation_batch, 4);
trace_intel_begin_generate_draws(&cmd_buffer->trace);
anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress =
anv_batch_current_address(&cmd_buffer->generation_batch);
}
cmd_buffer->generation_return_addr = anv_batch_current_address(&cmd_buffer->batch);
trace_intel_end_generate_draws(&cmd_buffer->trace);
struct anv_device *device = cmd_buffer->device;
struct anv_simple_shader *state = &cmd_buffer->generation_shader_state;
*state = (struct anv_simple_shader) {
.cmd_buffer = cmd_buffer,
.batch = &cmd_buffer->generation_batch,
.kernel = device->internal_kernels[ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
.l3_config = device->internal_kernels_l3_config,
};
genX(emit_simple_shader_init)(state);
}
static struct anv_address
genX(cmd_buffer_get_draw_id_addr)(struct anv_cmd_buffer *cmd_buffer,
uint32_t draw_id_count)
{
#if GFX_VER >= 11
return ANV_NULL_ADDRESS;
#else
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
if (!vs_prog_data->uses_drawid)
return ANV_NULL_ADDRESS;
struct anv_state draw_id_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * draw_id_count, 4);
return anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
draw_id_state);
#endif
}
static uint32_t
genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer)
{
/* With the extended parameters in 3DPRIMITIVE on Gfx11+ we can emit
* everything. Prior to this, we need to emit a couple of
* VERTEX_BUFFER_STATE.
*/
#if GFX_VER >= 11
return 4 * GENX(3DPRIMITIVE_EXTENDED_length);
#else
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
uint32_t len = 0;
if (vs_prog_data->uses_firstvertex ||
vs_prog_data->uses_baseinstance ||
vs_prog_data->uses_drawid) {
len += 4; /* 3DSTATE_VERTEX_BUFFERS */
if (vs_prog_data->uses_firstvertex ||
vs_prog_data->uses_baseinstance)
len += 4 * GENX(VERTEX_BUFFER_STATE_length);
if (vs_prog_data->uses_drawid)
len += 4 * GENX(VERTEX_BUFFER_STATE_length);
}
return len + 4 * GENX(3DPRIMITIVE_length);
#endif
}
static void
genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
struct anv_generated_indirect_params *params)
{
/* We don't know the end_addr until we have emitted all the generation
* draws. Go and edit the address of all the push parameters.
*/
uint64_t end_addr =
anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
while (params != NULL) {
params->draw.end_addr = end_addr;
params = params->prev;
}
}
static void
genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address indirect_data_addr,
uint32_t indirect_data_stride,
struct anv_address count_addr,
uint32_t max_draw_count,
bool indexed)
{
const bool start_generation_batch =
anv_address_is_null(cmd_buffer->generation_return_addr);
genX(flush_pipeline_select_3d)(cmd_buffer);
struct anv_address draw_id_addr =
genX(cmd_buffer_get_draw_id_addr)(cmd_buffer, max_draw_count);
#if GFX_VER == 9
/* Mark the VB-0 as using the entire dynamic state pool area, but only for
* the draw call starting the generation batch. All the following ones will
* use the same area.
*/
if (start_generation_batch) {
struct anv_device *device = cmd_buffer->device;
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
cmd_buffer, 0,
(struct anv_address) {
.offset = device->physical->va.dynamic_state_pool.addr,
},
device->physical->va.dynamic_state_pool.size);
}
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
if (vs_prog_data->uses_baseinstance ||
vs_prog_data->uses_firstvertex) {
/* We're using the indirect buffer directly to source base instance &
* first vertex values. Mark the entire area as used.
*/
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
indirect_data_addr,
indirect_data_stride * max_draw_count);
}
if (vs_prog_data->uses_drawid) {
/* Mark the whole draw id buffer as used. */
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
draw_id_addr,
sizeof(uint32_t) * max_draw_count);
}
#endif
/* Apply the pipeline flush here so the indirect data is available for the
* generation shader.
*/
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
if (start_generation_batch)
genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
/* In order to have the vertex fetch gather the data we need to have a non
* 0 stride. It's possible to have a 0 stride given by the application when
* draw_count is 1, but we need a correct value for the
* VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
* correctly :
*
* Vulkan spec, vkCmdDrawIndirect:
*
* "If drawCount is less than or equal to one, stride is ignored."
*/
assert(indirect_data_stride > 0);
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
/* Emit the 3D state in the main batch. */
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
const uint32_t draw_cmd_stride =
genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
struct anv_generated_indirect_params *last_params = NULL;
uint32_t item_base = 0;
while (item_base < max_draw_count) {
const uint32_t item_count = MIN2(max_draw_count - item_base,
MAX_GENERATED_DRAW_COUNT);
const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
/* Ensure we have enough contiguous space for all the draws so that the
* compute shader can edit all the 3DPRIMITIVEs from a single base
* address.
*
* TODO: we might have to split that if the amount of space is to large (at
* 1Mb?).
*/
VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
draw_cmd_size);
if (result != VK_SUCCESS)
return;
struct anv_generated_indirect_params *params =
genX(cmd_buffer_emit_generate_draws)(
cmd_buffer,
anv_batch_current_address(&cmd_buffer->batch),
draw_cmd_stride,
anv_address_add(indirect_data_addr,
item_base * indirect_data_stride),
indirect_data_stride,
anv_address_add(draw_id_addr, 4 * item_base),
item_base,
item_count,
count_addr,
max_draw_count,
indexed);
anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
item_base += item_count;
params->prev = last_params;
last_params = params;
}
genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params);
#if GFX_VER == 9
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, indexed ? RANDOM : SEQUENTIAL);
#endif
}
static void
genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer)
{
/* No return address setup means we don't have to do anything */
if (anv_address_is_null(cmd_buffer->generation_return_addr))
return;
struct anv_batch *batch = &cmd_buffer->generation_batch;
/* Wait for all the generation vertex shader to generate the commands. */
genX(emit_apply_pipe_flushes)(batch,
cmd_buffer->device,
_3D,
#if GFX_VER == 9
ANV_PIPE_VF_CACHE_INVALIDATE_BIT |
#endif
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT,
NULL /* emitted_bits */);
#if GFX_VER >= 12
anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = false;
}
#else
/* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
* as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
*/
#endif
/* Return to the main batch. */
anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress = cmd_buffer->generation_return_addr;
}
cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
}
#endif /* GENX_CMD_GENERATED_INDIRECT_DRAW_H */