mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
anv: add Gfx9 support VK_EXT_device_generated_commands
This platform just needs a bit more care around vertex buffer state emission. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
This commit is contained in:
parent
afabf6e350
commit
fee5106b53
10 changed files with 168 additions and 19 deletions
|
|
@ -230,6 +230,7 @@ genX_cl_included_symbols = [
|
|||
'MI_ARB_CHECK',
|
||||
'MI_BATCH_BUFFER_START',
|
||||
'MI_STORE_DATA_IMM',
|
||||
'PIPE_CONTROL',
|
||||
# structures
|
||||
'3DSTATE_CONSTANT_ALL_DATA',
|
||||
'3DSTATE_CONSTANT_BODY',
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
(((descriptor)->active_stages & \
|
||||
BITFIELD_BIT(ANV_DGC_STAGE_##stage)) != 0)
|
||||
|
||||
#if GFX_VER >= 11
|
||||
#if GFX_VER >= 9
|
||||
|
||||
static void
|
||||
merge_dwords(global void *dst, global void *src1, global void *src2, uint32_t n_dwords)
|
||||
|
|
@ -434,19 +434,35 @@ genX(libanv_preprocess_gfx_generate)(global void *cmd_base,
|
|||
|
||||
/* 3DSTATE_VERTEX_BUFFERS */
|
||||
uint32_t n_vertex_buffers = state->layout.vertex_buffers.n_buffers;
|
||||
if (n_vertex_buffers) {
|
||||
uint32_t n_draw_param_buffers = GFX_VER == 9 ? util_bitcount(state->descriptor.draw_params) : 0;
|
||||
if (n_vertex_buffers > 0 || n_draw_param_buffers > 0) {
|
||||
global void *cmd_vb = cmd_ptr + state->layout.vertex_buffers.cmd_offset;
|
||||
|
||||
genX(write_3DSTATE_VERTEX_BUFFERS)(cmd_vb, n_vertex_buffers);
|
||||
genX(write_3DSTATE_VERTEX_BUFFERS)(cmd_vb, n_vertex_buffers + n_draw_param_buffers);
|
||||
cmd_vb += 4;
|
||||
|
||||
#if GFX_VER == 9
|
||||
global void *prev_seq_ptr = seq_base + (seq_idx == 0 ? 0 : (seq_idx - 1)) * seq_stride;
|
||||
bool needs_vf_inval = false;
|
||||
#endif
|
||||
|
||||
uint16_t mocs = state->layout.vertex_buffers.mocs;
|
||||
for (uint32_t i = 0; i < n_vertex_buffers; i++) {
|
||||
struct anv_dgc_vertex_buffer vb = state->layout.vertex_buffers.buffers[i];
|
||||
|
||||
VkBindVertexBufferIndirectCommandEXT vtx_data =
|
||||
*(global VkBindVertexBufferIndirectCommandEXT *)(
|
||||
seq_ptr + vb.seq_offset);
|
||||
#if GFX_VER == 9
|
||||
VkBindVertexBufferIndirectCommandEXT prev_vtx_data =
|
||||
*(global VkBindVertexBufferIndirectCommandEXT *)(
|
||||
prev_seq_ptr + vb.seq_offset);
|
||||
if ((vtx_data.bufferAddress >> 32) != (prev_vtx_data.bufferAddress >> 32)) {
|
||||
uint32_t offset = vtx_data.bufferAddress & 0xffffffff;
|
||||
uint32_t prev_offset = prev_vtx_data.bufferAddress & 0xffffffff;
|
||||
if (offset >= prev_offset && offset < (prev_offset + prev_vtx_data.size))
|
||||
needs_vf_inval = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
genX(write_VERTEX_BUFFER_STATE)(cmd_vb, mocs, vb.binding,
|
||||
vtx_data.bufferAddress,
|
||||
|
|
@ -454,6 +470,53 @@ genX(libanv_preprocess_gfx_generate)(global void *cmd_base,
|
|||
vtx_data.stride);
|
||||
cmd_vb += GENX(VERTEX_BUFFER_STATE_length) * 4;
|
||||
}
|
||||
|
||||
#if GFX_VER == 9
|
||||
global uint32_t *draw_param_ptr =
|
||||
get_ptr(data_base, data_stride, data_prolog_size, seq_idx) +
|
||||
state->layout.push_constants.data_offset +
|
||||
MAX_PUSH_CONSTANTS_SIZE +
|
||||
ANV_DRIVER_PUSH_CONSTANTS_SIZE;
|
||||
|
||||
if (state->descriptor.draw_params & ANV_DGC_DRAW_PARAM_BASE_INSTANCE_VERTEX) {
|
||||
genX(write_VERTEX_BUFFER_STATE)(cmd_vb, mocs, ANV_SVGS_VB_INDEX,
|
||||
(uint64_t)draw_param_ptr, 8, 0);
|
||||
cmd_vb += GENX(VERTEX_BUFFER_STATE_length) * 4;
|
||||
if (state->layout.draw.draw_type == ANV_DGC_DRAW_TYPE_SEQUENTIAL) {
|
||||
VkDrawIndirectCommand data =
|
||||
*((global VkDrawIndirectCommand *)(seq_ptr + state->layout.draw.seq_offset));
|
||||
draw_param_ptr[0] = data.firstVertex;
|
||||
draw_param_ptr[1] = data.firstInstance;
|
||||
} else {
|
||||
VkDrawIndexedIndirectCommand data =
|
||||
*((global VkDrawIndexedIndirectCommand *)(seq_ptr + state->layout.draw.seq_offset));
|
||||
draw_param_ptr[0] = data.vertexOffset;
|
||||
draw_param_ptr[1] = data.firstInstance;
|
||||
}
|
||||
draw_param_ptr += 2;
|
||||
}
|
||||
if (state->descriptor.draw_params & ANV_DGC_DRAW_PARAM_DRAW_ID) {
|
||||
genX(write_VERTEX_BUFFER_STATE)(cmd_vb, mocs, ANV_DRAWID_VB_INDEX,
|
||||
(uint64_t)draw_param_ptr, 4, 0);
|
||||
cmd_vb += GENX(VERTEX_BUFFER_STATE_length) * 4;
|
||||
/* gl_DrawID is always 0 since we don't support
|
||||
* VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_COUNT_EXT
|
||||
*/
|
||||
draw_param_ptr[0] = 0;
|
||||
draw_param_ptr += 1;
|
||||
}
|
||||
|
||||
if (needs_vf_inval) {
|
||||
struct GENX(PIPE_CONTROL) pc = {
|
||||
.CommandStreamerStallEnable = true,
|
||||
.VFCacheInvalidationEnable = true,
|
||||
};
|
||||
GENX(PIPE_CONTROL_pack)(cmd_vb, &pc);
|
||||
} else {
|
||||
genX(set_data)(cmd_vb, GENX(PIPE_CONTROL_length) * 4, 0);
|
||||
}
|
||||
cmd_vb += GENX(PIPE_CONTROL_length) * 4;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if INTEL_WA_16011107343_GFX_VER || INTEL_WA_22018402687_GFX_VER
|
||||
|
|
@ -497,8 +560,8 @@ genX(libanv_preprocess_gfx_generate)(global void *cmd_base,
|
|||
false /* indexed */,
|
||||
is_predicated,
|
||||
tbimr_enabled,
|
||||
true /* uses_base, unused for Gfx11+ */,
|
||||
true /* uses_draw_id, unused for Gfx11+ */,
|
||||
false /* uses_base, unused for Gfx11+ */,
|
||||
false /* uses_draw_id, unused for Gfx11+ */,
|
||||
0 /* mocs, unused for Gfx11+ */);
|
||||
break;
|
||||
|
||||
|
|
@ -511,8 +574,8 @@ genX(libanv_preprocess_gfx_generate)(global void *cmd_base,
|
|||
true /* indexed */,
|
||||
is_predicated,
|
||||
tbimr_enabled,
|
||||
true /* uses_base, unused for Gfx11+ */,
|
||||
true /* uses_draw_id, unused for Gfx11+ */,
|
||||
false /* uses_base, unused for Gfx11+ */,
|
||||
false /* uses_draw_id, unused for Gfx11+ */,
|
||||
0 /* mocs, unused for Gfx11+ */);
|
||||
break;
|
||||
|
||||
|
|
@ -944,4 +1007,4 @@ genX(libanv_preprocess_rt_generate)(global void *cmd_base,
|
|||
}
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
|
||||
#endif /* GFX_VER >= 11 */
|
||||
#endif /* GFX_VER >= 9 */
|
||||
|
|
|
|||
|
|
@ -43,3 +43,23 @@ void genX(copy_data)(global void *dst_ptr,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy size from src_ptr to dst_ptr for using a single lane with size
|
||||
* multiple of 4.
|
||||
*/
|
||||
void genX(set_data)(global void *dst_ptr,
|
||||
uint32_t size,
|
||||
uint32_t data)
|
||||
{
|
||||
for (uint32_t offset = 0; offset < size; offset += 16) {
|
||||
if (offset + 16 <= size) {
|
||||
*(global uint4 *)(dst_ptr + offset) = (uint4)(data);
|
||||
} else if (offset + 12 <= size) {
|
||||
*(global uint3 *)(dst_ptr + offset) = (uint3)(data);
|
||||
} else if (offset + 8 <= size) {
|
||||
*(global uint2 *)(dst_ptr + offset) = (uint2)(data);
|
||||
} else if (offset + 4 <= size) {
|
||||
*(global uint *)(dst_ptr + offset) = data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -203,9 +203,26 @@ VkResult anv_CreateIndirectCommandsLayoutEXT(
|
|||
}
|
||||
|
||||
/* 3DSTATE_VERTEX_BUFFERS */
|
||||
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_VB)) {
|
||||
if (devinfo->ver == 9) {
|
||||
const uint32_t n_vb_entries =
|
||||
2 + util_bitcount(vk_layout->vertex_bindings);
|
||||
layout_add_command(layout_obj,
|
||||
(1 /* TODO: _3DSTATE_VERTEX_BUFFERS_length(devinfo) */ +
|
||||
/* Number of vertex buffers + draw params (Gfx9 only) */
|
||||
n_vb_entries *
|
||||
VERTEX_BUFFER_STATE_length(devinfo)) * 4,
|
||||
"vertex");
|
||||
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_VB)) {
|
||||
layout_add_command(layout_obj,
|
||||
PIPE_CONTROL_length(devinfo) * 4,
|
||||
"vertex cache inval");
|
||||
}
|
||||
/* Draw params data, gl_BaseInstance, gl_BaseVertex, gl_DrawID */
|
||||
layout_add_data(layout_obj, 4 * 3, 4, NULL);
|
||||
} else if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_VB)) {
|
||||
layout_add_command(layout_obj,
|
||||
(1 /* TODO: _3DSTATE_VERTEX_BUFFERS_length(devinfo) */ +
|
||||
/* Number of vertex buffers */
|
||||
util_bitcount(vk_layout->vertex_bindings) *
|
||||
VERTEX_BUFFER_STATE_length(devinfo)) * 4,
|
||||
"vertex");
|
||||
|
|
@ -499,9 +516,26 @@ anv_dgc_fill_gfx_layout(struct anv_dgc_gfx_layout *layout,
|
|||
layout->vertex_buffers.buffers[i].binding =
|
||||
vk_layout->vb_layouts[i].binding;
|
||||
}
|
||||
|
||||
cmd_offset += layout->vertex_buffers.cmd_size;
|
||||
}
|
||||
if (devinfo->ver == 9) {
|
||||
const struct brw_vs_prog_data *vs_prog_data =
|
||||
get_shader_vs_prog_data(shaders[MESA_SHADER_VERTEX]);
|
||||
if (vs_prog_data->uses_firstvertex ||
|
||||
vs_prog_data->uses_baseinstance ||
|
||||
vs_prog_data->uses_drawid) {
|
||||
layout->vertex_buffers.cmd_size = MAX2(
|
||||
layout->vertex_buffers.cmd_size,
|
||||
4 /* TODO: _3DSTATE_VERTEX_BUFFERS_length(devinfo) */);
|
||||
if (vs_prog_data->uses_firstvertex ||
|
||||
vs_prog_data->uses_baseinstance)
|
||||
layout->vertex_buffers.cmd_size += VERTEX_BUFFER_STATE_length(devinfo) * 4;
|
||||
if (vs_prog_data->uses_drawid)
|
||||
layout->vertex_buffers.cmd_size += VERTEX_BUFFER_STATE_length(devinfo) * 4;
|
||||
}
|
||||
if (vk_layout->dgc_info & BITFIELD_BIT(MESA_VK_DGC_VB))
|
||||
layout->vertex_buffers.cmd_size += PIPE_CONTROL_length(devinfo) * 4;
|
||||
}
|
||||
cmd_offset += layout->vertex_buffers.cmd_size;
|
||||
|
||||
layout->indirect_set.final_cmds_offset = cmd_offset;
|
||||
if (intel_needs_workaround(devinfo, 16011107343) &&
|
||||
|
|
|
|||
|
|
@ -68,6 +68,15 @@ anv_write_gfx_indirect_descriptor(struct anv_device *device,
|
|||
}
|
||||
assert(descriptor->final_commands_size <= sizeof(descriptor->final_commands));
|
||||
|
||||
if (device->info->ver == 9) {
|
||||
const struct brw_vs_prog_data *vs_prog_data = get_gfx_vs_prog_data(gfx);
|
||||
|
||||
descriptor->draw_params =
|
||||
((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) ?
|
||||
ANV_DGC_DRAW_PARAM_BASE_INSTANCE_VERTEX : 0) |
|
||||
(vs_prog_data->uses_drawid ? ANV_DGC_DRAW_PARAM_DRAW_ID : 0);
|
||||
}
|
||||
|
||||
anv_foreach_vk_stage(vk_stage, ANV_GRAPHICS_STAGE_BITS) {
|
||||
enum anv_dgc_stage gen_stage = anv_vk_stage_to_dgc_stage(vk_stage);
|
||||
enum mesa_shader_stage stage = vk_to_mesa_shader_stage(vk_stage);
|
||||
|
|
|
|||
|
|
@ -304,8 +304,7 @@ get_device_extensions(const struct anv_physical_device *device,
|
|||
* buffer approach, at the expense of late preprocessing. But this is
|
||||
* for later.
|
||||
*/
|
||||
.EXT_device_generated_commands = device->info.verx10 >= 125 ||
|
||||
(device->info.ver >= 11 || ANV_DEBUG(EXPERIMENTAL)),
|
||||
.EXT_device_generated_commands = device->info.verx10 >= 125 || ANV_DEBUG(EXPERIMENTAL),
|
||||
.EXT_device_memory_report = true,
|
||||
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
|
||||
.EXT_display_control = true,
|
||||
|
|
|
|||
|
|
@ -163,8 +163,6 @@ struct intel_perf_query_result;
|
|||
#define BINDING_TABLE_VIEW_SIZE (1u << 20)
|
||||
#define BINDING_TABLE_POOL_DEFAULT_BLOCK_SIZE (4096)
|
||||
|
||||
#define HW_MAX_VBS 33
|
||||
|
||||
/* 3DSTATE_VERTEX_BUFFER supports 33 VBs, but before Gen11 we used 2
|
||||
* for base & drawid SGVs */
|
||||
static inline int
|
||||
|
|
|
|||
|
|
@ -75,6 +75,8 @@
|
|||
*/
|
||||
#define MAX_BINDING_TABLE_SIZE 240
|
||||
|
||||
#define HW_MAX_VBS 33
|
||||
|
||||
/* 3DSTATE_VERTEX_BUFFER supports 33 VBs, but these limits are applied on Gen9
|
||||
* graphics, where 2 VBs are reserved for base & drawid SGVs.
|
||||
*/
|
||||
|
|
@ -350,6 +352,11 @@ enum anv_dgc_push_slot_type {
|
|||
ANV_DGC_PUSH_SLOT_TYPE_OTHER,
|
||||
};
|
||||
|
||||
enum anv_dgc_draw_params {
|
||||
ANV_DGC_DRAW_PARAM_BASE_INSTANCE_VERTEX = BITFIELD_BIT(0),
|
||||
ANV_DGC_DRAW_PARAM_DRAW_ID = BITFIELD_BIT(1),
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure holds prepacked HW instructions for a set of graphics
|
||||
* shaders forming a pipeline . It is part of the command buffer temporary
|
||||
|
|
@ -362,7 +369,12 @@ struct anv_dgc_gfx_descriptor {
|
|||
uint32_t final_commands[20];
|
||||
uint32_t final_commands_size;
|
||||
|
||||
uint32_t wa_18019110168_remapping_table_offset;
|
||||
union {
|
||||
/* Gfx12.5 only */
|
||||
uint32_t wa_18019110168_remapping_table_offset;
|
||||
/* Gfx9 only */
|
||||
enum anv_dgc_draw_params draw_params;
|
||||
};
|
||||
|
||||
struct {
|
||||
struct anv_dgc_push_stage_state {
|
||||
|
|
|
|||
|
|
@ -786,6 +786,21 @@ void genX(CmdExecuteGeneratedCommandsEXT)(
|
|||
if (cmd_buffer->state.conditional_render_enabled)
|
||||
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||
|
||||
#if GFX_VER == 9
|
||||
/* Gfx9 has a VF cache issues (only considers the bottom 32bit of the VF
|
||||
* buffer address), since we're likely to emit those in the DGC buffer,
|
||||
* invalidate the cache here, further invalidation is emitted in the
|
||||
* generated commands if needed.
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR |
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
|
||||
ANV_PIPE_VF_CACHE_INVALIDATE_BIT,
|
||||
"Gfx9 VF cache inval pre dgc exec");
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
#endif
|
||||
|
||||
/* If a shader runs, flush the data to make it visible to CS. */
|
||||
if (params) {
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
|
|
|
|||
|
|
@ -110,7 +110,6 @@ genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_
|
|||
nir_imul_imm(b, load_compute_index(b), 4));
|
||||
return sizeof(struct anv_memcpy_params);
|
||||
|
||||
#if GFX_VER >= 11
|
||||
case ANV_INTERNAL_KERNEL_DGC_GFX_COMPUTE:
|
||||
case ANV_INTERNAL_KERNEL_DGC_GFX_FRAGMENT:
|
||||
genX(libanv_preprocess_gfx_generate)(
|
||||
|
|
@ -186,7 +185,6 @@ genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_
|
|||
load_param(b, 32, struct anv_dgc_dump_params, n_dwords),
|
||||
load_param(b, 64, struct anv_dgc_dump_params, call_addr));
|
||||
return sizeof(struct anv_dgc_dump_params);
|
||||
#endif /* GFX_VER >= 11 */
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
case ANV_INTERNAL_KERNEL_DGC_RT_COMPUTE:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue