mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
anv: add gfx9 generated draw support
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20497>
This commit is contained in:
parent
3ac7d5f258
commit
1d9cf8f381
10 changed files with 432 additions and 64 deletions
|
|
@ -134,6 +134,7 @@ anv_create_cmd_buffer(struct vk_command_pool *pool,
|
||||||
|
|
||||||
cmd_buffer->generation_jump_addr = ANV_NULL_ADDRESS;
|
cmd_buffer->generation_jump_addr = ANV_NULL_ADDRESS;
|
||||||
cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
|
cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
|
||||||
|
cmd_buffer->generation_bt_state = ANV_STATE_NULL;
|
||||||
|
|
||||||
anv_cmd_state_init(cmd_buffer);
|
anv_cmd_state_init(cmd_buffer);
|
||||||
|
|
||||||
|
|
@ -201,6 +202,7 @@ anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
|
||||||
|
|
||||||
cmd_buffer->generation_jump_addr = ANV_NULL_ADDRESS;
|
cmd_buffer->generation_jump_addr = ANV_NULL_ADDRESS;
|
||||||
cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
|
cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
|
||||||
|
cmd_buffer->generation_bt_state = ANV_STATE_NULL;
|
||||||
|
|
||||||
anv_state_stream_finish(&cmd_buffer->surface_state_stream);
|
anv_state_stream_finish(&cmd_buffer->surface_state_stream);
|
||||||
anv_state_stream_init(&cmd_buffer->surface_state_stream,
|
anv_state_stream_init(&cmd_buffer->surface_state_stream,
|
||||||
|
|
|
||||||
|
|
@ -897,7 +897,6 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
||||||
|
|
||||||
|
|
||||||
device->generated_indirect_draws =
|
device->generated_indirect_draws =
|
||||||
device->info.ver >= 11 &&
|
|
||||||
debug_get_bool_option("ANV_ENABLE_GENERATED_INDIRECT_DRAWS",
|
debug_get_bool_option("ANV_ENABLE_GENERATED_INDIRECT_DRAWS",
|
||||||
true);
|
true);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@
|
||||||
|
|
||||||
#include "anv_generated_indirect_draws.h"
|
#include "anv_generated_indirect_draws.h"
|
||||||
|
|
||||||
|
#include "shaders/gfx9_generated_draws_spv.h"
|
||||||
#include "shaders/gfx11_generated_draws_spv.h"
|
#include "shaders/gfx11_generated_draws_spv.h"
|
||||||
|
|
||||||
/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global
|
/* This pass takes vulkan descriptor bindings 0 & 1 and turns them into global
|
||||||
|
|
@ -101,7 +102,27 @@ lower_vulkan_descriptors_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case 2:
|
case 2: {
|
||||||
|
desc_value =
|
||||||
|
nir_load_ubo(b, 1, 64,
|
||||||
|
nir_imm_int(b, 2),
|
||||||
|
nir_imm_int(b,
|
||||||
|
offsetof(struct anv_generated_indirect_params,
|
||||||
|
draw_ids_addr)),
|
||||||
|
.align_mul = 8,
|
||||||
|
.align_offset = 0,
|
||||||
|
.range_base = 0,
|
||||||
|
.range = ~0);
|
||||||
|
desc_value =
|
||||||
|
nir_vec4(b,
|
||||||
|
nir_unpack_64_2x32_split_x(b, desc_value),
|
||||||
|
nir_unpack_64_2x32_split_y(b, desc_value),
|
||||||
|
nir_imm_int(b, 0),
|
||||||
|
nir_imm_int(b, 0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 3:
|
||||||
desc_value =
|
desc_value =
|
||||||
nir_vec2(b,
|
nir_vec2(b,
|
||||||
nir_imm_int(b, 2),
|
nir_imm_int(b, 2),
|
||||||
|
|
@ -276,9 +297,6 @@ compile_upload_spirv(struct anv_device *device,
|
||||||
VkResult
|
VkResult
|
||||||
anv_device_init_generated_indirect_draws(struct anv_device *device)
|
anv_device_init_generated_indirect_draws(struct anv_device *device)
|
||||||
{
|
{
|
||||||
if (device->info->ver < 11)
|
|
||||||
return VK_SUCCESS;
|
|
||||||
|
|
||||||
const struct intel_l3_weights w =
|
const struct intel_l3_weights w =
|
||||||
intel_get_default_l3_weights(device->info,
|
intel_get_default_l3_weights(device->info,
|
||||||
true /* wants_dc_cache */,
|
true /* wants_dc_cache */,
|
||||||
|
|
@ -298,16 +316,24 @@ anv_device_init_generated_indirect_draws(struct anv_device *device)
|
||||||
sizeof(indirect_draws_key),
|
sizeof(indirect_draws_key),
|
||||||
NULL);
|
NULL);
|
||||||
if (device->generated_draw_kernel == NULL) {
|
if (device->generated_draw_kernel == NULL) {
|
||||||
|
const uint32_t *spirv_source =
|
||||||
|
device->info->ver >= 11 ?
|
||||||
|
gfx11_generated_draws_spv_source :
|
||||||
|
gfx9_generated_draws_spv_source;
|
||||||
|
const uint32_t spirv_source_size =
|
||||||
|
device->info->ver >= 11 ?
|
||||||
|
ARRAY_SIZE(gfx11_generated_draws_spv_source) :
|
||||||
|
ARRAY_SIZE(gfx9_generated_draws_spv_source);
|
||||||
|
const uint32_t send_count =
|
||||||
|
device->info->ver >= 11 ?
|
||||||
|
11 /* 2 * (2 loads + 3 stores) + 1 store */ :
|
||||||
|
17 /* 2 * (2 loads + 6 stores) + 1 store */;
|
||||||
|
|
||||||
device->generated_draw_kernel =
|
device->generated_draw_kernel =
|
||||||
compile_upload_spirv(device,
|
compile_upload_spirv(device,
|
||||||
&indirect_draws_key,
|
&indirect_draws_key,
|
||||||
sizeof(indirect_draws_key),
|
sizeof(indirect_draws_key),
|
||||||
gfx11_generated_draws_spv_source,
|
spirv_source, spirv_source_size, send_count);
|
||||||
ARRAY_SIZE(gfx11_generated_draws_spv_source),
|
|
||||||
11 /*
|
|
||||||
* 2 * (2 indirect data loads + 3 3DPRIMITVE stores) +
|
|
||||||
* 1 store (MI_BATCH_BUFFER_START)
|
|
||||||
*/);
|
|
||||||
}
|
}
|
||||||
if (device->generated_draw_kernel == NULL)
|
if (device->generated_draw_kernel == NULL)
|
||||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
|
||||||
|
|
@ -28,19 +28,40 @@
|
||||||
|
|
||||||
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
|
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
|
||||||
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
|
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
|
||||||
|
#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2)
|
||||||
|
#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3)
|
||||||
|
|
||||||
/* This needs to match common_generated_draws.glsl :
|
/* This needs to match common_generated_draws.glsl :
|
||||||
*
|
*
|
||||||
* layout(set = 0, binding = 2) uniform block
|
* layout(set = 0, binding = 2) uniform block
|
||||||
*/
|
*/
|
||||||
struct anv_generated_indirect_draw_params {
|
struct anv_generated_indirect_draw_params {
|
||||||
|
/* Draw ID buffer address (only used on Gfx9) */
|
||||||
|
uint64_t draw_id_addr;
|
||||||
|
/* Indirect data buffer address (only used on Gfx9) */
|
||||||
uint64_t indirect_data_addr;
|
uint64_t indirect_data_addr;
|
||||||
|
/* Stride between each elements of the indirect data buffer */
|
||||||
uint32_t indirect_data_stride;
|
uint32_t indirect_data_stride;
|
||||||
uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws */
|
uint32_t flags; /* 0-7: bits, 8-15: mocs, 16-23: cmd_dws */
|
||||||
|
/* Base number of the draw ID, it is added to the index computed from the
|
||||||
|
* gl_FragCoord
|
||||||
|
*/
|
||||||
uint32_t draw_base;
|
uint32_t draw_base;
|
||||||
|
|
||||||
|
/* Number of draws to generate */
|
||||||
uint32_t draw_count;
|
uint32_t draw_count;
|
||||||
|
|
||||||
|
/* Maximum number of draws (equals to draw_count for indirect draws without
|
||||||
|
* an indirect count)
|
||||||
|
*/
|
||||||
uint32_t max_draw_count;
|
uint32_t max_draw_count;
|
||||||
|
|
||||||
|
/* Instance multiplier for multi view */
|
||||||
uint32_t instance_multiplier;
|
uint32_t instance_multiplier;
|
||||||
|
|
||||||
|
/* Address where to jump at after the generated draw (only used with
|
||||||
|
* indirect draw count variants)
|
||||||
|
*/
|
||||||
uint64_t end_addr;
|
uint64_t end_addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -53,6 +74,9 @@ struct anv_generated_indirect_params {
|
||||||
/* Global address of binding 1 */
|
/* Global address of binding 1 */
|
||||||
uint64_t generated_cmds_addr;
|
uint64_t generated_cmds_addr;
|
||||||
|
|
||||||
|
/* Global address of binding 2 */
|
||||||
|
uint64_t draw_ids_addr;
|
||||||
|
|
||||||
/* CPU side pointer to the previous item when number of draws has to be
|
/* CPU side pointer to the previous item when number of draws has to be
|
||||||
* split into smaller chunks, see while loop in
|
* split into smaller chunks, see while loop in
|
||||||
* genX(cmd_buffer_emit_indirect_generated_draws)
|
* genX(cmd_buffer_emit_indirect_generated_draws)
|
||||||
|
|
|
||||||
|
|
@ -2777,6 +2777,11 @@ struct anv_cmd_buffer {
|
||||||
*/
|
*/
|
||||||
struct anv_address generation_return_addr;
|
struct anv_address generation_return_addr;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Binding table allocation for generation shaders (only used on Gfx9).
|
||||||
|
*/
|
||||||
|
struct anv_state generation_bt_state;
|
||||||
|
|
||||||
/** List of anv_batch_bo used for generation
|
/** List of anv_batch_bo used for generation
|
||||||
*
|
*
|
||||||
* We have to keep this separated of the anv_cmd_buffer::batch_bos that is
|
* We have to keep this separated of the anv_cmd_buffer::batch_bos that is
|
||||||
|
|
|
||||||
|
|
@ -3550,12 +3550,8 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
|
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GFX_HAS_GENERATED_CMDS GFX_VER >= 11
|
|
||||||
#if GFX_HAS_GENERATED_CMDS
|
|
||||||
#include "genX_cmd_draw_generated_indirect.h"
|
#include "genX_cmd_draw_generated_indirect.h"
|
||||||
#endif
|
|
||||||
|
|
||||||
#if GFX_HAS_GENERATED_CMDS
|
|
||||||
ALWAYS_INLINE static bool
|
ALWAYS_INLINE static bool
|
||||||
anv_use_generated_draws(const struct anv_cmd_buffer *cmd_buffer, uint32_t count)
|
anv_use_generated_draws(const struct anv_cmd_buffer *cmd_buffer, uint32_t count)
|
||||||
{
|
{
|
||||||
|
|
@ -3574,7 +3570,6 @@ anv_use_generated_draws(const struct anv_cmd_buffer *cmd_buffer, uint32_t count)
|
||||||
return device->physical->generated_indirect_draws &&
|
return device->physical->generated_indirect_draws &&
|
||||||
count >= device->physical->instance->generated_indirect_threshold;
|
count >= device->physical->instance->generated_indirect_threshold;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
genX(BeginCommandBuffer)(
|
genX(BeginCommandBuffer)(
|
||||||
|
|
@ -3791,9 +3786,7 @@ genX(EndCommandBuffer)(
|
||||||
|
|
||||||
anv_measure_endcommandbuffer(cmd_buffer);
|
anv_measure_endcommandbuffer(cmd_buffer);
|
||||||
|
|
||||||
#if GFX_HAS_GENERATED_CMDS
|
|
||||||
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
|
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Turn on object level preemption if it is disabled to have it in known
|
/* Turn on object level preemption if it is disabled to have it in known
|
||||||
* state at the beginning of new command buffer.
|
* state at the beginning of new command buffer.
|
||||||
|
|
@ -3873,9 +3866,7 @@ genX(CmdExecuteCommands)(
|
||||||
*/
|
*/
|
||||||
genX(cmd_buffer_apply_pipe_flushes)(primary);
|
genX(cmd_buffer_apply_pipe_flushes)(primary);
|
||||||
|
|
||||||
#if GFX_HAS_GENERATED_CMDS
|
|
||||||
genX(cmd_buffer_flush_generated_draws)(primary);
|
genX(cmd_buffer_flush_generated_draws)(primary);
|
||||||
#endif
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < commandBufferCount; i++) {
|
for (uint32_t i = 0; i < commandBufferCount; i++) {
|
||||||
ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
|
ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
|
||||||
|
|
@ -4066,10 +4057,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
|
||||||
anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
|
anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
|
||||||
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
|
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
|
||||||
|
|
||||||
#if GFX_HAS_GENERATED_CMDS
|
|
||||||
if (dst_flags & VK_ACCESS_INDIRECT_COMMAND_READ_BIT)
|
if (dst_flags & VK_ACCESS_INDIRECT_COMMAND_READ_BIT)
|
||||||
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
|
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
|
||||||
#endif
|
|
||||||
|
|
||||||
anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
|
anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
|
||||||
}
|
}
|
||||||
|
|
@ -4721,7 +4710,6 @@ void genX(CmdDrawIndirect)(
|
||||||
drawCount);
|
drawCount);
|
||||||
trace_intel_begin_draw_indirect(&cmd_buffer->trace);
|
trace_intel_begin_draw_indirect(&cmd_buffer->trace);
|
||||||
|
|
||||||
#if GFX_HAS_GENERATED_CMDS
|
|
||||||
if (anv_use_generated_draws(cmd_buffer, drawCount)) {
|
if (anv_use_generated_draws(cmd_buffer, drawCount)) {
|
||||||
genX(cmd_buffer_emit_indirect_generated_draws)(
|
genX(cmd_buffer_emit_indirect_generated_draws)(
|
||||||
cmd_buffer,
|
cmd_buffer,
|
||||||
|
|
@ -4735,11 +4723,6 @@ void genX(CmdDrawIndirect)(
|
||||||
anv_address_add(buffer->address, offset),
|
anv_address_add(buffer->address, offset),
|
||||||
stride, drawCount, false /* indexed */);
|
stride, drawCount, false /* indexed */);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
emit_indirect_draws(cmd_buffer,
|
|
||||||
anv_address_add(buffer->address, offset),
|
|
||||||
stride, drawCount, false /* indexed */);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
trace_intel_end_draw_indirect(&cmd_buffer->trace, drawCount);
|
trace_intel_end_draw_indirect(&cmd_buffer->trace, drawCount);
|
||||||
}
|
}
|
||||||
|
|
@ -4763,7 +4746,6 @@ void genX(CmdDrawIndexedIndirect)(
|
||||||
drawCount);
|
drawCount);
|
||||||
trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace);
|
trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace);
|
||||||
|
|
||||||
#if GFX_HAS_GENERATED_CMDS
|
|
||||||
if (anv_use_generated_draws(cmd_buffer, drawCount)) {
|
if (anv_use_generated_draws(cmd_buffer, drawCount)) {
|
||||||
genX(cmd_buffer_emit_indirect_generated_draws)(
|
genX(cmd_buffer_emit_indirect_generated_draws)(
|
||||||
cmd_buffer,
|
cmd_buffer,
|
||||||
|
|
@ -4777,11 +4759,6 @@ void genX(CmdDrawIndexedIndirect)(
|
||||||
anv_address_add(buffer->address, offset),
|
anv_address_add(buffer->address, offset),
|
||||||
stride, drawCount, true /* indexed */);
|
stride, drawCount, true /* indexed */);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
emit_indirect_draws(cmd_buffer,
|
|
||||||
anv_address_add(buffer->address, offset),
|
|
||||||
stride, drawCount, true /* indexed */);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, drawCount);
|
trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, drawCount);
|
||||||
}
|
}
|
||||||
|
|
@ -4966,7 +4943,6 @@ void genX(CmdDrawIndirectCount)(
|
||||||
anv_address_add(count_buffer->address, countBufferOffset);
|
anv_address_add(count_buffer->address, countBufferOffset);
|
||||||
stride = MAX2(stride, sizeof(VkDrawIndirectCommand));
|
stride = MAX2(stride, sizeof(VkDrawIndirectCommand));
|
||||||
|
|
||||||
#if GFX_HAS_GENERATED_CMDS
|
|
||||||
if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
|
if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
|
||||||
genX(cmd_buffer_emit_indirect_generated_draws)(
|
genX(cmd_buffer_emit_indirect_generated_draws)(
|
||||||
cmd_buffer,
|
cmd_buffer,
|
||||||
|
|
@ -4983,14 +4959,6 @@ void genX(CmdDrawIndirectCount)(
|
||||||
maxDrawCount,
|
maxDrawCount,
|
||||||
false /* indexed */);
|
false /* indexed */);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
emit_indirect_count_draws(cmd_buffer,
|
|
||||||
indirect_data_address,
|
|
||||||
stride,
|
|
||||||
count_address,
|
|
||||||
maxDrawCount,
|
|
||||||
false /* indexed */);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
||||||
}
|
}
|
||||||
|
|
@ -5023,7 +4991,6 @@ void genX(CmdDrawIndexedIndirectCount)(
|
||||||
anv_address_add(count_buffer->address, countBufferOffset);
|
anv_address_add(count_buffer->address, countBufferOffset);
|
||||||
stride = MAX2(stride, sizeof(VkDrawIndexedIndirectCommand));
|
stride = MAX2(stride, sizeof(VkDrawIndexedIndirectCommand));
|
||||||
|
|
||||||
#if GFX_HAS_GENERATED_CMDS
|
|
||||||
if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
|
if (anv_use_generated_draws(cmd_buffer, maxDrawCount)) {
|
||||||
genX(cmd_buffer_emit_indirect_generated_draws)(
|
genX(cmd_buffer_emit_indirect_generated_draws)(
|
||||||
cmd_buffer,
|
cmd_buffer,
|
||||||
|
|
@ -5040,14 +5007,6 @@ void genX(CmdDrawIndexedIndirectCount)(
|
||||||
maxDrawCount,
|
maxDrawCount,
|
||||||
true /* indexed */);
|
true /* indexed */);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
emit_indirect_count_draws(cmd_buffer,
|
|
||||||
indirect_data_address,
|
|
||||||
stride,
|
|
||||||
count_address,
|
|
||||||
maxDrawCount,
|
|
||||||
true /* indexed */);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,10 +34,6 @@
|
||||||
#include "anv_private.h"
|
#include "anv_private.h"
|
||||||
#include "anv_generated_indirect_draws.h"
|
#include "anv_generated_indirect_draws.h"
|
||||||
|
|
||||||
#if GFX_VER < 11
|
|
||||||
#error "Generated draws optimization not supported prior to Gfx11"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* This is a maximum number of items a fragment shader can generate due to the
|
/* This is a maximum number of items a fragment shader can generate due to the
|
||||||
* viewport size.
|
* viewport size.
|
||||||
*/
|
*/
|
||||||
|
|
@ -92,7 +88,9 @@ genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
sgvs.InstanceIDComponentNumber = COMP_1;
|
sgvs.InstanceIDComponentNumber = COMP_1;
|
||||||
sgvs.InstanceIDElementOffset = 0;
|
sgvs.InstanceIDElementOffset = 0;
|
||||||
}
|
}
|
||||||
|
#if GFX_VER >= 11
|
||||||
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
|
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
|
||||||
|
#endif
|
||||||
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||||
vfi.InstancingEnable = false;
|
vfi.InstancingEnable = false;
|
||||||
vfi.VertexElementIndex = 0;
|
vfi.VertexElementIndex = 0;
|
||||||
|
|
@ -183,7 +181,7 @@ genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
|
||||||
ps.VectorMaskEnable = prog_data->uses_vmask;
|
ps.VectorMaskEnable = prog_data->uses_vmask;
|
||||||
|
|
||||||
ps.BindingTableEntryCount = 0;
|
ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
|
||||||
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
|
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
|
||||||
prog_data->base.ubo_ranges[0].length;
|
prog_data->base.ubo_ranges[0].length;
|
||||||
|
|
||||||
|
|
@ -254,7 +252,44 @@ genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0) | BITFIELD_BIT(1);
|
#if GFX_VER == 9
|
||||||
|
/* Allocate a binding table for Gfx9 for 2 reason :
|
||||||
|
*
|
||||||
|
* 1. we need a to emit a 3DSTATE_BINDING_TABLE_POINTERS_PS to make the
|
||||||
|
* HW apply the preceeding 3DSTATE_CONSTANT_PS
|
||||||
|
*
|
||||||
|
* 2. Emitting an empty 3DSTATE_BINDING_TABLE_POINTERS_PS would cause RT
|
||||||
|
* writes (even though they're empty) to disturb later writes
|
||||||
|
* (probably due to RT cache)
|
||||||
|
*
|
||||||
|
* Our binding table only has one entry to the null surface.
|
||||||
|
*/
|
||||||
|
uint32_t bt_offset;
|
||||||
|
cmd_buffer->generation_bt_state =
|
||||||
|
anv_cmd_buffer_alloc_binding_table(cmd_buffer, 1, &bt_offset);
|
||||||
|
if (cmd_buffer->generation_bt_state.map == NULL) {
|
||||||
|
VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Re-emit state base addresses so we get the new surface state base
|
||||||
|
* address before we start emitting binding tables etc.
|
||||||
|
*/
|
||||||
|
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
|
||||||
|
|
||||||
|
cmd_buffer->generation_bt_state =
|
||||||
|
anv_cmd_buffer_alloc_binding_table(cmd_buffer, 1, &bt_offset);
|
||||||
|
assert(cmd_buffer->generation_bt_state.map != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t *bt_map = cmd_buffer->generation_bt_state.map;
|
||||||
|
bt_map[0] = anv_bindless_state_for_binding_table(
|
||||||
|
cmd_buffer->device->null_surface_state).offset + bt_offset;
|
||||||
|
|
||||||
|
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0);
|
||||||
cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |
|
cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |
|
||||||
ANV_CMD_DIRTY_XFB_ENABLE);
|
ANV_CMD_DIRTY_XFB_ENABLE);
|
||||||
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||||
|
|
@ -326,10 +361,20 @@ genX(cmd_buffer_emit_generated_push_data)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
.ConstantBufferReadLength = DIV_ROUND_UP(push_data_state.alloc_size, 32),
|
.ConstantBufferReadLength = DIV_ROUND_UP(push_data_state.alloc_size, 32),
|
||||||
});
|
});
|
||||||
#else
|
#else
|
||||||
|
/* The Skylake PRM contains the following restriction:
|
||||||
|
*
|
||||||
|
* "The driver must ensure The following case does not occur
|
||||||
|
* without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
|
||||||
|
* buffer 3 read length equal to zero committed followed by a
|
||||||
|
* 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
|
||||||
|
* zero committed."
|
||||||
|
*
|
||||||
|
* To avoid this, we program the highest slot.
|
||||||
|
*/
|
||||||
anv_batch_emit(batch, GENX(3DSTATE_CONSTANT_PS), c) {
|
anv_batch_emit(batch, GENX(3DSTATE_CONSTANT_PS), c) {
|
||||||
c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
|
c.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
|
||||||
c.ConstantBody.ReadLength[0] = DIV_ROUND_UP(push_data_state.alloc_size, 32);
|
c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(push_data_state.alloc_size, 32);
|
||||||
c.ConstantBody.Buffer[0] = push_data_addr;
|
c.ConstantBody.Buffer[3] = push_data_addr;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -340,6 +385,7 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
uint32_t generated_cmd_stride,
|
uint32_t generated_cmd_stride,
|
||||||
struct anv_address indirect_data_addr,
|
struct anv_address indirect_data_addr,
|
||||||
uint32_t indirect_data_stride,
|
uint32_t indirect_data_stride,
|
||||||
|
struct anv_address draw_id_addr,
|
||||||
uint32_t item_base,
|
uint32_t item_base,
|
||||||
uint32_t item_count,
|
uint32_t item_count,
|
||||||
struct anv_address count_addr,
|
struct anv_address count_addr,
|
||||||
|
|
@ -356,15 +402,21 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
ANV_UBO_ALIGNMENT);
|
ANV_UBO_ALIGNMENT);
|
||||||
|
|
||||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||||
|
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
||||||
|
|
||||||
struct anv_generated_indirect_params *push_data = push_data_state.map;
|
struct anv_generated_indirect_params *push_data = push_data_state.map;
|
||||||
*push_data = (struct anv_generated_indirect_params) {
|
*push_data = (struct anv_generated_indirect_params) {
|
||||||
.draw = {
|
.draw = {
|
||||||
|
.draw_id_addr = anv_address_physical(draw_id_addr),
|
||||||
.indirect_data_addr = anv_address_physical(indirect_data_addr),
|
.indirect_data_addr = anv_address_physical(indirect_data_addr),
|
||||||
.indirect_data_stride = indirect_data_stride,
|
.indirect_data_stride = indirect_data_stride,
|
||||||
.flags = (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
|
.flags = (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
|
||||||
(cmd_buffer->state.conditional_render_enabled ?
|
(cmd_buffer->state.conditional_render_enabled ?
|
||||||
ANV_GENERATED_FLAG_PREDICATED : 0) |
|
ANV_GENERATED_FLAG_PREDICATED : 0) |
|
||||||
|
((vs_prog_data->uses_firstvertex ||
|
||||||
|
vs_prog_data->uses_baseinstance) ?
|
||||||
|
ANV_GENERATED_FLAG_BASE : 0) |
|
||||||
|
(vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
|
||||||
(anv_mocs(cmd_buffer->device, indirect_data_addr.bo,
|
(anv_mocs(cmd_buffer->device, indirect_data_addr.bo,
|
||||||
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
|
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
|
||||||
((generated_cmd_stride / 4) << 16),
|
((generated_cmd_stride / 4) << 16),
|
||||||
|
|
@ -378,6 +430,7 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
},
|
},
|
||||||
.indirect_data_addr = anv_address_physical(indirect_data_addr),
|
.indirect_data_addr = anv_address_physical(indirect_data_addr),
|
||||||
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
|
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
|
||||||
|
.draw_ids_addr = anv_address_physical(draw_id_addr),
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!anv_address_is_null(count_addr)) {
|
if (!anv_address_is_null(count_addr)) {
|
||||||
|
|
@ -405,6 +458,15 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
/* Only emit the data after the memcpy above. */
|
/* Only emit the data after the memcpy above. */
|
||||||
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
|
genX(cmd_buffer_emit_generated_push_data)(cmd_buffer, push_data_state);
|
||||||
|
|
||||||
|
#if GFX_VER == 9
|
||||||
|
/* Why are the push constants not flushed without a binding table
|
||||||
|
* update??
|
||||||
|
*/
|
||||||
|
anv_batch_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), btp) {
|
||||||
|
btp.PointertoPSBindingTable = cmd_buffer->generation_bt_state.offset;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
|
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
|
||||||
prim.VertexAccessType = SEQUENTIAL;
|
prim.VertexAccessType = SEQUENTIAL;
|
||||||
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
||||||
|
|
@ -440,6 +502,58 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b
|
||||||
trace_intel_end_generate_draws(&cmd_buffer->trace);
|
trace_intel_end_generate_draws(&cmd_buffer->trace);
|
||||||
|
|
||||||
genX(cmd_buffer_emit_generate_draws_pipeline)(cmd_buffer);
|
genX(cmd_buffer_emit_generate_draws_pipeline)(cmd_buffer);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct anv_address
|
||||||
|
genX(cmd_buffer_get_draw_id_addr)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
uint32_t draw_id_count)
|
||||||
|
{
|
||||||
|
#if GFX_VER >= 11
|
||||||
|
return ANV_NULL_ADDRESS;
|
||||||
|
#else
|
||||||
|
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||||
|
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
||||||
|
if (!vs_prog_data->uses_drawid)
|
||||||
|
return ANV_NULL_ADDRESS;
|
||||||
|
|
||||||
|
struct anv_state draw_id_state =
|
||||||
|
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * draw_id_count, 4);
|
||||||
|
return anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
|
||||||
|
draw_id_state);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
/* With the extended parameters in 3DPRIMITIVE on Gfx11+ we can emit
|
||||||
|
* everything. Prior to this, we need to emit a couple of
|
||||||
|
* VERTEX_BUFFER_STATE.
|
||||||
|
*/
|
||||||
|
#if GFX_VER >= 11
|
||||||
|
return 4 * GENX(3DPRIMITIVE_EXTENDED_length);
|
||||||
|
#else
|
||||||
|
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||||
|
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
||||||
|
|
||||||
|
uint32_t len = 0;
|
||||||
|
|
||||||
|
if (vs_prog_data->uses_firstvertex ||
|
||||||
|
vs_prog_data->uses_baseinstance ||
|
||||||
|
vs_prog_data->uses_drawid) {
|
||||||
|
len += 4; /* 3DSTATE_VERTEX_BUFFERS */
|
||||||
|
|
||||||
|
if (vs_prog_data->uses_firstvertex ||
|
||||||
|
vs_prog_data->uses_baseinstance)
|
||||||
|
len += 4 * GENX(VERTEX_BUFFER_STATE_length);
|
||||||
|
|
||||||
|
if (vs_prog_data->uses_drawid)
|
||||||
|
len += 4 * GENX(VERTEX_BUFFER_STATE_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
return len + 4 * GENX(3DPRIMITIVE_length);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -465,14 +579,54 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
|
||||||
uint32_t max_draw_count,
|
uint32_t max_draw_count,
|
||||||
bool indexed)
|
bool indexed)
|
||||||
{
|
{
|
||||||
|
const bool start_generation_batch =
|
||||||
|
anv_address_is_null(cmd_buffer->generation_return_addr);
|
||||||
|
|
||||||
genX(flush_pipeline_select_3d)(cmd_buffer);
|
genX(flush_pipeline_select_3d)(cmd_buffer);
|
||||||
|
|
||||||
|
struct anv_address draw_id_addr =
|
||||||
|
genX(cmd_buffer_get_draw_id_addr)(cmd_buffer, max_draw_count);
|
||||||
|
|
||||||
|
#if GFX_VER == 9
|
||||||
|
/* Mark the VB-0 as using the entire dynamic state pool area, but only for
|
||||||
|
* the draw call starting the generation batch. All the following ones will
|
||||||
|
* use the same area.
|
||||||
|
*/
|
||||||
|
if (start_generation_batch) {
|
||||||
|
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 0,
|
||||||
|
(struct anv_address) {
|
||||||
|
.offset = DYNAMIC_STATE_POOL_MIN_ADDRESS,
|
||||||
|
},
|
||||||
|
DYNAMIC_STATE_POOL_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||||
|
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
||||||
|
|
||||||
|
if (vs_prog_data->uses_baseinstance ||
|
||||||
|
vs_prog_data->uses_firstvertex) {
|
||||||
|
/* We're using the indirect buffer directly to source base instance &
|
||||||
|
* first vertex values. Mark the entire area as used.
|
||||||
|
*/
|
||||||
|
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
|
||||||
|
indirect_data_addr,
|
||||||
|
indirect_data_stride * max_draw_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vs_prog_data->uses_drawid) {
|
||||||
|
/* Mark the whole draw id buffer as used. */
|
||||||
|
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
|
||||||
|
draw_id_addr,
|
||||||
|
sizeof(uint32_t) * max_draw_count);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Apply the pipeline flush here so the indirect data is available for the
|
/* Apply the pipeline flush here so the indirect data is available for the
|
||||||
* generation shader.
|
* generation shader.
|
||||||
*/
|
*/
|
||||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||||
|
|
||||||
if (anv_address_is_null(cmd_buffer->generation_return_addr))
|
if (start_generation_batch)
|
||||||
genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
|
genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
|
||||||
|
|
||||||
/* In order to have the vertex fetch gather the data we need to have a non
|
/* In order to have the vertex fetch gather the data we need to have a non
|
||||||
|
|
@ -493,7 +647,8 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
|
||||||
/* Emit the 3D state in the main batch. */
|
/* Emit the 3D state in the main batch. */
|
||||||
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
|
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
|
||||||
|
|
||||||
const uint32_t draw_cmd_stride = 4 * GENX(3DPRIMITIVE_EXTENDED_length);
|
const uint32_t draw_cmd_stride =
|
||||||
|
genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
|
||||||
|
|
||||||
struct anv_generated_indirect_params *last_params = NULL;
|
struct anv_generated_indirect_params *last_params = NULL;
|
||||||
uint32_t item_base = 0;
|
uint32_t item_base = 0;
|
||||||
|
|
@ -522,6 +677,7 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
|
||||||
anv_address_add(indirect_data_addr,
|
anv_address_add(indirect_data_addr,
|
||||||
item_base * indirect_data_stride),
|
item_base * indirect_data_stride),
|
||||||
indirect_data_stride,
|
indirect_data_stride,
|
||||||
|
anv_address_add(draw_id_addr, 4 * item_base),
|
||||||
item_base,
|
item_base,
|
||||||
item_count,
|
item_count,
|
||||||
count_addr,
|
count_addr,
|
||||||
|
|
@ -537,6 +693,10 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params);
|
genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params);
|
||||||
|
|
||||||
|
#if GFX_VER == 9
|
||||||
|
update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, indexed ? RANDOM : SEQUENTIAL);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -552,6 +712,9 @@ genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
genX(emit_apply_pipe_flushes)(batch,
|
genX(emit_apply_pipe_flushes)(batch,
|
||||||
cmd_buffer->device,
|
cmd_buffer->device,
|
||||||
_3D,
|
_3D,
|
||||||
|
#if GFX_VER == 9
|
||||||
|
ANV_PIPE_VF_CACHE_INVALIDATE_BIT |
|
||||||
|
#endif
|
||||||
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
|
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
|
||||||
ANV_PIPE_CS_STALL_BIT);
|
ANV_PIPE_CS_STALL_BIT);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,10 @@
|
||||||
|
|
||||||
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
|
#define ANV_GENERATED_FLAG_INDEXED BITFIELD_BIT(0)
|
||||||
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
|
#define ANV_GENERATED_FLAG_PREDICATED BITFIELD_BIT(1)
|
||||||
|
#define ANV_GENERATED_FLAG_DRAWID BITFIELD_BIT(2)
|
||||||
|
#define ANV_GENERATED_FLAG_BASE BITFIELD_BIT(3)
|
||||||
|
|
||||||
/* These 2 bindings will be accessed through A64 messages */
|
/* These 3 bindings will be accessed through A64 messages */
|
||||||
layout(set = 0, binding = 0, std430) buffer Storage0 {
|
layout(set = 0, binding = 0, std430) buffer Storage0 {
|
||||||
uint indirect_data[];
|
uint indirect_data[];
|
||||||
};
|
};
|
||||||
|
|
@ -35,8 +37,13 @@ layout(set = 0, binding = 1, std430) buffer Storage1 {
|
||||||
uint commands[];
|
uint commands[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 2, std430) buffer Storage2 {
|
||||||
|
uint draw_ids[];
|
||||||
|
};
|
||||||
|
|
||||||
/* This data will be provided through push constants. */
|
/* This data will be provided through push constants. */
|
||||||
layout(set = 0, binding = 2) uniform block {
|
layout(set = 0, binding = 3) uniform block {
|
||||||
|
uint64_t draw_id_addr;
|
||||||
uint64_t indirect_data_addr;
|
uint64_t indirect_data_addr;
|
||||||
uint indirect_data_stride;
|
uint indirect_data_stride;
|
||||||
uint flags;
|
uint flags;
|
||||||
|
|
@ -47,6 +54,44 @@ layout(set = 0, binding = 2) uniform block {
|
||||||
uint64_t end_addr;
|
uint64_t end_addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void write_VERTEX_BUFFER_STATE(uint write_offset,
|
||||||
|
uint mocs,
|
||||||
|
uint buffer_idx,
|
||||||
|
uint64_t address,
|
||||||
|
uint size)
|
||||||
|
{
|
||||||
|
commands[write_offset + 0] = (0 << 0 | /* Buffer Pitch */
|
||||||
|
0 << 13 | /* Null Vertex Buffer */
|
||||||
|
1 << 14 | /* Address Modify Enable */
|
||||||
|
mocs << 16 | /* MOCS */
|
||||||
|
buffer_idx << 26); /* Vertex Buffer Index */
|
||||||
|
commands[write_offset + 1] = uint(address & 0xffffffff);
|
||||||
|
commands[write_offset + 2] = uint(address >> 32);
|
||||||
|
commands[write_offset + 3] = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void write_3DPRIMITIVE(uint write_offset,
|
||||||
|
bool is_predicated,
|
||||||
|
bool is_indexed,
|
||||||
|
uint vertex_count_per_instance,
|
||||||
|
uint start_vertex_location,
|
||||||
|
uint instance_count,
|
||||||
|
uint start_instance_location,
|
||||||
|
uint base_vertex_location)
|
||||||
|
{
|
||||||
|
commands[write_offset + 0] = (3 << 29 | /* Command Type */
|
||||||
|
3 << 27 | /* Command SubType */
|
||||||
|
3 << 24 | /* 3D Command Opcode */
|
||||||
|
uint(is_predicated) << 8 |
|
||||||
|
5 << 0); /* DWord Length */
|
||||||
|
commands[write_offset + 1] = uint(is_indexed) << 8;
|
||||||
|
commands[write_offset + 2] = vertex_count_per_instance;
|
||||||
|
commands[write_offset + 3] = start_vertex_location;
|
||||||
|
commands[write_offset + 4] = instance_count;
|
||||||
|
commands[write_offset + 5] = start_instance_location;
|
||||||
|
commands[write_offset + 6] = base_vertex_location;
|
||||||
|
}
|
||||||
|
|
||||||
void write_3DPRIMITIVE_EXTENDED(uint write_offset,
|
void write_3DPRIMITIVE_EXTENDED(uint write_offset,
|
||||||
bool is_predicated,
|
bool is_predicated,
|
||||||
bool is_indexed,
|
bool is_indexed,
|
||||||
|
|
|
||||||
144
src/intel/vulkan/shaders/gfx9_generated_draws.glsl
Normal file
144
src/intel/vulkan/shaders/gfx9_generated_draws.glsl
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
#extension GL_ARB_gpu_shader_int64 : enable
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "common_generated_draws.glsl"
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
bool is_indexed = (flags & ANV_GENERATED_FLAG_INDEXED) != 0;
|
||||||
|
bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
|
||||||
|
bool uses_base = (flags & ANV_GENERATED_FLAG_BASE) != 0;
|
||||||
|
bool uses_drawid = (flags & ANV_GENERATED_FLAG_DRAWID) != 0;
|
||||||
|
uint mocs = (flags >> 8) & 0xff;
|
||||||
|
uint _3dprim_dw_size = (flags >> 16) & 0xff;
|
||||||
|
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
|
||||||
|
uint indirect_data_offset = item_idx * indirect_data_stride / 4;
|
||||||
|
uint cmd_idx = item_idx * _3dprim_dw_size;
|
||||||
|
uint draw_id = draw_base + item_idx;
|
||||||
|
|
||||||
|
if (draw_id < draw_count) {
|
||||||
|
if (is_indexed) {
|
||||||
|
/* Loading a VkDrawIndexedIndirectCommand */
|
||||||
|
uint index_count = indirect_data[indirect_data_offset + 0];
|
||||||
|
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
|
||||||
|
uint first_index = indirect_data[indirect_data_offset + 2];
|
||||||
|
uint vertex_offset = indirect_data[indirect_data_offset + 3];
|
||||||
|
uint first_instance = indirect_data[indirect_data_offset + 4];
|
||||||
|
|
||||||
|
if (uses_base || uses_drawid) {
|
||||||
|
uint state_vertex_len =
|
||||||
|
1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0);
|
||||||
|
commands[cmd_idx] =
|
||||||
|
(3 << 29 | /* Command Type */
|
||||||
|
3 << 27 | /* Command SubType */
|
||||||
|
0 << 24 | /* 3D Command Opcode */
|
||||||
|
8 << 16 | /* 3D Command Sub Opcode */
|
||||||
|
(state_vertex_len - 2) << 0); /* DWord Length */
|
||||||
|
cmd_idx += 1;
|
||||||
|
if (uses_base) {
|
||||||
|
uint64_t indirect_draw_data_addr =
|
||||||
|
indirect_data_addr + item_idx * indirect_data_stride + 12;
|
||||||
|
write_VERTEX_BUFFER_STATE(cmd_idx,
|
||||||
|
mocs,
|
||||||
|
31,
|
||||||
|
indirect_draw_data_addr,
|
||||||
|
8);
|
||||||
|
cmd_idx += 4;
|
||||||
|
}
|
||||||
|
if (uses_drawid) {
|
||||||
|
uint64_t draw_idx_addr = draw_id_addr + 4 * item_idx;
|
||||||
|
draw_ids[draw_id] = draw_id;
|
||||||
|
write_VERTEX_BUFFER_STATE(cmd_idx,
|
||||||
|
mocs,
|
||||||
|
32,
|
||||||
|
draw_idx_addr,
|
||||||
|
4);
|
||||||
|
cmd_idx += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
write_3DPRIMITIVE(cmd_idx,
|
||||||
|
is_predicated,
|
||||||
|
is_indexed,
|
||||||
|
index_count,
|
||||||
|
first_index,
|
||||||
|
instance_count,
|
||||||
|
first_instance,
|
||||||
|
vertex_offset);
|
||||||
|
} else {
|
||||||
|
/* Loading a VkDrawIndirectCommand structure */
|
||||||
|
uint vertex_count = indirect_data[indirect_data_offset + 0];
|
||||||
|
uint instance_count = indirect_data[indirect_data_offset + 1] * instance_multiplier;
|
||||||
|
uint first_vertex = indirect_data[indirect_data_offset + 2];
|
||||||
|
uint first_instance = indirect_data[indirect_data_offset + 3];
|
||||||
|
|
||||||
|
if (uses_base || uses_drawid) {
|
||||||
|
uint state_vertex_len =
|
||||||
|
1 + (uses_base ? 4 : 0) + (uses_drawid ? 4 : 0);
|
||||||
|
commands[cmd_idx] =
|
||||||
|
(3 << 29 | /* Command Type */
|
||||||
|
3 << 27 | /* Command SubType */
|
||||||
|
0 << 24 | /* 3D Command Opcode */
|
||||||
|
8 << 16 | /* 3D Command Sub Opcode */
|
||||||
|
(state_vertex_len - 2) << 0); /* DWord Length */
|
||||||
|
cmd_idx += 1;
|
||||||
|
if (uses_base) {
|
||||||
|
uint64_t indirect_draw_data_addr =
|
||||||
|
indirect_data_addr + item_idx * indirect_data_stride + 8;
|
||||||
|
write_VERTEX_BUFFER_STATE(cmd_idx,
|
||||||
|
mocs,
|
||||||
|
31,
|
||||||
|
indirect_draw_data_addr,
|
||||||
|
8);
|
||||||
|
cmd_idx += 4;
|
||||||
|
}
|
||||||
|
if (uses_drawid) {
|
||||||
|
uint64_t draw_idx_addr = draw_id_addr + 4 * item_idx;
|
||||||
|
draw_ids[draw_id] = draw_id;
|
||||||
|
write_VERTEX_BUFFER_STATE(cmd_idx,
|
||||||
|
mocs,
|
||||||
|
32,
|
||||||
|
draw_idx_addr,
|
||||||
|
4);
|
||||||
|
cmd_idx += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
write_3DPRIMITIVE(cmd_idx,
|
||||||
|
is_predicated,
|
||||||
|
is_indexed,
|
||||||
|
vertex_count,
|
||||||
|
first_vertex,
|
||||||
|
instance_count,
|
||||||
|
first_instance,
|
||||||
|
0 /* base_vertex_location */);
|
||||||
|
}
|
||||||
|
} else if (draw_id == draw_count && draw_id < max_draw_count) {
|
||||||
|
/* Only write a jump forward in the batch if we have fewer elements than
|
||||||
|
* the max draw count.
|
||||||
|
*/
|
||||||
|
write_MI_BATCH_BUFFER_START(cmd_idx, end_addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -33,6 +33,7 @@ float64_spv_h = custom_target(
|
||||||
)
|
)
|
||||||
|
|
||||||
generated_draws_shaders = [
|
generated_draws_shaders = [
|
||||||
|
'gfx9_generated_draws.glsl',
|
||||||
'gfx11_generated_draws.glsl',
|
'gfx11_generated_draws.glsl',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue