mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 11:28:05 +02:00
anv/intel: add device generated commands shaders
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
This commit is contained in:
parent
c85647b968
commit
1281e2b9a0
8 changed files with 1352 additions and 5 deletions
|
|
@ -11,6 +11,7 @@
|
|||
#endif
|
||||
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/enum_operators.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -14,9 +14,11 @@
|
|||
#elif (GFX_VERx10 == 125)
|
||||
# include "genxml/gen125_rt_cl_pack.h"
|
||||
#elif (GFX_VERx10 == 200)
|
||||
# include "genxml/gen200_rt_cl_pack.h"
|
||||
# include "genxml/xe2_rt_cl_pack.h"
|
||||
#elif (GFX_VERx10 == 300)
|
||||
# include "genxml/gen300_rt_cl_pack.h"
|
||||
# include "genxml/xe3_rt_cl_pack.h"
|
||||
#elif (GFX_VERx10 == 350)
|
||||
# include "genxml/xe3p_rt_cl_pack.h"
|
||||
#else
|
||||
# error "Need to add a pack header include for this gen"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -128,15 +128,50 @@ endforeach
|
|||
|
||||
genX_cl_included_symbols = [
|
||||
# instructions
|
||||
'3DMESH_3D',
|
||||
'3DSTATE_CLIP',
|
||||
'3DSTATE_CONSTANT_ALL',
|
||||
'3DSTATE_CONSTANT_VS',
|
||||
'3DSTATE_DS',
|
||||
'3DSTATE_GS',
|
||||
'3DSTATE_HS',
|
||||
'3DSTATE_INDEX_BUFFER',
|
||||
'3DSTATE_MESH_CONTROL',
|
||||
'3DSTATE_MESH_SHADER_DATA',
|
||||
'3DSTATE_PS',
|
||||
'3DSTATE_PS_EXTRA',
|
||||
'3DSTATE_PS_BLEND',
|
||||
'3DSTATE_RASTER',
|
||||
'3DSTATE_SF',
|
||||
'3DSTATE_STREAMOUT',
|
||||
'3DSTATE_TASK_CONTROL',
|
||||
'3DSTATE_TASK_SHADER_DATA',
|
||||
'3DSTATE_TE',
|
||||
'3DSTATE_VERTEX_BUFFERS',
|
||||
'3DSTATE_VF_TOPOLOGY',
|
||||
'3DSTATE_VFG',
|
||||
'3DSTATE_VS',
|
||||
'3DSTATE_WM',
|
||||
'3DPRIMITIVE',
|
||||
'3DPRIMITIVE_EXTENDED',
|
||||
'COMPUTE_WALKER',
|
||||
'GPGPU_WALKER',
|
||||
'MEDIA_CURBE_LOAD',
|
||||
'MEDIA_INTERFACE_DESCRIPTOR_LOAD',
|
||||
'MEDIA_STATE_FLUSH',
|
||||
'MI_ARB_CHECK',
|
||||
'MI_BATCH_BUFFER_START',
|
||||
'MI_STORE_DATA_IMM',
|
||||
# structures
|
||||
'3DSTATE_CONSTANT_ALL_DATA',
|
||||
'3DSTATE_CONSTANT_BODY',
|
||||
'BINDLESS_SHADER_RECORD',
|
||||
'CALL_STACK_HANDLER',
|
||||
'COMPUTE_WALKER_BODY',
|
||||
'INTERFACE_DESCRIPTOR_DATA',
|
||||
'POSTSYNC_DATA',
|
||||
'RT_DISPATCH_GLOBALS',
|
||||
'RT_SHADER_TABLE',
|
||||
'VERTEX_BUFFER_STATE',
|
||||
]
|
||||
|
||||
|
|
|
|||
947
src/intel/shaders/dgc.cl
Normal file
947
src/intel/shaders/dgc.cl
Normal file
|
|
@ -0,0 +1,947 @@
|
|||
/*
|
||||
* Copyright 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "libintel_shaders.h"
|
||||
#include "dev/intel_wa.h"
|
||||
#include "vulkan/anv_types.h"
|
||||
|
||||
#define HAS_STAGE(descriptor, stage) \
|
||||
(((descriptor)->active_stages & \
|
||||
BITFIELD_BIT(ANV_DGC_STAGE_##stage)) != 0)
|
||||
|
||||
#if GFX_VER >= 11
|
||||
|
||||
static void
|
||||
merge_dwords(global void *dst, global void *src1, global void *src2, uint32_t n_dwords)
|
||||
{
|
||||
for (uint32_t i = 0; i < n_dwords; i += 4) {
|
||||
if (n_dwords - i >= 4) {
|
||||
*(global uint4 *)(dst + i * 4) = *(global uint4 *)(src1 + i * 4) |
|
||||
*(global uint4 *)(src2 + i * 4) ;
|
||||
} else if (n_dwords - i >= 3) {
|
||||
*(global uint3 *)(dst + i * 4) = *(global uint3 *)(src1 + i * 4) |
|
||||
*(global uint3 *)(src2 + i * 4) ;
|
||||
} else if (n_dwords - i >= 2) {
|
||||
*(global uint2 *)(dst + i * 4) = *(global uint2 *)(src1 + i * 4) |
|
||||
*(global uint2 *)(src2 + i * 4) ;
|
||||
} else {
|
||||
*(global uint *)(dst + i * 4) = *(global uint *)(src1 + i * 4) |
|
||||
*(global uint *)(src2 + i * 4) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VER >= 12
|
||||
static uint32_t
|
||||
write_3DSTATE_CONSTANT_ALL(global void *dst_ptr,
|
||||
global void *push_data_addr,
|
||||
global struct anv_dgc_push_stage_state *stage_state,
|
||||
global struct anv_dgc_gfx_state *state,
|
||||
uint32_t stage_enabled)
|
||||
{
|
||||
uint32_t n_slots = stage_state->legacy.n_slots;
|
||||
struct GENX(3DSTATE_CONSTANT_ALL) v = {
|
||||
GENX(3DSTATE_CONSTANT_ALL_header),
|
||||
.DWordLength = GENX(3DSTATE_CONSTANT_ALL_length) -
|
||||
GENX(3DSTATE_CONSTANT_ALL_length_bias) +
|
||||
n_slots * GENX(3DSTATE_CONSTANT_ALL_DATA_length),
|
||||
.ShaderUpdateEnable = stage_enabled,
|
||||
.MOCS = state->layout.push_constants.mocs,
|
||||
.PointerBufferMask = (1u << n_slots) - 1,
|
||||
};
|
||||
GENX(3DSTATE_CONSTANT_ALL_pack)(dst_ptr, &v);
|
||||
|
||||
dst_ptr += GENX(3DSTATE_CONSTANT_ALL_length) * 4;
|
||||
|
||||
for (uint32_t i = 0; i < n_slots; i++) {
|
||||
struct anv_dgc_push_stage_slot slot = stage_state->legacy.slots[i];
|
||||
|
||||
if (slot.type == ANV_DGC_PUSH_SLOT_TYPE_PUSH_CONSTANTS) {
|
||||
struct GENX(3DSTATE_CONSTANT_ALL_DATA) vd = {
|
||||
.ConstantBufferReadLength = slot.push_data_size / 32,
|
||||
.PointerToConstantBuffer = (uint64_t) push_data_addr + slot.push_data_offset,
|
||||
};
|
||||
GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(dst_ptr, &vd);
|
||||
} else {
|
||||
struct GENX(3DSTATE_CONSTANT_ALL_DATA) vd = {
|
||||
.ConstantBufferReadLength = slot.push_data_size / 32,
|
||||
.PointerToConstantBuffer = state->push_constants.addresses[i],
|
||||
};
|
||||
GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(dst_ptr, &vd);
|
||||
}
|
||||
|
||||
dst_ptr += GENX(3DSTATE_CONSTANT_ALL_DATA_length) * 4;
|
||||
}
|
||||
|
||||
return 4 * (GENX(3DSTATE_CONSTANT_ALL_length) +
|
||||
n_slots * GENX(3DSTATE_CONSTANT_ALL_DATA_length));
|
||||
}
|
||||
#else
|
||||
static uint64_t
|
||||
pc_slot_address(global struct anv_dgc_push_stage_slot *slot,
|
||||
global uint64_t *slot_address,
|
||||
global void *push_data_addr)
|
||||
{
|
||||
if (slot->type == ANV_DGC_PUSH_SLOT_TYPE_PUSH_CONSTANTS) {
|
||||
return (uint64_t) push_data_addr + slot->push_data_offset;
|
||||
} else {
|
||||
return *slot_address;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
write_3DSTATE_CONSTANT_XS(global void *dst_ptr,
|
||||
global void *push_data_addr,
|
||||
global struct anv_dgc_push_stage_state *stage_state,
|
||||
global struct anv_dgc_gfx_state *state,
|
||||
uint32_t stage_enabled)
|
||||
{
|
||||
uint32_t opcode;
|
||||
if (stage_enabled & BITFIELD_BIT(ANV_DGC_STAGE_VERTEX))
|
||||
opcode = 21;
|
||||
else if (stage_enabled & BITFIELD_BIT(ANV_DGC_STAGE_TESS_CTRL))
|
||||
opcode = 25;
|
||||
else if (stage_enabled & BITFIELD_BIT(ANV_DGC_STAGE_TESS_EVAL))
|
||||
opcode = 26;
|
||||
else if (stage_enabled & BITFIELD_BIT(ANV_DGC_STAGE_GEOMETRY))
|
||||
opcode = 22;
|
||||
else
|
||||
opcode = 23;
|
||||
|
||||
struct GENX(3DSTATE_CONSTANT_VS) v = {
|
||||
GENX(3DSTATE_CONSTANT_VS_header),
|
||||
._3DCommandSubOpcode = opcode,
|
||||
.ConstantBody = {
|
||||
.Buffer = {
|
||||
pc_slot_address(&stage_state->legacy.slots[0],
|
||||
&state->push_constants.addresses[0],
|
||||
push_data_addr),
|
||||
pc_slot_address(&stage_state->legacy.slots[1],
|
||||
&state->push_constants.addresses[1],
|
||||
push_data_addr),
|
||||
pc_slot_address(&stage_state->legacy.slots[2],
|
||||
&state->push_constants.addresses[2],
|
||||
push_data_addr),
|
||||
pc_slot_address(&stage_state->legacy.slots[3],
|
||||
&state->push_constants.addresses[3],
|
||||
push_data_addr),
|
||||
},
|
||||
.ReadLength = {
|
||||
stage_state->legacy.slots[0].push_data_size / 32,
|
||||
stage_state->legacy.slots[1].push_data_size / 32,
|
||||
stage_state->legacy.slots[2].push_data_size / 32,
|
||||
stage_state->legacy.slots[3].push_data_size / 32,
|
||||
},
|
||||
},
|
||||
};
|
||||
GENX(3DSTATE_CONSTANT_VS_pack)(dst_ptr, &v);
|
||||
|
||||
return 4 * GENX(3DSTATE_CONSTANT_VS_length);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
write_app_push_constant_data(global void *push_data_ptr,
|
||||
global struct anv_dgc_push_layout *pc_layout,
|
||||
global void *seq_ptr,
|
||||
global void *template_ptr,
|
||||
uint32_t template_size,
|
||||
uint32_t seq_idx)
|
||||
{
|
||||
uint32_t num_entries = pc_layout->num_entries;
|
||||
|
||||
/* Copy the push constant data prepared on the CPU into the preprocess
|
||||
* buffer. Try to minimize the amount if the first entry partially or
|
||||
* entirely overlaps.
|
||||
*/
|
||||
if (template_size > 0) {
|
||||
if (num_entries > 0) {
|
||||
struct anv_dgc_push_entry first_entry = pc_layout->entries[0];
|
||||
uint32_t entry_end = first_entry.push_offset + first_entry.size;
|
||||
if (first_entry.push_offset > 0) {
|
||||
genX(copy_data)(push_data_ptr, template_ptr,
|
||||
first_entry.push_offset);
|
||||
}
|
||||
if (entry_end < template_size) {
|
||||
genX(copy_data)(push_data_ptr + entry_end,
|
||||
template_ptr + entry_end,
|
||||
template_size - entry_end);
|
||||
}
|
||||
} else {
|
||||
genX(copy_data)(push_data_ptr, template_ptr, template_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* Update push constant data using the indirect stream */
|
||||
for (uint32_t i = 0; i < num_entries; i++) {
|
||||
struct anv_dgc_push_entry entry = pc_layout->entries[i];
|
||||
global void *pc_ptr = seq_ptr + entry.seq_offset;
|
||||
genX(copy_data)(push_data_ptr + entry.push_offset,
|
||||
pc_ptr, entry.size);
|
||||
}
|
||||
|
||||
if (pc_layout->seq_id_active)
|
||||
*(uint32_t *)(push_data_ptr + pc_layout->seq_id_offset) = seq_idx;
|
||||
}
|
||||
|
||||
static void
|
||||
write_cs_drv_push_constant_data(global struct anv_push_constants *push_data_ptr,
|
||||
global void *driver_template_ptr,
|
||||
uint32_t offset, uint32_t size,
|
||||
global VkDispatchIndirectCommand *info)
|
||||
{
|
||||
genX(copy_data)(&push_data_ptr->client_data[offset],
|
||||
driver_template_ptr, size);
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
/* On Gfx12.5+ we always have the entire push constant space, so it's fine to copy */
|
||||
push_data_ptr->cs.num_workgroups[0] = info->x;
|
||||
push_data_ptr->cs.num_workgroups[1] = info->y;
|
||||
push_data_ptr->cs.num_workgroups[2] = info->z;
|
||||
#else
|
||||
/* Prior to Gfx12.5, the push constant data has to be aligned to 64B and
|
||||
* the beginning is based off the first location the shader needs. So if
|
||||
* the read location is does not include the workgroup, don't write it, we
|
||||
* would be overwriting some other data in the generated commands/data.
|
||||
*/
|
||||
if (offset <= offsetof(struct anv_push_constants, cs.num_workgroups[0])) {
|
||||
push_data_ptr->cs.num_workgroups[0] = info->x;
|
||||
push_data_ptr->cs.num_workgroups[1] = info->y;
|
||||
push_data_ptr->cs.num_workgroups[2] = info->z;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
write_rt_drv_push_constant_data(global void *driver_data_ptr,
|
||||
global void *driver_template_ptr,
|
||||
uint32_t size)
|
||||
{
|
||||
genX(copy_data)(driver_data_ptr, driver_template_ptr, size);
|
||||
}
|
||||
|
||||
static void
|
||||
write_gfx_drv_push_constant_data(global void *driver_data_ptr,
|
||||
global void *driver_template_ptr,
|
||||
uint32_t size)
|
||||
{
|
||||
genX(copy_data)(driver_data_ptr, driver_template_ptr, size);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
write_gfx_push_constant_commands(global void *push_cmd_ptr,
|
||||
global void *push_data_ptr,
|
||||
global struct anv_dgc_gfx_state *state)
|
||||
{
|
||||
uint32_t cmd_offset = 0;
|
||||
uint32_t push_stages = state->descriptor.push_constants.active_stages;
|
||||
for (uint32_t s = ANV_DGC_STAGE_VERTEX;
|
||||
s <= ANV_DGC_STAGE_FRAGMENT && push_stages != 0; s++) {
|
||||
if ((BITFIELD_BIT(s) & push_stages) == 0)
|
||||
continue;
|
||||
|
||||
global struct anv_dgc_push_stage_state *stage_state =
|
||||
&state->descriptor.push_constants.stages[s];
|
||||
|
||||
#if GFX_VER >= 12
|
||||
cmd_offset += write_3DSTATE_CONSTANT_ALL(push_cmd_ptr + cmd_offset,
|
||||
push_data_ptr,
|
||||
stage_state,
|
||||
state,
|
||||
BITFIELD_BIT(s));
|
||||
#else
|
||||
cmd_offset += write_3DSTATE_CONSTANT_XS(push_cmd_ptr + cmd_offset,
|
||||
push_data_ptr,
|
||||
stage_state,
|
||||
state,
|
||||
BITFIELD_BIT(s));
|
||||
#endif
|
||||
|
||||
push_stages &= ~BITFIELD_BIT(s);
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
/* Mesh & Task use a single combined push constants + driver constants
|
||||
* pointer
|
||||
*/
|
||||
if (push_stages & BITFIELD_BIT(ANV_DGC_STAGE_TASK)) {
|
||||
struct anv_dgc_push_bindless_stage pc =
|
||||
state->descriptor.push_constants.stages[ANV_DGC_STAGE_TASK].bindless;
|
||||
uint64_t pc_addr = (uint64_t) push_data_ptr + pc.push_data_offset;
|
||||
struct GENX(3DSTATE_TASK_SHADER_DATA) data = {
|
||||
GENX(3DSTATE_TASK_SHADER_DATA_header),
|
||||
.InlineData = {
|
||||
pc.inline_dwords[0] == ANV_INLINE_DWORD_PUSH_ADDRESS_LDW ?
|
||||
pc_addr & 0xffffffff : ((global uint32_t *)push_data_ptr)[pc.inline_dwords[0]],
|
||||
pc.inline_dwords[0] == ANV_INLINE_DWORD_PUSH_ADDRESS_LDW ?
|
||||
pc_addr >> 32 : ((global uint32_t *)push_data_ptr)[pc.inline_dwords[1]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[2]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[3]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[4]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[5]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[6]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[7]],
|
||||
},
|
||||
};
|
||||
GENX(3DSTATE_TASK_SHADER_DATA_pack)(push_cmd_ptr + cmd_offset, &data);
|
||||
cmd_offset += GENX(3DSTATE_TASK_SHADER_DATA_length) * 4;
|
||||
}
|
||||
|
||||
|
||||
if (push_stages & BITFIELD_BIT(ANV_DGC_STAGE_MESH)) {
|
||||
struct anv_dgc_push_bindless_stage pc =
|
||||
state->descriptor.push_constants.stages[ANV_DGC_STAGE_MESH].bindless;
|
||||
uint64_t pc_addr = (uint64_t) push_data_ptr + pc.push_data_offset;
|
||||
struct GENX(3DSTATE_MESH_SHADER_DATA) data = {
|
||||
GENX(3DSTATE_MESH_SHADER_DATA_header),
|
||||
.InlineData = {
|
||||
pc.inline_dwords[0] == ANV_INLINE_DWORD_PUSH_ADDRESS_LDW ? pc_addr & 0xffffffff :
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[0]],
|
||||
pc.inline_dwords[1] == ANV_INLINE_DWORD_PUSH_ADDRESS_UDW ? pc_addr >> 32 :
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[1]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[2]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[3]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[4]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[5]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[6]],
|
||||
((global uint32_t *)push_data_ptr)[pc.inline_dwords[7]],
|
||||
},
|
||||
};
|
||||
GENX(3DSTATE_MESH_SHADER_DATA_pack)(push_cmd_ptr + cmd_offset, &data);
|
||||
cmd_offset += GENX(3DSTATE_MESH_SHADER_DATA_length) * 4;
|
||||
#undef PVDW_OR
|
||||
}
|
||||
#endif
|
||||
|
||||
return cmd_offset;
|
||||
}
|
||||
|
||||
static global void *
|
||||
get_ptr(global void *base, uint32_t stride,
|
||||
uint32_t prolog_size, uint32_t seq_idx)
|
||||
{
|
||||
return base + prolog_size + seq_idx * stride;
|
||||
}
|
||||
|
||||
static void
|
||||
write_prolog_epilog(global void *cmd_base, uint32_t cmd_stride,
|
||||
uint32_t max_count, uint32_t cmd_prolog_size,
|
||||
uint32_t seq_idx, uint64_t return_addr)
|
||||
{
|
||||
/* A write to the location of the MI_BATCH_BUFFER_START below. */
|
||||
genX(write_address)(cmd_base,
|
||||
get_ptr(cmd_base, cmd_stride,
|
||||
cmd_prolog_size, max_count) + 4,
|
||||
return_addr);
|
||||
|
||||
global void *next_addr = cmd_base + (GENX(MI_STORE_DATA_IMM_length) + 1 +
|
||||
GENX(MI_BATCH_BUFFER_START_length)) * 4;
|
||||
|
||||
genX(write_MI_BATCH_BUFFER_START)(
|
||||
cmd_base + (GENX(MI_STORE_DATA_IMM_length) + 1) * 4,
|
||||
(uint64_t)next_addr);
|
||||
|
||||
/* Reenable the prefetcher. */
|
||||
#if GFX_VER >= 12
|
||||
struct GENX(MI_ARB_CHECK) v = {
|
||||
GENX(MI_ARB_CHECK_header),
|
||||
/* This is a trick to get the CLC->SPIRV not to use a constant variable
|
||||
* for this. Otherwise we run into issues trying to store that variable
|
||||
* in constant memory which is inefficient for a single dword and also
|
||||
* not handled in our backend.
|
||||
*/
|
||||
.PreParserDisableMask = seq_idx == 0,
|
||||
.PreParserDisable = false,
|
||||
};
|
||||
GENX(MI_ARB_CHECK_pack)(next_addr, &v);
|
||||
#endif
|
||||
|
||||
/* This is the epilog, returning to the main batch. */
|
||||
genX(write_MI_BATCH_BUFFER_START)(
|
||||
get_ptr(cmd_base, cmd_stride, cmd_prolog_size, max_count),
|
||||
return_addr);
|
||||
}
|
||||
|
||||
static void
|
||||
write_return_addr(global void *cmd_base, uint32_t cmd_stride,
|
||||
uint32_t max_count, uint32_t cmd_prolog_size,
|
||||
uint64_t return_addr)
|
||||
{
|
||||
/* A write to the location of the MI_BATCH_BUFFER_START below. */
|
||||
genX(write_address)(cmd_base,
|
||||
get_ptr(cmd_base, cmd_stride,
|
||||
cmd_prolog_size, max_count) + 4,
|
||||
return_addr);
|
||||
}
|
||||
|
||||
void
|
||||
genX(libanv_preprocess_gfx_generate)(global void *cmd_base,
|
||||
uint32_t cmd_stride,
|
||||
global void *data_base,
|
||||
uint32_t data_stride,
|
||||
global void *seq_base,
|
||||
uint32_t seq_stride,
|
||||
global uint32_t *seq_count,
|
||||
uint32_t max_seq_count,
|
||||
uint32_t cmd_prolog_size,
|
||||
uint32_t data_prolog_size,
|
||||
global struct anv_dgc_gfx_state *state,
|
||||
global void *const_ptr,
|
||||
uint32_t const_size,
|
||||
global void *driver_const_ptr,
|
||||
uint64_t return_addr,
|
||||
uint32_t flags,
|
||||
uint32_t seq_idx)
|
||||
{
|
||||
uint32_t max_count = seq_count != 0 ? min(*seq_count, max_seq_count) : max_seq_count;
|
||||
|
||||
if (seq_idx == 0) {
|
||||
write_prolog_epilog(cmd_base, cmd_stride, max_count,
|
||||
cmd_prolog_size, seq_idx, return_addr);
|
||||
}
|
||||
|
||||
if (seq_idx >= max_count)
|
||||
return;
|
||||
|
||||
/* Pointer to the stream data, layed out as described in stream_layout. */
|
||||
global void *seq_ptr = seq_base + seq_idx * seq_stride;
|
||||
|
||||
/* Where to write the commands */
|
||||
global void *cmd_ptr =
|
||||
get_ptr(cmd_base, cmd_stride, cmd_prolog_size, seq_idx);
|
||||
|
||||
/* 3DSTATE_INDEX_BUFFER */
|
||||
struct anv_dgc_index_buffer index_buffer = state->layout.index_buffer;
|
||||
if (index_buffer.cmd_size != 0) {
|
||||
VkBindIndexBufferIndirectCommandEXT idx_data =
|
||||
*(global VkBindIndexBufferIndirectCommandEXT *)(
|
||||
seq_ptr + index_buffer.seq_offset);
|
||||
|
||||
uint32_t index_format =
|
||||
index_buffer.u32_value == idx_data.indexType ? INDEX_DWORD :
|
||||
index_buffer.u16_value == idx_data.indexType ? INDEX_WORD :
|
||||
index_buffer.u8_value == idx_data.indexType ? INDEX_BYTE :
|
||||
INDEX_BYTE;
|
||||
|
||||
genX(write_3DSTATE_INDEX_BUFFER)(cmd_ptr + index_buffer.cmd_offset,
|
||||
idx_data.bufferAddress,
|
||||
idx_data.size,
|
||||
index_format,
|
||||
index_buffer.mocs);
|
||||
}
|
||||
|
||||
/* 3DSTATE_VERTEX_BUFFERS */
|
||||
uint32_t n_vertex_buffers = state->layout.vertex_buffers.n_buffers;
|
||||
if (n_vertex_buffers) {
|
||||
global void *cmd_vb = cmd_ptr + state->layout.vertex_buffers.cmd_offset;
|
||||
|
||||
genX(write_3DSTATE_VERTEX_BUFFERS)(cmd_vb, n_vertex_buffers);
|
||||
cmd_vb += 4;
|
||||
|
||||
uint16_t mocs = state->layout.vertex_buffers.mocs;
|
||||
for (uint32_t i = 0; i < n_vertex_buffers; i++) {
|
||||
struct anv_dgc_vertex_buffer vb = state->layout.vertex_buffers.buffers[i];
|
||||
|
||||
VkBindVertexBufferIndirectCommandEXT vtx_data =
|
||||
*(global VkBindVertexBufferIndirectCommandEXT *)(
|
||||
seq_ptr + vb.seq_offset);
|
||||
|
||||
genX(write_VERTEX_BUFFER_STATE)(cmd_vb, mocs, vb.binding,
|
||||
vtx_data.bufferAddress,
|
||||
vtx_data.size,
|
||||
vtx_data.stride);
|
||||
cmd_vb += GENX(VERTEX_BUFFER_STATE_length) * 4;
|
||||
}
|
||||
}
|
||||
|
||||
#if INTEL_WA_16011107343_GFX_VER || INTEL_WA_22018402687_GFX_VER
|
||||
genX(copy_data)(cmd_ptr + state->layout.indirect_set.final_cmds_offset,
|
||||
state->descriptor.final_commands,
|
||||
state->layout.indirect_set.final_cmds_size);
|
||||
#endif
|
||||
|
||||
/* Push constants */
|
||||
enum anv_dgc_push_constant_flags pc_flags =
|
||||
state->layout.push_constants.flags;
|
||||
if (pc_flags & ANV_DGC_PUSH_CONSTANTS_CMD_ACTIVE) {
|
||||
global void *push_data_ptr =
|
||||
get_ptr(data_base, data_stride, data_prolog_size, seq_idx) +
|
||||
state->layout.push_constants.data_offset;
|
||||
|
||||
write_app_push_constant_data(push_data_ptr,
|
||||
&state->layout.push_constants,
|
||||
seq_ptr, const_ptr,
|
||||
const_size, seq_idx);
|
||||
write_gfx_drv_push_constant_data(
|
||||
push_data_ptr + MAX_PUSH_CONSTANTS_SIZE,
|
||||
driver_const_ptr, ANV_DRIVER_PUSH_CONSTANTS_SIZE);
|
||||
|
||||
write_gfx_push_constant_commands(cmd_ptr +
|
||||
state->layout.push_constants.cmd_offset,
|
||||
push_data_ptr,
|
||||
state);
|
||||
}
|
||||
|
||||
/* 3DPRIMITIVE / 3DMESH_3D */
|
||||
bool is_predicated = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0;
|
||||
bool tbimr_enabled = (flags & ANV_GENERATED_FLAG_TBIMR) != 0;
|
||||
switch (state->layout.draw.draw_type) {
|
||||
case ANV_DGC_DRAW_TYPE_SEQUENTIAL:
|
||||
genX(write_draw)(cmd_ptr + state->layout.draw.cmd_offset,
|
||||
seq_ptr + state->layout.draw.seq_offset,
|
||||
0 /* draw_id_ptr */,
|
||||
0 /* draw_id, always 0 per spec */,
|
||||
state->draw.instance_multiplier,
|
||||
false /* indexed */,
|
||||
is_predicated,
|
||||
tbimr_enabled,
|
||||
true /* uses_base, unused for Gfx11+ */,
|
||||
true /* uses_draw_id, unused for Gfx11+ */,
|
||||
0 /* mocs, unused for Gfx11+ */);
|
||||
break;
|
||||
|
||||
case ANV_DGC_DRAW_TYPE_INDEXED:
|
||||
genX(write_draw)(cmd_ptr + state->layout.draw.cmd_offset,
|
||||
seq_ptr + state->layout.draw.seq_offset,
|
||||
0 /* draw_id_ptr */,
|
||||
0 /* draw_id, always 0 per spec */,
|
||||
state->draw.instance_multiplier,
|
||||
true /* indexed */,
|
||||
is_predicated,
|
||||
tbimr_enabled,
|
||||
true /* uses_base, unused for Gfx11+ */,
|
||||
true /* uses_draw_id, unused for Gfx11+ */,
|
||||
0 /* mocs, unused for Gfx11+ */);
|
||||
break;
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
case ANV_DGC_DRAW_TYPE_MESH:
|
||||
genX(write_3DMESH_3D)(cmd_ptr + state->layout.draw.cmd_offset,
|
||||
seq_ptr + state->layout.draw.seq_offset,
|
||||
is_predicated,
|
||||
tbimr_enabled);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static void
|
||||
emit_dispatch_commands(global void *cmd_base,
|
||||
uint32_t cmd_stride,
|
||||
uint32_t seq_idx,
|
||||
uint32_t prolog_size,
|
||||
global void *push_data_ptr,
|
||||
global struct anv_dgc_cs_layout *layout,
|
||||
global struct anv_dgc_cs_descriptor *descriptor,
|
||||
global void *interface_descriptor_data_ptr,
|
||||
uint32_t flags,
|
||||
global VkDispatchIndirectCommand *info)
|
||||
{
|
||||
global void *cmd_ptr = get_ptr(cmd_base, cmd_stride, prolog_size, seq_idx);
|
||||
|
||||
uint64_t pc_addr = (uint64_t)push_data_ptr + descriptor->push_data_offset;
|
||||
|
||||
struct GENX(COMPUTE_WALKER) v = {
|
||||
.PredicateEnable = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0,
|
||||
.body = {
|
||||
.ThreadGroupIDXDimension = info->x,
|
||||
.ThreadGroupIDYDimension = info->y,
|
||||
.ThreadGroupIDZDimension = info->z,
|
||||
.ExecutionMask = descriptor->right_mask,
|
||||
.InlineData = {
|
||||
descriptor->gfx125.inline_dwords[0] == ANV_INLINE_DWORD_PUSH_ADDRESS_LDW ?
|
||||
pc_addr & 0xffffffff : ((global uint32_t *)push_data_ptr)[descriptor->gfx125.inline_dwords[0]],
|
||||
descriptor->gfx125.inline_dwords[0] == ANV_INLINE_DWORD_PUSH_ADDRESS_LDW ?
|
||||
pc_addr >> 32 : ((global uint32_t *)push_data_ptr)[descriptor->gfx125.inline_dwords[1]],
|
||||
((global uint32_t *)push_data_ptr)[descriptor->gfx125.inline_dwords[2]],
|
||||
((global uint32_t *)push_data_ptr)[descriptor->gfx125.inline_dwords[3]],
|
||||
((global uint32_t *)push_data_ptr)[descriptor->gfx125.inline_dwords[4]],
|
||||
((global uint32_t *)push_data_ptr)[descriptor->gfx125.inline_dwords[5]],
|
||||
((global uint32_t *)push_data_ptr)[descriptor->gfx125.inline_dwords[6]],
|
||||
((global uint32_t *)push_data_ptr)[descriptor->gfx125.inline_dwords[7]],
|
||||
},
|
||||
},
|
||||
};
|
||||
GENX(COMPUTE_WALKER_repack)(cmd_ptr, descriptor->gfx125.compute_walker, &v);
|
||||
}
|
||||
#else
|
||||
static void
|
||||
emit_dispatch_commands(global void *cmd_base,
|
||||
uint32_t cmd_stride,
|
||||
uint32_t seq_idx,
|
||||
uint32_t cmd_prolog_size,
|
||||
global void *data_ptr,
|
||||
global struct anv_dgc_cs_layout *layout,
|
||||
global struct anv_dgc_cs_descriptor *descriptor,
|
||||
global void *interface_descriptor_data_ptr,
|
||||
uint32_t flags,
|
||||
global VkDispatchIndirectCommand *info)
|
||||
{
|
||||
global void *cmd_ptr = get_ptr(cmd_base, cmd_stride, cmd_prolog_size, seq_idx);
|
||||
|
||||
if (layout->indirect_set.active != 0) {
|
||||
/* Emit MEDIA_VFE_STATE either for each sequence */
|
||||
genX(copy_data)(cmd_ptr, descriptor->gfx9.media_vfe_state,
|
||||
sizeof(descriptor->gfx9.media_vfe_state));
|
||||
cmd_ptr += sizeof(descriptor->gfx9.media_vfe_state);
|
||||
|
||||
/* Load the shader descriptor */
|
||||
global void *idd_ptr = data_ptr + layout->indirect_set.data_offset;
|
||||
merge_dwords(idd_ptr,
|
||||
interface_descriptor_data_ptr,
|
||||
descriptor->gfx9.interface_descriptor_data,
|
||||
GENX(INTERFACE_DESCRIPTOR_DATA_length));
|
||||
|
||||
uint32_t idd_offset =
|
||||
ANV_DYNAMIC_VISIBLE_HEAP_OFFSET + ((uint64_t)idd_ptr) & 0xffffffff;
|
||||
|
||||
struct GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD) mdd = {
|
||||
GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD_header),
|
||||
.InterfaceDescriptorTotalLength = GENX(INTERFACE_DESCRIPTOR_DATA_length) * 4,
|
||||
.InterfaceDescriptorDataStartAddress = idd_offset,
|
||||
};
|
||||
GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack)(cmd_ptr, &mdd);
|
||||
cmd_ptr += GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD_length) * 4;
|
||||
}
|
||||
|
||||
/* Push constant offset relative to the dynamic state heap */
|
||||
uint32_t dyn_push_data_offset =
|
||||
ANV_DYNAMIC_VISIBLE_HEAP_OFFSET + (((uint64_t)data_ptr) & 0xffffffff);
|
||||
|
||||
struct GENX(MEDIA_CURBE_LOAD) mdl = {
|
||||
GENX(MEDIA_CURBE_LOAD_header),
|
||||
.CURBETotalDataLength = descriptor->gfx9.cross_thread_push_size +
|
||||
descriptor->gfx9.n_threads *
|
||||
descriptor->gfx9.per_thread_push_size,
|
||||
.CURBEDataStartAddress = dyn_push_data_offset,
|
||||
};
|
||||
GENX(MEDIA_CURBE_LOAD_pack)(cmd_ptr, &mdl);
|
||||
cmd_ptr += GENX(MEDIA_CURBE_LOAD_length) * 4;
|
||||
|
||||
/* Emit the walker */
|
||||
struct GENX(GPGPU_WALKER) walker = {
|
||||
.PredicateEnable = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0,
|
||||
.SIMDSize = descriptor->simd_size / 16,
|
||||
.ThreadWidthCounterMaximum = descriptor->threads - 1,
|
||||
.RightExecutionMask = descriptor->right_mask,
|
||||
.BottomExecutionMask = 0xffffffff,
|
||||
.ThreadGroupIDXDimension = info->x,
|
||||
.ThreadGroupIDYDimension = info->y,
|
||||
.ThreadGroupIDZDimension = info->z,
|
||||
};
|
||||
GENX(GPGPU_WALKER_repack)(cmd_ptr, descriptor->gfx9.gpgpu_walker, &walker);
|
||||
global uint32_t *walker_ptr = cmd_ptr;
|
||||
cmd_ptr += GENX(GPGPU_WALKER_length) * 4;
|
||||
|
||||
uint32_t per_thread_push_size = descriptor->gfx9.per_thread_push_size;
|
||||
if (per_thread_push_size > 0) {
|
||||
uint32_t cross_thread_push_size = descriptor->gfx9.cross_thread_push_size;
|
||||
global void *per_thread_ptr0 = data_ptr + cross_thread_push_size;
|
||||
global void *per_thread_ptr = per_thread_ptr0;
|
||||
for (uint32_t t = 0; t < descriptor->gfx9.n_threads; t++) {
|
||||
if (t > 0) {
|
||||
genX(copy_data)(per_thread_ptr, per_thread_ptr0,
|
||||
per_thread_push_size);
|
||||
}
|
||||
*(uint32_t*)(per_thread_ptr + descriptor->gfx9.subgroup_id_offset) = t;
|
||||
per_thread_ptr += per_thread_push_size;
|
||||
}
|
||||
}
|
||||
|
||||
struct GENX(MEDIA_STATE_FLUSH) flush = {
|
||||
GENX(MEDIA_STATE_FLUSH_header),
|
||||
};
|
||||
GENX(MEDIA_STATE_FLUSH_pack)(cmd_ptr, &flush);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
genX(libanv_preprocess_cs_generate)(global void *cmd_base,
|
||||
uint32_t cmd_stride,
|
||||
global void *data_base,
|
||||
uint32_t data_stride,
|
||||
global void *seq_base,
|
||||
uint32_t seq_stride,
|
||||
global uint32_t *seq_count,
|
||||
uint32_t max_seq_count,
|
||||
uint32_t cmd_prolog_size,
|
||||
uint32_t data_prolog_size,
|
||||
global struct anv_dgc_cs_layout *layout,
|
||||
global struct anv_dgc_cs_descriptor *indirect_set,
|
||||
global void *interface_descriptor_data_ptr,
|
||||
global void *const_ptr,
|
||||
uint32_t const_size,
|
||||
global void *driver_const_ptr,
|
||||
uint64_t return_addr,
|
||||
uint32_t flags,
|
||||
uint32_t seq_idx)
|
||||
{
|
||||
uint32_t max_count = seq_count != 0 ? min(*seq_count, max_seq_count) : max_seq_count;
|
||||
|
||||
if (seq_idx == 0) {
|
||||
write_prolog_epilog(cmd_base, cmd_stride, max_count,
|
||||
cmd_prolog_size, seq_idx, return_addr);
|
||||
}
|
||||
|
||||
if (seq_idx >= max_count)
|
||||
return;
|
||||
|
||||
/* Pointer to the application generated data, layed out as described in
|
||||
* stream_layout.
|
||||
*/
|
||||
global void *seq_ptr = seq_base + seq_idx * seq_stride;
|
||||
|
||||
/* Get the shader descriptor. */
|
||||
global struct anv_dgc_cs_descriptor *descriptor;
|
||||
if (layout->indirect_set.active != 0) {
|
||||
uint32_t set_idx = *(global uint32_t *)(seq_ptr + layout->indirect_set.seq_offset);
|
||||
descriptor = &indirect_set[set_idx];
|
||||
} else {
|
||||
descriptor = indirect_set;
|
||||
}
|
||||
|
||||
/* Prepare the push constant data. */
|
||||
uint32_t push_data_offset = descriptor->push_data_offset;
|
||||
|
||||
/* */
|
||||
global void *push_data_ptr =
|
||||
get_ptr(data_base, data_stride, data_prolog_size, seq_idx) +
|
||||
layout->push_constants.data_offset;
|
||||
#if GFX_VERx10 >= 125
|
||||
write_app_push_constant_data(
|
||||
push_data_ptr, &layout->push_constants,
|
||||
seq_ptr, const_ptr, const_size, seq_idx);
|
||||
write_cs_drv_push_constant_data(
|
||||
push_data_ptr, driver_const_ptr,
|
||||
MAX_PUSH_CONSTANTS_SIZE,
|
||||
ANV_DRIVER_PUSH_CONSTANTS_SIZE,
|
||||
seq_ptr + layout->dispatch.seq_offset);
|
||||
#else
|
||||
write_app_push_constant_data(
|
||||
push_data_ptr, &layout->push_constants,
|
||||
seq_ptr, const_ptr, const_size, seq_idx);
|
||||
write_cs_drv_push_constant_data(
|
||||
push_data_ptr - descriptor->push_data_offset, driver_const_ptr,
|
||||
MAX2(descriptor->push_data_offset, MAX_PUSH_CONSTANTS_SIZE),
|
||||
MIN2(ANV_DRIVER_PUSH_CONSTANTS_SIZE,
|
||||
(MAX_PUSH_CONSTANTS_SIZE + ANV_DRIVER_PUSH_CONSTANTS_SIZE) -
|
||||
descriptor->push_data_offset),
|
||||
seq_ptr + layout->dispatch.seq_offset);
|
||||
#endif
|
||||
|
||||
/* Finally write the commands */
|
||||
emit_dispatch_commands(cmd_base, cmd_stride, seq_idx, cmd_prolog_size,
|
||||
push_data_ptr, layout, descriptor,
|
||||
interface_descriptor_data_ptr, flags,
|
||||
seq_ptr + layout->dispatch.seq_offset);
|
||||
}
|
||||
|
||||
void
|
||||
genX(libanv_postprocess_cs_generate)(global void *cmd_base,
|
||||
uint32_t cmd_stride,
|
||||
global void *data_base,
|
||||
uint32_t data_stride,
|
||||
global uint32_t *seq_count,
|
||||
uint32_t max_seq_count,
|
||||
uint32_t cmd_prolog_size,
|
||||
uint32_t data_prolog_size,
|
||||
uint32_t data_idd_offset,
|
||||
global struct anv_dgc_cs_descriptor *descriptor,
|
||||
uint64_t return_addr,
|
||||
uint32_t seq_idx)
|
||||
{
|
||||
uint32_t max_count = seq_count != 0 ? min(*seq_count, max_seq_count) : max_seq_count;
|
||||
|
||||
if (seq_idx == 0) {
|
||||
write_prolog_epilog(cmd_base, cmd_stride, max_count,
|
||||
cmd_prolog_size, seq_idx, return_addr);
|
||||
}
|
||||
|
||||
if (seq_idx >= max_count)
|
||||
return;
|
||||
|
||||
/* Where to write the commands */
|
||||
global void *cmd_ptr =
|
||||
get_ptr(cmd_base, cmd_stride, cmd_prolog_size, seq_idx);
|
||||
|
||||
/* OR the driver INTERFACE_DESCRIPTOR_DATA dwords with the device generated
|
||||
* ones.
|
||||
*/
|
||||
uint32_t n_dwords = 2; /* dwords covered from
|
||||
* INTERFACE_DESCRIPTOR_DATA::SamplerCount to
|
||||
* INTERFACE_DESCRIPTOR_DATA::BindingTablePointer
|
||||
*/
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
uint32_t idd_offset_B = 12 /* offset in INTERFACE_DESCRIPTOR_DATA */;
|
||||
uint32_t csw_body_offset_B = (GFX_VERx10 >= 200 ? 72 : 68) /* offset in COMPUTE_WALKER_BODY */;
|
||||
uint32_t csw_offset_B = 4 /* offset in COMPUTE_WALKER */;
|
||||
uint32_t inst_offset_B = csw_offset_B + csw_body_offset_B + idd_offset_B;
|
||||
merge_dwords(cmd_ptr + inst_offset_B,
|
||||
cmd_ptr + inst_offset_B,
|
||||
&descriptor->gfx125.compute_walker[inst_offset_B / 4],
|
||||
n_dwords);
|
||||
#else
|
||||
global void *idd_ptr =
|
||||
get_ptr(data_base, data_stride, data_prolog_size, seq_idx) +
|
||||
data_idd_offset;
|
||||
uint32_t inst_offset_B = 12 /* offset in INTERFACE_DESCRIPTOR_DATA */;
|
||||
merge_dwords(idd_ptr + inst_offset_B,
|
||||
idd_ptr + inst_offset_B,
|
||||
&descriptor->gfx9.interface_descriptor_data[inst_offset_B / 4],
|
||||
n_dwords);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static uint3
|
||||
calc_local_trace_size(uint3 global_size)
|
||||
{
|
||||
unsigned total_shift = 0;
|
||||
uint3 local_shift = (uint3)(0, 0, 0);
|
||||
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if ((1 << local_shift[i]) < global_size[i]) {
|
||||
progress = true;
|
||||
local_shift[i]++;
|
||||
total_shift++;
|
||||
}
|
||||
|
||||
if (total_shift == 3)
|
||||
return local_shift;
|
||||
}
|
||||
} while (progress);
|
||||
|
||||
/* Assign whatever's left to x */
|
||||
local_shift[0] += 3 - total_shift;
|
||||
|
||||
return local_shift;
|
||||
}
|
||||
|
||||
void
|
||||
genX(libanv_preprocess_rt_generate)(global void *cmd_base,
|
||||
uint32_t cmd_stride,
|
||||
global void *data_base,
|
||||
uint32_t data_stride,
|
||||
global void *seq_base,
|
||||
uint32_t seq_stride,
|
||||
global uint32_t *seq_count,
|
||||
uint32_t max_seq_count,
|
||||
uint32_t cmd_prolog_size,
|
||||
uint32_t data_prolog_size,
|
||||
global struct anv_dgc_cs_layout *layout,
|
||||
global void *compute_walker_template,
|
||||
global void *rtdg_global_template,
|
||||
global void *const_ptr,
|
||||
uint32_t const_size,
|
||||
global void *driver_const_ptr,
|
||||
uint64_t return_addr,
|
||||
uint32_t flags,
|
||||
uint32_t seq_idx)
|
||||
{
|
||||
uint32_t max_count = seq_count != 0 ? min(*seq_count, max_seq_count) : max_seq_count;
|
||||
|
||||
if (seq_idx == 0) {
|
||||
write_prolog_epilog(cmd_base, cmd_stride, max_count,
|
||||
cmd_prolog_size, seq_idx, return_addr);
|
||||
}
|
||||
|
||||
if (seq_idx >= max_count)
|
||||
return;
|
||||
|
||||
/* Where to write the commands */
|
||||
global void *cmd_ptr =
|
||||
get_ptr(cmd_base, cmd_stride, cmd_prolog_size, seq_idx);
|
||||
|
||||
/* Pointer to the application generated data, layed out as described in
|
||||
* stream_layout.
|
||||
*/
|
||||
global void *seq_ptr = seq_base + seq_idx * seq_stride;
|
||||
|
||||
VkTraceRaysIndirectCommand2KHR *info =
|
||||
((global VkTraceRaysIndirectCommand2KHR *)(seq_ptr + layout->dispatch.seq_offset));
|
||||
uint3 launch_size = (uint3)(info->width, info->height, info->depth);
|
||||
|
||||
/* RTDG + push constants */
|
||||
global void *push_data_ptr =
|
||||
get_ptr(data_base, data_stride, data_prolog_size, seq_idx) +
|
||||
layout->push_constants.data_offset;
|
||||
global void *rtdg_ptr = push_data_ptr;
|
||||
struct GENX(RT_DISPATCH_GLOBALS) rtdg = {
|
||||
.LaunchWidth = launch_size.x,
|
||||
.LaunchHeight = launch_size.y,
|
||||
.LaunchDepth = launch_size.z,
|
||||
#if GFX_VER >= 30
|
||||
.HitGroupStride = info->hitShaderBindingTableStride,
|
||||
.HitGroupTable = info->hitShaderBindingTableAddress,
|
||||
.MissGroupTable = info->missShaderBindingTableAddress,
|
||||
.MissGroupStride = info->missShaderBindingTableStride,
|
||||
.CallableGroupTable = info->callableShaderBindingTableAddress,
|
||||
.CallableGroupStride = info->callableShaderBindingTableStride,
|
||||
#else
|
||||
.HitGroupTable = (struct GENX(RT_SHADER_TABLE)) {
|
||||
.BaseAddress = info->hitShaderBindingTableAddress,
|
||||
.Stride = info->hitShaderBindingTableStride,
|
||||
},
|
||||
.MissGroupTable = (struct GENX(RT_SHADER_TABLE)) {
|
||||
.BaseAddress = info->missShaderBindingTableAddress,
|
||||
.Stride = info->missShaderBindingTableStride,
|
||||
},
|
||||
.CallableGroupTable = (struct GENX(RT_SHADER_TABLE)) {
|
||||
.BaseAddress = info->callableShaderBindingTableAddress,
|
||||
.Stride = info->callableShaderBindingTableStride,
|
||||
},
|
||||
#endif
|
||||
};
|
||||
GENX(RT_DISPATCH_GLOBALS_repack)(rtdg_ptr, rtdg_global_template, &rtdg);
|
||||
|
||||
write_app_push_constant_data(
|
||||
push_data_ptr + ANV_DGC_RT_GLOBAL_DISPATCH_SIZE,
|
||||
&layout->push_constants,
|
||||
seq_ptr, const_ptr, const_size, seq_idx);
|
||||
write_rt_drv_push_constant_data(
|
||||
push_data_ptr +
|
||||
ANV_DGC_RT_GLOBAL_DISPATCH_SIZE +
|
||||
MAX_PUSH_CONSTANTS_SIZE,
|
||||
driver_const_ptr,
|
||||
ANV_DRIVER_PUSH_CONSTANTS_SIZE);
|
||||
|
||||
uint3 local_size_log2 = calc_local_trace_size(launch_size);
|
||||
uint3 one = 1;
|
||||
uint3 local_size = one << local_size_log2;
|
||||
uint3 global_size = DIV_ROUND_UP(launch_size, local_size);
|
||||
|
||||
/* Finally write the commands */
|
||||
global uint64_t *sbt = (global uint64_t *)info->raygenShaderRecordAddress;
|
||||
struct GENX(COMPUTE_WALKER) v = {
|
||||
.PredicateEnable = (flags & ANV_GENERATED_FLAG_PREDICATED) != 0,
|
||||
.body = {
|
||||
.LocalXMaximum = (1u << local_size_log2.x) - 1,
|
||||
.LocalYMaximum = (1u << local_size_log2.y) - 1,
|
||||
.LocalZMaximum = (1u << local_size_log2.z) - 1,
|
||||
.ThreadGroupIDXDimension = global_size.x,
|
||||
.ThreadGroupIDYDimension = global_size.y,
|
||||
.ThreadGroupIDZDimension = global_size.z,
|
||||
/* See struct brw_rt_raygen_trampoline_params */
|
||||
.InlineData = {
|
||||
((uint64_t) rtdg_ptr) & 0xffffffff,
|
||||
((uint64_t) rtdg_ptr) >> 32,
|
||||
info->raygenShaderRecordAddress & 0xffffffff,
|
||||
info->raygenShaderRecordAddress >> 32,
|
||||
local_size_log2.x << 8 |
|
||||
local_size_log2.y << 16 |
|
||||
local_size_log2.z << 24,
|
||||
},
|
||||
},
|
||||
};
|
||||
GENX(COMPUTE_WALKER_repack)(cmd_ptr, compute_walker_template, &v);
|
||||
}
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
|
||||
#endif /* GFX_VER >= 11 */
|
||||
|
|
@ -4,6 +4,31 @@
|
|||
|
||||
#include "libintel_shaders.h"
|
||||
|
||||
void genX(write_address)(global void *dst_ptr, global void *address, uint64_t value)
|
||||
{
|
||||
struct GENX(MI_STORE_DATA_IMM) v = {
|
||||
GENX(MI_STORE_DATA_IMM_header),
|
||||
.DWordLength = GENX(MI_STORE_DATA_IMM_length) -
|
||||
GENX(MI_STORE_DATA_IMM_length_bias) + 1,
|
||||
#if GFX_VER >= 12
|
||||
.ForceWriteCompletionCheck = true,
|
||||
#endif
|
||||
.Address = (uint64_t)address,
|
||||
.ImmediateData = value,
|
||||
};
|
||||
GENX(MI_STORE_DATA_IMM_pack)(dst_ptr, &v);
|
||||
}
|
||||
|
||||
void genX(write_3DSTATE_VF_TOPOLOGY)(global void *dst_ptr,
|
||||
uint32_t topology)
|
||||
{
|
||||
struct GENX(3DSTATE_VF_TOPOLOGY) v = {
|
||||
GENX(3DSTATE_VF_TOPOLOGY_header),
|
||||
.PrimitiveTopologyType = topology,
|
||||
};
|
||||
GENX(3DSTATE_VF_TOPOLOGY_pack)(dst_ptr, &v);
|
||||
}
|
||||
|
||||
void genX(write_3DSTATE_VERTEX_BUFFERS)(global void *dst_ptr,
|
||||
uint32_t buffer_count)
|
||||
{
|
||||
|
|
@ -38,6 +63,25 @@ void genX(write_VERTEX_BUFFER_STATE)(global void *dst_ptr,
|
|||
GENX(VERTEX_BUFFER_STATE_pack)(dst_ptr, &v);
|
||||
}
|
||||
|
||||
void genX(write_3DSTATE_INDEX_BUFFER)(global void *dst_ptr,
|
||||
uint64_t buffer_addr,
|
||||
uint32_t buffer_size,
|
||||
uint32_t index_format,
|
||||
uint32_t mocs)
|
||||
{
|
||||
struct GENX(3DSTATE_INDEX_BUFFER) v = {
|
||||
GENX(3DSTATE_INDEX_BUFFER_header),
|
||||
.MOCS = mocs,
|
||||
.IndexFormat = index_format,
|
||||
#if GFX_VER >= 12
|
||||
.L3BypassDisable = true,
|
||||
#endif
|
||||
.BufferStartingAddress = buffer_addr,
|
||||
.BufferSize = buffer_size,
|
||||
};
|
||||
GENX(3DSTATE_INDEX_BUFFER_pack)(dst_ptr, &v);
|
||||
}
|
||||
|
||||
void genX(write_3DPRIMITIVE)(global void *dst_ptr,
|
||||
bool is_predicated,
|
||||
bool is_indexed,
|
||||
|
|
@ -202,3 +246,25 @@ void genX(write_draw)(global uint32_t *dst_ptr,
|
|||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
void genX(write_3DMESH_3D)(global uint32_t *dst_ptr,
|
||||
global void *indirect_ptr,
|
||||
bool is_predicated,
|
||||
bool uses_tbimr)
|
||||
{
|
||||
VkDrawMeshTasksIndirectCommandEXT data =
|
||||
*((global VkDrawMeshTasksIndirectCommandEXT *)indirect_ptr);
|
||||
|
||||
struct GENX(3DMESH_3D) v = {
|
||||
GENX(3DMESH_3D_header),
|
||||
.TBIMREnabled = uses_tbimr,
|
||||
.PredicateEnable = is_predicated,
|
||||
.ThreadGroupCountX = data.groupCountX,
|
||||
.ThreadGroupCountY = data.groupCountY,
|
||||
.ThreadGroupCountZ = data.groupCountZ,
|
||||
|
||||
};
|
||||
GENX(3DMESH_3D_pack)(dst_ptr, &v);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -13,17 +13,22 @@
|
|||
|
||||
#include "util/macros.h"
|
||||
|
||||
#else
|
||||
#include "compiler/intel_shader_enums.h"
|
||||
|
||||
#define _MESA_LIBCL_ASSERT_IGNORE 1
|
||||
#else
|
||||
#include "libcl_vk.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_cl_pack.h"
|
||||
#include "genxml/genX_rt_cl_pack.h"
|
||||
|
||||
#define PRAGMA_POISON(param)
|
||||
#include "compiler/intel_shader_enums.h"
|
||||
|
||||
#define _3DPRIM_PATCHLIST(n) (0x20 + (n - 1))
|
||||
#endif
|
||||
|
||||
#define ANV_GENERATED_MAX_VES (29)
|
||||
|
||||
/**
|
||||
* Flags for generated_draws.cl
|
||||
*/
|
||||
|
|
@ -46,6 +51,10 @@ enum anv_generated_draw_flags {
|
|||
ANV_GENERATED_FLAG_WA_16011107343 = BITFIELD_BIT(7),
|
||||
/* Wa_22018402687 */
|
||||
ANV_GENERATED_FLAG_WA_22018402687 = BITFIELD_BIT(8),
|
||||
/* Wa_16014912113 */
|
||||
ANV_GENERATED_FLAG_WA_16014912113 = BITFIELD_BIT(9),
|
||||
/* Wa_18022330953 / Wa_22011440098 */
|
||||
ANV_GENERATED_FLAG_WA_18022330953 = BITFIELD_BIT(10)
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -58,6 +67,9 @@ enum anv_generated_draw_flags {
|
|||
|
||||
#ifdef __OPENCL_VERSION__
|
||||
|
||||
void genX(write_address)(global void *dst_ptr,
|
||||
global void *address, uint64_t value);
|
||||
|
||||
void genX(write_3DSTATE_VERTEX_BUFFERS)(global void *dst_ptr,
|
||||
uint32_t buffer_count);
|
||||
|
||||
|
|
@ -68,6 +80,15 @@ void genX(write_VERTEX_BUFFER_STATE)(global void *dst_ptr,
|
|||
uint32_t size,
|
||||
uint32_t stride);
|
||||
|
||||
void genX(write_3DSTATE_INDEX_BUFFER)(global void *dst_ptr,
|
||||
uint64_t buffer_addr,
|
||||
uint32_t buffer_size,
|
||||
uint32_t index_format,
|
||||
uint32_t mocs);
|
||||
|
||||
void genX(write_3DSTATE_VF_TOPOLOGY)(global void *dst_ptr,
|
||||
uint32_t topology);
|
||||
|
||||
void genX(write_3DPRIMITIVE)(global void *dst_ptr,
|
||||
bool is_predicated,
|
||||
bool is_indexed,
|
||||
|
|
@ -93,6 +114,13 @@ void genX(write_3DPRIMITIVE_EXTENDED)(global void *dst_ptr,
|
|||
uint32_t param_draw_id);
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
void genX(write_3DMESH_3D)(global uint32_t *dst_ptr,
|
||||
global void *indirect_ptr,
|
||||
bool is_predicated,
|
||||
bool uses_tbimr);
|
||||
#endif
|
||||
|
||||
void genX(write_MI_BATCH_BUFFER_START)(global void *dst_ptr, uint64_t addr);
|
||||
|
||||
void genX(write_draw)(global uint32_t *dst_ptr,
|
||||
|
|
@ -112,6 +140,10 @@ void genX(copy_data)(global void *dst_ptr,
|
|||
global void *src_ptr,
|
||||
uint32_t size);
|
||||
|
||||
void genX(set_data)(global void *dst_ptr,
|
||||
uint32_t data,
|
||||
uint32_t size);
|
||||
|
||||
#endif /* __OPENCL_VERSION__ */
|
||||
|
||||
#endif /* _LIBANV_SHADERS_H_ */
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ endif
|
|||
|
||||
intel_shader_files = files(
|
||||
'libintel_shaders.h',
|
||||
'dgc.cl',
|
||||
'generate.cl',
|
||||
'generate_draws.cl',
|
||||
'generate_draws_iris.cl',
|
||||
|
|
@ -45,6 +46,7 @@ foreach gen : intel_shaders_gens
|
|||
command : [
|
||||
prog_mesa_clc,
|
||||
intel_shader_files, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', '--',
|
||||
'-Wno-initializer-overrides',
|
||||
'-DGFX_VERx10=@0@'.format(gen[0]),
|
||||
'-I' + join_paths(meson.current_source_dir(), '.'),
|
||||
'-I' + join_paths(dir_source_root, 'src/compiler/libcl'),
|
||||
|
|
|
|||
|
|
@ -194,5 +194,267 @@ struct anv_push_constants {
|
|||
};
|
||||
};
|
||||
|
||||
#define ANV_DRIVER_PUSH_CONSTANTS_SIZE (sizeof(struct anv_push_constants) - MAX_PUSH_CONSTANTS_SIZE)
|
||||
|
||||
#define ANV_INLINE_DWORD_PUSH_ADDRESS_LDW (UINT8_MAX - 0)
|
||||
#define ANV_INLINE_DWORD_PUSH_ADDRESS_UDW (UINT8_MAX - 1)
|
||||
|
||||
/* Location of the user visible part of the dynamic state heap (1GiB) */
|
||||
#define ANV_DYNAMIC_VISIBLE_HEAP_OFFSET (1024 * 1024 * 1024)
|
||||
|
||||
/**
|
||||
* Stage enum for generated commands
|
||||
*/
|
||||
enum anv_dgc_stage {
|
||||
ANV_DGC_STAGE_VERTEX = 0,
|
||||
ANV_DGC_STAGE_TESS_CTRL,
|
||||
ANV_DGC_STAGE_TESS_EVAL,
|
||||
ANV_DGC_STAGE_GEOMETRY,
|
||||
ANV_DGC_STAGE_FRAGMENT,
|
||||
ANV_DGC_STAGE_TASK,
|
||||
ANV_DGC_STAGE_MESH,
|
||||
|
||||
ANV_DGC_STAGE_COMPUTE,
|
||||
ANV_DGC_STAGE_RT,
|
||||
|
||||
ANV_DGC_STAGES,
|
||||
};
|
||||
|
||||
#define ANV_DGC_N_GFX_STAGES (ANV_DGC_STAGE_MESH + 1)
|
||||
|
||||
enum anv_dgc_draw_type {
|
||||
ANV_DGC_DRAW_TYPE_SEQUENTIAL,
|
||||
ANV_DGC_DRAW_TYPE_INDEXED,
|
||||
ANV_DGC_DRAW_TYPE_MESH,
|
||||
};
|
||||
|
||||
#define ANV_DGC_RT_GLOBAL_DISPATCH_SIZE (128)
|
||||
|
||||
enum anv_dgc_push_constant_flags {
|
||||
ANV_DGC_PUSH_CONSTANTS_CMD_ACTIVE = BITFIELD_BIT(0),
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure represents the indirect data layout (in
|
||||
* VkGeneratedCommandsInfoEXT::indirectAddress) for push constants
|
||||
*/
|
||||
struct anv_dgc_push_layout {
|
||||
struct anv_dgc_push_entry {
|
||||
/* Location of the data to copy in the indirect buffer */
|
||||
uint32_t seq_offset;
|
||||
|
||||
/* Location where to write the data in anv_push_constants::client_data[]
|
||||
*/
|
||||
uint16_t push_offset;
|
||||
|
||||
/* Size of the data to copy */
|
||||
uint16_t size;
|
||||
} entries[32];
|
||||
|
||||
uint8_t flags; /* enum anv_dgc_push_constant_flags */
|
||||
|
||||
uint8_t num_entries;
|
||||
uint8_t mocs;
|
||||
|
||||
/* Whether the sequence ID is active and at what offset we should write it
|
||||
* in the push constant data
|
||||
*/
|
||||
uint16_t seq_id_active;
|
||||
uint16_t seq_id_offset;
|
||||
|
||||
/* Offset of the push constant commands in the preprocessed buffer.
|
||||
*/
|
||||
uint16_t cmd_offset;
|
||||
uint16_t cmd_size;
|
||||
|
||||
/* Offset of the data in the indirect buffer, relative to
|
||||
* VkGeneratedCommandsInfoEXT::indirectAddress
|
||||
*/
|
||||
uint16_t data_offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure represents both the data layout (in
|
||||
* VkGeneratedCommandsInfoEXT::indirectAddress) and the command layout in the
|
||||
* preprocess buffer (in VkGeneratedCommandsInfoEXT::preprocessAddress) for
|
||||
* graphics commands
|
||||
*/
|
||||
struct anv_dgc_gfx_layout {
|
||||
struct anv_dgc_index_buffer {
|
||||
uint16_t cmd_offset; /* Offset of 3DSTATE_INDEX_BUFFER */
|
||||
uint16_t cmd_size;
|
||||
uint16_t seq_offset; /* Offset of VkBindIndexBufferIndirectCommandEXT */
|
||||
uint16_t mocs;
|
||||
uint32_t u32_value;
|
||||
uint32_t u16_value;
|
||||
uint32_t u8_value;
|
||||
} index_buffer;
|
||||
|
||||
struct {
|
||||
struct anv_dgc_vertex_buffer {
|
||||
uint16_t seq_offset; /* Offset of VkBindVertexBufferIndirectCommandEXT */
|
||||
uint16_t binding;
|
||||
} buffers[31];
|
||||
uint16_t n_buffers;
|
||||
uint16_t mocs;
|
||||
uint16_t cmd_offset; /* Offset of 3DSTATE_VERTEX_BUFFERS */
|
||||
uint16_t cmd_size;
|
||||
} vertex_buffers;
|
||||
|
||||
struct anv_dgc_push_layout push_constants;
|
||||
|
||||
struct {
|
||||
uint16_t final_cmds_offset;
|
||||
uint16_t final_cmds_size;
|
||||
uint32_t active;
|
||||
} indirect_set;
|
||||
|
||||
struct {
|
||||
uint16_t cmd_offset; /* Offset of 3DPRIMITIVE/3DMESH_3D */
|
||||
uint16_t cmd_size;
|
||||
uint16_t draw_type; /* anv_dgc_gfx_draw_type */
|
||||
uint16_t seq_offset; /* Offset of :
|
||||
* - VkDrawIndirectCommand
|
||||
* - VkDrawIndexedIndirectCommand
|
||||
* - VkDrawMeshTasksIndirectCommandEXT
|
||||
*/
|
||||
} draw;
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure represents both the data layout (in
|
||||
* VkGeneratedCommandsInfoEXT::indirectAddress) and the command layout in the
|
||||
* preprocess buffer (in VkGeneratedCommandsInfoEXT::preprocessAddress) for
|
||||
* compute commands
|
||||
*/
|
||||
struct anv_dgc_cs_layout {
|
||||
struct anv_dgc_push_layout push_constants;
|
||||
|
||||
/* Location of the indirect execution set index */
|
||||
struct {
|
||||
uint32_t seq_offset;
|
||||
uint16_t data_offset;
|
||||
uint16_t active;
|
||||
} indirect_set;
|
||||
|
||||
/* Offset of VkDispatchIndirectCommand */
|
||||
struct {
|
||||
uint32_t seq_offset;
|
||||
uint16_t cmd_offset;
|
||||
uint16_t pad;
|
||||
} dispatch;
|
||||
};
|
||||
|
||||
enum anv_dgc_push_slot_type {
|
||||
ANV_DGC_PUSH_SLOT_TYPE_PUSH_CONSTANTS,
|
||||
ANV_DGC_PUSH_SLOT_TYPE_OTHER,
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure holds prepacked HW instructions for a set of graphics
|
||||
* shaders forming a pipeline . It is part of the command buffer temporary
|
||||
* memory.
|
||||
*/
|
||||
struct anv_dgc_gfx_descriptor {
|
||||
/* Fully packed instructions ready to be copied directly into the
|
||||
* preprocess buffer (for workarounds)
|
||||
*/
|
||||
uint32_t final_commands[20];
|
||||
uint32_t final_commands_size;
|
||||
|
||||
uint32_t wa_18019110168_remapping_table_offset;
|
||||
|
||||
struct {
|
||||
struct anv_dgc_push_stage_state {
|
||||
union {
|
||||
struct {
|
||||
struct anv_dgc_push_stage_slot {
|
||||
uint16_t push_data_offset;
|
||||
uint16_t push_data_size;
|
||||
uint32_t type; /* enum anv_dgc_push_slot_type */
|
||||
} slots[4];
|
||||
uint32_t n_slots;
|
||||
} legacy;
|
||||
struct anv_dgc_push_bindless_stage {
|
||||
uint16_t push_data_offset;
|
||||
uint16_t inline_dwords_count;
|
||||
uint8_t inline_dwords[8];
|
||||
} bindless;
|
||||
};
|
||||
} stages[ANV_DGC_N_GFX_STAGES];
|
||||
uint32_t active_stages; /* Bitfield of anv_dgc_command_stage */
|
||||
} push_constants;
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure holds information about the graphics state for generation.
|
||||
*/
|
||||
struct anv_dgc_gfx_state {
|
||||
struct anv_dgc_gfx_layout layout;
|
||||
|
||||
struct anv_dgc_gfx_descriptor descriptor;
|
||||
|
||||
struct {
|
||||
uint64_t addresses[4];
|
||||
} push_constants;
|
||||
|
||||
struct {
|
||||
uint16_t instance_multiplier;
|
||||
uint32_t flags; /* ANV_GENERATED_FLAG_* */
|
||||
} draw;
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure holds prepacked HW instructions for a compute shader. It is
|
||||
* either located in the memory associated with VkIndirectExecutionSetEXT or
|
||||
* part of the command buffer temporary memory if indirect execution set is
|
||||
* not used.
|
||||
*/
|
||||
struct anv_dgc_cs_descriptor {
|
||||
union {
|
||||
struct {
|
||||
uint32_t compute_walker[40];
|
||||
uint32_t inline_dwords_count;
|
||||
uint8_t inline_dwords[8];
|
||||
} gfx125;
|
||||
|
||||
struct {
|
||||
/* Needs to be the first field because
|
||||
* MEDIA_INTERFACE_DESCRIPTOR_LOAD::InterfaceDescriptorDataStartAddress
|
||||
* needs 64B alignment.
|
||||
*/
|
||||
uint32_t interface_descriptor_data[8];
|
||||
uint32_t gpgpu_walker[15];
|
||||
uint32_t media_vfe_state[9];
|
||||
|
||||
uint32_t n_threads;
|
||||
uint16_t cross_thread_push_size;
|
||||
uint8_t per_thread_push_size;
|
||||
uint8_t subgroup_id_offset;
|
||||
} gfx9;
|
||||
};
|
||||
|
||||
uint32_t right_mask;
|
||||
uint32_t threads;
|
||||
uint32_t simd_size;
|
||||
|
||||
uint32_t push_data_offset;
|
||||
|
||||
/* Align the struct to 64B */
|
||||
uint32_t pad[1];
|
||||
};
|
||||
|
||||
/**
|
||||
* This structure holds information for a ray tracing pipeline.
|
||||
*/
|
||||
struct anv_dgc_rt_indirect_descriptor {
|
||||
uint32_t ray_stack_stride;
|
||||
uint32_t stack_ids_per_dss;
|
||||
uint32_t sw_stack_size;
|
||||
|
||||
uint64_t call_handler;
|
||||
|
||||
uint64_t hit_sbt;
|
||||
uint64_t miss_sbt;
|
||||
uint64_t callable_sbt;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue