mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-11 20:20:26 +01:00
anv,brw: Allow multiple ray queries without spilling to a shadow stack
Allows a shader to have multiple ray queries without spilling them to a shadow stack. Instead, the driver provides the shader with an array of multiple RTDispatchGlobals structs to give each query its own dedicated stack. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38778>
This commit is contained in:
parent
0291aa3e71
commit
1f1de7ebd6
10 changed files with 182 additions and 281 deletions
|
|
@ -38,8 +38,10 @@ struct lowering_state {
|
||||||
struct hash_table *queries;
|
struct hash_table *queries;
|
||||||
uint32_t n_queries;
|
uint32_t n_queries;
|
||||||
|
|
||||||
struct brw_nir_rt_globals_defs globals;
|
|
||||||
nir_def *rq_globals;
|
nir_def *rq_globals;
|
||||||
|
|
||||||
|
uint32_t num_dss_rt_stacks;
|
||||||
|
uint32_t sync_stacks_stride;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct brw_ray_query {
|
struct brw_ray_query {
|
||||||
|
|
@ -50,12 +52,6 @@ struct brw_ray_query {
|
||||||
|
|
||||||
#define SIZEOF_QUERY_STATE (sizeof(uint32_t))
|
#define SIZEOF_QUERY_STATE (sizeof(uint32_t))
|
||||||
|
|
||||||
static bool
|
|
||||||
need_spill_fill(struct lowering_state *state)
|
|
||||||
{
|
|
||||||
return state->n_queries > 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This pass converts opaque RayQuery structures from SPIRV into a vec3 where
|
* This pass converts opaque RayQuery structures from SPIRV into a vec3 where
|
||||||
* the first 2 elements store a global address for the query and the third
|
* the first 2 elements store a global address for the query and the third
|
||||||
|
|
@ -98,10 +94,8 @@ create_internal_var(struct brw_ray_query *rq, struct lowering_state *state)
|
||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static nir_def *
|
static nir_def *
|
||||||
get_ray_query_shadow_addr(nir_builder *b,
|
get_ray_query_stack_index(nir_builder *b,
|
||||||
nir_deref_instr *deref,
|
nir_deref_instr *deref,
|
||||||
struct lowering_state *state,
|
struct lowering_state *state,
|
||||||
nir_deref_instr **out_state_deref)
|
nir_deref_instr **out_state_deref)
|
||||||
|
|
@ -116,35 +110,17 @@ get_ray_query_shadow_addr(nir_builder *b,
|
||||||
|
|
||||||
struct brw_ray_query *rq = entry->data;
|
struct brw_ray_query *rq = entry->data;
|
||||||
|
|
||||||
/* Base address in the shadow memory of the variable associated with this
|
nir_def *query_idx = nir_imm_int(b, rq->id);
|
||||||
* ray query variable.
|
|
||||||
*/
|
|
||||||
nir_def *base_addr =
|
|
||||||
nir_iadd_imm(b, state->globals.resume_sbt_addr,
|
|
||||||
brw_rt_ray_queries_shadow_stack_size(state->devinfo) * rq->id);
|
|
||||||
|
|
||||||
bool spill_fill = need_spill_fill(state);
|
|
||||||
*out_state_deref = nir_build_deref_var(b, rq->internal_var);
|
*out_state_deref = nir_build_deref_var(b, rq->internal_var);
|
||||||
|
|
||||||
if (!spill_fill)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
/* Just emit code and let constant-folding go to town */
|
/* Just emit code and let constant-folding go to town */
|
||||||
nir_deref_instr **p = &path.path[1];
|
nir_deref_instr **p = &path.path[1];
|
||||||
for (; *p; p++) {
|
for (; *p; p++) {
|
||||||
if ((*p)->deref_type == nir_deref_type_array) {
|
if ((*p)->deref_type == nir_deref_type_array) {
|
||||||
nir_def *index = (*p)->arr.index.ssa;
|
nir_def *index = (*p)->arr.index.ssa;
|
||||||
|
|
||||||
/**/
|
|
||||||
*out_state_deref = nir_build_deref_array(b, *out_state_deref, index);
|
*out_state_deref = nir_build_deref_array(b, *out_state_deref, index);
|
||||||
|
index = nir_amul_imm(b, index, MAX2(1, glsl_get_aoa_size((*p)->type)));
|
||||||
/**/
|
query_idx = nir_iadd(b, query_idx, index);
|
||||||
uint64_t size = MAX2(1, glsl_get_aoa_size((*p)->type)) *
|
|
||||||
brw_rt_ray_queries_shadow_stack_size(state->devinfo);
|
|
||||||
|
|
||||||
nir_def *mul = nir_amul_imm(b, nir_i2i64(b, index), size);
|
|
||||||
|
|
||||||
base_addr = nir_iadd(b, base_addr, mul);
|
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE("Unsupported deref type");
|
UNREACHABLE("Unsupported deref type");
|
||||||
}
|
}
|
||||||
|
|
@ -152,28 +128,7 @@ get_ray_query_shadow_addr(nir_builder *b,
|
||||||
|
|
||||||
nir_deref_path_finish(&path);
|
nir_deref_path_finish(&path);
|
||||||
|
|
||||||
/* Add the lane offset to the shadow memory address */
|
return query_idx;
|
||||||
nir_def *lane_offset =
|
|
||||||
nir_imul_imm(
|
|
||||||
b,
|
|
||||||
nir_iadd(
|
|
||||||
b,
|
|
||||||
nir_imul(
|
|
||||||
b,
|
|
||||||
brw_load_btd_dss_id(b),
|
|
||||||
state->globals.num_dss_rt_stacks),
|
|
||||||
brw_nir_rt_sync_stack_id(b)),
|
|
||||||
BRW_RT_SIZEOF_SHADOW_RAY_QUERY);
|
|
||||||
|
|
||||||
/* Top/bottom 16 lanes each get their own stack area */
|
|
||||||
lane_offset = nir_bcsel(
|
|
||||||
b,
|
|
||||||
nir_ilt_imm(b, nir_load_subgroup_invocation(b), 16),
|
|
||||||
lane_offset,
|
|
||||||
nir_iadd_imm(b, lane_offset,
|
|
||||||
brw_rt_ray_queries_shadow_stack_size(state->devinfo) / 2));
|
|
||||||
|
|
||||||
return nir_iadd(b, base_addr, nir_i2i64(b, lane_offset));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -209,26 +164,6 @@ update_trace_ctrl_level(nir_builder *b,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
fill_query(nir_builder *b,
|
|
||||||
nir_def *hw_stack_addr,
|
|
||||||
nir_def *shadow_stack_addr,
|
|
||||||
nir_def *ctrl)
|
|
||||||
{
|
|
||||||
brw_nir_memcpy_global(b, hw_stack_addr, 64, shadow_stack_addr, 64,
|
|
||||||
BRW_RT_SIZEOF_RAY_QUERY);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
spill_query(nir_builder *b,
|
|
||||||
nir_def *hw_stack_addr,
|
|
||||||
nir_def *shadow_stack_addr)
|
|
||||||
{
|
|
||||||
brw_nir_memcpy_global(b, shadow_stack_addr, 64, hw_stack_addr, 64,
|
|
||||||
BRW_RT_SIZEOF_RAY_QUERY);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lower_ray_query_intrinsic(nir_builder *b,
|
lower_ray_query_intrinsic(nir_builder *b,
|
||||||
nir_intrinsic_instr *intrin,
|
nir_intrinsic_instr *intrin,
|
||||||
|
|
@ -239,12 +174,20 @@ lower_ray_query_intrinsic(nir_builder *b,
|
||||||
b->cursor = nir_instr_remove(&intrin->instr);
|
b->cursor = nir_instr_remove(&intrin->instr);
|
||||||
|
|
||||||
nir_deref_instr *ctrl_level_deref;
|
nir_deref_instr *ctrl_level_deref;
|
||||||
nir_def *shadow_stack_addr =
|
nir_def *stack_index =
|
||||||
get_ray_query_shadow_addr(b, deref, state, &ctrl_level_deref);
|
get_ray_query_stack_index(b, deref, state, &ctrl_level_deref);
|
||||||
nir_def *hw_stack_addr =
|
nir_def *rq_globals_addr =
|
||||||
brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr,
|
nir_iadd(b, state->rq_globals,
|
||||||
state->globals.num_dss_rt_stacks);
|
nir_i2i64(b, nir_amul_imm(b, stack_index,
|
||||||
nir_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr;
|
BRW_RT_DISPATCH_GLOBALS_ALIGN)));
|
||||||
|
nir_def *stack_base_addr =
|
||||||
|
nir_isub(b, state->rq_globals,
|
||||||
|
nir_i2i64(b, nir_amul_imm(b, stack_index,
|
||||||
|
state->sync_stacks_stride)));
|
||||||
|
nir_def *stack_addr =
|
||||||
|
brw_nir_rt_sync_stack_addr(b, stack_base_addr,
|
||||||
|
state->num_dss_rt_stacks,
|
||||||
|
state->devinfo);
|
||||||
mesa_shader_stage stage = b->shader->info.stage;
|
mesa_shader_stage stage = b->shader->info.stage;
|
||||||
|
|
||||||
switch (intrin->intrinsic) {
|
switch (intrin->intrinsic) {
|
||||||
|
|
@ -313,22 +256,12 @@ lower_ray_query_intrinsic(nir_builder *b,
|
||||||
*/
|
*/
|
||||||
brw_nir_rt_query_mark_done(b, stack_addr);
|
brw_nir_rt_query_mark_done(b, stack_addr);
|
||||||
|
|
||||||
if (shadow_stack_addr)
|
nir_trace_ray_intel(b, rq_globals_addr, level, ctrl, .synchronous = true);
|
||||||
fill_query(b, hw_stack_addr, shadow_stack_addr, ctrl);
|
|
||||||
|
|
||||||
/* Do not use state->rq_globals, we want a uniform value for the
|
|
||||||
* tracing call.
|
|
||||||
*/
|
|
||||||
nir_trace_ray_intel(b, nir_load_ray_query_global_intel(b),
|
|
||||||
level, ctrl, .synchronous = true);
|
|
||||||
|
|
||||||
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
||||||
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false,
|
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr, false,
|
||||||
state->devinfo);
|
state->devinfo);
|
||||||
|
|
||||||
if (shadow_stack_addr)
|
|
||||||
spill_query(b, hw_stack_addr, shadow_stack_addr);
|
|
||||||
|
|
||||||
update_trace_ctrl_level(b, ctrl_level_deref,
|
update_trace_ctrl_level(b, ctrl_level_deref,
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
|
nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
|
||||||
|
|
@ -547,21 +480,17 @@ lower_ray_query_impl(nir_function_impl *impl, struct lowering_state *state)
|
||||||
nir_builder _b, *b = &_b;
|
nir_builder _b, *b = &_b;
|
||||||
_b = nir_builder_at(nir_before_impl(impl));
|
_b = nir_builder_at(nir_before_impl(impl));
|
||||||
|
|
||||||
nir_def *rq_globals_base = nir_load_ray_query_global_intel(b);
|
state->rq_globals = nir_load_ray_query_global_intel(b);
|
||||||
|
|
||||||
/* Use a different global for each 16lanes groups (only in SIMD32). */
|
/* ATSM PRMs Vol 9, "State Model for Ray Tracing - RTDispatchGlobals"
|
||||||
state->rq_globals = nir_bcsel(
|
*
|
||||||
b,
|
* "For Sync Ray tracing (i.e. using RayQueries), SW must allocate
|
||||||
nir_iand(b,
|
* space assuming 2K StackIDs"
|
||||||
nir_ige_imm(b, nir_load_subgroup_invocation(b), 16),
|
*/
|
||||||
nir_ieq_imm(b, nir_load_subgroup_size(b), 32)),
|
state->num_dss_rt_stacks = 2048; /* TODO */
|
||||||
nir_iadd_imm(
|
|
||||||
b, rq_globals_base,
|
|
||||||
align(4 * RT_DISPATCH_GLOBALS_length(state->devinfo), 64)),
|
|
||||||
rq_globals_base);
|
|
||||||
|
|
||||||
brw_nir_rt_load_globals_addr(b, &state->globals, state->rq_globals,
|
state->sync_stacks_stride =
|
||||||
state->devinfo);
|
brw_rt_ray_queries_stacks_stride(state->devinfo);
|
||||||
|
|
||||||
nir_foreach_block_safe(block, impl) {
|
nir_foreach_block_safe(block, impl) {
|
||||||
nir_foreach_instr_safe(instr, block) {
|
nir_foreach_instr_safe(instr, block) {
|
||||||
|
|
|
||||||
|
|
@ -178,7 +178,8 @@ brw_nir_rt_sw_hotzone_addr(nir_builder *b,
|
||||||
static inline nir_def *
|
static inline nir_def *
|
||||||
brw_nir_rt_sync_stack_addr(nir_builder *b,
|
brw_nir_rt_sync_stack_addr(nir_builder *b,
|
||||||
nir_def *base_mem_addr,
|
nir_def *base_mem_addr,
|
||||||
nir_def *num_dss_rt_stacks)
|
uint32_t num_dss_rt_stacks,
|
||||||
|
const struct intel_device_info *devinfo)
|
||||||
{
|
{
|
||||||
/* Bspec 47547 (Xe) and 56936 (Xe2+) say:
|
/* Bspec 47547 (Xe) and 56936 (Xe2+) say:
|
||||||
* For Ray queries (Synchronous Ray Tracing), the formula is similar but
|
* For Ray queries (Synchronous Ray Tracing), the formula is similar but
|
||||||
|
|
@ -195,12 +196,29 @@ brw_nir_rt_sync_stack_addr(nir_builder *b,
|
||||||
* NUM_SYNC_STACKID_PER_DSS instead.
|
* NUM_SYNC_STACKID_PER_DSS instead.
|
||||||
*/
|
*/
|
||||||
nir_def *offset32 =
|
nir_def *offset32 =
|
||||||
nir_imul(b,
|
nir_imul_imm(b,
|
||||||
nir_iadd(b,
|
nir_iadd(b,
|
||||||
nir_imul(b, brw_load_btd_dss_id(b),
|
nir_imul_imm(b, brw_load_btd_dss_id(b),
|
||||||
num_dss_rt_stacks),
|
num_dss_rt_stacks),
|
||||||
nir_iadd_imm(b, brw_nir_rt_sync_stack_id(b), 1)),
|
nir_iadd_imm(b, brw_nir_rt_sync_stack_id(b), 1)),
|
||||||
nir_imm_int(b, BRW_RT_SIZEOF_RAY_QUERY));
|
BRW_RT_SIZEOF_RAY_QUERY);
|
||||||
|
|
||||||
|
/* StackID offset for the bottom 16 lanes in SIMD32, this must match the
|
||||||
|
* offset of the second base address provided by the driver through the
|
||||||
|
* pair of ray query RTDispatchGlobals
|
||||||
|
*/
|
||||||
|
uint32_t simd32_stack_offset =
|
||||||
|
num_dss_rt_stacks * BRW_RT_SIZEOF_RAY_QUERY *
|
||||||
|
intel_device_info_dual_subslice_id_bound(devinfo);
|
||||||
|
|
||||||
|
offset32 =
|
||||||
|
nir_bcsel(b,
|
||||||
|
nir_iand(b,
|
||||||
|
nir_ige_imm(b, nir_load_subgroup_invocation(b), 16),
|
||||||
|
nir_ieq_imm(b, nir_load_subgroup_size(b), 32)),
|
||||||
|
nir_iadd_imm(b, offset32, simd32_stack_offset),
|
||||||
|
offset32);
|
||||||
|
|
||||||
return nir_isub(b, base_mem_addr, nir_u2u64(b, offset32));
|
return nir_isub(b, base_mem_addr, nir_u2u64(b, offset32));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -300,7 +318,6 @@ struct brw_nir_rt_globals_defs {
|
||||||
nir_def *launch_size;
|
nir_def *launch_size;
|
||||||
nir_def *call_sbt_addr;
|
nir_def *call_sbt_addr;
|
||||||
nir_def *call_sbt_stride;
|
nir_def *call_sbt_stride;
|
||||||
nir_def *resume_sbt_addr;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
|
@ -368,8 +385,6 @@ brw_nir_rt_load_globals_addr(nir_builder *b,
|
||||||
defs->call_sbt_stride =
|
defs->call_sbt_stride =
|
||||||
nir_iand_imm(b, nir_unpack_32_2x16_split_x(b, nir_channel(b, data, 2)),
|
nir_iand_imm(b, nir_unpack_32_2x16_split_x(b, nir_channel(b, data, 2)),
|
||||||
0x1fff);
|
0x1fff);
|
||||||
defs->resume_sbt_addr =
|
|
||||||
nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 3));
|
|
||||||
} else {
|
} else {
|
||||||
defs->call_sbt_addr =
|
defs->call_sbt_addr =
|
||||||
nir_pack_64_2x32_split(b, nir_channel(b, data, 0),
|
nir_pack_64_2x32_split(b, nir_channel(b, data, 0),
|
||||||
|
|
@ -377,9 +392,6 @@ brw_nir_rt_load_globals_addr(nir_builder *b,
|
||||||
nir_imm_int(b, 0)));
|
nir_imm_int(b, 0)));
|
||||||
defs->call_sbt_stride =
|
defs->call_sbt_stride =
|
||||||
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 1));
|
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 1));
|
||||||
|
|
||||||
defs->resume_sbt_addr =
|
|
||||||
nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ extern "C" {
|
||||||
#define BRW_RT_SBT_HANDLE_SIZE 32
|
#define BRW_RT_SBT_HANDLE_SIZE 32
|
||||||
|
|
||||||
/** RT_DISPATCH_GLOBALS size (see gen_rt.xml) */
|
/** RT_DISPATCH_GLOBALS size (see gen_rt.xml) */
|
||||||
#define BRW_RT_DISPATCH_GLOBALS_SIZE 80
|
#define BRW_RT_DISPATCH_GLOBALS_SIZE 72
|
||||||
|
|
||||||
/** RT_DISPATCH_GLOBALS alignment
|
/** RT_DISPATCH_GLOBALS alignment
|
||||||
*
|
*
|
||||||
|
|
@ -191,10 +191,6 @@ struct brw_rt_raygen_trampoline_params {
|
||||||
(BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \
|
(BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \
|
||||||
(BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0))
|
(BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0))
|
||||||
|
|
||||||
#define BRW_RT_SIZEOF_SHADOW_RAY_QUERY \
|
|
||||||
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
|
|
||||||
(BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS)
|
|
||||||
|
|
||||||
#define BRW_RT_SIZEOF_HW_STACK \
|
#define BRW_RT_SIZEOF_HW_STACK \
|
||||||
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
|
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
|
||||||
BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \
|
BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \
|
||||||
|
|
@ -270,25 +266,15 @@ brw_rt_ray_queries_hw_stacks_size(const struct intel_device_info *devinfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
brw_rt_ray_queries_shadow_stack_size(const struct intel_device_info *devinfo)
|
brw_rt_ray_queries_stacks_offset(uint32_t num_queries)
|
||||||
{
|
{
|
||||||
/* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
|
return BRW_RT_DISPATCH_GLOBALS_ALIGN << util_logbase2_ceil(num_queries);
|
||||||
* which includes all the threads.
|
|
||||||
*/
|
|
||||||
uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
|
|
||||||
uint32_t max_simd_size = 32;
|
|
||||||
return max_eu_id * max_simd_size * BRW_RT_SIZEOF_SHADOW_RAY_QUERY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
brw_rt_ray_queries_shadow_stacks_size(const struct intel_device_info *devinfo,
|
brw_rt_ray_queries_stacks_stride(const struct intel_device_info *devinfo)
|
||||||
uint32_t ray_queries)
|
|
||||||
{
|
{
|
||||||
/* Don't bother a shadow stack if we only have a single query. We can
|
return align(brw_rt_ray_queries_hw_stacks_size(devinfo), 4096);
|
||||||
* directly write in the HW buffer.
|
|
||||||
*/
|
|
||||||
return (ray_queries > 1 ? ray_queries : 0) * brw_rt_ray_queries_shadow_stack_size(devinfo) +
|
|
||||||
ray_queries * 4; /* Ctrl + Level data */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,6 @@
|
||||||
<field name="Launch Height" dword="14" bits="31:0" type="uint" />
|
<field name="Launch Height" dword="14" bits="31:0" type="uint" />
|
||||||
<field name="Launch Depth" dword="15" bits="31:0" type="uint" />
|
<field name="Launch Depth" dword="15" bits="31:0" type="uint" />
|
||||||
<field name="Callable Group Table" dword="16" bits="63:0" type="RT_SHADER_TABLE" />
|
<field name="Callable Group Table" dword="16" bits="63:0" type="RT_SHADER_TABLE" />
|
||||||
<field name="Resume Shader Table" dword="18" bits="63:0" type="address" />
|
|
||||||
</struct>
|
</struct>
|
||||||
<struct name="RT_GENERAL_SBT_HANDLE" length="8">
|
<struct name="RT_GENERAL_SBT_HANDLE" length="8">
|
||||||
<field name="General" dword="0" bits="63:0" type="BINDLESS_SHADER_RECORD" />
|
<field name="General" dword="0" bits="63:0" type="BINDLESS_SHADER_RECORD" />
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,5 @@
|
||||||
<field name="Launch Depth" dword="15" bits="31:0" type="uint" />
|
<field name="Launch Depth" dword="15" bits="31:0" type="uint" />
|
||||||
<field name="Callable Group Table" dword="16" bits="63:0" type="address" />
|
<field name="Callable Group Table" dword="16" bits="63:0" type="address" />
|
||||||
<field name="Callable Group Stride" dword="18" bits="12:0" type="uint" />
|
<field name="Callable Group Stride" dword="18" bits="12:0" type="uint" />
|
||||||
<field name="Resume Shader Table" dword="19" bits="63:0" type="address" />
|
|
||||||
</struct>
|
</struct>
|
||||||
</genxml>
|
</genxml>
|
||||||
|
|
|
||||||
|
|
@ -442,58 +442,84 @@ anv_cmd_buffer_set_rt_query_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||||
uint32_t ray_queries,
|
uint32_t ray_queries,
|
||||||
VkShaderStageFlags stages)
|
VkShaderStageFlags stages)
|
||||||
{
|
{
|
||||||
struct anv_device *device = cmd_buffer->device;
|
if (ray_queries > cmd_buffer->state.num_ray_query_globals) {
|
||||||
uint8_t idx = anv_get_ray_query_bo_index(cmd_buffer);
|
struct anv_device *device = cmd_buffer->device;
|
||||||
|
uint8_t wa_idx = anv_get_ray_query_bo_index(cmd_buffer);
|
||||||
|
|
||||||
uint64_t ray_shadow_size =
|
unsigned bucket = util_logbase2_ceil(ray_queries);
|
||||||
align64(brw_rt_ray_queries_shadow_stacks_size(device->info, ray_queries),
|
assert(bucket < ARRAY_SIZE(device->ray_query_bos[0]));
|
||||||
4096);
|
|
||||||
if (ray_shadow_size > 0 &&
|
|
||||||
(!cmd_buffer->state.ray_query_shadow_bo ||
|
|
||||||
cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
|
|
||||||
unsigned shadow_size_log2 = MAX2(util_logbase2_ceil(ray_shadow_size), 16);
|
|
||||||
unsigned bucket = shadow_size_log2 - 16;
|
|
||||||
assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos[0]));
|
|
||||||
|
|
||||||
struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[idx][bucket]);
|
uint64_t offset = brw_rt_ray_queries_stacks_offset(1 << bucket);
|
||||||
|
uint64_t stride = brw_rt_ray_queries_stacks_stride(device->info);
|
||||||
|
|
||||||
|
struct anv_bo *bo = p_atomic_read(&device->ray_query_bos[wa_idx][bucket]);
|
||||||
if (bo == NULL) {
|
if (bo == NULL) {
|
||||||
struct anv_bo *new_bo;
|
struct anv_bo *new_bo;
|
||||||
VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
|
VkResult result =
|
||||||
1 << shadow_size_log2,
|
anv_device_alloc_bo(device, "RT queries scratch",
|
||||||
ANV_BO_ALLOC_INTERNAL, /* alloc_flags */
|
offset + (stride << bucket), /* size */
|
||||||
0, /* explicit_address */
|
ANV_BO_ALLOC_INTERNAL |
|
||||||
&new_bo);
|
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE, /* alloc_flags */
|
||||||
|
0, /* explicit_address */
|
||||||
|
&new_bo);
|
||||||
|
|
||||||
ANV_DMR_BO_ALLOC(&cmd_buffer->vk.base, new_bo, result);
|
ANV_DMR_BO_ALLOC(&cmd_buffer->vk.base, new_bo, result);
|
||||||
if (result != VK_SUCCESS) {
|
if (result != VK_SUCCESS) {
|
||||||
anv_batch_set_error(&cmd_buffer->batch, result);
|
anv_batch_set_error(&cmd_buffer->batch, result);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[idx][bucket], NULL, new_bo);
|
/* Map extra space we added at end of the buffer, we will write the
|
||||||
|
* array of RT_DISPATCH_GLOBALS into it so we can use only a single
|
||||||
|
* memory address in our shaders for all stacks and globals
|
||||||
|
*/
|
||||||
|
void *map;
|
||||||
|
result = anv_device_map_bo(device, new_bo, stride << bucket,
|
||||||
|
offset, NULL, &map);
|
||||||
|
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
ANV_DMR_BO_FREE(&cmd_buffer->vk.base, new_bo);
|
||||||
|
anv_device_release_bo(device, new_bo);
|
||||||
|
anv_batch_set_error(&cmd_buffer->batch, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
anv_genX(device->info, setup_ray_query_globals)(device,
|
||||||
|
new_bo,
|
||||||
|
stride << bucket,
|
||||||
|
map,
|
||||||
|
1 << bucket);
|
||||||
|
|
||||||
|
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||||||
|
if (device->physical->memory.need_flush)
|
||||||
|
util_flush_inval_range(map, offset);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
anv_device_unmap_bo(device, new_bo, map, offset, false);
|
||||||
|
|
||||||
|
bo = p_atomic_cmpxchg(&device->ray_query_bos[wa_idx][bucket], NULL, new_bo);
|
||||||
if (bo != NULL) {
|
if (bo != NULL) {
|
||||||
ANV_DMR_BO_FREE(&device->vk.base, new_bo);
|
ANV_DMR_BO_FREE(&cmd_buffer->vk.base, new_bo);
|
||||||
anv_device_release_bo(device, new_bo);
|
anv_device_release_bo(device, new_bo);
|
||||||
} else {
|
} else {
|
||||||
bo = new_bo;
|
bo = new_bo;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cmd_buffer->state.ray_query_shadow_bo = bo;
|
|
||||||
|
|
||||||
/* Add the ray query buffers to the batch list. */
|
/* Add the HW buffer to the list of BO used. */
|
||||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
anv_reloc_list_add_bo(cmd_buffer->batch.relocs, bo);
|
||||||
cmd_buffer->state.ray_query_shadow_bo);
|
|
||||||
|
cmd_buffer->state.ray_query_globals = (struct anv_address) {
|
||||||
|
.bo = bo,
|
||||||
|
.offset = (int64_t) (stride << bucket),
|
||||||
|
};
|
||||||
|
|
||||||
|
cmd_buffer->state.num_ray_query_globals = 1 << bucket;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add the HW buffer to the list of BO used. */
|
/* Update the push constants & mark them dirty. */
|
||||||
assert(device->ray_query_bo[idx]);
|
|
||||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
|
||||||
device->ray_query_bo[idx]);
|
|
||||||
|
|
||||||
/* Fill the push constants & mark them dirty. */
|
|
||||||
struct anv_address ray_query_globals_addr =
|
|
||||||
anv_genX(device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
|
|
||||||
pipeline_state->push_constants.ray_query_globals =
|
pipeline_state->push_constants.ray_query_globals =
|
||||||
anv_address_physical(ray_query_globals_addr);
|
anv_address_physical(cmd_buffer->state.ray_query_globals);
|
||||||
cmd_buffer->state.push_constants_dirty |= stages;
|
cmd_buffer->state.push_constants_dirty |= stages;
|
||||||
pipeline_state->push_constants_data_dirty = true;
|
pipeline_state->push_constants_data_dirty = true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -341,22 +341,16 @@ VkResult anv_CreateDevice(
|
||||||
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
|
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
|
||||||
VkResult result;
|
VkResult result;
|
||||||
struct anv_device *device;
|
struct anv_device *device;
|
||||||
bool device_has_compute_queue = false;
|
|
||||||
|
|
||||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
|
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
|
||||||
|
|
||||||
/* Check requested queues and fail if we are requested to create any
|
/* Check requested queues and fail if we are requested to create any
|
||||||
* queues with flags we don't support.
|
* queues with flags we don't support.
|
||||||
*/
|
*/
|
||||||
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
|
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
|
||||||
if (pCreateInfo->pQueueCreateInfos[i].flags & ~VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT)
|
if (pCreateInfo->pQueueCreateInfos[i].flags & ~VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT)
|
||||||
return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
|
return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
|
||||||
|
|
||||||
const struct anv_queue_family *family =
|
|
||||||
&physical_device->queue.families[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex];
|
|
||||||
device_has_compute_queue |= family->engine_class == INTEL_ENGINE_CLASS_COMPUTE;
|
|
||||||
}
|
|
||||||
|
|
||||||
device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
|
device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
|
||||||
sizeof(*device), 8,
|
sizeof(*device), 8,
|
||||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||||
|
|
@ -786,36 +780,9 @@ VkResult anv_CreateDevice(
|
||||||
device->workaround_bo->size,
|
device->workaround_bo->size,
|
||||||
INTEL_DEBUG_BLOCK_TYPE_FRAME);
|
INTEL_DEBUG_BLOCK_TYPE_FRAME);
|
||||||
|
|
||||||
if (device->vk.enabled_extensions.KHR_ray_query) {
|
|
||||||
uint32_t ray_queries_size =
|
|
||||||
align(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
|
|
||||||
|
|
||||||
result = anv_device_alloc_bo(device, "ray queries",
|
|
||||||
ray_queries_size,
|
|
||||||
ANV_BO_ALLOC_INTERNAL,
|
|
||||||
0 /* explicit_address */,
|
|
||||||
&device->ray_query_bo[0]);
|
|
||||||
ANV_DMR_BO_ALLOC(&device->vk.base, device->ray_query_bo[0], result);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto fail_alloc_device_bo;
|
|
||||||
|
|
||||||
/* We need a separate ray query bo for CCS engine with Wa_14022863161. */
|
|
||||||
if (intel_needs_workaround(device->isl_dev.info, 14022863161) &&
|
|
||||||
device_has_compute_queue) {
|
|
||||||
result = anv_device_alloc_bo(device, "ray queries",
|
|
||||||
ray_queries_size,
|
|
||||||
ANV_BO_ALLOC_INTERNAL,
|
|
||||||
0 /* explicit_address */,
|
|
||||||
&device->ray_query_bo[1]);
|
|
||||||
ANV_DMR_BO_ALLOC(&device->vk.base, device->ray_query_bo[1], result);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
goto fail_ray_query_bo;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result = anv_device_init_trivial_batch(device);
|
result = anv_device_init_trivial_batch(device);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
goto fail_ray_query_bo;
|
goto fail_alloc_device_bo;
|
||||||
|
|
||||||
/* Emit the CPS states before running the initialization batch as those
|
/* Emit the CPS states before running the initialization batch as those
|
||||||
* structures are referenced.
|
* structures are referenced.
|
||||||
|
|
@ -1073,13 +1040,6 @@ VkResult anv_CreateDevice(
|
||||||
fail_trivial_batch:
|
fail_trivial_batch:
|
||||||
ANV_DMR_BO_FREE(&device->vk.base, device->trivial_batch_bo);
|
ANV_DMR_BO_FREE(&device->vk.base, device->trivial_batch_bo);
|
||||||
anv_device_release_bo(device, device->trivial_batch_bo);
|
anv_device_release_bo(device, device->trivial_batch_bo);
|
||||||
fail_ray_query_bo:
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_bo); i++) {
|
|
||||||
if (device->ray_query_bo[i]) {
|
|
||||||
ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_bo[i]);
|
|
||||||
anv_device_release_bo(device, device->ray_query_bo[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fail_alloc_device_bo:
|
fail_alloc_device_bo:
|
||||||
if (device->mem_fence_bo) {
|
if (device->mem_fence_bo) {
|
||||||
ANV_DMR_BO_FREE(&device->vk.base, device->mem_fence_bo);
|
ANV_DMR_BO_FREE(&device->vk.base, device->mem_fence_bo);
|
||||||
|
|
@ -1231,17 +1191,13 @@ void anv_DestroyDevice(
|
||||||
anv_scratch_pool_finish(device, &device->protected_scratch_pool);
|
anv_scratch_pool_finish(device, &device->protected_scratch_pool);
|
||||||
|
|
||||||
if (device->vk.enabled_extensions.KHR_ray_query) {
|
if (device->vk.enabled_extensions.KHR_ray_query) {
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_bo); i++) {
|
for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_bos); i++) {
|
||||||
for (unsigned j = 0; j < ARRAY_SIZE(device->ray_query_shadow_bos[0]); j++) {
|
for (unsigned j = 0; j < ARRAY_SIZE(device->ray_query_bos[0]); j++) {
|
||||||
if (device->ray_query_shadow_bos[i][j] != NULL) {
|
if (device->ray_query_bos[i][j] != NULL) {
|
||||||
ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_shadow_bos[i][j]);
|
ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_bos[i][j]);
|
||||||
anv_device_release_bo(device, device->ray_query_shadow_bos[i][j]);
|
anv_device_release_bo(device, device->ray_query_bos[i][j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (device->ray_query_bo[i]) {
|
|
||||||
ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_bo[i]);
|
|
||||||
anv_device_release_bo(device, device->ray_query_bo[i]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ANV_DMR_BO_FREE(&device->vk.base, device->workaround_bo);
|
ANV_DMR_BO_FREE(&device->vk.base, device->workaround_bo);
|
||||||
|
|
|
||||||
|
|
@ -226,7 +226,11 @@ void genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
|
||||||
void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer);
|
void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer);
|
||||||
|
|
||||||
struct anv_address genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer);
|
void genX(setup_ray_query_globals)(struct anv_device *device,
|
||||||
|
struct anv_bo* bo,
|
||||||
|
uint64_t offset,
|
||||||
|
void* map,
|
||||||
|
uint32_t num_queries);
|
||||||
|
|
||||||
void genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer,
|
void genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
uint32_t total_scratch);
|
uint32_t total_scratch);
|
||||||
|
|
|
||||||
|
|
@ -2625,22 +2625,11 @@ struct anv_device {
|
||||||
|
|
||||||
uint32_t protected_session_id;
|
uint32_t protected_session_id;
|
||||||
|
|
||||||
/** Shadow ray query BO
|
/** Pool of ray query buffers used to communicated with HW unit.
|
||||||
*
|
|
||||||
* The ray_query_bo only holds the current ray being traced. When using
|
|
||||||
* more than 1 ray query per thread, we cannot fit all the queries in
|
|
||||||
* there, so we need a another buffer to hold query data that is not
|
|
||||||
* currently being used by the HW for tracing, similar to a scratch space.
|
|
||||||
*
|
|
||||||
* The size of the shadow buffer depends on the number of queries per
|
|
||||||
* shader.
|
|
||||||
*
|
*
|
||||||
* We might need a buffer per queue family due to Wa_14022863161.
|
* We might need a buffer per queue family due to Wa_14022863161.
|
||||||
*/
|
*/
|
||||||
struct anv_bo *ray_query_shadow_bos[2][16];
|
struct anv_bo *ray_query_bos[2][16];
|
||||||
/** Ray query buffer used to communicated with HW unit.
|
|
||||||
*/
|
|
||||||
struct anv_bo *ray_query_bo[2];
|
|
||||||
|
|
||||||
struct anv_shader_internal *rt_trampoline;
|
struct anv_shader_internal *rt_trampoline;
|
||||||
struct anv_shader_internal *rt_trivial_return;
|
struct anv_shader_internal *rt_trivial_return;
|
||||||
|
|
@ -4247,10 +4236,19 @@ struct anv_push_constants {
|
||||||
*/
|
*/
|
||||||
uint32_t surfaces_base_offset;
|
uint32_t surfaces_base_offset;
|
||||||
|
|
||||||
/** Ray query globals
|
/**
|
||||||
|
* Pointer to ray query stacks and their associated pairs of
|
||||||
|
* RT_DISPATCH_GLOBALS structures (see genX(setup_ray_query_globals))
|
||||||
*
|
*
|
||||||
* Pointer to a couple of RT_DISPATCH_GLOBALS structures (see
|
* The pair of globals for each query object are stored counting up from
|
||||||
* genX(cmd_buffer_ray_query_globals))
|
* this address in units of BRW_RT_DISPATCH_GLOBALS_ALIGN:
|
||||||
|
*
|
||||||
|
* rq_globals = ray_query_globals + (rq * BRW_RT_DISPATCH_GLOBALS_ALIGN)
|
||||||
|
*
|
||||||
|
* The raytracing scratch area for each ray query is stored counting down
|
||||||
|
* from this address in units of brw_rt_ray_queries_stacks_stride(devinfo):
|
||||||
|
*
|
||||||
|
* rq_stacks_addr = ray_query_globals - (rq * ray_queries_stacks_stride)
|
||||||
*/
|
*/
|
||||||
uint64_t ray_query_globals;
|
uint64_t ray_query_globals;
|
||||||
|
|
||||||
|
|
@ -4753,9 +4751,14 @@ struct anv_cmd_state {
|
||||||
unsigned current_hash_scale;
|
unsigned current_hash_scale;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A buffer used for spill/fill of ray queries.
|
* Number of ray query buffers allocated.
|
||||||
*/
|
*/
|
||||||
struct anv_bo * ray_query_shadow_bo;
|
uint32_t num_ray_query_globals;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Current array of RT_DISPATCH_GLOBALS for ray queries.
|
||||||
|
*/
|
||||||
|
struct anv_address ray_query_globals;
|
||||||
|
|
||||||
/** Pointer to the last emitted COMPUTE_WALKER.
|
/** Pointer to the last emitted COMPUTE_WALKER.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@
|
||||||
#include "ds/intel_tracepoints.h"
|
#include "ds/intel_tracepoints.h"
|
||||||
|
|
||||||
#include "genX_mi_builder.h"
|
#include "genX_mi_builder.h"
|
||||||
|
#include "nir_builder.h"
|
||||||
|
|
||||||
void
|
void
|
||||||
genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer,
|
genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
|
@ -811,49 +812,35 @@ void genX(CmdDispatchIndirect)(
|
||||||
genX(cmd_buffer_dispatch_indirect)(cmd_buffer, addr, false);
|
genX(cmd_buffer_dispatch_indirect)(cmd_buffer, addr, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct anv_address
|
void
|
||||||
genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer)
|
genX(setup_ray_query_globals)(struct anv_device *device,
|
||||||
|
struct anv_bo* bo,
|
||||||
|
uint64_t offset,
|
||||||
|
void* map,
|
||||||
|
uint32_t num_queries)
|
||||||
{
|
{
|
||||||
#if GFX_VERx10 >= 125
|
#if GFX_VERx10 >= 125
|
||||||
struct anv_device *device = cmd_buffer->device;
|
assert(num_queries > 0);
|
||||||
|
uint64_t stack_stride = brw_rt_ray_queries_stacks_stride(device->info);
|
||||||
struct anv_state state =
|
for (uint32_t i = 0; i < num_queries; ++i)
|
||||||
anv_cmd_buffer_alloc_temporary_state(cmd_buffer,
|
for (uint32_t j = 0; j < 2; j++)
|
||||||
2 * align(BRW_RT_DISPATCH_GLOBALS_SIZE, 64),
|
GENX(RT_DISPATCH_GLOBALS_pack)(NULL,
|
||||||
BRW_RT_DISPATCH_GLOBALS_ALIGN);
|
(char*) map +
|
||||||
struct brw_rt_scratch_layout layout;
|
i * BRW_RT_DISPATCH_GLOBALS_ALIGN +
|
||||||
uint32_t stack_ids_per_dss = 2048; /* TODO: can we use a lower value in
|
j * align(4 * GENX(RT_DISPATCH_GLOBALS_length), 64),
|
||||||
* some cases?
|
&(struct GENX(RT_DISPATCH_GLOBALS)) {
|
||||||
*/
|
.MemBaseAddress = (struct anv_address) {
|
||||||
brw_rt_compute_scratch_layout(&layout, device->info,
|
/* The ray query HW computes offsets from the top of the
|
||||||
stack_ids_per_dss, 1 << 10);
|
* buffer, so set the address at the end of the buffer.
|
||||||
|
*/
|
||||||
uint8_t idx = anv_get_ray_query_bo_index(cmd_buffer);
|
.bo = bo,
|
||||||
|
.offset = offset - i * stack_stride - j * stack_stride / 2,
|
||||||
for (uint32_t i = 0; i < 2; i++) {
|
},
|
||||||
const struct GENX(RT_DISPATCH_GLOBALS) rtdg = {
|
.AsyncRTStackSize = BRW_RT_SIZEOF_RAY_QUERY / 64,
|
||||||
.MemBaseAddress = (struct anv_address) {
|
.NumDSSRTStacks = 2048, /* TODO */
|
||||||
/* The ray query HW computes offsets from the top of the buffer, so
|
.MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS,
|
||||||
* let the address at the end of the buffer.
|
.Flags = RT_DEPTH_TEST_LESS_EQUAL,
|
||||||
*/
|
});
|
||||||
.bo = device->ray_query_bo[idx],
|
|
||||||
.offset = (i + 1) * (device->ray_query_bo[idx]->size / 2),
|
|
||||||
},
|
|
||||||
.AsyncRTStackSize = layout.ray_stack_stride / 64,
|
|
||||||
.NumDSSRTStacks = layout.stack_ids_per_dss,
|
|
||||||
.MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS,
|
|
||||||
.Flags = RT_DEPTH_TEST_LESS_EQUAL,
|
|
||||||
.ResumeShaderTable = (struct anv_address) {
|
|
||||||
.bo = cmd_buffer->state.ray_query_shadow_bo,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
GENX(RT_DISPATCH_GLOBALS_pack)(
|
|
||||||
NULL,
|
|
||||||
state.map + i * align(4 * GENX(RT_DISPATCH_GLOBALS_length), 64),
|
|
||||||
&rtdg);
|
|
||||||
}
|
|
||||||
|
|
||||||
return anv_cmd_buffer_temporary_state_address(cmd_buffer, state);
|
|
||||||
#else
|
#else
|
||||||
UNREACHABLE("Not supported");
|
UNREACHABLE("Not supported");
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue