mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 17:58:26 +02:00
radv/rt: Only use ds_bvh_stack_rtn if the stack base is possible to encode
The hardware only provides 13 bits for encoding the stack base (in
dwords). That translates to the stack base being required to be below
8192 dwords, or 32kB. It's possible to exceed this - LDS is 64kB after
all. Add an explicit check to make sure we don't end up with offsets
that overflow the hw's address fields. This fixes Metro Exodus Enhanced
Edition, which was using ray queries in a 1024-thread sized workgroup,
resulting in exactly 64kB of LDS being required for the stack.
This check isn't required for RT pipelines as we always use 32 or 64
wide workgroups with no other LDS used, so it's impossible to reach this
stack base limit.
Cc: mesa-stable
(cherry picked from commit 59a397793e)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40092>
This commit is contained in:
parent
47caf527e3
commit
c4bb652871
2 changed files with 12 additions and 3 deletions
|
|
@ -2854,7 +2854,7 @@
|
|||
"description": "radv/rt: Only use ds_bvh_stack_rtn if the stack base is possible to encode",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -144,6 +144,7 @@ radv_get_ray_query_type()
|
|||
struct ray_query_vars {
|
||||
nir_variable *var;
|
||||
|
||||
bool use_bvh_stack_rtn;
|
||||
bool shared_stack;
|
||||
uint32_t shared_base;
|
||||
uint32_t stack_entries;
|
||||
|
|
@ -165,10 +166,18 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray
|
|||
workgroup_size = MAX2(workgroup_size, 32);
|
||||
uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4;
|
||||
uint32_t shared_offset = align(shader->info.shared_size, 4);
|
||||
|
||||
if (shader->info.stage != MESA_SHADER_COMPUTE || glsl_type_is_array(opaque_type) ||
|
||||
shared_offset + shared_stack_size > pdev->max_shared_size) {
|
||||
dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT;
|
||||
} else {
|
||||
if (radv_use_bvh_stack_rtn(pdev)) {
|
||||
/* The hardware ds_bvh_stack_rtn address can only encode a stack base up to 8191 dwords. */
|
||||
uint32_t num_wave32_groups = DIV_ROUND_UP(workgroup_size, 32);
|
||||
uint32_t max_group_stack_base = (num_wave32_groups - 1) * 32 * shared_stack_entries;
|
||||
uint32_t max_stack_base = (shared_offset / 4) + max_group_stack_base;
|
||||
dst->use_bvh_stack_rtn = max_stack_base < 8192;
|
||||
}
|
||||
dst->shared_stack = true;
|
||||
dst->shared_base = shared_offset;
|
||||
dst->stack_entries = shared_stack_entries;
|
||||
|
|
@ -303,7 +312,7 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
|
|||
|
||||
if (vars->shared_stack) {
|
||||
nir_def *stack_idx = nir_load_local_invocation_index(b);
|
||||
if (radv_use_bvh_stack_rtn(pdev)) {
|
||||
if (vars->use_bvh_stack_rtn) {
|
||||
uint32_t workgroup_size =
|
||||
b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2];
|
||||
nir_def *addr =
|
||||
|
|
@ -563,7 +572,7 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va
|
|||
};
|
||||
|
||||
if (vars->shared_stack) {
|
||||
args.use_bvh_stack_rtn = radv_use_bvh_stack_rtn(pdev);
|
||||
args.use_bvh_stack_rtn = vars->use_bvh_stack_rtn;
|
||||
if (args.use_bvh_stack_rtn) {
|
||||
args.stack_stride = 1;
|
||||
args.stack_base = 0;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue