mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-13 17:30:32 +01:00
radv/rt: Only use ds_bvh_stack_rtn if the stack base is possible to encode
The hardware only provides 13 bits for encoding the stack base (in dwords). That translates to the stack base being required to be below 8192 dwords, or 32kB. It's possible to exceed this - LDS is 64kB after all. Add an explicit check to make sure we don't end up with offsets that overflow the hw's address fields. This fixes Metro Exodus Enhanced Edition, which was using ray queries in a 1024-thread sized workgroup, resulting in exactly 64kB of LDS being required for the stack. This check isn't required for RT pipelines as we always use 32 or 64 wide workgroups with no other LDS used, so it's impossible to reach this stack base limit. Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39691>
This commit is contained in:
parent
b0cc03dfbd
commit
59a397793e
1 changed files with 11 additions and 2 deletions
|
|
@ -144,6 +144,7 @@ radv_get_ray_query_type()
|
|||
struct ray_query_vars {
|
||||
nir_variable *var;
|
||||
|
||||
bool use_bvh_stack_rtn;
|
||||
bool shared_stack;
|
||||
uint32_t shared_base;
|
||||
uint32_t stack_entries;
|
||||
|
|
@ -165,10 +166,18 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray
|
|||
workgroup_size = MAX2(workgroup_size, 32);
|
||||
uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4;
|
||||
uint32_t shared_offset = align(shader->info.shared_size, 4);
|
||||
|
||||
if (shader->info.stage != MESA_SHADER_COMPUTE || glsl_type_is_array(opaque_type) ||
|
||||
shared_offset + shared_stack_size > pdev->max_shared_size) {
|
||||
dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT;
|
||||
} else {
|
||||
if (radv_use_bvh_stack_rtn(pdev)) {
|
||||
/* The hardware ds_bvh_stack_rtn address can only encode a stack base up to 8191 dwords. */
|
||||
uint32_t num_wave32_groups = DIV_ROUND_UP(workgroup_size, 32);
|
||||
uint32_t max_group_stack_base = (num_wave32_groups - 1) * 32 * shared_stack_entries;
|
||||
uint32_t max_stack_base = (shared_offset / 4) + max_group_stack_base;
|
||||
dst->use_bvh_stack_rtn = max_stack_base < 8192;
|
||||
}
|
||||
dst->shared_stack = true;
|
||||
dst->shared_base = shared_offset;
|
||||
dst->stack_entries = shared_stack_entries;
|
||||
|
|
@ -303,7 +312,7 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
|
|||
|
||||
if (vars->shared_stack) {
|
||||
nir_def *stack_idx = nir_load_local_invocation_index(b);
|
||||
if (radv_use_bvh_stack_rtn(pdev)) {
|
||||
if (vars->use_bvh_stack_rtn) {
|
||||
uint32_t workgroup_size =
|
||||
b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2];
|
||||
nir_def *addr =
|
||||
|
|
@ -563,7 +572,7 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va
|
|||
};
|
||||
|
||||
if (vars->shared_stack) {
|
||||
args.use_bvh_stack_rtn = radv_use_bvh_stack_rtn(pdev);
|
||||
args.use_bvh_stack_rtn = vars->use_bvh_stack_rtn;
|
||||
if (args.use_bvh_stack_rtn) {
|
||||
args.stack_stride = 1;
|
||||
args.stack_base = 0;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue