brw: enable ray query spilling in SIMD32

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36181>
This commit is contained in:
Lionel Landwerlin 2025-07-15 22:26:57 +03:00 committed by Marge Bot
parent 6d19b898e7
commit 2fa09500a2
3 changed files with 30 additions and 5 deletions

View file

@ -24,6 +24,8 @@
#include "brw_nir_rt.h"
#include "brw_nir_rt_builder.h"
#include "genxml/genX_bits.h"
#include "nir_deref.h"
#include "util/macros.h"
@ -163,6 +165,14 @@ get_ray_query_shadow_addr(nir_builder *b,
brw_nir_rt_sync_stack_id(b)),
BRW_RT_SIZEOF_SHADOW_RAY_QUERY);
/* Top/bottom 16 lanes each get their own stack area */
lane_offset = nir_bcsel(
b,
nir_ilt_imm(b, nir_load_subgroup_invocation(b), 16),
lane_offset,
nir_iadd_imm(b, lane_offset,
brw_rt_ray_queries_shadow_stack_size(state->devinfo) / 2));
return nir_iadd(b, base_addr, nir_i2i64(b, lane_offset));
}
@ -306,7 +316,11 @@ lower_ray_query_intrinsic(nir_builder *b,
if (shadow_stack_addr)
fill_query(b, hw_stack_addr, shadow_stack_addr, ctrl);
nir_trace_ray_intel(b, state->rq_globals, level, ctrl, .synchronous = true);
/* Do not use state->rq_globals, we want a uniform value for the
* tracing call.
*/
nir_trace_ray_intel(b, nir_load_ray_query_global_intel(b),
level, ctrl, .synchronous = true);
struct brw_nir_rt_mem_hit_defs hit_in = {};
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false,
@ -533,7 +547,18 @@ lower_ray_query_impl(nir_function_impl *impl, struct lowering_state *state)
nir_builder _b, *b = &_b;
_b = nir_builder_at(nir_before_impl(impl));
state->rq_globals = nir_load_ray_query_global_intel(b);
nir_def *rq_globals_base = nir_load_ray_query_global_intel(b);
/* Use a different global for each 16lanes groups (only in SIMD32). */
state->rq_globals = nir_bcsel(
b,
nir_iand(b,
nir_ige_imm(b, nir_load_subgroup_invocation(b), 16),
nir_ieq_imm(b, nir_load_subgroup_size(b), 32)),
nir_iadd_imm(
b, rq_globals_base,
align(4 * RT_DISPATCH_GLOBALS_length(state->devinfo), 64)),
rq_globals_base);
brw_nir_rt_load_globals_addr(b, &state->globals, state->rq_globals,
state->devinfo);

View file

@ -265,7 +265,7 @@ brw_rt_ray_queries_hw_stacks_size(const struct intel_device_info *devinfo)
* which includes all the threads.
*/
uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
uint32_t max_simd_size = 32;
return max_eu_id * max_simd_size * BRW_RT_SIZEOF_RAY_QUERY;
}
@ -276,7 +276,7 @@ brw_rt_ray_queries_shadow_stack_size(const struct intel_device_info *devinfo)
* which includes all the threads.
*/
uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
uint32_t max_simd_size = 32;
return max_eu_id * max_simd_size * BRW_RT_SIZEOF_SHADOW_RAY_QUERY;
}

View file

@ -140,7 +140,7 @@ libintel_compiler_brw = static_library(
cpp_args : ['-Werror=vla'],
gnu_symbol_visibility : 'hidden',
link_with : libintel_compiler_nir,
dependencies : [idep_nir_headers, idep_mesautil, idep_intel_dev, idep_vtn, idep_mda],
dependencies : [idep_nir_headers, idep_mesautil, idep_intel_dev, idep_genxml, idep_vtn, idep_mda],
build_by_default : false,
)