brw: enable ex_bso for LSC_SS

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35160>
This commit is contained in:
Lionel Landwerlin 2025-05-23 21:18:41 +03:00 committed by Marge Bot
parent 9bb152c9a9
commit 2ef29502ed
6 changed files with 15 additions and 22 deletions

View file

@ -6100,7 +6100,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0);
brw_reg bind = ubld.AND(retype(brw_vec1_grf(0, 5), BRW_TYPE_UD),
brw_imm_ud(INTEL_MASK(31, 10)));
if (devinfo->ver >= 20)
if (devinfo->ver >= 20 || bld.shader->compiler->extended_bindless_surface_offset)
bind = ubld.SHR(bind, brw_imm_ud(4));
/* load_scratch / store_scratch cannot be is_scalar yet. */

View file

@ -23,7 +23,7 @@ build_ex_desc(const brw_builder &bld, unsigned reg_size, bool unspill)
brw_imm_ud(INTEL_MASK(31, 10)));
const intel_device_info *devinfo = bld.shader->devinfo;
if (devinfo->verx10 >= 200) {
if (devinfo->verx10 >= 200 || bld.shader->compiler->extended_bindless_surface_offset) {
ubld.SHR(ex_desc, ex_desc, brw_imm_ud(4));
} else {
if (unspill) {
@ -84,6 +84,7 @@ brw_lower_lsc_fill(const intel_device_info *devinfo, brw_shader &s,
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE;
unspill_inst->has_side_effects = false;
unspill_inst->is_volatile = true;
unspill_inst->bindless_surface = true;
unspill_inst->src[0] = brw_imm_ud(
desc |
@ -136,6 +137,7 @@ brw_lower_lsc_spill(const intel_device_info *devinfo, brw_inst *inst)
spill_inst->size_written = 0;
spill_inst->has_side_effects = true;
spill_inst->is_volatile = false;
spill_inst->bindless_surface = true;
spill_inst->src[0] = brw_imm_ud(
desc |

View file

@ -1110,15 +1110,16 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_send_inst *send,
/* On Gfx20+ UGM always uses ExBSO which implies bindless. */
send->bindless_surface =
surf_type == LSC_ADDR_SURFTYPE_BSS ||
(devinfo->ver >= 20 && surf_type == LSC_ADDR_SURFTYPE_SS);
(surf_type == LSC_ADDR_SURFTYPE_BSS ||
surf_type == LSC_ADDR_SURFTYPE_SS);
switch (surf_type) {
case LSC_ADDR_SURFTYPE_BSS:
case LSC_ADDR_SURFTYPE_SS:
assert(surface.file != BAD_FILE);
/* We assume that the driver provided the handle in the top 20 bits so
* we can use the surface handle directly as the extended descriptor.
/* We assume that the driver provided the handle in the top 20 bits or
* top 26 bits (depending on extended_bindless_surface_offset) so we can
* use the surface handle directly as the extended descriptor.
*/
send->src[SEND_SRC_EX_DESC] = retype(surface, BRW_TYPE_UD);
@ -1697,9 +1698,6 @@ lower_lsc_varying_pull_constant_logical_send(const brw_builder &bld,
inst = NULL;
send->sfid = BRW_SFID_UGM;
send->bindless_surface =
surf_type == LSC_ADDR_SURFTYPE_BSS ||
(devinfo->ver >= 20 && surf_type == LSC_ADDR_SURFTYPE_SS);
assert(!compiler->indirect_ubos_use_sampler);

View file

@ -1012,17 +1012,10 @@ uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
struct anv_scratch_pool *pool,
unsigned per_thread_scratch);
/* Note that on Gfx12HP we pass a scratch space surface state offset
* shifted by 2 relative to the value specified on the BSpec, since
* that allows the compiler to save a shift instruction while
* constructing the extended descriptor for SS addressing. That
* worked because we limit the scratch surface state pool to 8 MB and
* because we relied on the legacy (ExBSO=0) encoding of the extended
* descriptor in order to save the shift, which is no longer supported
* for the UGM shared function on Xe2 platforms, so we no longer
* attempt to do that trick.
/* Format expected for payload delivery, see 3DSTATE_(VS|HS|DS|GS|PS),
* 3DSTATE_BTD & CFE_STATE instruction definitions.
*/
#define ANV_SCRATCH_SPACE_SHIFT(ver) ((ver) >= 20 ? 6 : 4)
#define ANV_SCRATCH_SPACE_SHIFT (6)
/** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
struct anv_bo_cache {

View file

@ -66,7 +66,7 @@ genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer,
anv_reloc_list_add_bo(cmd_buffer->batch.relocs, scratch_bo);
scratch_surf = anv_scratch_pool_get_surf(cmd_buffer->device, scratch_pool,
total_scratch);
cfe.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT(GFX_VER);
cfe.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT;
#if GFX_VER >= 20
switch (cmd_buffer->device->physical->instance->stack_ids) {
case 256: cfe.StackIDControl = StackIDs256; break;
@ -1246,7 +1246,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
anv_scratch_pool_get_surf(cmd_buffer->device,
&device->scratch_pool,
rt->scratch_size);
btd.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT(GFX_VER);
btd.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT;
}
#if INTEL_NEEDS_WA_14017794102 || INTEL_NEEDS_WA_14023061436
btd.BTDMidthreadpreemption = false;

View file

@ -93,7 +93,7 @@ get_scratch_surf(struct anv_batch *batch,
anv_reloc_list_add_bo(batch->relocs, bo);
return anv_scratch_pool_get_surf(
device, pool, shader->prog_data->total_scratch) >>
ANV_SCRATCH_SPACE_SHIFT(GFX_VER);
ANV_SCRATCH_SPACE_SHIFT;
}
/* Streamout (can be used by several shaders) */