From 2ef29502edb01e3b99fdf2d5e0d06fa81dfa70a7 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 23 May 2025 21:18:41 +0300 Subject: [PATCH] brw: enable ex_bso for LSC_SS Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw/brw_from_nir.cpp | 2 +- src/intel/compiler/brw/brw_lower_fill_spill.cpp | 4 +++- src/intel/compiler/brw/brw_lower_logical_sends.cpp | 12 +++++------- src/intel/vulkan/anv_private.h | 13 +++---------- src/intel/vulkan/genX_cmd_compute.c | 4 ++-- src/intel/vulkan/genX_shader.c | 2 +- 6 files changed, 15 insertions(+), 22 deletions(-) diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 0433befd758..8f49f26be27 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -6100,7 +6100,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0); brw_reg bind = ubld.AND(retype(brw_vec1_grf(0, 5), BRW_TYPE_UD), brw_imm_ud(INTEL_MASK(31, 10))); - if (devinfo->ver >= 20) + if (devinfo->ver >= 20 || bld.shader->compiler->extended_bindless_surface_offset) bind = ubld.SHR(bind, brw_imm_ud(4)); /* load_scratch / store_scratch cannot be is_scalar yet. */ diff --git a/src/intel/compiler/brw/brw_lower_fill_spill.cpp b/src/intel/compiler/brw/brw_lower_fill_spill.cpp index 994a5199ebd..42080308c8c 100644 --- a/src/intel/compiler/brw/brw_lower_fill_spill.cpp +++ b/src/intel/compiler/brw/brw_lower_fill_spill.cpp @@ -23,7 +23,7 @@ build_ex_desc(const brw_builder &bld, unsigned reg_size, bool unspill) brw_imm_ud(INTEL_MASK(31, 10))); const intel_device_info *devinfo = bld.shader->devinfo; - if (devinfo->verx10 >= 200) { + if (devinfo->verx10 >= 200 || bld.shader->compiler->extended_bindless_surface_offset) { ubld.SHR(ex_desc, ex_desc, brw_imm_ud(4)); } else { if (unspill) { @@ -84,6 +84,7 @@ brw_lower_lsc_fill(const intel_device_info *devinfo, brw_shader &s, lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE; unspill_inst->has_side_effects = false; unspill_inst->is_volatile = true; + unspill_inst->bindless_surface = true; unspill_inst->src[0] = brw_imm_ud( desc | @@ -136,6 +137,7 @@ brw_lower_lsc_spill(const intel_device_info *devinfo, brw_inst *inst) spill_inst->size_written = 0; spill_inst->has_side_effects = true; spill_inst->is_volatile = false; + spill_inst->bindless_surface = true; spill_inst->src[0] = brw_imm_ud( desc | diff --git a/src/intel/compiler/brw/brw_lower_logical_sends.cpp b/src/intel/compiler/brw/brw_lower_logical_sends.cpp index a3060da7182..c9b2e0e2a07 100644 --- a/src/intel/compiler/brw/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw/brw_lower_logical_sends.cpp @@ -1110,15 +1110,16 @@ setup_lsc_surface_descriptors(const brw_builder &bld, brw_send_inst *send, /* On Gfx20+ UGM always uses ExBSO which implies bindless. */ send->bindless_surface = - surf_type == LSC_ADDR_SURFTYPE_BSS || - (devinfo->ver >= 20 && surf_type == LSC_ADDR_SURFTYPE_SS); + (surf_type == LSC_ADDR_SURFTYPE_BSS || + surf_type == LSC_ADDR_SURFTYPE_SS); switch (surf_type) { case LSC_ADDR_SURFTYPE_BSS: case LSC_ADDR_SURFTYPE_SS: assert(surface.file != BAD_FILE); - /* We assume that the driver provided the handle in the top 20 bits so - * we can use the surface handle directly as the extended descriptor. + /* We assume that the driver provided the handle in the top 20 bits or + * top 26 bits (depending on extended_bindless_surface_offset) so we can + * use the surface handle directly as the extended descriptor. */ send->src[SEND_SRC_EX_DESC] = retype(surface, BRW_TYPE_UD); @@ -1697,9 +1698,6 @@ lower_lsc_varying_pull_constant_logical_send(const brw_builder &bld, inst = NULL; send->sfid = BRW_SFID_UGM; - send->bindless_surface = - surf_type == LSC_ADDR_SURFTYPE_BSS || - (devinfo->ver >= 20 && surf_type == LSC_ADDR_SURFTYPE_SS); assert(!compiler->indirect_ubos_use_sampler); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index e9cafb365b0..4ea0c3a790d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1012,17 +1012,10 @@ uint32_t anv_scratch_pool_get_surf(struct anv_device *device, struct anv_scratch_pool *pool, unsigned per_thread_scratch); -/* Note that on Gfx12HP we pass a scratch space surface state offset - * shifted by 2 relative to the value specified on the BSpec, since - * that allows the compiler to save a shift instruction while - * constructing the extended descriptor for SS addressing. That - * worked because we limit the scratch surface state pool to 8 MB and - * because we relied on the legacy (ExBSO=0) encoding of the extended - * descriptor in order to save the shift, which is no longer supported - * for the UGM shared function on Xe2 platforms, so we no longer - * attempt to do that trick. +/* Format expected for payload delivery, see 3DSTATE_(VS|HS|DS|GS|PS), + * 3DSTATE_BTD & CFE_STATE instruction definitions. */ -#define ANV_SCRATCH_SPACE_SHIFT(ver) ((ver) >= 20 ? 6 : 4) +#define ANV_SCRATCH_SPACE_SHIFT (6) /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */ struct anv_bo_cache { diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index bfae57c4657..a41f6ed7465 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -66,7 +66,7 @@ genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer, anv_reloc_list_add_bo(cmd_buffer->batch.relocs, scratch_bo); scratch_surf = anv_scratch_pool_get_surf(cmd_buffer->device, scratch_pool, total_scratch); - cfe.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT(GFX_VER); + cfe.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT; #if GFX_VER >= 20 switch (cmd_buffer->device->physical->instance->stack_ids) { case 256: cfe.StackIDControl = StackIDs256; break; @@ -1246,7 +1246,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, anv_scratch_pool_get_surf(cmd_buffer->device, &device->scratch_pool, rt->scratch_size); - btd.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT(GFX_VER); + btd.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT; } #if INTEL_NEEDS_WA_14017794102 || INTEL_NEEDS_WA_14023061436 btd.BTDMidthreadpreemption = false; diff --git a/src/intel/vulkan/genX_shader.c b/src/intel/vulkan/genX_shader.c index 9ccda103eee..b4191813384 100644 --- a/src/intel/vulkan/genX_shader.c +++ b/src/intel/vulkan/genX_shader.c @@ -93,7 +93,7 @@ get_scratch_surf(struct anv_batch *batch, anv_reloc_list_add_bo(batch->relocs, bo); return anv_scratch_pool_get_surf( device, pool, shader->prog_data->total_scratch) >> - ANV_SCRATCH_SPACE_SHIFT(GFX_VER); + ANV_SCRATCH_SPACE_SHIFT; } /* Streamout (can be used by several shaders) */