diff --git a/src/intel/compiler/brw/brw_compiler.c b/src/intel/compiler/brw/brw_compiler.c index 202c85702aa..9513b706121 100644 --- a/src/intel/compiler/brw/brw_compiler.c +++ b/src/intel/compiler/brw/brw_compiler.c @@ -91,8 +91,6 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) compiler->precise_trig = debug_get_bool_option("INTEL_PRECISE_TRIG", false); - compiler->extended_bindless_surface_offset = devinfo->verx10 >= 125; - compiler->lower_dpas = !devinfo->has_systolic || debug_get_bool_option("INTEL_LOWER_DPAS", false); diff --git a/src/intel/compiler/brw/brw_compiler.h b/src/intel/compiler/brw/brw_compiler.h index 178f7ec8a54..b35cff7102b 100644 --- a/src/intel/compiler/brw/brw_compiler.h +++ b/src/intel/compiler/brw/brw_compiler.h @@ -73,13 +73,6 @@ struct brw_compiler { */ bool precise_trig; - /** - * Gfx12.5+ has a bit in the SEND instruction extending the bindless - * surface offset range from 20 to 26 bits, effectively giving us 4Gb of - * bindless surface descriptors instead of 64Mb previously. - */ - bool extended_bindless_surface_offset; - /** * Gfx11+ has a bit in the dword 3 of the sampler message header that * indicates whether the sampler handle is relative to the dynamic state diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index c776aa76061..b5a2b164c9c 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -6108,7 +6108,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0); brw_reg bind = ubld.AND(retype(brw_vec1_grf(0, 5), BRW_TYPE_UD), brw_imm_ud(INTEL_MASK(31, 10))); - if (devinfo->ver >= 20 || bld.shader->compiler->extended_bindless_surface_offset) + if (devinfo->ver >= 20 || intel_has_extended_bindless(devinfo)) bind = ubld.SHR(bind, brw_imm_ud(4)); /* load_scratch / store_scratch cannot be is_scalar yet. */ diff --git a/src/intel/compiler/brw/brw_generator.cpp b/src/intel/compiler/brw/brw_generator.cpp index 555a367390e..fffafd6d7f4 100644 --- a/src/intel/compiler/brw/brw_generator.cpp +++ b/src/intel/compiler/brw/brw_generator.cpp @@ -1009,7 +1009,7 @@ brw_generator::generate_code(const brw_shader &s, generate_send(inst->as_send(), dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC], src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2], inst->as_send()->bindless_surface && - compiler->extended_bindless_surface_offset); + intel_has_extended_bindless(devinfo)); send_count++; break; @@ -1018,7 +1018,7 @@ brw_generator::generate_code(const brw_shader &s, src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC], src[SEND_GATHER_SRC_SCALAR], brw_null_reg(), inst->as_send()->bindless_surface && - compiler->extended_bindless_surface_offset); + intel_has_extended_bindless(devinfo)); send_count++; break; diff --git a/src/intel/compiler/brw/brw_lower_fill_spill.cpp b/src/intel/compiler/brw/brw_lower_fill_spill.cpp index 42080308c8c..ec24338add4 100644 --- a/src/intel/compiler/brw/brw_lower_fill_spill.cpp +++ b/src/intel/compiler/brw/brw_lower_fill_spill.cpp @@ -23,7 +23,7 @@ build_ex_desc(const brw_builder &bld, unsigned reg_size, bool unspill) brw_imm_ud(INTEL_MASK(31, 10))); const intel_device_info *devinfo = bld.shader->devinfo; - if (devinfo->verx10 >= 200 || bld.shader->compiler->extended_bindless_surface_offset) { + if (devinfo->ver >= 20 || intel_has_extended_bindless(devinfo)) { ubld.SHR(ex_desc, ex_desc, brw_imm_ud(4)); } else { if (unspill) { diff --git a/src/intel/compiler/brw/brw_lower_logical_sends.cpp b/src/intel/compiler/brw/brw_lower_logical_sends.cpp index 429fc676e17..0e8e462a6f6 100644 --- a/src/intel/compiler/brw/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw/brw_lower_logical_sends.cpp @@ -2529,8 +2529,7 @@ brw_lower_send_descriptors(brw_shader &s) (ex_desc_imm & INTEL_MASK(15, 12)) != 0) needs_addr_reg = true; - if (send->bindless_surface && - s.compiler->extended_bindless_surface_offset) { + if (send->bindless_surface && intel_has_extended_bindless(devinfo)) { needs_addr_reg = true; /* When using the extended bindless offset, the whole extended * descriptor is the surface handle. diff --git a/src/intel/dev/intel_device_info.h b/src/intel/dev/intel_device_info.h index 7fe2552e995..486dfaa9fe4 100644 --- a/src/intel/dev/intel_device_info.h +++ b/src/intel/dev/intel_device_info.h @@ -214,6 +214,19 @@ enum intel_wa_steppings intel_device_info_wa_stepping(struct intel_device_info * uint32_t intel_device_info_get_max_slm_size(const struct intel_device_info *devinfo); uint32_t intel_device_info_get_max_preferred_slm_size(const struct intel_device_info *devinfo); +/** + * True if this device supports the Extended Bindless Surface Offset mode, + * which offers 26-bit surface handles, instead of 20-bit. This effectively + * gives us 4GB of bindless surface descriptors instead of only 64MB. + * + * On Gfx12.5 this is enabled via an "ExBSO" bit in the SEND instruction. + */ +static inline bool +intel_has_extended_bindless(const struct intel_device_info *devinfo) +{ + return devinfo->verx10 >= 125; +} + /** * Whether indirect UBO loads should use the sampler or go through the * data/constant cache. For the sampler, UBO surface states have to be set diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 9fe966c758b..42c4158dce8 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -198,7 +198,7 @@ anv_direct_descriptor_data_for_type(const struct anv_physical_device *device, * table. On previous generations, they are only reachable through * the binding table. */ - if (device->uses_ex_bso) { + if (intel_has_extended_bindless(&device->info)) { data &= ~(ANV_DESCRIPTOR_BTI_SURFACE_STATE | ANV_DESCRIPTOR_BTI_SAMPLER_STATE); } @@ -414,7 +414,7 @@ anv_descriptor_data_supports_bindless(const struct anv_physical_device *pdevice, * bindless offset, all push descriptors have to go through the binding * tables. */ - if (!pdevice->uses_ex_bso && + if (!intel_has_extended_bindless(&pdevice->info) && (set_flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)) { return data & (ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE | ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE | @@ -1852,10 +1852,10 @@ anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer, uint64_t push_base_address; if (layout->vk.flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) { - push_stream = pdevice->uses_ex_bso ? + push_stream = intel_has_extended_bindless(&pdevice->info) ? &cmd_buffer->push_descriptor_buffer_stream : &cmd_buffer->surface_state_stream; - push_base_address = pdevice->uses_ex_bso ? + push_base_address = intel_has_extended_bindless(&pdevice->info) ? pdevice->va.push_descriptor_buffer_pool.addr : pdevice->va.internal_surface_state_pool.addr; } else { diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 2015258a623..3aab06fb6a0 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -601,7 +601,7 @@ build_desc_address64(nir_builder *b, nir_def *set_idx, unsigned set_idx_imm, anv_load_driver_uniform(b, 1, desc_surface_offsets[set_idx_imm]); desc_offset = nir_iand_imm(b, desc_offset, ANV_DESCRIPTOR_SET_OFFSET_MASK); if (state->bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER && - !state->pdevice->uses_ex_bso) { + !intel_has_extended_bindless(&state->pdevice->info)) { nir_def *bindless_base_offset = anv_load_driver_uniform(b, 1, surfaces_base_offset); desc_offset = nir_iadd(b, bindless_base_offset, desc_offset); diff --git a/src/intel/vulkan/anv_nir_lower_resource_intel.c b/src/intel/vulkan/anv_nir_lower_resource_intel.c index d5b0dcf381e..b90ecc4bffc 100644 --- a/src/intel/vulkan/anv_nir_lower_resource_intel.c +++ b/src/intel/vulkan/anv_nir_lower_resource_intel.c @@ -122,7 +122,7 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data) */ if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT || state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) { - if (!state->device->uses_ex_bso) { + if (!intel_has_extended_bindless(&state->device->info)) { /* We're trying to reduce the number of instructions in the shaders * to compute surface handles. The assumption is that we're using * more surface handles than sampler handles (UBO, SSBO, images, diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c index df8b5ee4d87..92a13852dd8 100644 --- a/src/intel/vulkan/anv_physical_device.c +++ b/src/intel/vulkan/anv_physical_device.c @@ -1675,7 +1675,7 @@ get_properties(const struct anv_physical_device *pdevice, props->allowSamplerImageViewPostSubmitCreation = true; props->descriptorBufferOffsetAlignment = ANV_SURFACE_STATE_SIZE; - if (pdevice->uses_ex_bso) { + if (intel_has_extended_bindless(devinfo)) { props->maxDescriptorBufferBindings = MAX_SETS; props->maxResourceDescriptorBufferBindings = MAX_SETS; props->maxSamplerDescriptorBufferBindings = MAX_SETS; @@ -2780,10 +2780,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->vk.pipeline_cache_import_ops = anv_cache_import_ops; - device->uses_ex_bso = device->info.verx10 >= 125; - device->indirect_descriptors = - !device->uses_ex_bso || + !intel_has_extended_bindless(&devinfo) || driQueryOptionb(&instance->dri_options, "force_indirect_descriptors"); device->alloc_aux_tt_mem = @@ -2823,7 +2821,6 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, } device->compiler->shader_debug_log = compiler_debug_log; device->compiler->shader_perf_log = compiler_perf_log; - device->compiler->extended_bindless_surface_offset = device->uses_ex_bso; device->compiler->use_bindless_sampler_offset = false; device->compiler->spilling_rate = driQueryOptioni(&instance->dri_options, "shader_spilling_rate"); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 40bae5459f0..4d9d4497e40 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1523,7 +1523,6 @@ struct anv_physical_device { /* true if FCV optimization should be disabled. */ bool disable_fcv; /**/ - bool uses_ex_bso; bool always_flush_cache; @@ -1735,7 +1734,7 @@ anv_physical_device_bindless_heap_size(const struct anv_physical_device *device, * but we have some workarounds that require 2 heaps to overlap, so the * size is dictated by our VA allocation. */ - return device->uses_ex_bso ? + return intel_has_extended_bindless(&device->info) ? (descriptor_buffer ? device->va.dynamic_visible_pool.size : device->va.bindless_surface_state_pool.size) : @@ -2841,7 +2840,7 @@ anv_surface_state_to_handle(struct anv_physical_device *device, */ assert(state.offset >= 0); uint32_t offset = state.offset; - if (device->uses_ex_bso) { + if (intel_has_extended_bindless(&device->info)) { assert(util_is_aligned(offset, 64)); return offset; } else { diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 7b666f27585..593b8574f49 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3284,7 +3284,7 @@ compute_descriptor_set_surface_offset(const struct anv_cmd_buffer *cmd_buffer, { const struct anv_physical_device *device = cmd_buffer->device->physical; - if (device->uses_ex_bso) { + if (intel_has_extended_bindless(&device->info)) { int32_t buffer_index = pipe_state->descriptor_buffers[set_idx].buffer_index; uint64_t buffer_address =