anv, brw: Consolidate ex_bso bits to a static devinfo inline

If we have extended bindless surface offset (ExBSO) support, we want to
use it.  Consolidate the anv_physical_device and brw_compiler bits into
a single static inline that take devinfo.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39839>
This commit is contained in:
Kenneth Graunke 2026-02-10 15:05:11 -08:00 committed by Marge Bot
parent 14c64a88c7
commit 4bdef9824a
13 changed files with 29 additions and 30 deletions

View file

@ -91,8 +91,6 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
compiler->precise_trig = debug_get_bool_option("INTEL_PRECISE_TRIG", false);
compiler->extended_bindless_surface_offset = devinfo->verx10 >= 125;
compiler->lower_dpas = !devinfo->has_systolic ||
debug_get_bool_option("INTEL_LOWER_DPAS", false);

View file

@ -73,13 +73,6 @@ struct brw_compiler {
*/
bool precise_trig;
/**
* Gfx12.5+ has a bit in the SEND instruction extending the bindless
* surface offset range from 20 to 26 bits, effectively giving us 4Gb of
* bindless surface descriptors instead of 64Mb previously.
*/
bool extended_bindless_surface_offset;
/**
* Gfx11+ has a bit in the dword 3 of the sampler message header that
* indicates whether the sampler handle is relative to the dynamic state

View file

@ -6108,7 +6108,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0);
brw_reg bind = ubld.AND(retype(brw_vec1_grf(0, 5), BRW_TYPE_UD),
brw_imm_ud(INTEL_MASK(31, 10)));
if (devinfo->ver >= 20 || bld.shader->compiler->extended_bindless_surface_offset)
if (devinfo->ver >= 20 || intel_has_extended_bindless(devinfo))
bind = ubld.SHR(bind, brw_imm_ud(4));
/* load_scratch / store_scratch cannot be is_scalar yet. */

View file

@ -1009,7 +1009,7 @@ brw_generator::generate_code(const brw_shader &s,
generate_send(inst->as_send(), dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC],
src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2],
inst->as_send()->bindless_surface &&
compiler->extended_bindless_surface_offset);
intel_has_extended_bindless(devinfo));
send_count++;
break;
@ -1018,7 +1018,7 @@ brw_generator::generate_code(const brw_shader &s,
src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC],
src[SEND_GATHER_SRC_SCALAR], brw_null_reg(),
inst->as_send()->bindless_surface &&
compiler->extended_bindless_surface_offset);
intel_has_extended_bindless(devinfo));
send_count++;
break;

View file

@ -23,7 +23,7 @@ build_ex_desc(const brw_builder &bld, unsigned reg_size, bool unspill)
brw_imm_ud(INTEL_MASK(31, 10)));
const intel_device_info *devinfo = bld.shader->devinfo;
if (devinfo->verx10 >= 200 || bld.shader->compiler->extended_bindless_surface_offset) {
if (devinfo->ver >= 20 || intel_has_extended_bindless(devinfo)) {
ubld.SHR(ex_desc, ex_desc, brw_imm_ud(4));
} else {
if (unspill) {

View file

@ -2529,8 +2529,7 @@ brw_lower_send_descriptors(brw_shader &s)
(ex_desc_imm & INTEL_MASK(15, 12)) != 0)
needs_addr_reg = true;
if (send->bindless_surface &&
s.compiler->extended_bindless_surface_offset) {
if (send->bindless_surface && intel_has_extended_bindless(devinfo)) {
needs_addr_reg = true;
/* When using the extended bindless offset, the whole extended
* descriptor is the surface handle.

View file

@ -214,6 +214,19 @@ enum intel_wa_steppings intel_device_info_wa_stepping(struct intel_device_info *
uint32_t intel_device_info_get_max_slm_size(const struct intel_device_info *devinfo);
uint32_t intel_device_info_get_max_preferred_slm_size(const struct intel_device_info *devinfo);
/**
* True if this device supports the Extended Bindless Surface Offset mode,
* which offers 26-bit surface handles, instead of 20-bit. This effectively
* gives us 4GB of bindless surface descriptors instead of only 64MB.
*
* On Gfx12.5 this is enabled via an "ExBSO" bit in the SEND instruction.
*/
static inline bool
intel_has_extended_bindless(const struct intel_device_info *devinfo)
{
return devinfo->verx10 >= 125;
}
/**
* Whether indirect UBO loads should use the sampler or go through the
* data/constant cache. For the sampler, UBO surface states have to be set

View file

@ -198,7 +198,7 @@ anv_direct_descriptor_data_for_type(const struct anv_physical_device *device,
* table. On previous generations, they are only reachable through
* the binding table.
*/
if (device->uses_ex_bso) {
if (intel_has_extended_bindless(&device->info)) {
data &= ~(ANV_DESCRIPTOR_BTI_SURFACE_STATE |
ANV_DESCRIPTOR_BTI_SAMPLER_STATE);
}
@ -414,7 +414,7 @@ anv_descriptor_data_supports_bindless(const struct anv_physical_device *pdevice,
* bindless offset, all push descriptors have to go through the binding
* tables.
*/
if (!pdevice->uses_ex_bso &&
if (!intel_has_extended_bindless(&pdevice->info) &&
(set_flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)) {
return data & (ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE |
ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE |
@ -1852,10 +1852,10 @@ anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer,
uint64_t push_base_address;
if (layout->vk.flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) {
push_stream = pdevice->uses_ex_bso ?
push_stream = intel_has_extended_bindless(&pdevice->info) ?
&cmd_buffer->push_descriptor_buffer_stream :
&cmd_buffer->surface_state_stream;
push_base_address = pdevice->uses_ex_bso ?
push_base_address = intel_has_extended_bindless(&pdevice->info) ?
pdevice->va.push_descriptor_buffer_pool.addr :
pdevice->va.internal_surface_state_pool.addr;
} else {

View file

@ -601,7 +601,7 @@ build_desc_address64(nir_builder *b, nir_def *set_idx, unsigned set_idx_imm,
anv_load_driver_uniform(b, 1, desc_surface_offsets[set_idx_imm]);
desc_offset = nir_iand_imm(b, desc_offset, ANV_DESCRIPTOR_SET_OFFSET_MASK);
if (state->bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER &&
!state->pdevice->uses_ex_bso) {
!intel_has_extended_bindless(&state->pdevice->info)) {
nir_def *bindless_base_offset =
anv_load_driver_uniform(b, 1, surfaces_base_offset);
desc_offset = nir_iadd(b, bindless_base_offset, desc_offset);

View file

@ -122,7 +122,7 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
*/
if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT ||
state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) {
if (!state->device->uses_ex_bso) {
if (!intel_has_extended_bindless(&state->device->info)) {
/* We're trying to reduce the number of instructions in the shaders
* to compute surface handles. The assumption is that we're using
* more surface handles than sampler handles (UBO, SSBO, images,

View file

@ -1675,7 +1675,7 @@ get_properties(const struct anv_physical_device *pdevice,
props->allowSamplerImageViewPostSubmitCreation = true;
props->descriptorBufferOffsetAlignment = ANV_SURFACE_STATE_SIZE;
if (pdevice->uses_ex_bso) {
if (intel_has_extended_bindless(devinfo)) {
props->maxDescriptorBufferBindings = MAX_SETS;
props->maxResourceDescriptorBufferBindings = MAX_SETS;
props->maxSamplerDescriptorBufferBindings = MAX_SETS;
@ -2780,10 +2780,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
device->uses_ex_bso = device->info.verx10 >= 125;
device->indirect_descriptors =
!device->uses_ex_bso ||
!intel_has_extended_bindless(&devinfo) ||
driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
device->alloc_aux_tt_mem =
@ -2823,7 +2821,6 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
}
device->compiler->shader_debug_log = compiler_debug_log;
device->compiler->shader_perf_log = compiler_perf_log;
device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
device->compiler->use_bindless_sampler_offset = false;
device->compiler->spilling_rate =
driQueryOptioni(&instance->dri_options, "shader_spilling_rate");

View file

@ -1523,7 +1523,6 @@ struct anv_physical_device {
/* true if FCV optimization should be disabled. */
bool disable_fcv;
/**/
bool uses_ex_bso;
bool always_flush_cache;
@ -1735,7 +1734,7 @@ anv_physical_device_bindless_heap_size(const struct anv_physical_device *device,
* but we have some workarounds that require 2 heaps to overlap, so the
* size is dictated by our VA allocation.
*/
return device->uses_ex_bso ?
return intel_has_extended_bindless(&device->info) ?
(descriptor_buffer ?
device->va.dynamic_visible_pool.size :
device->va.bindless_surface_state_pool.size) :
@ -2841,7 +2840,7 @@ anv_surface_state_to_handle(struct anv_physical_device *device,
*/
assert(state.offset >= 0);
uint32_t offset = state.offset;
if (device->uses_ex_bso) {
if (intel_has_extended_bindless(&device->info)) {
assert(util_is_aligned(offset, 64));
return offset;
} else {

View file

@ -3284,7 +3284,7 @@ compute_descriptor_set_surface_offset(const struct anv_cmd_buffer *cmd_buffer,
{
const struct anv_physical_device *device = cmd_buffer->device->physical;
if (device->uses_ex_bso) {
if (intel_has_extended_bindless(&device->info)) {
int32_t buffer_index =
pipe_state->descriptor_buffers[set_idx].buffer_index;
uint64_t buffer_address =