mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-10 05:38:18 +02:00
anv, brw: Consolidate ex_bso bits to a static devinfo inline
If we have extended bindless surface offset (ExBSO) support, we want to use it. Consolidate the anv_physical_device and brw_compiler bits into a single static inline that take devinfo. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39839>
This commit is contained in:
parent
14c64a88c7
commit
4bdef9824a
13 changed files with 29 additions and 30 deletions
|
|
@ -91,8 +91,6 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
|
|||
|
||||
compiler->precise_trig = debug_get_bool_option("INTEL_PRECISE_TRIG", false);
|
||||
|
||||
compiler->extended_bindless_surface_offset = devinfo->verx10 >= 125;
|
||||
|
||||
compiler->lower_dpas = !devinfo->has_systolic ||
|
||||
debug_get_bool_option("INTEL_LOWER_DPAS", false);
|
||||
|
||||
|
|
|
|||
|
|
@ -73,13 +73,6 @@ struct brw_compiler {
|
|||
*/
|
||||
bool precise_trig;
|
||||
|
||||
/**
|
||||
* Gfx12.5+ has a bit in the SEND instruction extending the bindless
|
||||
* surface offset range from 20 to 26 bits, effectively giving us 4Gb of
|
||||
* bindless surface descriptors instead of 64Mb previously.
|
||||
*/
|
||||
bool extended_bindless_surface_offset;
|
||||
|
||||
/**
|
||||
* Gfx11+ has a bit in the dword 3 of the sampler message header that
|
||||
* indicates whether the sampler handle is relative to the dynamic state
|
||||
|
|
|
|||
|
|
@ -6108,7 +6108,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0);
|
||||
brw_reg bind = ubld.AND(retype(brw_vec1_grf(0, 5), BRW_TYPE_UD),
|
||||
brw_imm_ud(INTEL_MASK(31, 10)));
|
||||
if (devinfo->ver >= 20 || bld.shader->compiler->extended_bindless_surface_offset)
|
||||
if (devinfo->ver >= 20 || intel_has_extended_bindless(devinfo))
|
||||
bind = ubld.SHR(bind, brw_imm_ud(4));
|
||||
|
||||
/* load_scratch / store_scratch cannot be is_scalar yet. */
|
||||
|
|
|
|||
|
|
@ -1009,7 +1009,7 @@ brw_generator::generate_code(const brw_shader &s,
|
|||
generate_send(inst->as_send(), dst, src[SEND_SRC_DESC], src[SEND_SRC_EX_DESC],
|
||||
src[SEND_SRC_PAYLOAD1], src[SEND_SRC_PAYLOAD2],
|
||||
inst->as_send()->bindless_surface &&
|
||||
compiler->extended_bindless_surface_offset);
|
||||
intel_has_extended_bindless(devinfo));
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
|
|
@ -1018,7 +1018,7 @@ brw_generator::generate_code(const brw_shader &s,
|
|||
src[SEND_GATHER_SRC_DESC], src[SEND_GATHER_SRC_EX_DESC],
|
||||
src[SEND_GATHER_SRC_SCALAR], brw_null_reg(),
|
||||
inst->as_send()->bindless_surface &&
|
||||
compiler->extended_bindless_surface_offset);
|
||||
intel_has_extended_bindless(devinfo));
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ build_ex_desc(const brw_builder &bld, unsigned reg_size, bool unspill)
|
|||
brw_imm_ud(INTEL_MASK(31, 10)));
|
||||
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
if (devinfo->verx10 >= 200 || bld.shader->compiler->extended_bindless_surface_offset) {
|
||||
if (devinfo->ver >= 20 || intel_has_extended_bindless(devinfo)) {
|
||||
ubld.SHR(ex_desc, ex_desc, brw_imm_ud(4));
|
||||
} else {
|
||||
if (unspill) {
|
||||
|
|
|
|||
|
|
@ -2529,8 +2529,7 @@ brw_lower_send_descriptors(brw_shader &s)
|
|||
(ex_desc_imm & INTEL_MASK(15, 12)) != 0)
|
||||
needs_addr_reg = true;
|
||||
|
||||
if (send->bindless_surface &&
|
||||
s.compiler->extended_bindless_surface_offset) {
|
||||
if (send->bindless_surface && intel_has_extended_bindless(devinfo)) {
|
||||
needs_addr_reg = true;
|
||||
/* When using the extended bindless offset, the whole extended
|
||||
* descriptor is the surface handle.
|
||||
|
|
|
|||
|
|
@ -214,6 +214,19 @@ enum intel_wa_steppings intel_device_info_wa_stepping(struct intel_device_info *
|
|||
uint32_t intel_device_info_get_max_slm_size(const struct intel_device_info *devinfo);
|
||||
uint32_t intel_device_info_get_max_preferred_slm_size(const struct intel_device_info *devinfo);
|
||||
|
||||
/**
|
||||
* True if this device supports the Extended Bindless Surface Offset mode,
|
||||
* which offers 26-bit surface handles, instead of 20-bit. This effectively
|
||||
* gives us 4GB of bindless surface descriptors instead of only 64MB.
|
||||
*
|
||||
* On Gfx12.5 this is enabled via an "ExBSO" bit in the SEND instruction.
|
||||
*/
|
||||
static inline bool
|
||||
intel_has_extended_bindless(const struct intel_device_info *devinfo)
|
||||
{
|
||||
return devinfo->verx10 >= 125;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether indirect UBO loads should use the sampler or go through the
|
||||
* data/constant cache. For the sampler, UBO surface states have to be set
|
||||
|
|
|
|||
|
|
@ -198,7 +198,7 @@ anv_direct_descriptor_data_for_type(const struct anv_physical_device *device,
|
|||
* table. On previous generations, they are only reachable through
|
||||
* the binding table.
|
||||
*/
|
||||
if (device->uses_ex_bso) {
|
||||
if (intel_has_extended_bindless(&device->info)) {
|
||||
data &= ~(ANV_DESCRIPTOR_BTI_SURFACE_STATE |
|
||||
ANV_DESCRIPTOR_BTI_SAMPLER_STATE);
|
||||
}
|
||||
|
|
@ -414,7 +414,7 @@ anv_descriptor_data_supports_bindless(const struct anv_physical_device *pdevice,
|
|||
* bindless offset, all push descriptors have to go through the binding
|
||||
* tables.
|
||||
*/
|
||||
if (!pdevice->uses_ex_bso &&
|
||||
if (!intel_has_extended_bindless(&pdevice->info) &&
|
||||
(set_flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)) {
|
||||
return data & (ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE |
|
||||
ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE |
|
||||
|
|
@ -1852,10 +1852,10 @@ anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer,
|
|||
uint64_t push_base_address;
|
||||
|
||||
if (layout->vk.flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) {
|
||||
push_stream = pdevice->uses_ex_bso ?
|
||||
push_stream = intel_has_extended_bindless(&pdevice->info) ?
|
||||
&cmd_buffer->push_descriptor_buffer_stream :
|
||||
&cmd_buffer->surface_state_stream;
|
||||
push_base_address = pdevice->uses_ex_bso ?
|
||||
push_base_address = intel_has_extended_bindless(&pdevice->info) ?
|
||||
pdevice->va.push_descriptor_buffer_pool.addr :
|
||||
pdevice->va.internal_surface_state_pool.addr;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -601,7 +601,7 @@ build_desc_address64(nir_builder *b, nir_def *set_idx, unsigned set_idx_imm,
|
|||
anv_load_driver_uniform(b, 1, desc_surface_offsets[set_idx_imm]);
|
||||
desc_offset = nir_iand_imm(b, desc_offset, ANV_DESCRIPTOR_SET_OFFSET_MASK);
|
||||
if (state->bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER &&
|
||||
!state->pdevice->uses_ex_bso) {
|
||||
!intel_has_extended_bindless(&state->pdevice->info)) {
|
||||
nir_def *bindless_base_offset =
|
||||
anv_load_driver_uniform(b, 1, surfaces_base_offset);
|
||||
desc_offset = nir_iadd(b, bindless_base_offset, desc_offset);
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
|||
*/
|
||||
if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT ||
|
||||
state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) {
|
||||
if (!state->device->uses_ex_bso) {
|
||||
if (!intel_has_extended_bindless(&state->device->info)) {
|
||||
/* We're trying to reduce the number of instructions in the shaders
|
||||
* to compute surface handles. The assumption is that we're using
|
||||
* more surface handles than sampler handles (UBO, SSBO, images,
|
||||
|
|
|
|||
|
|
@ -1675,7 +1675,7 @@ get_properties(const struct anv_physical_device *pdevice,
|
|||
props->allowSamplerImageViewPostSubmitCreation = true;
|
||||
props->descriptorBufferOffsetAlignment = ANV_SURFACE_STATE_SIZE;
|
||||
|
||||
if (pdevice->uses_ex_bso) {
|
||||
if (intel_has_extended_bindless(devinfo)) {
|
||||
props->maxDescriptorBufferBindings = MAX_SETS;
|
||||
props->maxResourceDescriptorBufferBindings = MAX_SETS;
|
||||
props->maxSamplerDescriptorBufferBindings = MAX_SETS;
|
||||
|
|
@ -2780,10 +2780,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
|||
|
||||
device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
|
||||
|
||||
device->uses_ex_bso = device->info.verx10 >= 125;
|
||||
|
||||
device->indirect_descriptors =
|
||||
!device->uses_ex_bso ||
|
||||
!intel_has_extended_bindless(&devinfo) ||
|
||||
driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
|
||||
|
||||
device->alloc_aux_tt_mem =
|
||||
|
|
@ -2823,7 +2821,6 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
|||
}
|
||||
device->compiler->shader_debug_log = compiler_debug_log;
|
||||
device->compiler->shader_perf_log = compiler_perf_log;
|
||||
device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
|
||||
device->compiler->use_bindless_sampler_offset = false;
|
||||
device->compiler->spilling_rate =
|
||||
driQueryOptioni(&instance->dri_options, "shader_spilling_rate");
|
||||
|
|
|
|||
|
|
@ -1523,7 +1523,6 @@ struct anv_physical_device {
|
|||
/* true if FCV optimization should be disabled. */
|
||||
bool disable_fcv;
|
||||
/**/
|
||||
bool uses_ex_bso;
|
||||
|
||||
bool always_flush_cache;
|
||||
|
||||
|
|
@ -1735,7 +1734,7 @@ anv_physical_device_bindless_heap_size(const struct anv_physical_device *device,
|
|||
* but we have some workarounds that require 2 heaps to overlap, so the
|
||||
* size is dictated by our VA allocation.
|
||||
*/
|
||||
return device->uses_ex_bso ?
|
||||
return intel_has_extended_bindless(&device->info) ?
|
||||
(descriptor_buffer ?
|
||||
device->va.dynamic_visible_pool.size :
|
||||
device->va.bindless_surface_state_pool.size) :
|
||||
|
|
@ -2841,7 +2840,7 @@ anv_surface_state_to_handle(struct anv_physical_device *device,
|
|||
*/
|
||||
assert(state.offset >= 0);
|
||||
uint32_t offset = state.offset;
|
||||
if (device->uses_ex_bso) {
|
||||
if (intel_has_extended_bindless(&device->info)) {
|
||||
assert(util_is_aligned(offset, 64));
|
||||
return offset;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -3284,7 +3284,7 @@ compute_descriptor_set_surface_offset(const struct anv_cmd_buffer *cmd_buffer,
|
|||
{
|
||||
const struct anv_physical_device *device = cmd_buffer->device->physical;
|
||||
|
||||
if (device->uses_ex_bso) {
|
||||
if (intel_has_extended_bindless(&device->info)) {
|
||||
int32_t buffer_index =
|
||||
pipe_state->descriptor_buffers[set_idx].buffer_index;
|
||||
uint64_t buffer_address =
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue