mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 02:10:11 +01:00
anv: use 2 different buffers for surfaces/samplers in descriptor sets
We had the unfortunate finding on a recent platform to learn that the bindless sampler heap is not functioning as expected. Nowhere in the documentation is the size of the heap written down. So most people assumed that's the max number that we can program (4Gb). The reality is that it's only 64Mb. Though it is appearing like it's working properly for the whole 4Gb range for most apps, this is only because the HW bounds checking applied is broken. Instead of clamping anything beyong 64Mb, it's only clamping the last 4Kb of each 64Mb region. So this heap is useless for us to make a 4Gb region of both sampler & surface states... This change essentially turns off the bindless sampler heap on DG2+. The only location where we can put SAMPLER_STATE elements is the dynamic state heap. Unfortunately we cannot align the dynamic state heap with the bindless surface state heap. So the solution is to allocate sampler & surface states separately, each from the own heap in the descriptor pool. We now have to provide 2 sets of offsets for surfaces & samplers. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25897>
This commit is contained in:
parent
09a3a93372
commit
7c76125db2
11 changed files with 779 additions and 380 deletions
|
|
@ -673,10 +673,11 @@ void anv_CmdBindPipeline(
|
|||
|
||||
assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
|
||||
if (layout->set[s].layout->dynamic_offset_count > 0 &&
|
||||
(push->desc_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) {
|
||||
push->desc_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
|
||||
push->desc_offsets[s] |= (layout->set[s].dynamic_offset_start &
|
||||
ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
|
||||
(push->desc_surface_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) !=
|
||||
layout->set[s].dynamic_offset_start) {
|
||||
push->desc_surface_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
|
||||
push->desc_surface_offsets[s] |= (layout->set[s].dynamic_offset_start &
|
||||
ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -788,16 +789,16 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
/* When using indirect descriptors, stages that have access to the HW
|
||||
* binding tables, never need to access the
|
||||
* anv_push_constants::desc_offsets fields, because any data they need
|
||||
* from the descriptor buffer is accessible through a binding table
|
||||
* entry. For stages that are "bindless" (Mesh/Task/RT), we need to
|
||||
* provide anv_push_constants::desc_offsets matching the bound
|
||||
* anv_push_constants::desc_surface_offsets fields, because any data
|
||||
* they need from the descriptor buffer is accessible through a binding
|
||||
* table entry. For stages that are "bindless" (Mesh/Task/RT), we need
|
||||
* to provide anv_push_constants::desc_surface_offsets matching the bound
|
||||
* descriptor so that shaders can access the descriptor buffer through
|
||||
* A64 messages.
|
||||
*
|
||||
* With direct descriptors, the shaders can use the
|
||||
* anv_push_constants::desc_offsets to build bindless offsets. So it's
|
||||
* we always need to update the push constant data.
|
||||
* anv_push_constants::desc_surface_offsets to build bindless offsets.
|
||||
* So it's we always need to update the push constant data.
|
||||
*/
|
||||
bool update_desc_sets =
|
||||
!cmd_buffer->device->physical->indirect_descriptors ||
|
||||
|
|
@ -813,18 +814,20 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
|||
if (update_desc_sets) {
|
||||
struct anv_push_constants *push = &pipe_state->push_constants;
|
||||
|
||||
struct anv_address set_addr = anv_descriptor_set_address(set);
|
||||
uint64_t offset =
|
||||
anv_address_physical(set_addr) -
|
||||
cmd_buffer->device->physical->va.binding_table_pool.addr;
|
||||
anv_address_physical(set->desc_surface_addr) -
|
||||
cmd_buffer->device->physical->va.internal_surface_state_pool.addr;
|
||||
assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
|
||||
push->desc_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
|
||||
push->desc_offsets[set_index] |= offset;
|
||||
push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
|
||||
push->desc_surface_offsets[set_index] |= offset;
|
||||
push->desc_sampler_offsets[set_index] |=
|
||||
anv_address_physical(set->desc_sampler_addr) -
|
||||
cmd_buffer->device->physical->va.dynamic_state_pool.addr;
|
||||
|
||||
if (set_addr.bo) {
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
set_addr.bo);
|
||||
}
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
set->desc_surface_addr.bo);
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
set->desc_sampler_addr.bo);
|
||||
}
|
||||
|
||||
dirty_stages |= stages;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1479,7 +1479,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
|||
device->compiler->supports_shader_constants = true;
|
||||
device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
|
||||
device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
|
||||
device->compiler->use_bindless_sampler_offset = !device->indirect_descriptors;
|
||||
device->compiler->use_bindless_sampler_offset = false;
|
||||
device->compiler->spilling_rate =
|
||||
driQueryOptioni(&instance->dri_options, "shader_spilling_rate");
|
||||
|
||||
|
|
@ -3324,6 +3324,9 @@ VkResult anv_CreateDevice(
|
|||
device->physical->va.bindless_surface_state_pool.size);
|
||||
}
|
||||
|
||||
util_vma_heap_init(&device->vma_samplers,
|
||||
device->physical->va.sampler_state_pool.addr,
|
||||
device->physical->va.sampler_state_pool.size);
|
||||
util_vma_heap_init(&device->vma_trtt,
|
||||
device->physical->va.trtt.addr,
|
||||
device->physical->va.trtt.size);
|
||||
|
|
@ -3789,6 +3792,8 @@ VkResult anv_CreateDevice(
|
|||
pthread_mutex_destroy(&device->mutex);
|
||||
fail_vmas:
|
||||
util_vma_heap_finish(&device->vma_trtt);
|
||||
if (!device->physical->indirect_descriptors)
|
||||
util_vma_heap_finish(&device->vma_samplers);
|
||||
util_vma_heap_finish(&device->vma_desc);
|
||||
util_vma_heap_finish(&device->vma_hi);
|
||||
util_vma_heap_finish(&device->vma_lo);
|
||||
|
|
@ -3903,6 +3908,8 @@ void anv_DestroyDevice(
|
|||
anv_bo_cache_finish(&device->bo_cache);
|
||||
|
||||
util_vma_heap_finish(&device->vma_trtt);
|
||||
if (!device->physical->indirect_descriptors)
|
||||
util_vma_heap_finish(&device->vma_samplers);
|
||||
util_vma_heap_finish(&device->vma_desc);
|
||||
util_vma_heap_finish(&device->vma_hi);
|
||||
util_vma_heap_finish(&device->vma_lo);
|
||||
|
|
@ -3970,6 +3977,9 @@ anv_vma_heap_for_flags(struct anv_device *device,
|
|||
if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL)
|
||||
return &device->vma_desc;
|
||||
|
||||
if (alloc_flags & ANV_BO_ALLOC_SAMPLER_POOL)
|
||||
return &device->vma_samplers;
|
||||
|
||||
return &device->vma_hi;
|
||||
}
|
||||
|
||||
|
|
@ -4022,6 +4032,7 @@ anv_vma_free(struct anv_device *device,
|
|||
assert(vma_heap == &device->vma_lo ||
|
||||
vma_heap == &device->vma_hi ||
|
||||
vma_heap == &device->vma_desc ||
|
||||
vma_heap == &device->vma_samplers ||
|
||||
vma_heap == &device->vma_trtt);
|
||||
|
||||
const uint64_t addr_48b = intel_48b_address(address);
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ add_binding(struct apply_pipeline_layout_state *state,
|
|||
* this binding. This lets us be lazy and call this function constantly
|
||||
* without worrying about unnecessarily enabling the buffer.
|
||||
*/
|
||||
if (bind_layout->descriptor_stride)
|
||||
if (bind_layout->descriptor_surface_stride)
|
||||
state->set[set].desc_buffer_used = true;
|
||||
|
||||
if (bind_layout->dynamic_offset_index >= 0)
|
||||
|
|
@ -556,7 +556,8 @@ build_res_index(nir_builder *b,
|
|||
case nir_address_format_64bit_global_32bit_offset:
|
||||
/* Descriptor set buffer accesses will go through A64 messages, so the
|
||||
* index to get the descriptor set buffer address is located in the
|
||||
* anv_push_constants::desc_offsets and it's indexed by the set number.
|
||||
* anv_push_constants::desc_surface_offsets and it's indexed by the set
|
||||
* number.
|
||||
*/
|
||||
set_idx = set;
|
||||
break;
|
||||
|
|
@ -593,8 +594,8 @@ build_res_index(nir_builder *b,
|
|||
}
|
||||
|
||||
const uint32_t desc_bti = state->set[set].binding[binding].surface_offset;
|
||||
assert(bind_layout->descriptor_stride % 8 == 0);
|
||||
const uint32_t desc_stride = bind_layout->descriptor_stride / 8;
|
||||
assert(bind_layout->descriptor_surface_stride % 8 == 0);
|
||||
const uint32_t desc_stride = bind_layout->descriptor_surface_stride / 8;
|
||||
|
||||
nir_def *packed =
|
||||
nir_ior_imm(b,
|
||||
|
|
@ -605,7 +606,7 @@ build_res_index(nir_builder *b,
|
|||
|
||||
|
||||
return nir_vec4(b, packed,
|
||||
nir_imm_int(b, bind_layout->descriptor_offset),
|
||||
nir_imm_int(b, bind_layout->descriptor_surface_offset),
|
||||
nir_imm_int(b, array_size - 1),
|
||||
array_index);
|
||||
}
|
||||
|
|
@ -748,8 +749,8 @@ build_desc_addr_for_binding(nir_builder *b,
|
|||
nir_iadd_imm(b,
|
||||
nir_imul_imm(b,
|
||||
array_index,
|
||||
bind_layout->descriptor_stride),
|
||||
bind_layout->descriptor_offset);
|
||||
bind_layout->descriptor_surface_stride),
|
||||
bind_layout->descriptor_surface_offset);
|
||||
|
||||
return nir_vec4(b, nir_unpack_64_2x32_split_x(b, set_addr),
|
||||
nir_unpack_64_2x32_split_y(b, set_addr),
|
||||
|
|
@ -763,14 +764,38 @@ build_desc_addr_for_binding(nir_builder *b,
|
|||
nir_iadd_imm(b,
|
||||
nir_imul_imm(b,
|
||||
array_index,
|
||||
bind_layout->descriptor_stride),
|
||||
bind_layout->descriptor_offset));
|
||||
bind_layout->descriptor_surface_stride),
|
||||
bind_layout->descriptor_surface_offset));
|
||||
|
||||
default:
|
||||
unreachable("Unhandled address format");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
binding_descriptor_offset(const struct apply_pipeline_layout_state *state,
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout,
|
||||
bool sampler)
|
||||
{
|
||||
if (sampler &&
|
||||
state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
|
||||
return bind_layout->descriptor_sampler_offset;
|
||||
|
||||
return bind_layout->descriptor_surface_offset;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
binding_descriptor_stride(const struct apply_pipeline_layout_state *state,
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout,
|
||||
bool sampler)
|
||||
{
|
||||
if (sampler &&
|
||||
state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
|
||||
return bind_layout->descriptor_sampler_stride;
|
||||
|
||||
return bind_layout->descriptor_surface_stride;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_surface_index_for_binding(nir_builder *b,
|
||||
unsigned set, unsigned binding,
|
||||
|
|
@ -781,6 +806,10 @@ build_surface_index_for_binding(nir_builder *b,
|
|||
{
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&state->layout->set[set].layout->binding[binding];
|
||||
const unsigned descriptor_offset =
|
||||
binding_descriptor_offset(state, bind_layout, false /* sampler */);
|
||||
const unsigned descriptor_stride =
|
||||
binding_descriptor_stride(state, bind_layout, false /* sampler */);
|
||||
const bool is_bindless =
|
||||
is_binding_bindless(set, binding, false /* sampler */, state);
|
||||
|
||||
|
|
@ -797,23 +826,25 @@ build_surface_index_for_binding(nir_builder *b,
|
|||
} else {
|
||||
set_offset =
|
||||
nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0),
|
||||
.base = offsetof(struct anv_push_constants, desc_offsets[set]),
|
||||
.range = sizeof_field(struct anv_push_constants, desc_offsets[set]));
|
||||
.base = offsetof(struct anv_push_constants,
|
||||
desc_surface_offsets[set]),
|
||||
.range = sizeof_field(struct anv_push_constants,
|
||||
desc_surface_offsets[set]));
|
||||
|
||||
/* With bindless indexes are offsets in the descriptor buffer */
|
||||
surface_index =
|
||||
nir_iadd_imm(b,
|
||||
nir_imul_imm(b, array_index, bind_layout->descriptor_stride),
|
||||
bind_layout->descriptor_offset);
|
||||
nir_imul_imm(b, array_index, descriptor_stride),
|
||||
descriptor_offset);
|
||||
if (plane != 0) {
|
||||
assert(plane < bind_layout->max_plane_count);
|
||||
surface_index = nir_iadd_imm(b, surface_index,
|
||||
plane * (bind_layout->descriptor_stride /
|
||||
plane * (descriptor_stride /
|
||||
bind_layout->max_plane_count));
|
||||
}
|
||||
|
||||
assert(bind_layout->descriptor_offset % 64 == 0);
|
||||
assert(bind_layout->descriptor_stride % 64 == 0);
|
||||
assert(descriptor_offset % 64 == 0);
|
||||
assert(descriptor_stride % 64 == 0);
|
||||
}
|
||||
} else {
|
||||
/* Unused */
|
||||
|
|
@ -854,14 +885,17 @@ build_sampler_handle_for_binding(nir_builder *b,
|
|||
bool non_uniform,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&state->layout->set[set].layout->binding[binding];
|
||||
const unsigned descriptor_offset =
|
||||
binding_descriptor_offset(state, bind_layout, true /* sampler */);
|
||||
const unsigned descriptor_stride =
|
||||
binding_descriptor_stride(state, bind_layout, true /* sampler */);
|
||||
const bool is_bindless =
|
||||
is_binding_bindless(set, binding, true /* sampler */, state);
|
||||
nir_def *set_offset, *sampler_index;
|
||||
|
||||
if (is_bindless) {
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&state->layout->set[set].layout->binding[binding];
|
||||
|
||||
if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
|
||||
set_offset = nir_imm_int(b, 0xdeaddead);
|
||||
|
||||
|
|
@ -878,10 +912,12 @@ build_sampler_handle_for_binding(nir_builder *b,
|
|||
} else {
|
||||
set_offset =
|
||||
nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0),
|
||||
.base = offsetof(struct anv_push_constants, desc_offsets[set]),
|
||||
.range = sizeof_field(struct anv_push_constants, desc_offsets[set]));
|
||||
.base = offsetof(struct anv_push_constants,
|
||||
desc_sampler_offsets[set]),
|
||||
.range = sizeof_field(struct anv_push_constants,
|
||||
desc_sampler_offsets[set]));
|
||||
|
||||
uint32_t base_offset = bind_layout->descriptor_offset;
|
||||
uint32_t base_offset = descriptor_offset;
|
||||
|
||||
/* The SAMPLER_STATE can only be located at a 64 byte in the combined
|
||||
* image/sampler case. Combined image/sampler is not supported to be
|
||||
|
|
@ -892,13 +928,13 @@ build_sampler_handle_for_binding(nir_builder *b,
|
|||
|
||||
if (plane != 0) {
|
||||
assert(plane < bind_layout->max_plane_count);
|
||||
base_offset += plane * (bind_layout->descriptor_stride /
|
||||
base_offset += plane * (descriptor_stride /
|
||||
bind_layout->max_plane_count);
|
||||
}
|
||||
|
||||
sampler_index =
|
||||
nir_iadd_imm(b,
|
||||
nir_imul_imm(b, array_index, bind_layout->descriptor_stride),
|
||||
nir_imul_imm(b, array_index, descriptor_stride),
|
||||
base_offset);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -1095,7 +1131,7 @@ build_buffer_addr_for_binding(nir_builder *b,
|
|||
&state->layout->set[set].layout->binding[binding];
|
||||
return nir_vec2(b,
|
||||
nir_imm_int(b, state->set[set].desc_offset),
|
||||
nir_imm_int(b, bind_layout->descriptor_offset));
|
||||
nir_imm_int(b, bind_layout->descriptor_surface_offset));
|
||||
}
|
||||
|
||||
struct res_index_defs res = unpack_res_index(b, res_index);
|
||||
|
|
@ -1875,9 +1911,9 @@ add_bti_entry(struct anv_pipeline_bind_map *map,
|
|||
.set = set,
|
||||
.binding = binding,
|
||||
.index = bind_layout->descriptor_index + element,
|
||||
.set_offset = bind_layout->descriptor_offset +
|
||||
element * bind_layout->descriptor_stride +
|
||||
plane * bind_layout->descriptor_data_size,
|
||||
.set_offset = bind_layout->descriptor_surface_offset +
|
||||
element * bind_layout->descriptor_surface_stride +
|
||||
plane * bind_layout->descriptor_data_surface_size,
|
||||
.plane = plane,
|
||||
};
|
||||
assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
|
||||
|
|
@ -1896,8 +1932,8 @@ add_dynamic_bti_entry(struct anv_pipeline_bind_map *map,
|
|||
.set = set,
|
||||
.binding = binding,
|
||||
.index = bind_layout->descriptor_index + element,
|
||||
.set_offset = bind_layout->descriptor_offset +
|
||||
element * bind_layout->descriptor_stride,
|
||||
.set_offset = bind_layout->descriptor_surface_offset +
|
||||
element * bind_layout->descriptor_surface_stride,
|
||||
.dynamic_offset_index = bind_layout->dynamic_offset_index + element,
|
||||
};
|
||||
assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
|
||||
|
|
|
|||
|
|
@ -68,10 +68,12 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
|
||||
case nir_intrinsic_load_desc_set_address_intel:
|
||||
case nir_intrinsic_load_desc_set_dynamic_index_intel: {
|
||||
unsigned base = offsetof(struct anv_push_constants, desc_offsets);
|
||||
unsigned base = offsetof(struct anv_push_constants,
|
||||
desc_surface_offsets);
|
||||
push_start = MIN2(push_start, base);
|
||||
push_end = MAX2(push_end, base +
|
||||
sizeof_field(struct anv_push_constants, desc_offsets));
|
||||
sizeof_field(struct anv_push_constants,
|
||||
desc_surface_offsets));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -175,8 +177,10 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *pc_load = nir_load_uniform(b, 1, 32,
|
||||
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
|
||||
.base = offsetof(struct anv_push_constants, desc_offsets),
|
||||
.range = sizeof_field(struct anv_push_constants, desc_offsets),
|
||||
.base = offsetof(struct anv_push_constants,
|
||||
desc_surface_offsets),
|
||||
.range = sizeof_field(struct anv_push_constants,
|
||||
desc_surface_offsets),
|
||||
.dest_type = nir_type_uint32);
|
||||
pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK);
|
||||
nir_def *desc_addr =
|
||||
|
|
@ -192,8 +196,10 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *pc_load = nir_load_uniform(b, 1, 32,
|
||||
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
|
||||
.base = offsetof(struct anv_push_constants, desc_offsets),
|
||||
.range = sizeof_field(struct anv_push_constants, desc_offsets),
|
||||
.base = offsetof(struct anv_push_constants,
|
||||
desc_surface_offsets),
|
||||
.range = sizeof_field(struct anv_push_constants,
|
||||
desc_surface_offsets),
|
||||
.dest_type = nir_type_uint32);
|
||||
pc_load = nir_iand_imm(
|
||||
b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
|
||||
|
|
|
|||
|
|
@ -135,9 +135,7 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
|||
* set, resource_intel::src[0] has to be shifted right by 6 (bringing
|
||||
* it back in bytes).
|
||||
*/
|
||||
if (is_sampler)
|
||||
set_offset = nir_ushr_imm(b, set_offset, 6);
|
||||
else
|
||||
if (!is_sampler)
|
||||
binding_offset = nir_ishl_imm(b, binding_offset, 6);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -123,13 +123,15 @@ anv_shader_bin_create(struct anv_device *device,
|
|||
|
||||
int rv_count = 0;
|
||||
struct brw_shader_reloc_value reloc_values[6];
|
||||
assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
|
||||
assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
|
||||
assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
|
||||
.value = device->physical->indirect_descriptors ?
|
||||
(device->physical->va.indirect_descriptor_pool.addr >> 32) :
|
||||
(device->physical->va.binding_table_pool.addr >> 32),
|
||||
(device->physical->va.internal_surface_state_pool.addr >> 32),
|
||||
};
|
||||
assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
|
||||
.value = shader_data_addr,
|
||||
|
|
|
|||
|
|
@ -432,6 +432,9 @@ enum anv_bo_alloc_flags {
|
|||
* set it will allocate a coherent BO.
|
||||
**/
|
||||
ANV_BO_ALLOC_HOST_CACHED = (1 << 16),
|
||||
|
||||
/** For sampler pools */
|
||||
ANV_BO_ALLOC_SAMPLER_POOL = (1 << 17),
|
||||
};
|
||||
|
||||
struct anv_bo {
|
||||
|
|
@ -1634,6 +1637,7 @@ struct anv_device {
|
|||
struct util_vma_heap vma_lo;
|
||||
struct util_vma_heap vma_hi;
|
||||
struct util_vma_heap vma_desc;
|
||||
struct util_vma_heap vma_samplers;
|
||||
struct util_vma_heap vma_trtt;
|
||||
|
||||
/** List of all anv_device_memory objects */
|
||||
|
|
@ -2370,18 +2374,30 @@ struct anv_descriptor_set_binding_layout {
|
|||
*/
|
||||
int16_t dynamic_offset_index;
|
||||
|
||||
/* Computed size from data */
|
||||
uint16_t descriptor_data_size;
|
||||
/* Computed surface size from data (for one plane) */
|
||||
uint16_t descriptor_data_surface_size;
|
||||
|
||||
/* Computed sampler size from data (for one plane) */
|
||||
uint16_t descriptor_data_sampler_size;
|
||||
|
||||
/* Index into the descriptor set buffer views */
|
||||
int32_t buffer_view_index;
|
||||
|
||||
/* Offset into the descriptor buffer where this descriptor lives */
|
||||
uint32_t descriptor_offset;
|
||||
/* Offset into the descriptor buffer where the surface descriptor lives */
|
||||
uint32_t descriptor_surface_offset;
|
||||
|
||||
/* Pre computed stride (with multiplane descriptor, the descriptor includes
|
||||
* all the planes) */
|
||||
unsigned descriptor_stride;
|
||||
/* Offset into the descriptor buffer where the sampler descriptor lives */
|
||||
uint16_t descriptor_sampler_offset;
|
||||
|
||||
/* Pre computed surface stride (with multiplane descriptor, the descriptor
|
||||
* includes all the planes)
|
||||
*/
|
||||
uint16_t descriptor_surface_stride;
|
||||
|
||||
/* Pre computed sampler stride (with multiplane descriptor, the descriptor
|
||||
* includes all the planes)
|
||||
*/
|
||||
uint16_t descriptor_sampler_stride;
|
||||
|
||||
/* Immutable samplers (or NULL if no immutable samplers) */
|
||||
struct anv_sampler **immutable_samplers;
|
||||
|
|
@ -2433,8 +2449,15 @@ struct anv_descriptor_set_layout {
|
|||
*/
|
||||
VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
|
||||
|
||||
/* Size of the descriptor buffer for this descriptor set */
|
||||
uint32_t descriptor_buffer_size;
|
||||
/* Size of the descriptor buffer dedicated to surface states for this
|
||||
* descriptor set
|
||||
*/
|
||||
uint32_t descriptor_buffer_surface_size;
|
||||
|
||||
/* Size of the descriptor buffer dedicated to sampler states for this
|
||||
* descriptor set
|
||||
*/
|
||||
uint32_t descriptor_buffer_sampler_size;
|
||||
|
||||
/* Bindings in this descriptor set */
|
||||
struct anv_descriptor_set_binding_layout binding[0];
|
||||
|
|
@ -2506,13 +2529,20 @@ struct anv_descriptor_set {
|
|||
*/
|
||||
uint32_t generate_surface_states;
|
||||
|
||||
/* State relative to anv_descriptor_pool::bo */
|
||||
struct anv_state desc_mem;
|
||||
/* State relative to anv_descriptor_pool::surface_bo */
|
||||
struct anv_state desc_surface_mem;
|
||||
/* State relative to anv_descriptor_pool::sampler_bo */
|
||||
struct anv_state desc_sampler_mem;
|
||||
/* Surface state for the descriptor buffer */
|
||||
struct anv_state desc_surface_state;
|
||||
|
||||
/* Descriptor set address. */
|
||||
struct anv_address desc_addr;
|
||||
/* Descriptor set address pointing to desc_surface_mem (we don't need one
|
||||
* for sampler because they're never accessed other than by the HW through
|
||||
* the shader sampler handle).
|
||||
*/
|
||||
struct anv_address desc_surface_addr;
|
||||
|
||||
struct anv_address desc_sampler_addr;
|
||||
|
||||
/* Descriptor offset from the
|
||||
* device->va.internal_surface_state_pool.addr
|
||||
|
|
@ -2592,15 +2622,28 @@ anv_descriptor_set_address(struct anv_descriptor_set *set)
|
|||
push_set->set_used_on_gpu = true;
|
||||
}
|
||||
|
||||
return set->desc_addr;
|
||||
return set->desc_surface_addr;
|
||||
}
|
||||
|
||||
struct anv_descriptor_pool_heap {
|
||||
/* BO allocated to back the pool (unused for host pools) */
|
||||
struct anv_bo *bo;
|
||||
|
||||
/* Host memory allocated to back a host pool */
|
||||
void *host_mem;
|
||||
|
||||
/* Heap tracking allocations in bo/host_mem */
|
||||
struct util_vma_heap heap;
|
||||
|
||||
/* Size of the heap */
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
struct anv_descriptor_pool {
|
||||
struct vk_object_base base;
|
||||
|
||||
struct anv_bo *bo;
|
||||
void *host_bo;
|
||||
struct util_vma_heap bo_heap;
|
||||
struct anv_descriptor_pool_heap surfaces;
|
||||
struct anv_descriptor_pool_heap samplers;
|
||||
|
||||
struct anv_state_stream surface_state_stream;
|
||||
void *surface_state_free_list;
|
||||
|
|
@ -2614,9 +2657,6 @@ struct anv_descriptor_pool {
|
|||
/** Allocated size of host_mem */
|
||||
uint32_t host_mem_size;
|
||||
|
||||
/** Allocated size of descriptor bo (should be equal to bo->size) */
|
||||
uint32_t bo_mem_size;
|
||||
|
||||
/**
|
||||
* VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then
|
||||
* surface_state_stream is unused.
|
||||
|
|
@ -3265,15 +3305,6 @@ struct anv_push_constants {
|
|||
/** Push constant data provided by the client through vkPushConstants */
|
||||
uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
|
||||
|
||||
/** Dynamic offsets for dynamic UBOs and SSBOs */
|
||||
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
|
||||
|
||||
/* Robust access pushed registers. */
|
||||
uint64_t push_reg_mask[MESA_SHADER_STAGES];
|
||||
|
||||
/** Ray query globals (RT_DISPATCH_GLOBALS) */
|
||||
uint64_t ray_query_globals;
|
||||
|
||||
#define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
|
||||
#define ANV_DESCRIPTOR_SET_OFFSET_MASK (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
|
||||
|
||||
|
|
@ -3285,7 +3316,15 @@ struct anv_push_constants {
|
|||
*
|
||||
* In bits [6:63] : descriptor set address
|
||||
*/
|
||||
uint32_t desc_offsets[MAX_SETS];
|
||||
uint32_t desc_surface_offsets[MAX_SETS];
|
||||
|
||||
/**
|
||||
* Base offsets for descriptor sets from
|
||||
*/
|
||||
uint32_t desc_sampler_offsets[MAX_SETS];
|
||||
|
||||
/** Dynamic offsets for dynamic UBOs and SSBOs */
|
||||
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
|
||||
|
||||
union {
|
||||
struct {
|
||||
|
|
@ -3311,6 +3350,12 @@ struct anv_push_constants {
|
|||
uint32_t subgroup_id;
|
||||
} cs;
|
||||
};
|
||||
|
||||
/* Robust access pushed registers. */
|
||||
uint64_t push_reg_mask[MESA_SHADER_STAGES];
|
||||
|
||||
/** Ray query globals (RT_DISPATCH_GLOBALS) */
|
||||
uint64_t ray_query_globals;
|
||||
};
|
||||
|
||||
struct anv_surface_state {
|
||||
|
|
|
|||
|
|
@ -102,30 +102,29 @@ anv_physical_device_init_va_ranges(struct anv_physical_device *device)
|
|||
_1Gb - address);
|
||||
|
||||
address = va_add(&device->va.low_heap, address, _1Gb);
|
||||
/* PRMs & simulation disagrees on the actual size of this heap. Take the
|
||||
* smallest (simulation) so that it works everywhere.
|
||||
*/
|
||||
address = va_add(&device->va.dynamic_state_pool, address, _1Gb);
|
||||
address = va_add(&device->va.sampler_state_pool, address, 2 * _1Gb);
|
||||
|
||||
/* The following addresses have to be located in a 4Gb range so that the
|
||||
* binding tables can address internal surface states & bindless surface
|
||||
* states.
|
||||
/* The binding table pool has to be located directly in front of the
|
||||
* surface states.
|
||||
*/
|
||||
address = align64(address, _4Gb);
|
||||
address += _1Gb;
|
||||
address = va_add(&device->va.binding_table_pool, address, _1Gb);
|
||||
address = va_add(&device->va.internal_surface_state_pool, address, 1 * _1Gb);
|
||||
assert(device->va.internal_surface_state_pool.addr ==
|
||||
align64(device->va.internal_surface_state_pool.addr, 2 * _1Gb));
|
||||
/* Scratch surface state overlaps with the internal surface state */
|
||||
va_at(&device->va.scratch_surface_state_pool,
|
||||
device->va.internal_surface_state_pool.addr,
|
||||
8 * _1Mb);
|
||||
|
||||
/* The bindless surface state heap has be in the same 4Gb range from the
|
||||
* binding table pool start so they can be addressed from binding table
|
||||
* entries.
|
||||
*/
|
||||
address = va_add(&device->va.bindless_surface_state_pool, address, 2 * _1Gb);
|
||||
|
||||
|
||||
/* PRMs & simulation disagrees on the actual size of this heap. Take the
|
||||
* smallest (simulation) so that it works everywhere.
|
||||
*/
|
||||
address = align64(address, _4Gb);
|
||||
address = va_add(&device->va.dynamic_state_pool, address, _1Gb);
|
||||
address = va_add(&device->va.sampler_state_pool, address, 2 * _1Gb);
|
||||
|
||||
if (device->indirect_descriptors) {
|
||||
/* With indirect descriptors, descriptor buffers can go anywhere, they
|
||||
* just need to be in a 4Gb aligned range, so all shader accesses can
|
||||
|
|
|
|||
|
|
@ -171,33 +171,34 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
sba.GeneralStateBufferSize = 0xfffff;
|
||||
sba.IndirectObjectBufferSize = 0xfffff;
|
||||
sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096;
|
||||
sba.DynamicStateBufferSize = (device->physical->va.dynamic_state_pool.size +
|
||||
device->physical->va.sampler_state_pool.size) / 4096;
|
||||
sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096;
|
||||
sba.GeneralStateBufferSizeModifyEnable = true;
|
||||
sba.IndirectObjectBufferSizeModifyEnable = true;
|
||||
sba.DynamicStateBufferSizeModifyEnable = true;
|
||||
sba.InstructionBuffersizeModifyEnable = true;
|
||||
|
||||
#if GFX_VER >= 11
|
||||
sba.BindlessSamplerStateBaseAddress = ANV_NULL_ADDRESS;
|
||||
sba.BindlessSamplerStateBufferSize = 0;
|
||||
sba.BindlessSamplerStateMOCS = mocs;
|
||||
sba.BindlessSamplerStateBaseAddressModifyEnable = true;
|
||||
#endif
|
||||
|
||||
if (!device->physical->indirect_descriptors) {
|
||||
#if GFX_VERx10 >= 125
|
||||
/* Bindless Surface State & Bindless Sampler State are aligned to the
|
||||
* same heap
|
||||
*/
|
||||
sba.BindlessSurfaceStateBaseAddress =
|
||||
sba.BindlessSamplerStateBaseAddress =
|
||||
(struct anv_address) { .offset =
|
||||
device->physical->va.binding_table_pool.addr, };
|
||||
sba.BindlessSurfaceStateSize =
|
||||
(device->physical->va.binding_table_pool.size +
|
||||
device->physical->va.internal_surface_state_pool.size +
|
||||
device->physical->va.descriptor_pool.size) - 1;
|
||||
sba.BindlessSamplerStateBufferSize =
|
||||
(device->physical->va.binding_table_pool.size +
|
||||
device->physical->va.internal_surface_state_pool.size +
|
||||
device->physical->va.descriptor_pool.size) / 4096 - 1;
|
||||
sba.BindlessSurfaceStateMOCS = sba.BindlessSamplerStateMOCS = mocs;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable =
|
||||
sba.BindlessSamplerStateBaseAddressModifyEnable = true;
|
||||
(device->physical->va.internal_surface_state_pool.size +
|
||||
device->physical->va.bindless_surface_state_pool.size) - 1;
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
|
||||
#else
|
||||
unreachable("Direct descriptor not supported");
|
||||
#endif
|
||||
|
|
@ -210,12 +211,6 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||
anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1;
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
|
||||
#if GFX_VER >= 11
|
||||
sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 };
|
||||
sba.BindlessSamplerStateMOCS = mocs;
|
||||
sba.BindlessSamplerStateBaseAddressModifyEnable = true;
|
||||
sba.BindlessSamplerStateBufferSize = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
|
|
@ -2115,7 +2110,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
|||
/* This is a descriptor set buffer so the set index is actually
|
||||
* given by binding->binding. (Yes, that's confusing.)
|
||||
*/
|
||||
assert(set->desc_mem.alloc_size);
|
||||
assert(set->desc_surface_mem.alloc_size);
|
||||
assert(set->desc_surface_state.alloc_size);
|
||||
bt_map[s] = set->desc_surface_state.offset + state_offset;
|
||||
add_surface_reloc(cmd_buffer, anv_descriptor_set_address(set));
|
||||
|
|
@ -2349,8 +2344,8 @@ flush_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
|||
set->desc_surface_state.map,
|
||||
format, ISL_SWIZZLE_IDENTITY,
|
||||
ISL_SURF_USAGE_CONSTANT_BUFFER_BIT,
|
||||
set->desc_addr,
|
||||
layout->descriptor_buffer_size, 1);
|
||||
set->desc_surface_addr,
|
||||
layout->descriptor_buffer_surface_size, 1);
|
||||
}
|
||||
|
||||
state->push_descriptor.set_used_on_gpu = true;
|
||||
|
|
@ -2480,9 +2475,10 @@ get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
|
|||
case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
|
||||
struct anv_descriptor_set *set =
|
||||
gfx_state->base.descriptors[range->index];
|
||||
assert(range->start * 32 < set->desc_mem.alloc_size);
|
||||
assert((range->start + range->length) * 32 <= set->desc_mem.alloc_size);
|
||||
return set->desc_mem.alloc_size;
|
||||
struct anv_state state = set->desc_surface_mem;
|
||||
assert(range->start * 32 < state.alloc_size);
|
||||
assert((range->start + range->length) * 32 <= state.alloc_size);
|
||||
return state.alloc_size;
|
||||
}
|
||||
|
||||
case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
|
||||
|
|
|
|||
|
|
@ -250,7 +250,8 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
|
|||
(struct anv_address) { .offset =
|
||||
device->physical->va.dynamic_state_pool.addr,
|
||||
};
|
||||
sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096;
|
||||
sba.DynamicStateBufferSize = (device->physical->va.dynamic_state_pool.size +
|
||||
device->physical->va.sampler_state_pool.size) / 4096;
|
||||
sba.DynamicStateMOCS = mocs;
|
||||
sba.DynamicStateBaseAddressModifyEnable = true;
|
||||
sba.DynamicStateBufferSizeModifyEnable = true;
|
||||
|
|
@ -270,6 +271,13 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
|
|||
sba.InstructionBaseAddressModifyEnable = true;
|
||||
sba.InstructionBuffersizeModifyEnable = true;
|
||||
|
||||
#if GFX_VER >= 11
|
||||
sba.BindlessSamplerStateBaseAddress = ANV_NULL_ADDRESS;
|
||||
sba.BindlessSamplerStateBufferSize = 0;
|
||||
sba.BindlessSamplerStateMOCS = mocs;
|
||||
sba.BindlessSamplerStateBaseAddressModifyEnable = true;
|
||||
#endif
|
||||
|
||||
if (device->physical->indirect_descriptors) {
|
||||
sba.BindlessSurfaceStateBaseAddress =
|
||||
(struct anv_address) { .offset =
|
||||
|
|
@ -279,29 +287,18 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
|
|||
anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1;
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
|
||||
|
||||
sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 };
|
||||
sba.BindlessSamplerStateMOCS = mocs;
|
||||
sba.BindlessSamplerStateBaseAddressModifyEnable = true;
|
||||
sba.BindlessSamplerStateBufferSize = 0;
|
||||
} else {
|
||||
/* Bindless Surface State & Bindless Sampler State are aligned to the
|
||||
* same heap
|
||||
*/
|
||||
sba.BindlessSurfaceStateBaseAddress =
|
||||
sba.BindlessSamplerStateBaseAddress =
|
||||
(struct anv_address) { .offset = device->physical->va.binding_table_pool.addr, };
|
||||
sba.BindlessSurfaceStateBaseAddress = (struct anv_address) {
|
||||
.offset = device->physical->va.internal_surface_state_pool.addr,
|
||||
};
|
||||
sba.BindlessSurfaceStateSize =
|
||||
(device->physical->va.binding_table_pool.size +
|
||||
device->physical->va.internal_surface_state_pool.size +
|
||||
(device->physical->va.internal_surface_state_pool.size +
|
||||
device->physical->va.bindless_surface_state_pool.size) - 1;
|
||||
sba.BindlessSamplerStateBufferSize =
|
||||
(device->physical->va.binding_table_pool.size +
|
||||
device->physical->va.internal_surface_state_pool.size +
|
||||
device->physical->va.bindless_surface_state_pool.size) / 4096 - 1;
|
||||
sba.BindlessSurfaceStateMOCS = sba.BindlessSamplerStateMOCS = mocs;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable =
|
||||
sba.BindlessSamplerStateBaseAddressModifyEnable = true;
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue