mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 11:28:05 +02:00
anv: add apply_layout support for device bindable shaders/pipelines
We consider them like bindless stages (no binding table) as much as possible. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
This commit is contained in:
parent
af8c85b5bd
commit
6f5d30c0a2
5 changed files with 224 additions and 55 deletions
|
|
@ -604,6 +604,7 @@ enum intel_shader_reloc_id {
|
|||
BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
|
||||
BRW_SHADER_RELOC_PUSH_DESCRIPTORS_BUFFER_ADDR_HIGH,
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_VIEW_HANDLE,
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_BUFFERS_VIEW_HANDLE,
|
||||
BRW_SHADER_RELOC_INSTRUCTION_BASE_ADDR_HIGH,
|
||||
|
|
|
|||
|
|
@ -113,6 +113,7 @@ bool anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
struct anv_descriptor_set_layout * const *set_layouts,
|
||||
uint32_t set_count,
|
||||
const uint32_t *dynamic_offset_start,
|
||||
bool device_bindable,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
struct anv_pipeline_push_map *push_map,
|
||||
void *push_map_mem_ctx);
|
||||
|
|
|
|||
|
|
@ -64,9 +64,11 @@ struct apply_pipeline_layout_state {
|
|||
|
||||
bool uses_constants;
|
||||
bool has_dynamic_buffers;
|
||||
bool is_device_bindable;
|
||||
uint8_t constants_offset;
|
||||
|
||||
nir_variable *set_idx_to_bti;
|
||||
nir_variable *set_idx_to_base_addr;
|
||||
nir_variable *set_idx_to_offset;
|
||||
|
||||
struct {
|
||||
|
|
@ -505,8 +507,6 @@ build_load_descriptor_mem_from_res_index(nir_builder *b,
|
|||
{
|
||||
struct res_index_defs res = unpack_res_index(b, res_index);
|
||||
|
||||
nir_def *surface_index = nir_load_array_var(b, state->set_idx_to_bti, res.set);
|
||||
|
||||
nir_def *offset32 = nir_iadd_imm(
|
||||
b,
|
||||
nir_iadd(b,
|
||||
|
|
@ -515,12 +515,24 @@ build_load_descriptor_mem_from_res_index(nir_builder *b,
|
|||
nir_imul(b, res.array_index, res.desc_stride))),
|
||||
imm_offset);
|
||||
|
||||
return nir_load_ubo(b, num_components, bit_size,
|
||||
surface_index, offset32,
|
||||
.align_mul = 8,
|
||||
.align_offset = imm_offset % 8,
|
||||
.range_base = 0,
|
||||
.range = num_components * bit_size / 8);
|
||||
if (state->pdevice->info.has_lsc || !state->is_device_bindable) {
|
||||
nir_def *surface_index = nir_load_array_var(b, state->set_idx_to_bti, res.set);
|
||||
|
||||
return nir_load_ubo(b, num_components, bit_size,
|
||||
surface_index, offset32,
|
||||
.align_mul = 8,
|
||||
.align_offset = imm_offset % 8,
|
||||
.range_base = 0,
|
||||
.range = num_components * bit_size / 8);
|
||||
} else {
|
||||
return nir_load_global_constant(
|
||||
b, num_components, bit_size,
|
||||
nir_pack_64_2x32_split(b, offset32,
|
||||
nir_load_array_var(b, state->set_idx_to_base_addr,
|
||||
res.set)),
|
||||
.align_mul = 8,
|
||||
.align_offset = imm_offset % 8);
|
||||
}
|
||||
}
|
||||
|
||||
/* When using direct descriptor, we do not have a structure to read in memory
|
||||
|
|
@ -704,30 +716,6 @@ build_load_storage_3d_image_depth(nir_builder *b,
|
|||
}
|
||||
}
|
||||
|
||||
/** Build a 64bit_global_32bit_offset address for a descriptor set */
|
||||
static nir_def *
|
||||
build_desc_address64(nir_builder *b, nir_def *set_idx, unsigned set_idx_imm,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
nir_def *desc_offset = set_idx != NULL ?
|
||||
anv_load_driver_uniform_indexed(b, 1, desc_surface_offsets, set_idx) :
|
||||
anv_load_driver_uniform(b, 1, desc_surface_offsets[set_idx_imm]);
|
||||
desc_offset = nir_iand_imm(b, desc_offset, ANV_DESCRIPTOR_SET_OFFSET_MASK);
|
||||
if (state->bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER &&
|
||||
!intel_has_extended_bindless(&state->pdevice->info)) {
|
||||
nir_def *bindless_base_offset =
|
||||
anv_load_driver_uniform(b, 1, surfaces_base_offset);
|
||||
desc_offset = nir_iadd(b, bindless_base_offset, desc_offset);
|
||||
}
|
||||
return nir_pack_64_2x32_split(
|
||||
b, desc_offset,
|
||||
nir_load_reloc_const_intel(
|
||||
b,
|
||||
state->bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER ?
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH :
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_descriptor_set_bti(nir_builder *b,
|
||||
uint32_t set,
|
||||
|
|
@ -769,12 +757,38 @@ build_descriptor_set_bti(nir_builder *b,
|
|||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_descriptor_set_base_address(nir_builder *b,
|
||||
uint32_t set,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
const bool is_push_set = set < state->set_count &&
|
||||
state->set_layouts[set] != NULL &&
|
||||
(state->set_layouts[set]->vk.flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
|
||||
enum intel_shader_reloc_id reloc_id;
|
||||
|
||||
if (state->pdevice->info.has_lsc) {
|
||||
reloc_id =
|
||||
state->bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER ?
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH :
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH;
|
||||
} else {
|
||||
reloc_id =
|
||||
state->bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER ?
|
||||
is_push_set ? BRW_SHADER_RELOC_PUSH_DESCRIPTORS_BUFFER_ADDR_HIGH :
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH :
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH;
|
||||
}
|
||||
|
||||
return nir_load_reloc_const_intel(b, reloc_id);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_descriptor_set_offset(nir_builder *b,
|
||||
uint32_t set,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
if (state->pdevice->info.has_lsc) {
|
||||
if (state->pdevice->info.has_lsc || state->is_device_bindable) {
|
||||
return nir_iand_imm(b,
|
||||
anv_load_driver_uniform(b, 1, desc_surface_offsets[set]),
|
||||
ANV_DESCRIPTOR_SET_OFFSET_MASK /* array_index */);
|
||||
|
|
@ -801,6 +815,27 @@ build_desc_address32(nir_builder *b,
|
|||
offset));
|
||||
}
|
||||
|
||||
/** Build a 64bit_bounded_global address for a descriptor set */
|
||||
static nir_def *
|
||||
build_desc_address64(nir_builder *b,
|
||||
nir_def *set_idx, unsigned set,
|
||||
nir_def *offset,
|
||||
uint32_t range,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
return nir_vec4(b,
|
||||
nir_iadd(b,
|
||||
nir_load_array_var(b, state->set_idx_to_offset,
|
||||
set < MAX_SETS ?
|
||||
nir_imm_int(b, set) : set_idx),
|
||||
offset),
|
||||
nir_load_array_var(b, state->set_idx_to_base_addr,
|
||||
set < MAX_SETS ?
|
||||
nir_imm_int(b, set) : set_idx),
|
||||
nir_imm_int(b, range),
|
||||
nir_imm_int(b, 0));
|
||||
}
|
||||
|
||||
/** Whether a surface is accessed through the bindless surface state heap */
|
||||
static bool
|
||||
is_binding_bindless(unsigned set, unsigned binding, bool sampler,
|
||||
|
|
@ -880,19 +915,6 @@ build_desc_addr_for_binding(nir_builder *b,
|
|||
return build_desc_address32(b, NULL, set, desc_offset, state);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_inline_desc_addr32(nir_builder *b,
|
||||
unsigned set,
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
return nir_vec2(
|
||||
b,
|
||||
nir_load_array_var_imm(b, state->set_idx_to_bti, set),
|
||||
nir_iadd_imm(b, nir_load_array_var_imm(b, state->set_idx_to_offset, set),
|
||||
bind_layout->descriptor_surface_offset));
|
||||
}
|
||||
|
||||
static unsigned
|
||||
binding_descriptor_offset(const struct apply_pipeline_layout_state *state,
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout,
|
||||
|
|
@ -1237,9 +1259,16 @@ build_buffer_addr_for_binding(nir_builder *b,
|
|||
&state->set_layouts[set]->binding[binding];
|
||||
|
||||
if (bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
||||
return build_desc_address32(b, NULL, set,
|
||||
nir_imm_int(b, bind_layout->descriptor_surface_offset),
|
||||
state);
|
||||
if (!state->pdevice->info.has_lsc && state->is_device_bindable) {
|
||||
return build_desc_address64(b, NULL, set,
|
||||
nir_imm_int(b, bind_layout->descriptor_surface_offset),
|
||||
bind_layout->array_size,
|
||||
state);
|
||||
} else {
|
||||
return build_desc_address32(b, NULL, set,
|
||||
nir_imm_int(b, bind_layout->descriptor_surface_offset),
|
||||
state);
|
||||
}
|
||||
}
|
||||
|
||||
if (addr_format != nir_address_format_32bit_index_offset)
|
||||
|
|
@ -1428,9 +1457,6 @@ try_lower_direct_buffer_intrinsic(nir_builder *b,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Rewrite to 32bit_index_offset whenever we can */
|
||||
addr_format = nir_address_format_32bit_index_offset;
|
||||
|
||||
/* If a dynamic has not been assigned a binding table entry, we need to
|
||||
* bail here.
|
||||
*/
|
||||
|
|
@ -1438,6 +1464,12 @@ try_lower_direct_buffer_intrinsic(nir_builder *b,
|
|||
!descriptor_has_bti(desc, state))
|
||||
return false;
|
||||
|
||||
/* Rewrite to 32bit_index_offset */
|
||||
addr_format =
|
||||
(!state->pdevice->info.has_lsc && state->is_device_bindable) ?
|
||||
nir_address_format_64bit_bounded_global :
|
||||
nir_address_format_32bit_index_offset;
|
||||
|
||||
nir_def *addr =
|
||||
build_buffer_addr_for_deref(b, deref, addr_format, state);
|
||||
|
||||
|
|
@ -2190,6 +2222,115 @@ binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_stat
|
|||
return true;
|
||||
}
|
||||
|
||||
/* This builds a binding table based on the push descriptor layout, so all the
|
||||
* shaders using a compatible layout can share the same binding table. For LSC
|
||||
* platforms the binding table should already by empty (except render
|
||||
* targets), so it's already compatible.
|
||||
*/
|
||||
static void
|
||||
build_device_bindable_binding_table(struct apply_pipeline_layout_state *state,
|
||||
nir_shader *shader,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
struct anv_pipeline_push_map *push_map,
|
||||
void *push_map_mem_ctx)
|
||||
{
|
||||
/* Compute the amount of push block items required. */
|
||||
unsigned push_block_count = map->surface_count + MAX_SETS;
|
||||
for (unsigned s = 0; s < state->set_count; s++) {
|
||||
const struct anv_descriptor_set_layout *set_layout =
|
||||
state->set_layouts[s];
|
||||
if (!set_layout)
|
||||
continue;
|
||||
|
||||
for (unsigned b = 0; b < set_layout->binding_count; b++) {
|
||||
if (set_layout->binding[b].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
|
||||
push_block_count += set_layout->binding[b].array_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* Assign a block index for each surface */
|
||||
push_map->block_to_descriptor = rzalloc_array(push_map_mem_ctx,
|
||||
struct anv_pipeline_binding,
|
||||
push_block_count);
|
||||
|
||||
memcpy(push_map->block_to_descriptor,
|
||||
map->surface_to_descriptor,
|
||||
sizeof(push_map->block_to_descriptor[0]) * map->surface_count);
|
||||
push_map->block_count = map->surface_count;
|
||||
|
||||
/* No BTI allowed for descriptor sets, we'll use A64 messages. */
|
||||
for (unsigned s = 0; s < state->set_count; s++) {
|
||||
state->set[s].desc_offset = BINDLESS_OFFSET;
|
||||
|
||||
if (brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
|
||||
state->set[s].push_block = UINT32_MAX;
|
||||
} else {
|
||||
state->set[s].push_block = push_map->block_count;
|
||||
add_descriptor_push_entry(push_map, s, state->bind_map);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t set = 0; set < state->set_count; set++) {
|
||||
struct anv_descriptor_set_layout *set_layout =
|
||||
state->set_layouts[set];
|
||||
if (!set_layout)
|
||||
continue;
|
||||
|
||||
for (unsigned b = 0; b < set_layout->binding_count; b++) {
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&set_layout->binding[b];
|
||||
|
||||
/* Assume bindless by default */
|
||||
state->set[set].binding[b].surface_offset = BINDLESS_OFFSET;
|
||||
state->set[set].binding[b].sampler_offset = BINDLESS_OFFSET;
|
||||
|
||||
if (binding_is_promotable_to_push(set_layout, bind_layout)) {
|
||||
if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
||||
state->set[set].binding[b].push_block = push_map->block_count;
|
||||
for (unsigned i = 0; i < bind_layout->array_size; i++)
|
||||
add_push_entry(push_map, set, b, i, bind_layout);
|
||||
}
|
||||
}
|
||||
|
||||
if (!state->pdevice->info.has_lsc &&
|
||||
(set_layout->vk.flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) &&
|
||||
(set_layout->vk.flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT)) {
|
||||
/* Embedded sampler have an array size limited to 1 */
|
||||
if (state->set[set].binding[b].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER)
|
||||
add_embedded_sampler_entry(state, map, set, b);
|
||||
|
||||
const uint8_t max_planes = bti_multiplier(state, set, b);
|
||||
|
||||
for (unsigned i = 0; i < bind_layout->array_size; i++) {
|
||||
if (bind_layout->data & ANV_DESCRIPTOR_BTI_SURFACE_STATE) {
|
||||
const uint8_t max_sampler_planes =
|
||||
(bind_layout->samplers &&
|
||||
bind_layout->samplers[i].has_ycbcr_conversion) ?
|
||||
vk_format_get_plane_count(
|
||||
bind_layout->samplers[i].ycbcr_conversion_state.format) :
|
||||
1;
|
||||
for (uint8_t p = 0; p < max_planes; p++) {
|
||||
if (p < max_sampler_planes) {
|
||||
add_bti_entry(map, set, b, i, p, bind_layout);
|
||||
} else {
|
||||
add_null_bti_entry(map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!(state->set[set].binding[b].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER) &&
|
||||
(bind_layout->data & ANV_DESCRIPTOR_BTI_SAMPLER_STATE)) {
|
||||
for (unsigned i = 0; i < bind_layout->array_size; i++) {
|
||||
for (uint8_t p = 0; p < max_planes; p++)
|
||||
add_sampler_entry(map, set, b, i, p, bind_layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
build_packed_binding_table(struct apply_pipeline_layout_state *state,
|
||||
nir_shader *shader,
|
||||
|
|
@ -2429,6 +2570,22 @@ build_descriptor_sets_bti_array(nir_builder *b,
|
|||
return set_to_bti;
|
||||
}
|
||||
|
||||
static nir_variable *
|
||||
build_descriptor_sets_base_addr_array(nir_builder *b,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
nir_variable *set_to_base_addr = nir_local_variable_create(
|
||||
b->impl, glsl_array_type(glsl_uint_type(), MAX_SETS, 0),
|
||||
"set_to_base_addr");
|
||||
|
||||
for (uint32_t i = 0; i < MAX_SETS; i++) {
|
||||
nir_store_array_var(b, set_to_base_addr, nir_imm_int(b, i),
|
||||
build_descriptor_set_base_address(b, i, state), 0x1);
|
||||
}
|
||||
|
||||
return set_to_base_addr;
|
||||
}
|
||||
|
||||
static nir_variable *
|
||||
build_descriptor_sets_offset_array(nir_builder *b,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
|
|
@ -2452,6 +2609,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
struct anv_descriptor_set_layout * const *set_layouts,
|
||||
uint32_t set_count,
|
||||
const uint32_t *dynamic_offset_start,
|
||||
bool device_bindable,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
struct anv_pipeline_push_map *push_map,
|
||||
void *push_map_mem_ctx)
|
||||
|
|
@ -2473,6 +2631,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
.dynamic_offset_start = dynamic_offset_start,
|
||||
.ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
|
||||
.ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
|
||||
.is_device_bindable = device_bindable,
|
||||
};
|
||||
state.lowered_instrs = _mesa_pointer_set_create(state.mem_ctx);
|
||||
|
||||
|
|
@ -2491,8 +2650,10 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
progress |= nir_shader_instructions_pass(shader, get_used_bindings,
|
||||
nir_metadata_all, &state);
|
||||
|
||||
/* Build the binding table */
|
||||
build_packed_binding_table(&state, shader, map, push_map, push_map_mem_ctx);
|
||||
if (device_bindable)
|
||||
build_device_bindable_binding_table(&state, shader, map, push_map, push_map_mem_ctx);
|
||||
else
|
||||
build_packed_binding_table(&state, shader, map, push_map, push_map_mem_ctx);
|
||||
|
||||
/* Before we do the normal lowering, we look for any SSBO operations
|
||||
* that we can lower to the BTI model and lower them up-front. The BTI
|
||||
|
|
@ -2527,6 +2688,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
nir_foreach_function_impl(impl, shader) {
|
||||
nir_builder _b = nir_builder_at(nir_before_impl(impl)), *b = &_b;
|
||||
state.set_idx_to_bti = build_descriptor_sets_bti_array(b, &state);
|
||||
state.set_idx_to_base_addr = build_descriptor_sets_base_addr_array(b, &state);
|
||||
state.set_idx_to_offset = build_descriptor_sets_offset_array(b, &state);
|
||||
progress |= nir_function_instructions_pass(impl,
|
||||
lower_direct_buffer_instr,
|
||||
|
|
|
|||
|
|
@ -564,6 +564,10 @@ anv_shader_set_relocs(struct anv_device *device,
|
|||
.id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
|
||||
.value = device->physical->va.dynamic_visible_pool.addr >> 32,
|
||||
};
|
||||
reloc_values[rv_count++] = (struct intel_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_PUSH_DESCRIPTORS_BUFFER_ADDR_HIGH,
|
||||
.value = device->physical->va.internal_surface_state_pool.addr >> 32,
|
||||
};
|
||||
assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
|
||||
assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
|
||||
reloc_values[rv_count++] = (struct intel_shader_reloc_value) {
|
||||
|
|
|
|||
|
|
@ -1613,8 +1613,9 @@ anv_shader_lower_nir(struct anv_device *device,
|
|||
pdevice, shader_data->key.base.robust_flags,
|
||||
set_layouts, set_layout_count,
|
||||
(shader_data->info->flags &
|
||||
VK_SHADER_CREATE_INDEPENDENT_SETS_BIT_MESA) ? NULL:
|
||||
VK_SHADER_CREATE_INDEPENDENT_SETS_BIT_MESA) ? NULL :
|
||||
dynamic_descriptors_offsets,
|
||||
shader_data->info->flags & VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT,
|
||||
&shader_data->bind_map, &shader_data->push_map, mem_ctx);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue