anv: add pipeline/shader support for descriptor buffers

Lowering/layout is pretty much the same as direct descriptors. The
caveats is that since the descriptor buffers are not visible from the
binding tables we can't promote anything to the binding table (except
push descriptors).

The reason for this is that there is nothing that prevents an
application to use both types of descriptors and because descriptor
buffers have visible address + capture replay, we can't merge the 2
types in the same virtual address space location (limited to 4Gb max,
limited 2Gb with binding tables).

If we had the guarantee that both are not going to be used at the same
time, we could consider a 2Gb VA for descriptor buffers.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22151>
This commit is contained in:
Lionel Landwerlin 2023-03-27 19:42:31 +03:00 committed by Marge Bot
parent 8090bd78b8
commit 1de44b1951
9 changed files with 97 additions and 30 deletions

View file

@ -476,6 +476,7 @@ enum brw_shader_reloc_id {
BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE,
BRW_SHADER_RELOC_LAST_EMBEDDED_SAMPLER_HANDLE =
BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + BRW_MAX_EMBEDDED_SAMPLERS - 1,

View file

@ -94,6 +94,7 @@ void anv_nir_compute_push_layout(nir_shader *nir,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
const struct anv_pipeline_push_map *push_map,
enum anv_descriptor_set_layout_type desc_type,
void *mem_ctx);
void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,

View file

@ -154,6 +154,10 @@ add_binding(struct apply_pipeline_layout_state *state,
state->set[set].binding[binding].properties |= BINDING_PROPERTY_EMBEDDED_SAMPLER;
}
const VkDescriptorSetLayoutCreateFlags non_pushable_set_flags =
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT;
const VkDescriptorBindingFlags non_pushable_binding_flags =
VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT |
VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT |
@ -165,8 +169,19 @@ add_binding_type(struct apply_pipeline_layout_state *state,
{
add_binding(state, set, binding);
if ((state->layout->set[set].layout->binding[binding].flags &
non_pushable_binding_flags) == 0 &&
const struct anv_descriptor_set_layout *set_layout =
state->layout->set[set].layout;
const struct anv_descriptor_set_binding_layout *bind_layout =
&set_layout->binding[binding];
/* We can't push descriptor buffers but we can for push descriptors */
const bool is_set_pushable =
(set_layout->flags & non_pushable_set_flags) == 0 ||
set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
const bool is_binding_pushable =
(bind_layout->flags & non_pushable_binding_flags) == 0;
if (is_set_pushable && is_binding_pushable &&
(state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
@ -1912,8 +1927,16 @@ anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout *layout,
#endif
static bool
binding_is_promotable_to_push(const struct anv_descriptor_set_binding_layout *bind_layout)
binding_is_promotable_to_push(const struct anv_descriptor_set_layout *set_layout,
const struct anv_descriptor_set_binding_layout *bind_layout)
{
if (set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)
return true;
if (set_layout->flags & (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT))
return false;
return (bind_layout->flags & non_pushable_binding_flags) == 0;
}
@ -2124,7 +2147,9 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
} else if (state.set[s].desc_buffer_used) {
map->surface_to_descriptor[map->surface_count] =
(struct anv_pipeline_binding) {
.set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
.set = (layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) ?
ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER :
ANV_DESCRIPTOR_SET_DESCRIPTORS,
.binding = UINT32_MAX,
.index = s,
};
@ -2163,7 +2188,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
if (state.set[set].binding[b].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER)
add_embedded_sampler_entry(&state, map, set, b);
if (binding_is_promotable_to_push(bind_layout)) {
if (binding_is_promotable_to_push(set_layout, bind_layout)) {
if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
state.set[set].binding[b].push_block = push_map->block_count;
for (unsigned i = 0; i < bind_layout->array_size; i++)

View file

@ -36,6 +36,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
const struct anv_pipeline_push_map *push_map,
enum anv_descriptor_set_layout_type desc_type,
void *mem_ctx)
{
const struct brw_compiler *compiler = pdevice->compiler;
@ -74,6 +75,16 @@ anv_nir_compute_push_layout(nir_shader *nir,
push_end = MAX2(push_end, base +
sizeof_field(struct anv_push_constants,
desc_surface_offsets));
if (desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER &&
!pdevice->uses_ex_bso) {
base = offsetof(struct anv_push_constants,
surfaces_base_offset);
push_start = MIN2(push_start, base);
push_end = MAX2(push_end, base +
sizeof_field(struct anv_push_constants,
surfaces_base_offset));
}
break;
}
@ -173,19 +184,34 @@ anv_nir_compute_push_layout(nir_shader *nir,
case nir_intrinsic_load_desc_set_address_intel: {
assert(brw_shader_stage_requires_bindless_resources(nir->info.stage));
b->cursor = nir_before_instr(&intrin->instr);
nir_def *pc_load = nir_load_uniform(b, 1, 32,
nir_def *desc_offset = nir_load_uniform(b, 1, 32,
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
.base = offsetof(struct anv_push_constants,
desc_surface_offsets),
.range = sizeof_field(struct anv_push_constants,
desc_surface_offsets),
.dest_type = nir_type_uint32);
pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK);
desc_offset = nir_iand_imm(b, desc_offset, ANV_DESCRIPTOR_SET_OFFSET_MASK);
if (desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER &&
!pdevice->uses_ex_bso) {
nir_def *bindless_base_offset = nir_load_uniform(
b, 1, 32,
nir_imm_int(b, 0),
.base = offsetof(struct anv_push_constants,
surfaces_base_offset),
.range = sizeof_field(struct anv_push_constants,
surfaces_base_offset),
.dest_type = nir_type_uint32);
desc_offset = nir_iadd(b, bindless_base_offset, desc_offset);
}
nir_def *desc_addr =
nir_pack_64_2x32_split(
b, pc_load,
b, desc_offset,
nir_load_reloc_const_intel(
b, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
b,
desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER ?
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH :
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
nir_def_rewrite_uses(&intrin->def, desc_addr);
break;
}

View file

@ -113,12 +113,12 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
nir_def *set_offset = intrin->src[0].ssa;
nir_def *binding_offset = intrin->src[1].ssa;
nir_def *sampler_base_offset = intrin->src[3].ssa;
/* When using indirect descriptor, the surface handles are loaded from the
* descriptor buffer and do not need any offset.
*/
if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) {
if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT ||
state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) {
if (!state->device->uses_ex_bso) {
/* We're trying to reduce the number of instructions in the shaders
* to compute surface handles. The assumption is that we're using
@ -138,19 +138,9 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
* done for the push constant value provided in
* resource_intel::src[0]. That way the shader can just do a single
* ADD and get the surface handle.
*
* Samplers have a 4Gb heap and in the message they're in bits 31:6
* of the component 3 of the sampler message header. But since we
* push only a single offset for the base offset of the descriptor
* set, resource_intel::src[0] has to be shifted right by 6 (bringing
* it back in bytes).
*/
if (is_sampler) {
set_offset = nir_ushr_imm(b, set_offset, 6);
set_offset = nir_iadd(b, set_offset, sampler_base_offset);
} else {
if (!is_sampler)
binding_offset = nir_ishl_imm(b, binding_offset, 6);
}
}
nir_src_rewrite(&intrin->src[1],

View file

@ -133,7 +133,8 @@ anv_nir_loads_push_desc_buffer(nir_shader *nir,
const struct anv_pipeline_binding *binding =
&bind_map->surface_to_descriptor[bt_idx];
if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS &&
if ((binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS ||
binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER) &&
binding->index == push_set) {
return true;
}

View file

@ -1131,7 +1131,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
NIR_PASS_V(nir, anv_nir_compute_push_layout,
pdevice, stage->key.base.robust_flags,
anv_graphics_pipeline_stage_fragment_dynamic(stage),
prog_data, &stage->bind_map, &push_map, mem_ctx);
prog_data, &stage->bind_map, &push_map,
pipeline->layout.type, mem_ctx);
NIR_PASS_V(nir, anv_nir_lower_resource_intel, pdevice,
pipeline->layout.type);
@ -1633,6 +1634,12 @@ anv_pipeline_add_executable(struct anv_pipeline *pipeline,
fprintf(stream, "Vulkan push constants and API params");
break;
case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER:
fprintf(stream, "Descriptor buffer (desc buffer) for set %d (start=%dB)",
stage->bind_map.push_ranges[i].index,
stage->bind_map.push_ranges[i].start * 32);
break;
case ANV_DESCRIPTOR_SET_DESCRIPTORS:
fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
stage->bind_map.push_ranges[i].index,

View file

@ -162,7 +162,12 @@ anv_shader_bin_create(struct anv_device *device,
prog_data_in->const_data_offset;
int rv_count = 0;
struct brw_shader_reloc_value reloc_values[6];
struct brw_shader_reloc_value reloc_values[7];
assert((device->physical->va.descriptor_buffer_pool.addr & 0xffffffff) == 0);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
.value = device->physical->va.descriptor_buffer_pool.addr >> 32,
};
assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {

View file

@ -2838,11 +2838,12 @@ anv_descriptor_set_write_template(struct anv_device *device,
const struct vk_descriptor_update_template *template,
const void *data);
#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 4)
#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 3)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 2)
#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
#define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 5)
#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 4)
#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 3)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 2)
#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
struct anv_pipeline_binding {
/** Index in the descriptor set
@ -3295,6 +3296,16 @@ struct anv_push_constants {
/** Dynamic offsets for dynamic UBOs and SSBOs */
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
/** Surface buffer base offset
*
* Only used prior to DG2 with descriptor buffers.
*
* (surfaces_base_offset + desc_offsets[set_index]) is relative to
* device->va.descriptor_buffer_pool and can be used to compute a 64bit
* address to the descriptor buffer (using load_desc_set_address_intel).
*/
uint32_t surfaces_base_offset;
union {
struct {
/** Dynamic MSAA value */