mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 07:18:17 +02:00
anv: add pipeline/shader support for descriptor buffers
Lowering/layout is pretty much the same as direct descriptors. The caveats is that since the descriptor buffers are not visible from the binding tables we can't promote anything to the binding table (except push descriptors). The reason for this is that there is nothing that prevents an application to use both types of descriptors and because descriptor buffers have visible address + capture replay, we can't merge the 2 types in the same virtual address space location (limited to 4Gb max, limited 2Gb with binding tables). If we had the guarantee that both are not going to be used at the same time, we could consider a 2Gb VA for descriptor buffers. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22151>
This commit is contained in:
parent
8090bd78b8
commit
1de44b1951
9 changed files with 97 additions and 30 deletions
|
|
@ -476,6 +476,7 @@ enum brw_shader_reloc_id {
|
|||
BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
|
||||
BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
|
||||
BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE,
|
||||
BRW_SHADER_RELOC_LAST_EMBEDDED_SAMPLER_HANDLE =
|
||||
BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + BRW_MAX_EMBEDDED_SAMPLERS - 1,
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ void anv_nir_compute_push_layout(nir_shader *nir,
|
|||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
const struct anv_pipeline_push_map *push_map,
|
||||
enum anv_descriptor_set_layout_type desc_type,
|
||||
void *mem_ctx);
|
||||
|
||||
void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
|
||||
|
|
|
|||
|
|
@ -154,6 +154,10 @@ add_binding(struct apply_pipeline_layout_state *state,
|
|||
state->set[set].binding[binding].properties |= BINDING_PROPERTY_EMBEDDED_SAMPLER;
|
||||
}
|
||||
|
||||
const VkDescriptorSetLayoutCreateFlags non_pushable_set_flags =
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT;
|
||||
|
||||
const VkDescriptorBindingFlags non_pushable_binding_flags =
|
||||
VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT |
|
||||
VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT |
|
||||
|
|
@ -165,8 +169,19 @@ add_binding_type(struct apply_pipeline_layout_state *state,
|
|||
{
|
||||
add_binding(state, set, binding);
|
||||
|
||||
if ((state->layout->set[set].layout->binding[binding].flags &
|
||||
non_pushable_binding_flags) == 0 &&
|
||||
const struct anv_descriptor_set_layout *set_layout =
|
||||
state->layout->set[set].layout;
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&set_layout->binding[binding];
|
||||
|
||||
/* We can't push descriptor buffers but we can for push descriptors */
|
||||
const bool is_set_pushable =
|
||||
(set_layout->flags & non_pushable_set_flags) == 0 ||
|
||||
set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
|
||||
const bool is_binding_pushable =
|
||||
(bind_layout->flags & non_pushable_binding_flags) == 0;
|
||||
|
||||
if (is_set_pushable && is_binding_pushable &&
|
||||
(state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
|
||||
state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
|
||||
state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
|
||||
|
|
@ -1912,8 +1927,16 @@ anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout *layout,
|
|||
#endif
|
||||
|
||||
static bool
|
||||
binding_is_promotable_to_push(const struct anv_descriptor_set_binding_layout *bind_layout)
|
||||
binding_is_promotable_to_push(const struct anv_descriptor_set_layout *set_layout,
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout)
|
||||
{
|
||||
if (set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)
|
||||
return true;
|
||||
|
||||
if (set_layout->flags & (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT))
|
||||
return false;
|
||||
|
||||
return (bind_layout->flags & non_pushable_binding_flags) == 0;
|
||||
}
|
||||
|
||||
|
|
@ -2124,7 +2147,9 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
} else if (state.set[s].desc_buffer_used) {
|
||||
map->surface_to_descriptor[map->surface_count] =
|
||||
(struct anv_pipeline_binding) {
|
||||
.set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
|
||||
.set = (layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) ?
|
||||
ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER :
|
||||
ANV_DESCRIPTOR_SET_DESCRIPTORS,
|
||||
.binding = UINT32_MAX,
|
||||
.index = s,
|
||||
};
|
||||
|
|
@ -2163,7 +2188,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
if (state.set[set].binding[b].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER)
|
||||
add_embedded_sampler_entry(&state, map, set, b);
|
||||
|
||||
if (binding_is_promotable_to_push(bind_layout)) {
|
||||
if (binding_is_promotable_to_push(set_layout, bind_layout)) {
|
||||
if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
||||
state.set[set].binding[b].push_block = push_map->block_count;
|
||||
for (unsigned i = 0; i < bind_layout->array_size; i++)
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
const struct anv_pipeline_push_map *push_map,
|
||||
enum anv_descriptor_set_layout_type desc_type,
|
||||
void *mem_ctx)
|
||||
{
|
||||
const struct brw_compiler *compiler = pdevice->compiler;
|
||||
|
|
@ -74,6 +75,16 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
push_end = MAX2(push_end, base +
|
||||
sizeof_field(struct anv_push_constants,
|
||||
desc_surface_offsets));
|
||||
|
||||
if (desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER &&
|
||||
!pdevice->uses_ex_bso) {
|
||||
base = offsetof(struct anv_push_constants,
|
||||
surfaces_base_offset);
|
||||
push_start = MIN2(push_start, base);
|
||||
push_end = MAX2(push_end, base +
|
||||
sizeof_field(struct anv_push_constants,
|
||||
surfaces_base_offset));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -173,19 +184,34 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
case nir_intrinsic_load_desc_set_address_intel: {
|
||||
assert(brw_shader_stage_requires_bindless_resources(nir->info.stage));
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *pc_load = nir_load_uniform(b, 1, 32,
|
||||
nir_def *desc_offset = nir_load_uniform(b, 1, 32,
|
||||
nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)),
|
||||
.base = offsetof(struct anv_push_constants,
|
||||
desc_surface_offsets),
|
||||
.range = sizeof_field(struct anv_push_constants,
|
||||
desc_surface_offsets),
|
||||
.dest_type = nir_type_uint32);
|
||||
pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK);
|
||||
desc_offset = nir_iand_imm(b, desc_offset, ANV_DESCRIPTOR_SET_OFFSET_MASK);
|
||||
if (desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER &&
|
||||
!pdevice->uses_ex_bso) {
|
||||
nir_def *bindless_base_offset = nir_load_uniform(
|
||||
b, 1, 32,
|
||||
nir_imm_int(b, 0),
|
||||
.base = offsetof(struct anv_push_constants,
|
||||
surfaces_base_offset),
|
||||
.range = sizeof_field(struct anv_push_constants,
|
||||
surfaces_base_offset),
|
||||
.dest_type = nir_type_uint32);
|
||||
desc_offset = nir_iadd(b, bindless_base_offset, desc_offset);
|
||||
}
|
||||
nir_def *desc_addr =
|
||||
nir_pack_64_2x32_split(
|
||||
b, pc_load,
|
||||
b, desc_offset,
|
||||
nir_load_reloc_const_intel(
|
||||
b, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
|
||||
b,
|
||||
desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER ?
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH :
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
|
||||
nir_def_rewrite_uses(&intrin->def, desc_addr);
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -113,12 +113,12 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
|||
|
||||
nir_def *set_offset = intrin->src[0].ssa;
|
||||
nir_def *binding_offset = intrin->src[1].ssa;
|
||||
nir_def *sampler_base_offset = intrin->src[3].ssa;
|
||||
|
||||
/* When using indirect descriptor, the surface handles are loaded from the
|
||||
* descriptor buffer and do not need any offset.
|
||||
*/
|
||||
if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) {
|
||||
if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT ||
|
||||
state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) {
|
||||
if (!state->device->uses_ex_bso) {
|
||||
/* We're trying to reduce the number of instructions in the shaders
|
||||
* to compute surface handles. The assumption is that we're using
|
||||
|
|
@ -138,19 +138,9 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
|||
* done for the push constant value provided in
|
||||
* resource_intel::src[0]. That way the shader can just do a single
|
||||
* ADD and get the surface handle.
|
||||
*
|
||||
* Samplers have a 4Gb heap and in the message they're in bits 31:6
|
||||
* of the component 3 of the sampler message header. But since we
|
||||
* push only a single offset for the base offset of the descriptor
|
||||
* set, resource_intel::src[0] has to be shifted right by 6 (bringing
|
||||
* it back in bytes).
|
||||
*/
|
||||
if (is_sampler) {
|
||||
set_offset = nir_ushr_imm(b, set_offset, 6);
|
||||
set_offset = nir_iadd(b, set_offset, sampler_base_offset);
|
||||
} else {
|
||||
if (!is_sampler)
|
||||
binding_offset = nir_ishl_imm(b, binding_offset, 6);
|
||||
}
|
||||
}
|
||||
|
||||
nir_src_rewrite(&intrin->src[1],
|
||||
|
|
|
|||
|
|
@ -133,7 +133,8 @@ anv_nir_loads_push_desc_buffer(nir_shader *nir,
|
|||
|
||||
const struct anv_pipeline_binding *binding =
|
||||
&bind_map->surface_to_descriptor[bt_idx];
|
||||
if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS &&
|
||||
if ((binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS ||
|
||||
binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER) &&
|
||||
binding->index == push_set) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1131,7 +1131,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
|||
NIR_PASS_V(nir, anv_nir_compute_push_layout,
|
||||
pdevice, stage->key.base.robust_flags,
|
||||
anv_graphics_pipeline_stage_fragment_dynamic(stage),
|
||||
prog_data, &stage->bind_map, &push_map, mem_ctx);
|
||||
prog_data, &stage->bind_map, &push_map,
|
||||
pipeline->layout.type, mem_ctx);
|
||||
|
||||
NIR_PASS_V(nir, anv_nir_lower_resource_intel, pdevice,
|
||||
pipeline->layout.type);
|
||||
|
|
@ -1633,6 +1634,12 @@ anv_pipeline_add_executable(struct anv_pipeline *pipeline,
|
|||
fprintf(stream, "Vulkan push constants and API params");
|
||||
break;
|
||||
|
||||
case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER:
|
||||
fprintf(stream, "Descriptor buffer (desc buffer) for set %d (start=%dB)",
|
||||
stage->bind_map.push_ranges[i].index,
|
||||
stage->bind_map.push_ranges[i].start * 32);
|
||||
break;
|
||||
|
||||
case ANV_DESCRIPTOR_SET_DESCRIPTORS:
|
||||
fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
|
||||
stage->bind_map.push_ranges[i].index,
|
||||
|
|
|
|||
|
|
@ -162,7 +162,12 @@ anv_shader_bin_create(struct anv_device *device,
|
|||
prog_data_in->const_data_offset;
|
||||
|
||||
int rv_count = 0;
|
||||
struct brw_shader_reloc_value reloc_values[6];
|
||||
struct brw_shader_reloc_value reloc_values[7];
|
||||
assert((device->physical->va.descriptor_buffer_pool.addr & 0xffffffff) == 0);
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
|
||||
.value = device->physical->va.descriptor_buffer_pool.addr >> 32,
|
||||
};
|
||||
assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
|
||||
assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
|
|
|
|||
|
|
@ -2838,11 +2838,12 @@ anv_descriptor_set_write_template(struct anv_device *device,
|
|||
const struct vk_descriptor_update_template *template,
|
||||
const void *data);
|
||||
|
||||
#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 4)
|
||||
#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 3)
|
||||
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 2)
|
||||
#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 1)
|
||||
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
|
||||
#define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 5)
|
||||
#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 4)
|
||||
#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 3)
|
||||
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 2)
|
||||
#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 1)
|
||||
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
|
||||
|
||||
struct anv_pipeline_binding {
|
||||
/** Index in the descriptor set
|
||||
|
|
@ -3295,6 +3296,16 @@ struct anv_push_constants {
|
|||
/** Dynamic offsets for dynamic UBOs and SSBOs */
|
||||
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
|
||||
|
||||
/** Surface buffer base offset
|
||||
*
|
||||
* Only used prior to DG2 with descriptor buffers.
|
||||
*
|
||||
* (surfaces_base_offset + desc_offsets[set_index]) is relative to
|
||||
* device->va.descriptor_buffer_pool and can be used to compute a 64bit
|
||||
* address to the descriptor buffer (using load_desc_set_address_intel).
|
||||
*/
|
||||
uint32_t surfaces_base_offset;
|
||||
|
||||
union {
|
||||
struct {
|
||||
/** Dynamic MSAA value */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue