mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 03:40:10 +01:00
tu: Rework dynamic offset handling
With shader objects, we won't have the pipeline layout available. This means that the current way we implement dynamic offset descriptors in combination with fast-linking and independent descriptor sets, where we use the pipeline layout when fast-linking that has pre-computed offsets for each descriptor set, won't work. Instead we need to piece together the sizes of the descriptors in each descriptor set from the shaders. This is already effectively what we do when we stitch together the pipeline layout when fast-linking, but we need to make it work with just the shaders. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25679>
This commit is contained in:
parent
af691ec8e4
commit
88db736472
7 changed files with 95 additions and 50 deletions
|
|
@ -2475,6 +2475,11 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
|
|||
descriptors_state->max_sets_bound =
|
||||
MAX2(descriptors_state->max_sets_bound, firstSet + descriptorSetCount);
|
||||
|
||||
unsigned dynamic_offset_offset = 0;
|
||||
for (unsigned i = 0; i < firstSet; i++) {
|
||||
dynamic_offset_offset += layout->set[i].layout->dynamic_offset_size;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < descriptorSetCount; ++i) {
|
||||
unsigned idx = i + firstSet;
|
||||
TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]);
|
||||
|
|
@ -2494,7 +2499,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
|
|||
|
||||
uint32_t *src = set->dynamic_descriptors;
|
||||
uint32_t *dst = descriptors_state->dynamic_descriptors +
|
||||
layout->set[idx].dynamic_offset_start / 4;
|
||||
dynamic_offset_offset / 4;
|
||||
for (unsigned j = 0; j < set->layout->binding_count; j++) {
|
||||
struct tu_descriptor_set_binding_layout *binding =
|
||||
&set->layout->binding[j];
|
||||
|
|
@ -2550,15 +2555,17 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
dynamic_offset_offset += layout->set[idx].layout->dynamic_offset_size;
|
||||
}
|
||||
assert(dyn_idx == dynamicOffsetCount);
|
||||
|
||||
if (layout->dynamic_offset_size) {
|
||||
if (dynamic_offset_offset) {
|
||||
/* allocate and fill out dynamic descriptor set */
|
||||
struct tu_cs_memory dynamic_desc_set;
|
||||
int reserved_set_idx = cmd->device->physical_device->reserved_set_idx;
|
||||
VkResult result = tu_cs_alloc(&cmd->sub_cs,
|
||||
layout->dynamic_offset_size / (4 * A6XX_TEX_CONST_DWORDS),
|
||||
dynamic_offset_offset / (4 * A6XX_TEX_CONST_DWORDS),
|
||||
A6XX_TEX_CONST_DWORDS, &dynamic_desc_set);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
|
|
@ -2566,7 +2573,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
|
||||
memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors,
|
||||
layout->dynamic_offset_size);
|
||||
dynamic_offset_offset);
|
||||
assert(reserved_set_idx >= 0); /* reserved set must be bound */
|
||||
descriptors_state->set_iova[reserved_set_idx] = dynamic_desc_set.iova | BINDLESS_DESCRIPTOR_64B;
|
||||
descriptors_state->dynamic_bound = true;
|
||||
|
|
|
|||
|
|
@ -488,39 +488,15 @@ sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx,
|
|||
void
|
||||
tu_pipeline_layout_init(struct tu_pipeline_layout *layout)
|
||||
{
|
||||
unsigned dynamic_offset_size = 0;
|
||||
|
||||
for (uint32_t set = 0; set < layout->num_sets; set++) {
|
||||
layout->set[set].dynamic_offset_start = dynamic_offset_size;
|
||||
|
||||
if (layout->set[set].layout)
|
||||
dynamic_offset_size += layout->set[set].layout->dynamic_offset_size;
|
||||
}
|
||||
|
||||
layout->dynamic_offset_size = dynamic_offset_size;
|
||||
|
||||
/* We only care about INDEPENDENT_SETS for dynamic-offset descriptors,
|
||||
* where all the descriptors from all the sets are combined into one set
|
||||
* and we have to provide the dynamic_offset_start dynamically with fast
|
||||
* linking.
|
||||
*/
|
||||
if (dynamic_offset_size == 0) {
|
||||
layout->independent_sets = false;
|
||||
}
|
||||
|
||||
struct mesa_sha1 ctx;
|
||||
_mesa_sha1_init(&ctx);
|
||||
for (unsigned s = 0; s < layout->num_sets; s++) {
|
||||
if (layout->set[s].layout)
|
||||
sha1_update_descriptor_set_layout(&ctx, layout->set[s].layout);
|
||||
_mesa_sha1_update(&ctx, &layout->set[s].dynamic_offset_start,
|
||||
sizeof(layout->set[s].dynamic_offset_start));
|
||||
}
|
||||
_mesa_sha1_update(&ctx, &layout->num_sets, sizeof(layout->num_sets));
|
||||
_mesa_sha1_update(&ctx, &layout->push_constant_size,
|
||||
sizeof(layout->push_constant_size));
|
||||
_mesa_sha1_update(&ctx, &layout->independent_sets,
|
||||
sizeof(layout->independent_sets));
|
||||
_mesa_sha1_final(&ctx, layout->sha1);
|
||||
}
|
||||
|
||||
|
|
@ -562,8 +538,6 @@ tu_CreatePipelineLayout(VkDevice _device,
|
|||
}
|
||||
|
||||
layout->push_constant_size = align(layout->push_constant_size, 16);
|
||||
layout->independent_sets =
|
||||
pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
|
||||
|
||||
tu_pipeline_layout_init(layout);
|
||||
|
||||
|
|
|
|||
|
|
@ -93,14 +93,10 @@ struct tu_pipeline_layout
|
|||
{
|
||||
struct tu_descriptor_set_layout *layout;
|
||||
uint32_t size;
|
||||
uint32_t dynamic_offset_start;
|
||||
} set[MAX_SETS];
|
||||
|
||||
bool independent_sets;
|
||||
|
||||
uint32_t num_sets;
|
||||
uint32_t push_constant_size;
|
||||
uint32_t dynamic_offset_size;
|
||||
|
||||
unsigned char sha1[20];
|
||||
};
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ tu6_emit_load_state(struct tu_device *device,
|
|||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
assert(device->physical_device->reserved_set_idx >= 0);
|
||||
base = device->physical_device->reserved_set_idx;
|
||||
offset = (layout->set[i].dynamic_offset_start +
|
||||
offset = (pipeline->program.dynamic_descriptor_offsets[i] +
|
||||
binding->dynamic_offset_offset) / 4;
|
||||
FALLTHROUGH;
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
|
|
@ -205,7 +205,7 @@ tu6_emit_load_state(struct tu_device *device,
|
|||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
assert(device->physical_device->reserved_set_idx >= 0);
|
||||
base = device->physical_device->reserved_set_idx;
|
||||
offset = (layout->set[i].dynamic_offset_start +
|
||||
offset = (pipeline->program.dynamic_descriptor_offsets[i] +
|
||||
binding->dynamic_offset_offset) / 4;
|
||||
FALLTHROUGH;
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
|
||||
|
|
@ -405,7 +405,7 @@ static void
|
|||
tu6_emit_dynamic_offset(struct tu_cs *cs,
|
||||
const struct ir3_shader_variant *xs,
|
||||
const struct tu_shader *shader,
|
||||
struct tu_pipeline_builder *builder)
|
||||
const struct tu_program_state *program)
|
||||
{
|
||||
const struct tu_physical_device *phys_dev = cs->device->physical_device;
|
||||
if (!xs || shader->const_state.dynamic_offset_loc == UINT32_MAX)
|
||||
|
|
@ -422,8 +422,8 @@ tu6_emit_dynamic_offset(struct tu_cs *cs,
|
|||
|
||||
for (unsigned i = 0; i < phys_dev->usable_sets; i++) {
|
||||
unsigned dynamic_offset_start =
|
||||
builder->layout.set[i].dynamic_offset_start / (A6XX_TEX_CONST_DWORDS * 4);
|
||||
tu_cs_emit(cs, i < builder->layout.num_sets ? dynamic_offset_start : 0);
|
||||
program->dynamic_descriptor_offsets[i] / (A6XX_TEX_CONST_DWORDS * 4);
|
||||
tu_cs_emit(cs, dynamic_offset_start);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1151,14 +1151,14 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
|
|||
template <chip CHIP>
|
||||
static void
|
||||
tu6_emit_program_config(struct tu_cs *cs,
|
||||
struct tu_pipeline *pipeline,
|
||||
struct tu_pipeline_builder *builder,
|
||||
const struct tu_program_state *prog,
|
||||
struct tu_shader **shaders,
|
||||
const struct ir3_shader_variant **variants)
|
||||
{
|
||||
STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
|
||||
|
||||
bool shared_consts_enable =
|
||||
pipeline->program.shared_consts.type == IR3_PUSH_CONSTS_SHARED;
|
||||
prog->shared_consts.type == IR3_PUSH_CONSTS_SHARED;
|
||||
tu6_emit_shared_consts_enable<CHIP>(cs, shared_consts_enable);
|
||||
|
||||
tu_cs_emit_regs(cs, HLSQ_INVALIDATE_CMD(CHIP,
|
||||
|
|
@ -1178,7 +1178,7 @@ tu6_emit_program_config(struct tu_cs *cs,
|
|||
for (size_t stage_idx = MESA_SHADER_VERTEX;
|
||||
stage_idx <= MESA_SHADER_FRAGMENT; stage_idx++) {
|
||||
gl_shader_stage stage = (gl_shader_stage) stage_idx;
|
||||
tu6_emit_dynamic_offset(cs, variants[stage], pipeline->shaders[stage], builder);
|
||||
tu6_emit_dynamic_offset(cs, variants[stage], shaders[stage], prog);
|
||||
}
|
||||
|
||||
const struct ir3_shader_variant *vs = variants[MESA_SHADER_VERTEX];
|
||||
|
|
@ -2245,7 +2245,6 @@ tu_pipeline_builder_parse_layout(struct tu_pipeline_builder *builder,
|
|||
}
|
||||
|
||||
builder->layout.push_constant_size = library->push_constant_size;
|
||||
builder->layout.independent_sets |= library->independent_sets;
|
||||
}
|
||||
|
||||
tu_pipeline_layout_init(&builder->layout);
|
||||
|
|
@ -2261,7 +2260,6 @@ tu_pipeline_builder_parse_layout(struct tu_pipeline_builder *builder,
|
|||
vk_descriptor_set_layout_ref(&library->layouts[i]->vk);
|
||||
}
|
||||
library->push_constant_size = builder->layout.push_constant_size;
|
||||
library->independent_sets = builder->layout.independent_sets;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2294,6 +2292,8 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
|
|||
uint32_t safe_variants =
|
||||
ir3_trim_constlen(variants, builder->device->compiler);
|
||||
|
||||
unsigned dynamic_descriptor_sizes[MAX_SETS] = { };
|
||||
|
||||
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
||||
stage < ARRAY_SIZE(variants); stage = (gl_shader_stage) (stage+1)) {
|
||||
if (pipeline->shaders[stage]) {
|
||||
|
|
@ -2303,6 +2303,13 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
|
|||
} else {
|
||||
draw_states[stage] = pipeline->shaders[stage]->state;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < MAX_SETS; i++) {
|
||||
if (pipeline->shaders[stage]->dynamic_descriptor_sizes[i] >= 0) {
|
||||
dynamic_descriptor_sizes[i] =
|
||||
pipeline->shaders[stage]->dynamic_descriptor_sizes[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2322,6 +2329,13 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
|
|||
}
|
||||
}
|
||||
|
||||
unsigned dynamic_descriptor_offset = 0;
|
||||
for (unsigned i = 0; i < MAX_SETS; i++) {
|
||||
pipeline->program.dynamic_descriptor_offsets[i] =
|
||||
dynamic_descriptor_offset;
|
||||
dynamic_descriptor_offset += dynamic_descriptor_sizes[i];
|
||||
}
|
||||
|
||||
/* Emit HLSQ_xS_CNTL/HLSQ_SP_xS_CONFIG *first*, before emitting anything
|
||||
* else that could depend on that state (like push constants)
|
||||
*
|
||||
|
|
@ -2334,7 +2348,8 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
|
|||
* and draw passes.
|
||||
*/
|
||||
tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
|
||||
tu6_emit_program_config<CHIP>(&prog_cs, pipeline, builder, variants);
|
||||
tu6_emit_program_config<CHIP>(&prog_cs, &pipeline->program,
|
||||
pipeline->shaders, variants);
|
||||
pipeline->program.config_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
|
||||
|
||||
pipeline->program.vs_state = draw_states[MESA_SHADER_VERTEX];
|
||||
|
|
|
|||
|
|
@ -99,6 +99,8 @@ struct tu_program_state
|
|||
|
||||
struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
|
||||
|
||||
unsigned dynamic_descriptor_offsets[MAX_SETS];
|
||||
|
||||
bool per_view_viewport;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -188,8 +188,18 @@ lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b,
|
|||
|
||||
switch (binding_layout->type) {
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
if (layout->independent_sets) {
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
int offset = 0;
|
||||
for (unsigned i = 0; i < set; i++) {
|
||||
if (shader->dynamic_descriptor_sizes[i] >= 0) {
|
||||
offset += shader->dynamic_descriptor_sizes[i];
|
||||
} else {
|
||||
offset = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (offset < 0) {
|
||||
/* With independent sets, we don't know
|
||||
* layout->set[set].dynamic_offset_start until after link time which
|
||||
* with fast linking means after the shader is compiled. We have to
|
||||
|
|
@ -201,12 +211,13 @@ lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b,
|
|||
.base = shader->const_state.dynamic_offset_loc + set);
|
||||
base = nir_iadd(b, base, dynamic_offset_start);
|
||||
} else {
|
||||
base = nir_imm_int(b, (layout->set[set].dynamic_offset_start +
|
||||
base = nir_imm_int(b, (offset +
|
||||
binding_layout->dynamic_offset_offset) / (4 * A6XX_TEX_CONST_DWORDS));
|
||||
}
|
||||
assert(dev->physical_device->reserved_set_idx >= 0);
|
||||
set = dev->physical_device->reserved_set_idx;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
base = nir_imm_int(b, binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS));
|
||||
break;
|
||||
|
|
@ -749,7 +760,21 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
|
|||
align(DIV_ROUND_UP(const_state->push_consts.dwords, 4),
|
||||
dev->compiler->const_upload_unit);
|
||||
|
||||
if (layout->independent_sets) {
|
||||
bool unknown_dynamic_size = false;
|
||||
bool unknown_dynamic_offset = false;
|
||||
for (unsigned i = 0; i < layout->num_sets; i++) {
|
||||
if (tu_shader->dynamic_descriptor_sizes[i] == -1) {
|
||||
unknown_dynamic_size = true;
|
||||
} else if (unknown_dynamic_size &&
|
||||
tu_shader->dynamic_descriptor_sizes[i] > 0) {
|
||||
/* If there is an unknown size followed by a known size, then we may
|
||||
* need to dynamically determine the offset when linking.
|
||||
*/
|
||||
unknown_dynamic_offset = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (unknown_dynamic_offset) {
|
||||
const_state->dynamic_offset_loc = reserved_consts_vec4 * 4;
|
||||
assert(dev->physical_device->reserved_set_idx >= 0);
|
||||
reserved_consts_vec4 += DIV_ROUND_UP(dev->physical_device->reserved_set_idx, 4);
|
||||
|
|
@ -2121,6 +2146,8 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object,
|
|||
container_of(object, struct tu_shader, base);
|
||||
|
||||
blob_write_bytes(blob, &shader->const_state, sizeof(shader->const_state));
|
||||
blob_write_bytes(blob, &shader->dynamic_descriptor_sizes,
|
||||
sizeof(shader->dynamic_descriptor_sizes));
|
||||
blob_write_uint32(blob, shader->view_mask);
|
||||
blob_write_uint8(blob, shader->active_desc_sets);
|
||||
|
||||
|
|
@ -2133,6 +2160,8 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object,
|
|||
blob_write_uint8(blob, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
switch (shader->variant->type) {
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
blob_write_bytes(blob, &shader->tes, sizeof(shader->tes));
|
||||
|
|
@ -2162,6 +2191,8 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache,
|
|||
return NULL;
|
||||
|
||||
blob_copy_bytes(blob, &shader->const_state, sizeof(shader->const_state));
|
||||
blob_copy_bytes(blob, &shader->dynamic_descriptor_sizes,
|
||||
sizeof(shader->dynamic_descriptor_sizes));
|
||||
shader->view_mask = blob_read_uint32(blob);
|
||||
shader->active_desc_sets = blob_read_uint8(blob);
|
||||
|
||||
|
|
@ -2305,6 +2336,15 @@ tu_shader_create(struct tu_device *dev,
|
|||
nir->info.stage == MESA_SHADER_GEOMETRY)
|
||||
tu_gather_xfb_info(nir, &so_info);
|
||||
|
||||
for (unsigned i = 0; i < layout->num_sets; i++) {
|
||||
if (layout->set[i].layout) {
|
||||
shader->dynamic_descriptor_sizes[i] =
|
||||
layout->set[i].layout->dynamic_offset_size;
|
||||
} else {
|
||||
shader->dynamic_descriptor_sizes[i] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned reserved_consts_vec4 = 0;
|
||||
NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, &reserved_consts_vec4);
|
||||
|
||||
|
|
@ -2464,6 +2504,9 @@ tu_empty_fs_create(struct tu_device *dev, struct tu_shader **shader,
|
|||
if (fragment_density_map)
|
||||
(*shader)->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
|
||||
|
||||
for (unsigned i = 0; i < MAX_SETS; i++)
|
||||
(*shader)->dynamic_descriptor_sizes[i] = -1;
|
||||
|
||||
struct ir3_shader *ir3_shader =
|
||||
ir3_shader_from_nir(dev->compiler, fs_b.shader, &options, &so_info);
|
||||
(*shader)->variant = ir3_shader_create_variant(ir3_shader, &key, false);
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#include "tu_common.h"
|
||||
#include "tu_cs.h"
|
||||
#include "tu_suballoc.h"
|
||||
#include "tu_descriptor_set.h"
|
||||
|
||||
struct tu_inline_ubo
|
||||
{
|
||||
|
|
@ -69,6 +70,13 @@ struct tu_shader
|
|||
uint32_t view_mask;
|
||||
uint8_t active_desc_sets;
|
||||
|
||||
/* The dynamic buffer descriptor size for descriptor sets that we know
|
||||
* about. This is used when linking to piece together the sizes and from
|
||||
* there calculate the offsets. It's -1 if we don't know because the
|
||||
* descriptor set layout is NULL.
|
||||
*/
|
||||
int dynamic_descriptor_sizes[MAX_SETS];
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned patch_type;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue