mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-19 07:50:37 +01:00
anv: use internal surface state on Gfx12.5+ to access descriptor buffers
As a result on Gfx12.5+ we're not holding any binding table entry to access descriptor buffers. This should reduce the amount of binding table allocations. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10711 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35160>
This commit is contained in:
parent
87abf57764
commit
e94cb92cb0
9 changed files with 865 additions and 299 deletions
|
|
@ -649,24 +649,10 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
|||
cmd_buffer->state.descriptors_dirty |= stages;
|
||||
cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages;
|
||||
} else {
|
||||
/* When using indirect descriptors, stages that have access to the HW
|
||||
* binding tables, never need to access the
|
||||
* anv_push_constants::desc_offsets fields, because any data they
|
||||
* need from the descriptor buffer is accessible through a binding
|
||||
* table entry. For stages that are "bindless" (Mesh/Task/RT), we
|
||||
* need to provide anv_push_constants::desc_offsets matching the
|
||||
* bound descriptor so that shaders can access the descriptor buffer
|
||||
* through A64 messages.
|
||||
*
|
||||
* With direct descriptors, the shaders can use the
|
||||
* anv_push_constants::desc_offsets to build bindless offsets. So
|
||||
* it's we always need to update the push constant data.
|
||||
/* Plaforms with LSC will use descriptor buffer push constant
|
||||
* offsets
|
||||
*/
|
||||
bool update_desc_sets =
|
||||
!cmd_buffer->device->physical->indirect_descriptors ||
|
||||
(stages & (VK_SHADER_STAGE_TASK_BIT_EXT |
|
||||
VK_SHADER_STAGE_MESH_BIT_EXT |
|
||||
ANV_RT_STAGE_BITS));
|
||||
bool update_desc_sets = cmd_buffer->device->info->has_lsc;
|
||||
|
||||
if (update_desc_sets) {
|
||||
struct anv_push_constants *push = &pipe_state->push_constants;
|
||||
|
|
@ -679,14 +665,15 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
|||
push->desc_sampler_offsets[set_index] =
|
||||
anv_address_physical(set->desc_sampler_addr) -
|
||||
cmd_buffer->device->physical->va.dynamic_state_pool.addr;
|
||||
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
set->desc_surface_addr.bo);
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
set->desc_sampler_addr.bo);
|
||||
}
|
||||
}
|
||||
|
||||
/* Always add a reference to the buffers */
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
set->desc_surface_addr.bo);
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
set->desc_sampler_addr.bo);
|
||||
|
||||
dirty_stages |= stages;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -133,6 +133,9 @@ void anv_nir_validate_push_layout(const struct anv_physical_device *pdevice,
|
|||
|
||||
bool anv_nir_update_resource_intel_block(nir_shader *shader);
|
||||
|
||||
bool anv_nir_lower_desc_address(nir_shader *shader,
|
||||
const struct anv_pipeline_bind_map *map);
|
||||
|
||||
bool anv_nir_lower_unaligned_dispatch(nir_shader *shader);
|
||||
|
||||
bool anv_nir_lower_resource_intel(nir_shader *shader,
|
||||
|
|
@ -159,6 +162,40 @@ void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
|
|||
struct nir_shader *fs_nir,
|
||||
struct anv_device *device);
|
||||
|
||||
static inline bool
|
||||
anv_nir_is_promotable_ubo_binding(nir_src src)
|
||||
{
|
||||
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
|
||||
|
||||
return intrin && intrin->intrinsic == nir_intrinsic_resource_intel &&
|
||||
(nir_intrinsic_resource_access_intel(intrin) &
|
||||
nir_resource_intel_pushable);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_nir_is_internal_ubo(nir_src src)
|
||||
{
|
||||
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
|
||||
|
||||
return intrin && intrin->intrinsic == nir_intrinsic_resource_intel &&
|
||||
(nir_intrinsic_resource_access_intel(intrin) &
|
||||
nir_resource_intel_internal);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
anv_nir_get_ubo_binding_push_block(nir_src src)
|
||||
{
|
||||
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
|
||||
assert(intrin && intrin->intrinsic == nir_intrinsic_resource_intel);
|
||||
|
||||
return nir_intrinsic_resource_block_intel(intrin);
|
||||
}
|
||||
|
||||
void anv_nir_analyze_push_constants_ranges(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo,
|
||||
const struct anv_pipeline_push_map *push_map,
|
||||
struct anv_push_range out_ranges[4]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ struct apply_pipeline_layout_state {
|
|||
struct {
|
||||
bool desc_buffer_used;
|
||||
uint8_t desc_offset;
|
||||
uint32_t push_block;
|
||||
|
||||
struct anv_binding_apply_layout {
|
||||
uint8_t use_count;
|
||||
|
|
@ -389,35 +390,17 @@ build_load_descriptor_mem(nir_builder *b,
|
|||
const struct apply_pipeline_layout_state *state)
|
||||
|
||||
{
|
||||
switch (state->desc_addr_format) {
|
||||
case nir_address_format_64bit_global_32bit_offset: {
|
||||
nir_def *base_addr =
|
||||
nir_pack_64_2x32(b, nir_trim_vector(b, desc_addr, 2));
|
||||
nir_def *offset32 =
|
||||
nir_iadd_imm(b, nir_channel(b, desc_addr, 3), desc_offset);
|
||||
assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
|
||||
|
||||
return nir_load_global_constant_offset(b, num_components, bit_size,
|
||||
base_addr, offset32,
|
||||
.align_mul = 8,
|
||||
.align_offset = desc_offset % 8);
|
||||
}
|
||||
nir_def *surface_index = nir_channel(b, desc_addr, 0);
|
||||
nir_def *offset32 = nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
|
||||
|
||||
case nir_address_format_32bit_index_offset: {
|
||||
nir_def *surface_index = nir_channel(b, desc_addr, 0);
|
||||
nir_def *offset32 =
|
||||
nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
|
||||
|
||||
return nir_load_ubo(b, num_components, bit_size,
|
||||
surface_index, offset32,
|
||||
.align_mul = 8,
|
||||
.align_offset = desc_offset % 8,
|
||||
.range_base = 0,
|
||||
.range = num_components * bit_size / 8);
|
||||
}
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unsupported address format");
|
||||
}
|
||||
return nir_load_ubo(b, num_components, bit_size,
|
||||
surface_index, offset32,
|
||||
.align_mul = 8,
|
||||
.align_offset = desc_offset % 8,
|
||||
.range_base = 0,
|
||||
.range = num_components * bit_size / 8);
|
||||
}
|
||||
|
||||
/* When using direct descriptor, we do not have a structure to read in memory
|
||||
|
|
@ -635,11 +618,14 @@ build_desc_address64(nir_builder *b, nir_def *set_idx, unsigned set_idx_imm,
|
|||
/** Build a 32bit_index_offset address for a descriptor set */
|
||||
static nir_def *
|
||||
build_desc_address32(nir_builder *b,
|
||||
nir_def *set_idx, nir_def *offset,
|
||||
nir_def *set_idx, unsigned set,
|
||||
nir_def *offset,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
return nir_vec2(b,
|
||||
nir_vector_extract(b, state->set_idx_to_bti, set_idx),
|
||||
nir_vector_extract(
|
||||
b, state->set_idx_to_bti,
|
||||
set < MAX_SETS ? nir_imm_int(b, set) : set_idx),
|
||||
offset);
|
||||
}
|
||||
|
||||
|
|
@ -820,7 +806,7 @@ build_desc_addr_for_res_index(nir_builder *b,
|
|||
}
|
||||
|
||||
case nir_address_format_32bit_index_offset:
|
||||
return build_desc_address32(b, res.set_idx, desc_offset, state);
|
||||
return build_desc_address32(b, res.set_idx, UINT32_MAX, desc_offset, state);
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unhandled address format");
|
||||
|
|
@ -830,7 +816,7 @@ build_desc_addr_for_res_index(nir_builder *b,
|
|||
case nir_address_format_32bit_index_offset:
|
||||
assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
|
||||
assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
|
||||
return build_desc_address32(b, res.set_idx, desc_offset, state);
|
||||
return build_desc_address32(b, res.set_idx, UINT32_MAX, desc_offset, state);
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unhandled address format");
|
||||
|
|
@ -878,7 +864,7 @@ build_desc_addr_for_binding(nir_builder *b,
|
|||
desc_offset = nir_iadd_imm(
|
||||
b, desc_offset, plane * bind_layout->descriptor_data_surface_size);
|
||||
}
|
||||
return build_desc_address32(b, nir_imm_int(b, set), desc_offset, state);
|
||||
return build_desc_address32(b, NULL, set, desc_offset, state);
|
||||
}
|
||||
|
||||
default:
|
||||
|
|
@ -1245,10 +1231,10 @@ build_buffer_addr_for_binding(nir_builder *b,
|
|||
if (addr_format != nir_address_format_32bit_index_offset)
|
||||
return build_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
|
||||
|
||||
if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&state->set_layouts[set]->binding[binding];
|
||||
return build_desc_address32(b, nir_imm_int(b, set),
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&state->set_layouts[set]->binding[binding];
|
||||
if (bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
||||
return build_desc_address32(b, NULL, set,
|
||||
nir_imm_int(b, bind_layout->descriptor_surface_offset),
|
||||
state);
|
||||
}
|
||||
|
|
@ -1426,9 +1412,6 @@ try_lower_direct_buffer_intrinsic(nir_builder *b,
|
|||
if (state->bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT &&
|
||||
!descriptor_has_bti(desc, state))
|
||||
return false;
|
||||
|
||||
/* Rewrite to 32bit_index_offset whenever we can */
|
||||
addr_format = nir_address_format_32bit_index_offset;
|
||||
} else {
|
||||
assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
|
||||
|
||||
|
|
@ -1444,15 +1427,11 @@ try_lower_direct_buffer_intrinsic(nir_builder *b,
|
|||
bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK &&
|
||||
!descriptor_has_bti(desc, state))
|
||||
return false;
|
||||
|
||||
/* If this is an inline uniform and the shader stage is bindless, we
|
||||
* can't switch to 32bit_index_offset.
|
||||
*/
|
||||
if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
|
||||
!brw_shader_stage_requires_bindless_resources(b->shader->info.stage))
|
||||
addr_format = nir_address_format_32bit_index_offset;
|
||||
}
|
||||
|
||||
/* Rewrite to 32bit_index_offset whenever we can */
|
||||
addr_format = nir_address_format_32bit_index_offset;
|
||||
|
||||
/* If a dynamic has not been assigned a binding table entry, we need to
|
||||
* bail here.
|
||||
*/
|
||||
|
|
@ -2072,7 +2051,7 @@ binding_is_promotable_to_push(const struct anv_descriptor_set_layout *set_layout
|
|||
return (bind_layout->flags & non_pushable_binding_flags) == 0;
|
||||
}
|
||||
|
||||
static void
|
||||
static uint32_t
|
||||
add_null_bti_entry(struct anv_pipeline_bind_map *map)
|
||||
{
|
||||
map->surface_to_descriptor[map->surface_count++] =
|
||||
|
|
@ -2080,9 +2059,25 @@ add_null_bti_entry(struct anv_pipeline_bind_map *map)
|
|||
.set = ANV_DESCRIPTOR_SET_NULL,
|
||||
};
|
||||
assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
|
||||
return map->surface_count - 1;
|
||||
}
|
||||
|
||||
static void
|
||||
static uint32_t
|
||||
add_desc_bti_entry(struct anv_pipeline_bind_map *map,
|
||||
uint32_t set)
|
||||
{
|
||||
map->surface_to_descriptor[map->surface_count++] =
|
||||
(struct anv_pipeline_binding) {
|
||||
.set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
|
||||
.binding = UINT32_MAX,
|
||||
.index = set,
|
||||
};
|
||||
assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
|
||||
|
||||
return map->surface_count - 1;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
add_bti_entry(struct anv_pipeline_bind_map *map,
|
||||
uint32_t set,
|
||||
uint32_t binding,
|
||||
|
|
@ -2101,9 +2096,11 @@ add_bti_entry(struct anv_pipeline_bind_map *map,
|
|||
.plane = plane,
|
||||
};
|
||||
assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
|
||||
|
||||
return map->surface_count - 1;
|
||||
}
|
||||
|
||||
static void
|
||||
static uint32_t
|
||||
add_dynamic_bti_entry(struct anv_pipeline_bind_map *map,
|
||||
uint32_t set,
|
||||
uint32_t binding,
|
||||
|
|
@ -2120,6 +2117,8 @@ add_dynamic_bti_entry(struct anv_pipeline_bind_map *map,
|
|||
.dynamic_offset_index = bind_layout->dynamic_offset_index + element,
|
||||
};
|
||||
assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
|
||||
|
||||
return map->surface_count - 1;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2139,6 +2138,19 @@ add_sampler_entry(struct anv_pipeline_bind_map *map,
|
|||
};
|
||||
}
|
||||
|
||||
static void
|
||||
add_descriptor_push_entry(struct anv_pipeline_push_map *push_map,
|
||||
uint32_t set,
|
||||
struct anv_pipeline_bind_map *map)
|
||||
{
|
||||
push_map->block_to_descriptor[push_map->block_count++] =
|
||||
(struct anv_pipeline_binding) {
|
||||
.set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
|
||||
.binding = UINT32_MAX,
|
||||
.index = set,
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
add_push_entry(struct anv_pipeline_push_map *push_map,
|
||||
uint32_t set,
|
||||
|
|
@ -2218,7 +2230,7 @@ build_packed_binding_table(struct apply_pipeline_layout_state *state,
|
|||
void *push_map_mem_ctx)
|
||||
{
|
||||
/* Compute the amount of push block items required. */
|
||||
unsigned push_block_count = 0;
|
||||
unsigned push_block_count = map->surface_count + MAX_SETS;
|
||||
for (unsigned s = 0; s < state->set_count; s++) {
|
||||
const struct anv_descriptor_set_layout *set_layout =
|
||||
state->set_layouts[s];
|
||||
|
|
@ -2231,31 +2243,37 @@ build_packed_binding_table(struct apply_pipeline_layout_state *state,
|
|||
}
|
||||
}
|
||||
|
||||
/* Assign a BTI to each used descriptor set */
|
||||
for (unsigned s = 0; s < state->set_count; s++) {
|
||||
if (state->desc_addr_format != nir_address_format_32bit_index_offset) {
|
||||
state->set[s].desc_offset = BINDLESS_OFFSET;
|
||||
} else if (state->set[s].desc_buffer_used) {
|
||||
map->surface_to_descriptor[map->surface_count] =
|
||||
(struct anv_pipeline_binding) {
|
||||
.set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
|
||||
.binding = UINT32_MAX,
|
||||
.index = s,
|
||||
};
|
||||
state->set[s].desc_offset = map->surface_count++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Assign a block index for each surface */
|
||||
push_map->block_to_descriptor =
|
||||
rzalloc_array(push_map_mem_ctx, struct anv_pipeline_binding,
|
||||
map->surface_count + push_block_count);
|
||||
push_map->block_to_descriptor = rzalloc_array(push_map_mem_ctx,
|
||||
struct anv_pipeline_binding,
|
||||
push_block_count);
|
||||
|
||||
memcpy(push_map->block_to_descriptor,
|
||||
map->surface_to_descriptor,
|
||||
sizeof(push_map->block_to_descriptor[0]) * map->surface_count);
|
||||
push_map->block_count = map->surface_count;
|
||||
|
||||
/* Assign a BTI to each used descriptor set */
|
||||
for (unsigned s = 0; s < state->set_count; s++) {
|
||||
if (state->set[s].desc_buffer_used) {
|
||||
/* Only add a binding table entry on platform that cannot use
|
||||
* LSC_ADDR_SURFTYPE_SS.
|
||||
*/
|
||||
if (!state->pdevice->info.has_lsc)
|
||||
state->set[s].desc_offset = add_desc_bti_entry(map, s);
|
||||
|
||||
if (brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
|
||||
state->set[s].push_block = UINT32_MAX;
|
||||
} else {
|
||||
state->set[s].push_block = push_map->block_count;
|
||||
add_descriptor_push_entry(push_map, s, state->bind_map);
|
||||
}
|
||||
} else {
|
||||
state->set[s].desc_offset = BINDLESS_OFFSET;
|
||||
state->set[s].push_block = UINT32_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
/* Count used bindings, assign embedded sampler indices & add push blocks
|
||||
* for promotion to push constants
|
||||
*/
|
||||
|
|
@ -2428,19 +2446,59 @@ build_packed_binding_table(struct apply_pipeline_layout_state *state,
|
|||
}
|
||||
|
||||
static nir_def *
|
||||
build_descriptor_bti_vec(nir_builder *b,
|
||||
build_descriptor_set_bti(nir_builder *b,
|
||||
uint32_t set,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
if (state->pdevice->info.has_lsc) {
|
||||
nir_def *surface_handle =
|
||||
nir_load_reloc_const_intel(
|
||||
b,
|
||||
state->bind_map->layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER ?
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_BUFFERS_VIEW_HANDLE :
|
||||
BRW_SHADER_RELOC_DESCRIPTORS_VIEW_HANDLE);
|
||||
|
||||
return nir_resource_intel(
|
||||
b,
|
||||
nir_imm_int(b, set),
|
||||
surface_handle,
|
||||
nir_iand_imm(b,
|
||||
anv_load_driver_uniform(b, 1, desc_surface_offsets[set]),
|
||||
ANV_DESCRIPTOR_SET_OFFSET_MASK) /* array_index */,
|
||||
nir_imm_int(b, 0) /* bindless_base_offset */,
|
||||
.desc_set = set,
|
||||
.binding = -1,
|
||||
.resource_block_intel = state->set[set].push_block,
|
||||
.resource_access_intel = nir_resource_intel_pushable |
|
||||
nir_resource_intel_internal);
|
||||
} else {
|
||||
return nir_resource_intel(
|
||||
b,
|
||||
nir_imm_int(b, set),
|
||||
nir_imm_int(b, state->set[set].desc_offset),
|
||||
nir_imm_int(b, 0) /* array_index */,
|
||||
nir_imm_int(b, 0) /* bindless_base_offset */,
|
||||
.desc_set = set,
|
||||
.binding = -1,
|
||||
.resource_block_intel = state->set[set].desc_offset,
|
||||
.resource_access_intel = nir_resource_intel_pushable);
|
||||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_descriptor_sets_bti_vec(nir_builder *b,
|
||||
const struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
STATIC_ASSERT(MAX_SETS == 8);
|
||||
return nir_vec8(b,
|
||||
nir_imm_int(b, state->set[0].desc_offset),
|
||||
nir_imm_int(b, state->set[1].desc_offset),
|
||||
nir_imm_int(b, state->set[2].desc_offset),
|
||||
nir_imm_int(b, state->set[3].desc_offset),
|
||||
nir_imm_int(b, state->set[4].desc_offset),
|
||||
nir_imm_int(b, state->set[5].desc_offset),
|
||||
nir_imm_int(b, state->set[6].desc_offset),
|
||||
nir_imm_int(b, state->set[7].desc_offset));
|
||||
build_descriptor_set_bti(b, 0, state),
|
||||
build_descriptor_set_bti(b, 1, state),
|
||||
build_descriptor_set_bti(b, 2, state),
|
||||
build_descriptor_set_bti(b, 3, state),
|
||||
build_descriptor_set_bti(b, 4, state),
|
||||
build_descriptor_set_bti(b, 5, state),
|
||||
build_descriptor_set_bti(b, 6, state),
|
||||
build_descriptor_set_bti(b, 7, state));
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -2462,8 +2520,6 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
anv_validate_pipeline_layout(set_layouts, set_count, shader);
|
||||
#endif
|
||||
|
||||
const bool bindless_stage =
|
||||
brw_shader_stage_requires_bindless_resources(shader->info.stage);
|
||||
struct apply_pipeline_layout_state state = {
|
||||
.mem_ctx = ralloc_context(NULL),
|
||||
.pdevice = pdevice,
|
||||
|
|
@ -2471,9 +2527,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
.set_layouts = set_layouts,
|
||||
.set_count = set_count,
|
||||
.dynamic_offset_start = dynamic_offset_start,
|
||||
.desc_addr_format = bindless_stage ?
|
||||
nir_address_format_64bit_global_32bit_offset :
|
||||
nir_address_format_32bit_index_offset,
|
||||
.desc_addr_format = nir_address_format_32bit_index_offset,
|
||||
.ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
|
||||
.ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
|
||||
};
|
||||
|
|
@ -2529,7 +2583,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
*/
|
||||
nir_foreach_function_impl(impl, shader) {
|
||||
nir_builder _b = nir_builder_at(nir_before_impl(impl)), *b = &_b;
|
||||
state.set_idx_to_bti = build_descriptor_bti_vec(b, &state);
|
||||
state.set_idx_to_bti = build_descriptor_sets_bti_vec(b, &state);
|
||||
progress |= nir_function_instructions_pass(impl,
|
||||
lower_direct_buffer_instr,
|
||||
nir_metadata_control_flow,
|
||||
|
|
@ -2543,7 +2597,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
|
||||
nir_foreach_function_impl(impl, shader) {
|
||||
nir_builder _b = nir_builder_at(nir_before_impl(impl)), *b = &_b;
|
||||
state.set_idx_to_bti = build_descriptor_bti_vec(b, &state);
|
||||
state.set_idx_to_bti = build_descriptor_sets_bti_vec(b, &state);
|
||||
progress |= nir_function_instructions_pass(impl,
|
||||
apply_pipeline_layout,
|
||||
nir_metadata_control_flow,
|
||||
|
|
|
|||
|
|
@ -25,111 +25,90 @@
|
|||
#include "nir_builder.h"
|
||||
#include "compiler/brw/brw_nir.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "util/set.h"
|
||||
|
||||
struct lower_to_push_data_intel_state {
|
||||
const struct anv_pipeline_bind_map *bind_map;
|
||||
const struct anv_pipeline_push_map *push_map;
|
||||
struct push_data {
|
||||
bool push_ubo_ranges;
|
||||
bool needs_wa_18019110168;
|
||||
bool needs_dyn_tess_config;
|
||||
unsigned app_start, app_end;
|
||||
unsigned driver_start, driver_end;
|
||||
};
|
||||
|
||||
static bool
|
||||
lower_to_push_data_intel(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *data)
|
||||
static void
|
||||
adjust_driver_push_values(nir_shader *nir,
|
||||
enum brw_robustness_flags robust_flags,
|
||||
const struct anv_nir_push_layout_info *push_info,
|
||||
struct brw_base_prog_key *prog_key,
|
||||
const struct intel_device_info *devinfo,
|
||||
struct push_data *data)
|
||||
{
|
||||
const struct lower_to_push_data_intel_state *state = data;
|
||||
/* With bindless shaders we load uniforms with SEND messages. All the push
|
||||
* constants are located after the RT_DISPATCH_GLOBALS. We just need to add
|
||||
* the offset to the address right after RT_DISPATCH_GLOBALS (see
|
||||
* brw_nir_lower_rt_intrinsics.c).
|
||||
*/
|
||||
const unsigned base_offset =
|
||||
brw_shader_stage_is_bindless(b->shader->info.stage) ?
|
||||
0 : state->bind_map->push_ranges[0].start * 32;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_push_data_intel: {
|
||||
nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) - base_offset);
|
||||
return true;
|
||||
if (data->push_ubo_ranges && (robust_flags & BRW_ROBUSTNESS_UBO)) {
|
||||
/* We can't on-the-fly adjust our push ranges because doing so would
|
||||
* mess up the layout in the shader. When robustBufferAccess is
|
||||
* enabled, we push a mask into the shader indicating which pushed
|
||||
* registers are valid and we zero out the invalid ones at the top of
|
||||
* the shader.
|
||||
*/
|
||||
const uint32_t push_reg_mask_start =
|
||||
anv_drv_const_offset(gfx.push_reg_mask[nir->info.stage]);
|
||||
const uint32_t push_reg_mask_end =
|
||||
push_reg_mask_start +
|
||||
anv_drv_const_size(gfx.push_reg_mask[nir->info.stage]);
|
||||
data->driver_start = MIN2(data->driver_start, push_reg_mask_start);
|
||||
data->driver_end = MAX2(data->driver_end, push_reg_mask_end);
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_push_constant: {
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *data = nir_load_push_data_intel(
|
||||
b,
|
||||
intrin->def.num_components,
|
||||
intrin->def.bit_size,
|
||||
intrin->src[0].ssa,
|
||||
.base = nir_intrinsic_base(intrin) - base_offset,
|
||||
.range = nir_intrinsic_range(intrin));
|
||||
nir_def_replace(&intrin->def, data);
|
||||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ubo: {
|
||||
if (!brw_nir_ubo_surface_index_is_pushable(intrin->src[0]) ||
|
||||
!nir_src_is_const(intrin->src[1]))
|
||||
return false;
|
||||
|
||||
const int block = brw_nir_ubo_surface_index_get_push_block(intrin->src[0]);
|
||||
const unsigned byte_offset = nir_src_as_uint(intrin->src[1]);
|
||||
const unsigned num_components =
|
||||
nir_def_last_component_read(&intrin->def) + 1;
|
||||
const int bytes = num_components * (intrin->def.bit_size / 8);
|
||||
|
||||
const struct anv_pipeline_binding *binding =
|
||||
&state->push_map->block_to_descriptor[block];
|
||||
|
||||
uint32_t range_offset = 0;
|
||||
const struct anv_push_range *push_range = NULL;
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
if (state->bind_map->push_ranges[i].set == binding->set &&
|
||||
state->bind_map->push_ranges[i].index == binding->index &&
|
||||
byte_offset >= state->bind_map->push_ranges[i].start * 32 &&
|
||||
(byte_offset + bytes) <= (state->bind_map->push_ranges[i].start +
|
||||
state->bind_map->push_ranges[i].length) * 32) {
|
||||
push_range = &state->bind_map->push_ranges[i];
|
||||
break;
|
||||
} else {
|
||||
range_offset += state->bind_map->push_ranges[i].length * 32;
|
||||
}
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
if (push_info->fragment_dynamic) {
|
||||
const uint32_t fs_config_start = anv_drv_const_offset(gfx.fs_config);
|
||||
const uint32_t fs_config_end = fs_config_start +
|
||||
anv_drv_const_size(gfx.fs_config);
|
||||
data->driver_start = MIN2(data->driver_start, fs_config_start);
|
||||
data->driver_end = MAX2(data->driver_end, fs_config_end);
|
||||
}
|
||||
|
||||
if (push_range == NULL)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *data = nir_load_push_data_intel(
|
||||
b,
|
||||
nir_def_last_component_read(&intrin->def) + 1,
|
||||
intrin->def.bit_size,
|
||||
nir_imm_int(b, 0),
|
||||
.base = range_offset + byte_offset - push_range->start * 32,
|
||||
.range = nir_intrinsic_range(intrin));
|
||||
nir_def_replace(&intrin->def, data);
|
||||
return true;
|
||||
if (data->needs_wa_18019110168) {
|
||||
const uint32_t fs_per_prim_remap_start =
|
||||
anv_drv_const_offset(gfx.fs_per_prim_remap_offset);
|
||||
const uint32_t fs_per_prim_remap_end =
|
||||
fs_per_prim_remap_start +
|
||||
anv_drv_const_size(gfx.fs_per_prim_remap_offset);
|
||||
data->driver_start = MIN2(data->driver_start, fs_per_prim_remap_start);
|
||||
data->driver_end = MAX2(data->driver_end, fs_per_prim_remap_end);
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
data->needs_dyn_tess_config =
|
||||
(nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
||||
(container_of(prog_key, struct brw_tcs_prog_key, base)->input_vertices == 0 ||
|
||||
push_info->separate_tessellation)) ||
|
||||
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
|
||||
push_info->separate_tessellation);
|
||||
if (data->needs_dyn_tess_config) {
|
||||
const uint32_t tess_config_start = anv_drv_const_offset(gfx.tess_config);
|
||||
const uint32_t tess_config_end = tess_config_start +
|
||||
anv_drv_const_size(gfx.tess_config);
|
||||
data->driver_start = MIN2(data->driver_start, tess_config_start);
|
||||
data->driver_end = MAX2(data->driver_end, tess_config_end);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
anv_nir_compute_push_layout(nir_shader *nir,
|
||||
const struct anv_physical_device *pdevice,
|
||||
enum brw_robustness_flags robust_flags,
|
||||
const struct anv_nir_push_layout_info *push_info,
|
||||
struct brw_base_prog_key *prog_key,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
const struct anv_pipeline_push_map *push_map)
|
||||
static struct push_data
|
||||
gather_push_data(nir_shader *nir,
|
||||
enum brw_robustness_flags robust_flags,
|
||||
const struct intel_device_info *devinfo,
|
||||
const struct anv_nir_push_layout_info *push_info,
|
||||
struct brw_base_prog_key *prog_key,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
struct set *lowered_ubo_instrs)
|
||||
{
|
||||
const struct brw_compiler *compiler = pdevice->compiler;
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
memset(map->push_ranges, 0, sizeof(map->push_ranges));
|
||||
|
||||
bool has_const_ubo = false;
|
||||
unsigned push_start = UINT_MAX, push_end = 0;
|
||||
struct push_data data = {
|
||||
.app_start = UINT_MAX, .app_end = 0,
|
||||
.driver_start = UINT_MAX, .driver_end = 0,
|
||||
};
|
||||
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
|
|
@ -144,12 +123,26 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
has_const_ubo = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_push_constant:
|
||||
case nir_intrinsic_load_push_data_intel: {
|
||||
|
||||
case nir_intrinsic_load_push_constant: {
|
||||
unsigned base = nir_intrinsic_base(intrin);
|
||||
unsigned range = nir_intrinsic_range(intrin);
|
||||
push_start = MIN2(push_start, base);
|
||||
push_end = MAX2(push_end, base + range);
|
||||
data.app_start = MIN2(data.app_start, base);
|
||||
data.app_end = MAX2(data.app_end, base + range);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_push_data_intel: {
|
||||
if (lowered_ubo_instrs &&
|
||||
_mesa_set_search(lowered_ubo_instrs, intrin)) {
|
||||
has_const_ubo = true;
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned base = nir_intrinsic_base(intrin);
|
||||
unsigned range = nir_intrinsic_range(intrin);
|
||||
data.driver_start = MIN2(data.driver_start, base);
|
||||
data.driver_end = MAX2(data.driver_end, base + range);
|
||||
/* We need to retain this information to update the push
|
||||
* constant on vkCmdDispatch*().
|
||||
*/
|
||||
|
|
@ -167,67 +160,161 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
}
|
||||
}
|
||||
|
||||
const bool push_ubo_ranges =
|
||||
data.push_ubo_ranges =
|
||||
has_const_ubo && nir->info.stage != MESA_SHADER_COMPUTE &&
|
||||
!brw_shader_stage_requires_bindless_resources(nir->info.stage);
|
||||
|
||||
const bool needs_wa_18019110168 =
|
||||
data.needs_wa_18019110168 =
|
||||
nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
brw_nir_fragment_shader_needs_wa_18019110168(
|
||||
devinfo, push_info->mesh_dynamic ? INTEL_SOMETIMES : INTEL_NEVER, nir);
|
||||
|
||||
if (push_ubo_ranges && (robust_flags & BRW_ROBUSTNESS_UBO)) {
|
||||
/* We can't on-the-fly adjust our push ranges because doing so would
|
||||
* mess up the layout in the shader. When robustBufferAccess is
|
||||
* enabled, we push a mask into the shader indicating which pushed
|
||||
* registers are valid and we zero out the invalid ones at the top of
|
||||
* the shader.
|
||||
adjust_driver_push_values(nir, robust_flags, push_info,
|
||||
prog_key, devinfo, &data);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
struct lower_to_push_data_intel_state {
|
||||
const struct anv_pipeline_bind_map *bind_map;
|
||||
const struct anv_pipeline_push_map *push_map;
|
||||
|
||||
struct set *lowered_ubo_instrs;
|
||||
|
||||
/* Amount that should be subtracted to UBOs loads converted to
|
||||
* push_data_intel (in lowered_ubo_instrs)
|
||||
*/
|
||||
unsigned reduced_push_ranges;
|
||||
};
|
||||
|
||||
/* Lower internal UBOs, only used for descriptor buffer loads when the offset
|
||||
* is dynamic. We need to add the base offset of the descriptor buffer to the
|
||||
* offset relative to the descriptor set.
|
||||
*/
|
||||
static bool
|
||||
lower_internal_ubo(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin)
|
||||
{
|
||||
if (!anv_nir_is_internal_ubo(intrin->src[0]))
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_intrinsic_instr *resource = nir_src_as_intrinsic(intrin->src[0]);
|
||||
|
||||
/* Add the descriptor offset from the resource array_index source to the
|
||||
* relative offset.
|
||||
*/
|
||||
nir_src_rewrite(&intrin->src[1],
|
||||
nir_iadd(b, resource->src[2].ssa, intrin->src[1].ssa));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_ubo_to_push_data_intel(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *_data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_ubo)
|
||||
return false;
|
||||
|
||||
if (!anv_nir_is_promotable_ubo_binding(intrin->src[0]) ||
|
||||
!nir_src_is_const(intrin->src[1]) ||
|
||||
brw_shader_stage_requires_bindless_resources(b->shader->info.stage))
|
||||
return lower_internal_ubo(b, intrin);
|
||||
|
||||
const struct lower_to_push_data_intel_state *state = _data;
|
||||
const int block = anv_nir_get_ubo_binding_push_block(intrin->src[0]);
|
||||
assert(block < state->push_map->block_count);
|
||||
const struct anv_pipeline_binding *binding =
|
||||
&state->push_map->block_to_descriptor[block];
|
||||
const unsigned byte_offset = nir_src_as_uint(intrin->src[1]);
|
||||
const unsigned num_components =
|
||||
nir_def_last_component_read(&intrin->def) + 1;
|
||||
const int bytes = num_components * (intrin->def.bit_size / 8);
|
||||
|
||||
uint32_t range_offset = 0;
|
||||
const struct anv_push_range *push_range = NULL;
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
if (state->bind_map->push_ranges[i].set == binding->set &&
|
||||
state->bind_map->push_ranges[i].index == binding->index &&
|
||||
byte_offset >= state->bind_map->push_ranges[i].start * 32 &&
|
||||
(byte_offset + bytes) <= (state->bind_map->push_ranges[i].start +
|
||||
state->bind_map->push_ranges[i].length) * 32) {
|
||||
push_range = &state->bind_map->push_ranges[i];
|
||||
break;
|
||||
} else {
|
||||
range_offset += state->bind_map->push_ranges[i].length * 32;
|
||||
}
|
||||
}
|
||||
|
||||
if (push_range == NULL)
|
||||
return lower_internal_ubo(b, intrin);
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *data = nir_load_push_data_intel(
|
||||
b,
|
||||
nir_def_last_component_read(&intrin->def) + 1,
|
||||
intrin->def.bit_size,
|
||||
nir_imm_int(b, 0),
|
||||
.base = range_offset + byte_offset - push_range->start * 32,
|
||||
.range = nir_intrinsic_range(intrin));
|
||||
nir_def_replace(&intrin->def, data);
|
||||
|
||||
_mesa_set_add(state->lowered_ubo_instrs, nir_def_as_intrinsic(data));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_to_push_data_intel(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *_data)
|
||||
{
|
||||
const struct lower_to_push_data_intel_state *state = _data;
|
||||
/* With bindless shaders we load uniforms with SEND messages. All the push
|
||||
* constants are located after the RT_DISPATCH_GLOBALS. We just need to add
|
||||
* the offset to the address right after RT_DISPATCH_GLOBALS (see
|
||||
* brw_nir_lower_rt_intrinsics.c).
|
||||
*/
|
||||
const unsigned base_offset =
|
||||
brw_shader_stage_is_bindless(b->shader->info.stage) ?
|
||||
0 : state->bind_map->push_ranges[0].start * 32;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_push_data_intel:
|
||||
/* For lowered UBOs to push constants, shrink the base by the amount we
|
||||
* shrunk the driver push constants.
|
||||
*/
|
||||
const uint32_t push_reg_mask_start =
|
||||
anv_drv_const_offset(gfx.push_reg_mask[nir->info.stage]);
|
||||
const uint32_t push_reg_mask_end =
|
||||
push_reg_mask_start +
|
||||
anv_drv_const_size(gfx.push_reg_mask[nir->info.stage]);
|
||||
push_start = MIN2(push_start, push_reg_mask_start);
|
||||
push_end = MAX2(push_end, push_reg_mask_end);
|
||||
if (_mesa_set_search(state->lowered_ubo_instrs, intrin))
|
||||
nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) - state->reduced_push_ranges);
|
||||
else
|
||||
nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) - base_offset);
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_load_push_constant: {
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *data = nir_load_push_data_intel(
|
||||
b,
|
||||
intrin->def.num_components,
|
||||
intrin->def.bit_size,
|
||||
intrin->src[0].ssa,
|
||||
.base = nir_intrinsic_base(intrin) - base_offset,
|
||||
.range = nir_intrinsic_range(intrin));
|
||||
nir_def_replace(&intrin->def, data);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
if (push_info->fragment_dynamic) {
|
||||
const uint32_t fs_config_start =
|
||||
anv_drv_const_offset(gfx.fs_config);
|
||||
const uint32_t fs_config_end =
|
||||
fs_config_start +
|
||||
anv_drv_const_size(gfx.fs_config);
|
||||
push_start = MIN2(push_start, fs_config_start);
|
||||
push_end = MAX2(push_end, fs_config_end);
|
||||
}
|
||||
|
||||
if (needs_wa_18019110168) {
|
||||
const uint32_t fs_per_prim_remap_start =
|
||||
anv_drv_const_offset(gfx.fs_per_prim_remap_offset);
|
||||
const uint32_t fs_per_prim_remap_end =
|
||||
fs_per_prim_remap_start +
|
||||
anv_drv_const_size(gfx.fs_per_prim_remap_offset);
|
||||
push_start = MIN2(push_start, fs_per_prim_remap_start);
|
||||
push_end = MAX2(push_end, fs_per_prim_remap_end);
|
||||
}
|
||||
}
|
||||
|
||||
const bool needs_dyn_tess_config =
|
||||
(nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
||||
(container_of(prog_key, struct brw_tcs_prog_key, base)->input_vertices == 0 ||
|
||||
push_info->separate_tessellation)) ||
|
||||
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
|
||||
push_info->separate_tessellation);
|
||||
if (needs_dyn_tess_config) {
|
||||
const uint32_t tess_config_start = anv_drv_const_offset(gfx.tess_config);
|
||||
const uint32_t tess_config_end = tess_config_start +
|
||||
anv_drv_const_size(gfx.tess_config);
|
||||
push_start = MIN2(push_start, tess_config_start);
|
||||
push_end = MAX2(push_end, tess_config_end);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static struct anv_push_range
|
||||
compute_final_push_range(const struct intel_device_info *devinfo,
|
||||
const struct push_data *data)
|
||||
{
|
||||
/* Align push_start down to a 32B (for 3DSTATE_CONSTANT) and make it no
|
||||
* larger than push_end (no push constants is indicated by push_start =
|
||||
* UINT_MAX).
|
||||
|
|
@ -254,14 +341,50 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
* (unlike all Gfx stages) and so we can bound+align the allocation there
|
||||
* (see anv_cmd_buffer_cs_push_constants).
|
||||
*/
|
||||
push_start = MIN2(push_start, push_end);
|
||||
unsigned push_start = UINT32_MAX;
|
||||
|
||||
if (data->app_end != 0)
|
||||
push_start = MIN2(push_start, data->app_start);
|
||||
if (data->driver_end != 0)
|
||||
push_start = MIN2(push_start, data->driver_start);
|
||||
|
||||
if (push_start == UINT32_MAX) {
|
||||
return (struct anv_push_range) {
|
||||
.set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
|
||||
};
|
||||
}
|
||||
|
||||
push_start = ROUND_DOWN_TO(push_start, 32);
|
||||
|
||||
const struct anv_push_range push_constant_range = {
|
||||
const unsigned push_size = align(
|
||||
MAX2(data->app_end, data->driver_end) - push_start, devinfo->grf_size);
|
||||
|
||||
return (struct anv_push_range) {
|
||||
.set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
|
||||
.start = push_start / 32,
|
||||
.length = align(push_end - push_start, devinfo->grf_size) / 32,
|
||||
.length = push_size / 32,
|
||||
};
|
||||
}
|
||||
|
||||
bool
|
||||
anv_nir_compute_push_layout(nir_shader *nir,
|
||||
const struct anv_physical_device *pdevice,
|
||||
enum brw_robustness_flags robust_flags,
|
||||
const struct anv_nir_push_layout_info *push_info,
|
||||
struct brw_base_prog_key *prog_key,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
const struct anv_pipeline_push_map *push_map)
|
||||
{
|
||||
const struct brw_compiler *compiler = pdevice->compiler;
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
memset(map->push_ranges, 0, sizeof(map->push_ranges));
|
||||
|
||||
struct push_data data =
|
||||
gather_push_data(nir, robust_flags, devinfo, push_info, prog_key, map, NULL);
|
||||
|
||||
struct anv_push_range push_constant_range =
|
||||
compute_final_push_range(devinfo, &data);
|
||||
|
||||
/* When platforms support Mesh and the fragment shader is not fully linked
|
||||
* to the previous shader, payload format can change if the preceding
|
||||
|
|
@ -288,54 +411,40 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
* dynamic bit in fs_config_intel.
|
||||
*/
|
||||
const bool needs_padding_per_primitive =
|
||||
needs_wa_18019110168 ||
|
||||
data.needs_wa_18019110168 ||
|
||||
(push_info->mesh_dynamic &&
|
||||
(nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID));
|
||||
|
||||
unsigned n_push_ranges = 0;
|
||||
unsigned total_push_regs = 0;
|
||||
|
||||
if (push_constant_range.length > 0)
|
||||
if (push_constant_range.length > 0) {
|
||||
map->push_ranges[n_push_ranges++] = push_constant_range;
|
||||
total_push_regs += push_constant_range.length;
|
||||
}
|
||||
|
||||
if (push_ubo_ranges) {
|
||||
struct brw_ubo_range ubo_ranges[4] = {};
|
||||
struct anv_push_range analysis_ranges[4] = {};
|
||||
if (data.push_ubo_ranges) {
|
||||
anv_nir_analyze_push_constants_ranges(nir, devinfo, push_map,
|
||||
analysis_ranges);
|
||||
}
|
||||
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, ubo_ranges);
|
||||
const unsigned max_push_buffers = needs_padding_per_primitive ? 3 : 4;
|
||||
const unsigned max_push_regs = needs_padding_per_primitive ? 63 : 64;
|
||||
|
||||
const unsigned max_push_regs = 64;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
struct anv_push_range *candidate_range = &analysis_ranges[i];
|
||||
if (n_push_ranges >= max_push_buffers)
|
||||
break;
|
||||
|
||||
unsigned total_push_regs = push_constant_range.length;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
if (total_push_regs + ubo_ranges[i].length > max_push_regs)
|
||||
ubo_ranges[i].length = max_push_regs - total_push_regs;
|
||||
total_push_regs += ubo_ranges[i].length;
|
||||
}
|
||||
assert(total_push_regs <= max_push_regs);
|
||||
if (candidate_range->length + total_push_regs > max_push_regs)
|
||||
candidate_range->length = max_push_regs - total_push_regs;
|
||||
|
||||
const unsigned max_push_buffers = needs_padding_per_primitive ? 3 : 4;
|
||||
if (candidate_range->length == 0)
|
||||
break;
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
struct brw_ubo_range *ubo_range = &ubo_ranges[i];
|
||||
if (ubo_range->length == 0)
|
||||
continue;
|
||||
|
||||
if (n_push_ranges >= max_push_buffers) {
|
||||
memset(ubo_range, 0, sizeof(*ubo_range));
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(ubo_range->block < push_map->block_count);
|
||||
const struct anv_pipeline_binding *binding =
|
||||
&push_map->block_to_descriptor[ubo_range->block];
|
||||
|
||||
map->push_ranges[n_push_ranges++] = (struct anv_push_range) {
|
||||
.set = binding->set,
|
||||
.index = binding->index,
|
||||
.dynamic_offset_index = binding->dynamic_offset_index,
|
||||
.start = ubo_range->start,
|
||||
.length = ubo_range->length,
|
||||
};
|
||||
}
|
||||
map->push_ranges[n_push_ranges++] = *candidate_range;
|
||||
total_push_regs += candidate_range->length;
|
||||
}
|
||||
|
||||
/* Pass a single-register push constant payload for the PS stage even if
|
||||
|
|
@ -366,13 +475,44 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
|
||||
assert(n_push_ranges <= 4);
|
||||
|
||||
struct lower_to_push_data_intel_state lower_state = {
|
||||
.bind_map = map,
|
||||
.push_map = push_map,
|
||||
.lowered_ubo_instrs = _mesa_pointer_set_create(NULL),
|
||||
};
|
||||
|
||||
bool progress = nir_shader_intrinsics_pass(
|
||||
nir, lower_ubo_to_push_data_intel,
|
||||
nir_metadata_control_flow, &lower_state);
|
||||
|
||||
if (progress && nir_opt_dce(nir)) {
|
||||
/* Regather the push data */
|
||||
data = gather_push_data(nir, robust_flags, devinfo, push_info, prog_key,
|
||||
map, lower_state.lowered_ubo_instrs);
|
||||
|
||||
/* Update the ranges */
|
||||
struct anv_push_range shrinked_push_constant_range =
|
||||
compute_final_push_range(devinfo, &data);
|
||||
assert(shrinked_push_constant_range.length <= push_constant_range.length);
|
||||
|
||||
if (shrinked_push_constant_range.length > 0) {
|
||||
map->push_ranges[0] = shrinked_push_constant_range;
|
||||
} else if (map->push_ranges[0].set == shrinked_push_constant_range.set) {
|
||||
memmove(&map->push_ranges[0], &map->push_ranges[1], 3 * sizeof(map->push_ranges[0]));
|
||||
memset(&map->push_ranges[3], 0, sizeof(map->push_ranges[3]));
|
||||
}
|
||||
|
||||
lower_state.reduced_push_ranges = 32 *
|
||||
(push_constant_range.length - shrinked_push_constant_range.length);
|
||||
push_constant_range = shrinked_push_constant_range;
|
||||
}
|
||||
|
||||
/* Finally lower the application's push constants & driver' push data */
|
||||
progress |= nir_shader_intrinsics_pass(
|
||||
nir, lower_to_push_data_intel,
|
||||
nir_metadata_control_flow,
|
||||
&(struct lower_to_push_data_intel_state) {
|
||||
.bind_map = map,
|
||||
.push_map = push_map,
|
||||
});
|
||||
nir_metadata_control_flow, &lower_state);
|
||||
|
||||
ralloc_free(lower_state.lowered_ubo_instrs);
|
||||
|
||||
/* Do this before calling brw_cs_fill_push_const_info(), it uses the data
|
||||
* in prog_data->push_sizes[].
|
||||
|
|
@ -390,17 +530,17 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
prog_data->push_sizes[i] = map->push_ranges[i].length * 32;
|
||||
}
|
||||
|
||||
unsigned push_start = push_constant_range.start * 32;
|
||||
if (prog_data->robust_ubo_ranges) {
|
||||
const uint32_t push_reg_mask_offset =
|
||||
anv_drv_const_offset(gfx.push_reg_mask[nir->info.stage]);
|
||||
assert(push_reg_mask_offset >= push_start);
|
||||
prog_data->push_reg_mask_param =
|
||||
(push_reg_mask_offset - push_start) / 4;
|
||||
prog_data->push_reg_mask_param = (push_reg_mask_offset - push_start) / 4;
|
||||
}
|
||||
|
||||
switch (nir->info.stage) {
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (needs_dyn_tess_config) {
|
||||
if (data.needs_dyn_tess_config) {
|
||||
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
|
||||
|
||||
const uint32_t tess_config_offset = anv_drv_const_offset(gfx.tess_config);
|
||||
|
|
@ -429,7 +569,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
assert(fs_config_offset >= push_start);
|
||||
fs_prog_data->fs_config_param = fs_config_offset - push_start;
|
||||
}
|
||||
if (needs_wa_18019110168) {
|
||||
if (data.needs_wa_18019110168) {
|
||||
const uint32_t fs_per_prim_remap_offset =
|
||||
anv_drv_const_offset(gfx.fs_per_prim_remap_offset);
|
||||
assert(fs_per_prim_remap_offset >= push_start);
|
||||
|
|
@ -441,8 +581,8 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
const int subgroup_id_index =
|
||||
push_end == (anv_drv_const_offset(cs.subgroup_id) +
|
||||
anv_drv_const_size(cs.subgroup_id)) ?
|
||||
data.driver_end == (anv_drv_const_offset(cs.subgroup_id) +
|
||||
anv_drv_const_size(cs.subgroup_id)) ?
|
||||
(anv_drv_const_offset(cs.subgroup_id) - push_start) / 4 : -1;
|
||||
struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
|
||||
brw_cs_fill_push_const_info(devinfo, cs_prog_data, subgroup_id_index);
|
||||
|
|
|
|||
|
|
@ -36,6 +36,9 @@ update_resource_intel_block(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
if (intrin->intrinsic != nir_intrinsic_resource_intel)
|
||||
return false;
|
||||
|
||||
if (nir_intrinsic_resource_access_intel(intrin) & nir_resource_intel_internal)
|
||||
return false;
|
||||
|
||||
/* If the array index in the descriptor binding is not const, we won't be
|
||||
* able to turn this load_ubo into a push constant.
|
||||
*
|
||||
|
|
|
|||
336
src/intel/vulkan/anv_nir_push_constants_analysis.c
Normal file
336
src/intel/vulkan/anv_nir_push_constants_analysis.c
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
/* Copyright © 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
struct push_range_entry
|
||||
{
|
||||
struct anv_push_range range;
|
||||
int benefit;
|
||||
};
|
||||
|
||||
static int
|
||||
set_score(uint8_t set)
|
||||
{
|
||||
/* UBO bindings */
|
||||
if (set < MAX_SETS)
|
||||
return 1;
|
||||
|
||||
/* Promotion of descriptor data, higher score than UBOs because of inline
|
||||
* uniforms or data from the descriptor that can be used for later resource
|
||||
* access.
|
||||
*/
|
||||
switch (set) {
|
||||
case ANV_DESCRIPTOR_SET_DESCRIPTORS: return 3;
|
||||
default: UNREACHABLE("unexpected push set");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
score(const struct push_range_entry *entry)
|
||||
{
|
||||
return 2 * entry->benefit - entry->range.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares score for two UBO range entries.
|
||||
*
|
||||
* For a descending qsort().
|
||||
*/
|
||||
static int
|
||||
cmp_push_range_entry(const void *va, const void *vb)
|
||||
{
|
||||
const struct push_range_entry *a = va;
|
||||
const struct push_range_entry *b = vb;
|
||||
|
||||
/* Rank based on scores, descending order */
|
||||
int delta = score(b) - score(a);
|
||||
|
||||
/* Then use promotion type, descending order */
|
||||
if (delta == 0)
|
||||
delta = set_score(b->range.set) - set_score(a->range.set);
|
||||
|
||||
/* Then use the set index as a tie-breaker, descending order */
|
||||
if (delta == 0)
|
||||
delta = b->range.set - a->range.set;
|
||||
|
||||
/* Then use the UBO block index as a tie-breaker, descending order */
|
||||
if (delta == 0)
|
||||
delta = b->range.index - a->range.index;
|
||||
|
||||
/* Finally use the start offset as a second tie-breaker, ascending order */
|
||||
if (delta == 0)
|
||||
delta = a->range.start - b->range.start;
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
enum push_block_type {
|
||||
PUSH_BLOCK_TYPE_UBO = 1,
|
||||
};
|
||||
|
||||
struct push_block_key
|
||||
{
|
||||
enum push_block_type type;
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
struct push_block_info
|
||||
{
|
||||
struct push_block_key key;
|
||||
|
||||
/* Each bit in the offsets bitfield represents a 32-byte section of data.
|
||||
* If it's set to one, there is interesting UBO data at that offset. If
|
||||
* not, there's a "hole" - padding between data - or just nothing at all.
|
||||
*/
|
||||
uint64_t offsets;
|
||||
uint8_t uses[64];
|
||||
};
|
||||
|
||||
struct push_analysis_state
|
||||
{
|
||||
const struct intel_device_info *devinfo;
|
||||
struct hash_table *blocks;
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
push_block_key_hash(const void *key)
|
||||
{
|
||||
return _mesa_hash_data(key, sizeof(struct push_block_key));
|
||||
}
|
||||
|
||||
static bool
|
||||
push_block_key_compare(const void *key1, const void *key2)
|
||||
{
|
||||
return memcmp(key1, key2, sizeof(struct push_block_key)) == 0;
|
||||
}
|
||||
|
||||
static struct push_block_info *
|
||||
get_block_info(struct push_analysis_state *state,
|
||||
enum push_block_type type, uint32_t index)
|
||||
{
|
||||
struct push_block_key key = { .type = type, .index = index, };
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(state->blocks, &key);
|
||||
if (entry)
|
||||
return (struct push_block_info *) entry->data;
|
||||
|
||||
struct push_block_info *info =
|
||||
rzalloc(state->blocks, struct push_block_info);
|
||||
info->key = key;
|
||||
_mesa_hash_table_insert(state->blocks, &info->key, info);
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
static void
|
||||
maybe_add_pushable_ubo(struct push_analysis_state *state,
|
||||
nir_intrinsic_instr *intrin)
|
||||
{
|
||||
const int block = anv_nir_get_ubo_binding_push_block(intrin->src[0]);
|
||||
const unsigned byte_offset = nir_src_as_uint(intrin->src[1]);
|
||||
const int offset = byte_offset / state->devinfo->grf_size;
|
||||
|
||||
/* Avoid shifting by larger than the width of our bitfield, as this
|
||||
* is undefined in C. Even if we require multiple bits to represent
|
||||
* the entire value, it's OK to record a partial value - the backend
|
||||
* is capable of falling back to pull loads for later components of
|
||||
* vectors, as it has to shrink ranges for other reasons anyway.
|
||||
*/
|
||||
if (offset >= 64)
|
||||
return;
|
||||
|
||||
/* The value might span multiple GRFs. */
|
||||
const unsigned num_components =
|
||||
nir_def_last_component_read(&intrin->def) + 1;
|
||||
const int bytes = num_components * (intrin->def.bit_size / 8);
|
||||
const int start = ROUND_DOWN_TO(byte_offset, state->devinfo->grf_size);
|
||||
const int end = align(byte_offset + bytes, state->devinfo->grf_size);
|
||||
const int chunks = (end - start) / state->devinfo->grf_size;
|
||||
|
||||
/* TODO: should we count uses in loops as higher benefit? */
|
||||
|
||||
struct push_block_info *info =
|
||||
get_block_info(state, PUSH_BLOCK_TYPE_UBO, block);
|
||||
info->offsets |= ((1ull << chunks) - 1) << offset;
|
||||
info->uses[offset]++;
|
||||
}
|
||||
|
||||
static void
|
||||
analyze_pushable_block(struct push_analysis_state *state, nir_block *block)
|
||||
{
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
if (anv_nir_is_promotable_ubo_binding(intrin->src[0]) &&
|
||||
nir_src_is_const(intrin->src[1]))
|
||||
maybe_add_pushable_ubo(state, intrin);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_push_entry(FILE *file,
|
||||
const struct push_block_info *info,
|
||||
const struct push_range_entry *entry,
|
||||
struct push_analysis_state *state)
|
||||
{
|
||||
fprintf(file,
|
||||
"set %2d, index %2d, start %2d, length %2d, bits = %"PRIx64", "
|
||||
"benefit %2d, cost %2d, score = %2d\n",
|
||||
entry->range.set, entry->range.index,
|
||||
entry->range.start, entry->range.length,
|
||||
info ? info->offsets : 0ul, entry->benefit, entry->range.length, score(entry));
|
||||
}
|
||||
|
||||
void
|
||||
anv_nir_analyze_push_constants_ranges(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo,
|
||||
const struct anv_pipeline_push_map *push_map,
|
||||
struct anv_push_range out_ranges[4])
|
||||
{
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
struct push_analysis_state state = {
|
||||
.devinfo = devinfo,
|
||||
.blocks = _mesa_hash_table_create(mem_ctx,
|
||||
push_block_key_hash,
|
||||
push_block_key_compare),
|
||||
};
|
||||
|
||||
/* Walk the IR, recording how many times each UBO block/offset is used. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_foreach_block(block, impl) {
|
||||
analyze_pushable_block(&state, block);
|
||||
}
|
||||
}
|
||||
|
||||
/* Find ranges: a block, starting register-size aligned byte offset, and
|
||||
* length.
|
||||
*/
|
||||
struct util_dynarray ranges;
|
||||
util_dynarray_init(&ranges, mem_ctx);
|
||||
|
||||
hash_table_foreach(state.blocks, entry) {
|
||||
const struct push_block_info *info = entry->data;
|
||||
uint64_t offsets = info->offsets;
|
||||
|
||||
/* Walk through the offsets bitfield, finding contiguous regions of
|
||||
* set bits:
|
||||
*
|
||||
* 0000000001111111111111000000000000111111111111110000000011111100
|
||||
* ^^^^^^^^^^^^^ ^^^^^^^^^^^^^^ ^^^^^^
|
||||
*
|
||||
* Each of these will become a UBO range.
|
||||
*/
|
||||
while (offsets != 0) {
|
||||
/* Find the first 1 in the offsets bitfield. This represents the
|
||||
* start of a range of interesting UBO data. Make it zero-indexed.
|
||||
*/
|
||||
int first_bit = ffsll(offsets) - 1;
|
||||
|
||||
/* Find the first 0 bit in offsets beyond first_bit. To find the
|
||||
* first zero bit, we find the first 1 bit in the complement. In
|
||||
* order to ignore bits before first_bit, we mask off those bits.
|
||||
*/
|
||||
int first_hole = ffsll(~offsets & ~((1ull << first_bit) - 1)) - 1;
|
||||
|
||||
if (first_hole == -1) {
|
||||
/* If we didn't find a hole, then set it to the end of the
|
||||
* bitfield. There are no more ranges to process.
|
||||
*/
|
||||
first_hole = 64;
|
||||
offsets = 0;
|
||||
} else {
|
||||
/* We've processed all bits before first_hole. Mask them off. */
|
||||
offsets &= ~((1ull << first_hole) - 1);
|
||||
}
|
||||
|
||||
struct push_range_entry *entry =
|
||||
util_dynarray_grow(&ranges, struct push_range_entry, 1);
|
||||
|
||||
assert(info->key.index < push_map->block_count);
|
||||
const struct anv_pipeline_binding *binding =
|
||||
&push_map->block_to_descriptor[info->key.index];
|
||||
entry->range.set = binding->set;
|
||||
entry->range.index = binding->index;
|
||||
entry->range.dynamic_offset_index = binding->dynamic_offset_index;
|
||||
entry->range.start = first_bit;
|
||||
/* first_hole is one beyond the end, so we don't need to add 1 */
|
||||
entry->range.length = first_hole - first_bit;
|
||||
entry->benefit = 0;
|
||||
|
||||
for (int i = 0; i < entry->range.length; i++)
|
||||
entry->benefit += info->uses[first_bit + i];
|
||||
|
||||
if (false)
|
||||
print_push_entry(stderr, info, entry, &state);
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Consider combining ranges.
|
||||
*
|
||||
* We can only push 4 ranges via 3DSTATE_CONSTANT_XS. If there are
|
||||
* more ranges, and two are close by with only a small hole, it may be
|
||||
* worth combining them. The holes will waste register space, but the
|
||||
* benefit of removing pulls may outweigh that cost.
|
||||
*/
|
||||
|
||||
/* Sort the list so the most beneficial ranges are at the front. */
|
||||
int nr_entries = ranges.size / sizeof(struct push_range_entry);
|
||||
if (nr_entries > 0) {
|
||||
qsort(ranges.data, nr_entries, sizeof(struct push_range_entry),
|
||||
cmp_push_range_entry);
|
||||
}
|
||||
|
||||
if (false) {
|
||||
util_dynarray_foreach(&ranges, struct push_range_entry, entry) {
|
||||
print_push_entry(stderr, NULL, entry, &state);
|
||||
}
|
||||
}
|
||||
|
||||
struct push_range_entry *entries = ranges.data;
|
||||
|
||||
for (unsigned i = 0; i < nr_entries; i++) {
|
||||
entries[i].range.start *= devinfo->grf_size / 32;
|
||||
entries[i].range.length *= devinfo->grf_size / 32;
|
||||
}
|
||||
|
||||
/* Return the top 4, limited to the maximum number of push registers.
|
||||
*
|
||||
* The Vulkan driver sets up additional non-UBO push constants, so it may
|
||||
* need to shrink these ranges further (see anv_nir_compute_push_layout.c).
|
||||
* The OpenGL driver treats legacy uniforms as a UBO, so this is enough.
|
||||
*
|
||||
* To limit further, simply drop the tail of the list, as that's the least
|
||||
* valuable portion.
|
||||
*/
|
||||
const int max_ubos = 4;
|
||||
nr_entries = MIN2(nr_entries, max_ubos);
|
||||
|
||||
const unsigned max_push = 64;
|
||||
unsigned total_push = 0;
|
||||
|
||||
for (unsigned i = 0; i < nr_entries; i++) {
|
||||
if (total_push + entries[i].range.length > max_push)
|
||||
entries[i].range.length = max_push - total_push;
|
||||
total_push += entries[i].range.length;
|
||||
}
|
||||
|
||||
for (int i = 0; i < nr_entries; i++)
|
||||
out_ranges[i] = entries[i].range;
|
||||
for (int i = nr_entries; i < 4; i++)
|
||||
out_ranges[i] = (struct anv_push_range) {};
|
||||
|
||||
ralloc_free(ranges.mem_ctx);
|
||||
}
|
||||
|
|
@ -203,6 +203,10 @@ anv_nir_push_desc_ubo_fully_promoted(nir_shader *nir,
|
|||
if (nir_intrinsic_desc_set(resource) != push_set)
|
||||
continue;
|
||||
|
||||
/* Skip load_ubo loading the descriptor buffer (not a binding) */
|
||||
if (nir_intrinsic_binding(resource) == UINT32_MAX)
|
||||
continue;
|
||||
|
||||
uint32_t binding = nir_intrinsic_binding(resource);
|
||||
|
||||
/* If we have indirect indexing in the binding, no push promotion
|
||||
|
|
|
|||
|
|
@ -2687,6 +2687,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
|||
break;
|
||||
|
||||
case ANV_DESCRIPTOR_SET_DESCRIPTORS:
|
||||
/* We have LSC_SS surface states for this, binding table isn't
|
||||
* needed.
|
||||
*/
|
||||
assert(!cmd_buffer->device->info->has_lsc);
|
||||
if (shader->bind_map.layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) {
|
||||
assert(pipe_state->descriptor_buffers[binding->index].state.alloc_size);
|
||||
bt_map[s] = pipe_state->descriptor_buffers[binding->index].state.offset +
|
||||
|
|
|
|||
|
|
@ -181,6 +181,7 @@ libanv_files = files(
|
|||
'anv_nir_lower_ubo_loads.c',
|
||||
'anv_nir_lower_resource_intel.c',
|
||||
'anv_nir_lower_unaligned_dispatch.c',
|
||||
'anv_nir_push_constants_analysis.c',
|
||||
'anv_nir_push_descriptor_analysis.c',
|
||||
'anv_perf.c',
|
||||
'anv_physical_device.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue