mesa/src/intel/vulkan/anv_cmd_buffer.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1842 lines
72 KiB
C
Raw Normal View History

/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "anv_measure.h"
#include "vk_common_entrypoints.h"
#include "vk_util.h"
/** \file anv_cmd_buffer.c
*
* This file contains all of the stuff for emitting commands into a command
* buffer. This includes implementations of most of the vkCmd*
* entrypoints. This file is concerned entirely with state emission and
* not with the command buffer data structure itself. As far as this file
* is concerned, most of anv_cmd_buffer is magic.
*/
static void
anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_cmd_state *state = &cmd_buffer->state;
memset(state, 0, sizeof(*state));
state->current_pipeline = UINT32_MAX;
state->gfx.restart_index = UINT32_MAX;
state->gfx.object_preemption = true;
state->gfx.dirty = 0;
state->gfx.streamout_stage = MESA_SHADER_NONE;
state->compute.pixel_async_compute_thread_limit = UINT8_MAX;
state->compute.z_pass_async_compute_thread_limit = UINT8_MAX;
state->compute.np_z_async_throttle_settings = UINT8_MAX;
BITSET_COPY(state->gfx.dyn_state.pack_dirty,
cmd_buffer->device->gfx_dirty_state);
}
static void
anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state)
{
anv_push_descriptor_set_finish(&pipe_state->push_descriptor);
}
static void
anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_cmd_state *state = &cmd_buffer->state;
anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base);
anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base);
}
static void
anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
{
anv_cmd_state_finish(cmd_buffer);
anv_cmd_state_init(cmd_buffer);
}
2015-07-29 14:05:06 -07:00
VkResult
anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer)
{
if (cmd_buffer->companion_rcs_cmd_buffer)
return VK_SUCCESS;
VkResult result = VK_SUCCESS;
pthread_mutex_lock(&cmd_buffer->device->mutex);
VK_FROM_HANDLE(vk_command_pool, pool,
cmd_buffer->device->companion_rcs_cmd_pool);
assert(pool != NULL);
struct vk_command_buffer *tmp_cmd_buffer = NULL;
result = pool->command_buffer_ops->create(pool, cmd_buffer->vk.level, &tmp_cmd_buffer);
if (result != VK_SUCCESS)
goto unlock_and_return;
cmd_buffer->companion_rcs_cmd_buffer =
container_of(tmp_cmd_buffer, struct anv_cmd_buffer, vk);
anv_genX(cmd_buffer->device->info, cmd_buffer_begin_companion)(
cmd_buffer->companion_rcs_cmd_buffer, cmd_buffer->vk.level);
unlock_and_return:
pthread_mutex_unlock(&cmd_buffer->device->mutex);
return result;
}
static VkResult
anv_create_cmd_buffer(struct vk_command_pool *pool,
VkCommandBufferLevel level,
struct vk_command_buffer **cmd_buffer_out)
{
struct anv_device *device =
container_of(pool->base.device, struct anv_device, vk);
struct anv_cmd_buffer *cmd_buffer;
VkResult result;
cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cmd_buffer == NULL)
return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY);
result = vk_command_buffer_init(pool, &cmd_buffer->vk,
&anv_cmd_buffer_ops, level);
if (result != VK_SUCCESS)
goto fail_alloc;
cmd_buffer->vk.dynamic_graphics_state.ms.sample_locations =
&cmd_buffer->state.gfx.sample_locations;
cmd_buffer->vk.dynamic_graphics_state.vi =
&cmd_buffer->state.gfx.vertex_input;
cmd_buffer->batch.status = VK_SUCCESS;
cmd_buffer->generation.batch.status = VK_SUCCESS;
cmd_buffer->device = device;
assert(pool->queue_family_index < device->physical->queue.family_count);
cmd_buffer->queue_family =
&device->physical->queue.families[pool->queue_family_index];
result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
if (result != VK_SUCCESS)
goto fail_vk;
anv_state_stream_init(&cmd_buffer->surface_state_stream,
&device->internal_surface_state_pool, 4096);
anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
&device->dynamic_state_pool, 16384);
anv_state_stream_init(&cmd_buffer->general_state_stream,
&device->general_state_pool, 16384);
anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
&device->indirect_push_descriptor_pool, 4096);
anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
&device->push_descriptor_buffer_pool, 4096);
int success = u_vector_init_pow2(&cmd_buffer->dynamic_bos, 8,
sizeof(struct anv_bo *));
if (!success)
goto fail_batch_bo;
cmd_buffer->self_mod_locations = NULL;
cmd_buffer->companion_rcs_cmd_buffer = NULL;
cmd_buffer->is_companion_rcs_cmd_buffer = false;
cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
memset(&cmd_buffer->generation.shader_state, 0,
sizeof(cmd_buffer->generation.shader_state));
anv_cmd_state_init(cmd_buffer);
anv_measure_init(cmd_buffer);
u_trace_init(&cmd_buffer->trace, &device->ds.trace_context);
list_inithead(&cmd_buffer->bvh_dumps);
*cmd_buffer_out = &cmd_buffer->vk;
2015-07-29 14:05:06 -07:00
return VK_SUCCESS;
fail_batch_bo:
anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
fail_vk:
vk_command_buffer_finish(&cmd_buffer->vk);
fail_alloc:
vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
return result;
}
static void
destroy_cmd_buffer(struct anv_cmd_buffer *cmd_buffer)
{
u_trace_fini(&cmd_buffer->trace);
anv_measure_destroy(cmd_buffer);
anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
anv_state_stream_finish(&cmd_buffer->surface_state_stream);
anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
anv_state_stream_finish(&cmd_buffer->general_state_stream);
anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
ANV_DMR_BO_FREE(&cmd_buffer->vk.base, *bo);
anv_bo_pool_free((*bo)->map != NULL ?
&cmd_buffer->device->batch_bo_pool :
&cmd_buffer->device->bvh_bo_pool, *bo);
}
u_vector_finish(&cmd_buffer->dynamic_bos);
list_for_each_entry_safe(struct anv_bvh_dump, bvh_dump,
&cmd_buffer->bvh_dumps, link) {
anv_device_release_bo(cmd_buffer->device, bvh_dump->bo);
free(bvh_dump);
}
anv_cmd_state_finish(cmd_buffer);
vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer->self_mod_locations);
vk_command_buffer_finish(&cmd_buffer->vk);
vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
}
static void
anv_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
{
struct anv_cmd_buffer *cmd_buffer =
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
struct anv_device *device = cmd_buffer->device;
pthread_mutex_lock(&device->mutex);
if (cmd_buffer->companion_rcs_cmd_buffer) {
destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
cmd_buffer->companion_rcs_cmd_buffer = NULL;
}
ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
destroy_cmd_buffer(cmd_buffer);
pthread_mutex_unlock(&device->mutex);
}
static void
reset_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
UNUSED VkCommandBufferResetFlags flags)
{
vk_command_buffer_reset(&cmd_buffer->vk);
cmd_buffer->usage_flags = 0;
anv: Implement VK_KHR_performance_query This has the same kernel requirements are VK_INTEL_performance_query v2: Fix empty queue submit (Lionel) v3: Fix autotool build issue (Piotr Byszewski) v4: Fix Reset & Begin/End in same command buffer, using soft-pin & relocation on the same buffer won't work currently. This version uses a somewhat dirty trick in anv_execbuf_add_bo (Piotr Byszewski) v5: Fix enumeration with null pointers for either pCounters or pCounterDescriptions (Piotr) Fix return condition on enumeration (Lionel) Set counter uuid using sha1 hashes (Lionel) v6: Fix counters scope, should be COMMAND_KHR not COMMAND_BUFFER_KHR (Lionel) v7: Rebase (Lionel) v8: Rework checking for loaded queries (Lionel) v9: Use new i915-perf interface v10: Use anv_multialloc (Jason) v11: Implement perf query passes using self modifying batches (Lionel) Limit support to softpin/gen8 v12: Remove spurious changes (Jason) v13: Drop relocs (Jason) v14: Avoid overwritting .sType in VkPerformanceCounterKHR/VkPerformanceCounterDescriptionKHR (Lionel) v15: Don't copy the entire VkPerformanceCounterKHR/VkPerformanceCounterDescriptionKHR (Jason) Reuse anv_batch rather than custom packing (Jason) v16: Fix missing MI_BB_END in reconfiguration batch Only report the extension with kernel support (perf_version >= 3) v17: Some cleanup of unused stuff Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2775>
2018-10-06 19:12:34 +01:00
cmd_buffer->perf_query_pool = NULL;
cmd_buffer->is_companion_rcs_cmd_buffer = false;
anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
anv_cmd_state_reset(cmd_buffer);
memset(&cmd_buffer->generation.shader_state, 0,
sizeof(cmd_buffer->generation.shader_state));
cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
anv_state_stream_finish(&cmd_buffer->surface_state_stream);
anv_state_stream_init(&cmd_buffer->surface_state_stream,
&cmd_buffer->device->internal_surface_state_pool, 4096);
anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
&cmd_buffer->device->dynamic_state_pool, 16384);
anv_state_stream_finish(&cmd_buffer->general_state_stream);
anv_state_stream_init(&cmd_buffer->general_state_stream,
&cmd_buffer->device->general_state_pool, 16384);
anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
&cmd_buffer->device->indirect_push_descriptor_pool,
4096);
anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
&cmd_buffer->device->push_descriptor_buffer_pool, 4096);
while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
anv_device_release_bo(cmd_buffer->device, *bo);
}
anv_measure_reset(cmd_buffer);
u_trace_fini(&cmd_buffer->trace);
u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->ds.trace_context);
list_for_each_entry_safe(struct anv_bvh_dump, bvh_dump,
&cmd_buffer->bvh_dumps, link) {
anv_device_release_bo(cmd_buffer->device, bvh_dump->bo);
free(bvh_dump);
}
list_inithead(&cmd_buffer->bvh_dumps);
2015-07-29 14:05:06 -07:00
}
void
anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
UNUSED VkCommandBufferResetFlags flags)
{
struct anv_cmd_buffer *cmd_buffer =
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
if (cmd_buffer->companion_rcs_cmd_buffer) {
reset_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer, flags);
destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
cmd_buffer->companion_rcs_cmd_buffer = NULL;
}
ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
reset_cmd_buffer(cmd_buffer, flags);
}
const struct vk_command_buffer_ops anv_cmd_buffer_ops = {
.create = anv_create_cmd_buffer,
.reset = anv_cmd_buffer_reset,
.destroy = anv_cmd_buffer_destroy,
};
void
anv_cmd_buffer_emit_bt_pool_base_address(struct anv_cmd_buffer *cmd_buffer)
{
const struct intel_device_info *devinfo = cmd_buffer->device->info;
anv_genX(devinfo, cmd_buffer_emit_bt_pool_base_address)(cmd_buffer);
}
void
anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
VkImageAspectFlagBits aspect,
enum isl_aux_usage aux_usage,
uint32_t level,
uint32_t base_layer,
uint32_t layer_count)
{
const struct intel_device_info *devinfo = cmd_buffer->device->info;
anv_genX(devinfo, cmd_buffer_mark_image_written)(cmd_buffer, image,
aspect, aux_usage,
level, base_layer,
layer_count);
}
void
anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
const enum isl_format format,
const struct isl_swizzle swizzle,
union isl_color_value clear_color)
{
const struct intel_device_info *devinfo = cmd_buffer->device->info;
anv_genX(devinfo, set_fast_clear_state)(cmd_buffer, image, format, swizzle,
clear_color);
}
void
anv_cmd_buffer_load_clear_color(struct anv_cmd_buffer *cmd_buffer,
struct anv_state state,
const struct anv_image_view *iview)
{
const struct intel_device_info *devinfo = cmd_buffer->device->info;
anv_genX(devinfo, cmd_buffer_load_clear_color)(cmd_buffer, state, iview);
}
anv: Implement VK_EXT_conditional_rendering for gen 7.5+ Conditional rendering affects next functions: - vkCmdDraw, vkCmdDrawIndexed, vkCmdDrawIndirect, vkCmdDrawIndexedIndirect - vkCmdDrawIndirectCountKHR, vkCmdDrawIndexedIndirectCountKHR - vkCmdDispatch, vkCmdDispatchIndirect, vkCmdDispatchBase - vkCmdClearAttachments Value from conditional buffer is cached into designated register, MI_PREDICATE is emitted every time conditional rendering is enabled and command requires it. v2: by Jason Ekstrand - Use vk_find_struct_const instead of manually looping - Move draw count loading to prepare function - Zero the top 32-bits of MI_ALU_REG15 v3: Apply pipeline flush before accessing conditional buffer (The issue was found by Samuel Iglesias) v4: - Remove support of Haswell due to possible hardware bug - Made TMP_REG_PREDICATE and TMP_REG_DRAW_COUNT defines to define registers in one place. v5: thanks to Jason Ekstrand and Lionel Landwerlin - Workaround the fact that MI_PREDICATE_RESULT is not accessible on Haswell by manually calculating MI_PREDICATE_RESULT and re-emitting MI_PREDICATE when necessary. v6: suggested by Lionel Landwerlin - Instead of calculating the result of predicate once - re-emit MI_PREDICATE to make it easier to investigate error states. v7: suggested by Jason - Make anv_pipe_invalidate_bits_for_access_flag add CS_STALL if VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT is set. v8: suggested by Lionel - Precompute conditional predicate's result to support secondary command buffers. - Make prepare_for_draw_count_predicate more readable. Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-10-05 17:54:07 +03:00
void
anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)
{
const struct intel_device_info *devinfo = cmd_buffer->device->info;
anv_genX(devinfo, cmd_emit_conditional_render_predicate)(cmd_buffer);
anv: Implement VK_EXT_conditional_rendering for gen 7.5+ Conditional rendering affects next functions: - vkCmdDraw, vkCmdDrawIndexed, vkCmdDrawIndirect, vkCmdDrawIndexedIndirect - vkCmdDrawIndirectCountKHR, vkCmdDrawIndexedIndirectCountKHR - vkCmdDispatch, vkCmdDispatchIndirect, vkCmdDispatchBase - vkCmdClearAttachments Value from conditional buffer is cached into designated register, MI_PREDICATE is emitted every time conditional rendering is enabled and command requires it. v2: by Jason Ekstrand - Use vk_find_struct_const instead of manually looping - Move draw count loading to prepare function - Zero the top 32-bits of MI_ALU_REG15 v3: Apply pipeline flush before accessing conditional buffer (The issue was found by Samuel Iglesias) v4: - Remove support of Haswell due to possible hardware bug - Made TMP_REG_PREDICATE and TMP_REG_DRAW_COUNT defines to define registers in one place. v5: thanks to Jason Ekstrand and Lionel Landwerlin - Workaround the fact that MI_PREDICATE_RESULT is not accessible on Haswell by manually calculating MI_PREDICATE_RESULT and re-emitting MI_PREDICATE when necessary. v6: suggested by Lionel Landwerlin - Instead of calculating the result of predicate once - re-emit MI_PREDICATE to make it easier to investigate error states. v7: suggested by Jason - Make anv_pipe_invalidate_bits_for_access_flag add CS_STALL if VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT is set. v8: suggested by Lionel - Precompute conditional predicate's result to support secondary command buffers. - Make prepare_for_draw_count_predicate more readable. Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-10-05 17:54:07 +03:00
}
static void
clear_pending_query_bits(enum anv_query_bits *query_bits,
enum anv_pipe_bits flushed_bits)
{
if (flushed_bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
*query_bits &= ~ANV_QUERY_WRITES_RT_FLUSH;
if (flushed_bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT)
*query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
if ((flushed_bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT) &&
(flushed_bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT) &&
(flushed_bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT))
*query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
/* Once RT/TILE have been flushed, we can consider the CS_STALL flush */
if ((*query_bits & (ANV_QUERY_WRITES_TILE_FLUSH |
ANV_QUERY_WRITES_RT_FLUSH |
ANV_QUERY_WRITES_DATA_FLUSH)) == 0 &&
(flushed_bits & (ANV_PIPE_END_OF_PIPE_SYNC_BIT | ANV_PIPE_CS_STALL_BIT)))
*query_bits &= ~ANV_QUERY_WRITES_CS_STALL;
}
void
anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
enum anv_pipe_bits flushed_bits)
{
clear_pending_query_bits(&cmd_buffer->state.queries.clear_bits, flushed_bits);
clear_pending_query_bits(&cmd_buffer->state.queries.buffer_write_bits, flushed_bits);
}
static bool
mem_update(void *dst, const void *src, size_t size)
{
if (memcmp(dst, src, size) == 0)
return false;
memcpy(dst, src, size);
return true;
}
static void
set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
mesa_shader_stage stage,
const struct anv_pipeline_bind_map *map)
{
assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_blake3s));
if (mem_update(cmd_buffer->state.surface_blake3s[stage],
map->surface_blake3, sizeof(map->surface_blake3))) {
anv_cmd_buffer_dirty_descriptors(cmd_buffer,
mesa_to_vk_shader_stage(stage),
"shader surfaces change");
}
assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_blake3s));
if (mem_update(cmd_buffer->state.sampler_blake3s[stage],
map->sampler_blake3, sizeof(map->sampler_blake3))) {
anv_cmd_buffer_dirty_descriptors(cmd_buffer,
mesa_to_vk_shader_stage(stage),
"shader samplers change");
}
assert(stage < ARRAY_SIZE(cmd_buffer->state.push_blake3s));
if (mem_update(cmd_buffer->state.push_blake3s[stage],
map->push_blake3, sizeof(map->push_blake3)))
cmd_buffer->state.push_constants_dirty |= mesa_to_vk_shader_stage(stage);
}
static void
anv_cmd_buffer_set_rt_query_buffer(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipeline_state,
uint32_t ray_queries,
VkShaderStageFlags stages)
{
struct anv_device *device = cmd_buffer->device;
uint8_t idx = anv_get_ray_query_bo_index(cmd_buffer);
uint64_t ray_shadow_size =
align64(brw_rt_ray_queries_shadow_stacks_size(device->info, ray_queries),
4096);
if (ray_shadow_size > 0 &&
(!cmd_buffer->state.ray_query_shadow_bo ||
cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
unsigned shadow_size_log2 = MAX2(util_logbase2_ceil(ray_shadow_size), 16);
unsigned bucket = shadow_size_log2 - 16;
assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos[0]));
struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[idx][bucket]);
if (bo == NULL) {
struct anv_bo *new_bo;
VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
1 << shadow_size_log2,
ANV_BO_ALLOC_INTERNAL, /* alloc_flags */
0, /* explicit_address */
&new_bo);
ANV_DMR_BO_ALLOC(&cmd_buffer->vk.base, new_bo, result);
if (result != VK_SUCCESS) {
anv_batch_set_error(&cmd_buffer->batch, result);
return;
}
bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[idx][bucket], NULL, new_bo);
if (bo != NULL) {
ANV_DMR_BO_FREE(&device->vk.base, new_bo);
anv_device_release_bo(device, new_bo);
} else {
bo = new_bo;
}
}
cmd_buffer->state.ray_query_shadow_bo = bo;
/* Add the ray query buffers to the batch list. */
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->state.ray_query_shadow_bo);
}
/* Add the HW buffer to the list of BO used. */
assert(device->ray_query_bo[idx]);
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
device->ray_query_bo[idx]);
/* Fill the push constants & mark them dirty. */
struct anv_address ray_query_globals_addr =
anv_genX(device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
pipeline_state->push_constants.ray_query_globals =
anv_address_physical(ray_query_globals_addr);
cmd_buffer->state.push_constants_dirty |= stages;
pipeline_state->push_constants_data_dirty = true;
}
static void
update_push_descriptor_flags(struct anv_cmd_pipeline_state *state,
struct anv_shader ** const shaders,
uint32_t shader_count)
{
state->push_buffer_stages = 0;
state->push_descriptor_stages = 0;
for (uint32_t i = 0; i < shader_count; i++) {
if (shaders[i] == NULL)
continue;
VkShaderStageFlags stage = mesa_to_vk_shader_stage(shaders[i]->vk.stage);
if (shaders[i]->push_desc_info.used_descriptors)
state->push_descriptor_stages |= stage;
if (shaders[i]->push_desc_info.push_set_buffer)
state->push_buffer_stages |= stage;
}
}
static bool
maybe_update_dynamic_buffers_indices(struct anv_cmd_pipeline_state *state,
const uint8_t *offsets)
{
struct anv_push_constants *push = &state->push_constants;
bool modified = false;
for (uint32_t i = 0; i < MAX_SETS; i++) {
if ((push->desc_surface_offsets[i] &
ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) !=
offsets[i]) {
push->desc_surface_offsets[i] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
push->desc_surface_offsets[i] |= offsets[i];
modified = true;
}
}
return modified;
}
static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipeline_layout_state(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
const struct anv_descriptor_set_layout *set_layout,
VkShaderStageFlags *out_stages)
{
*out_stages = set_layout->shader_stages;
switch (bind_point) {
case VK_PIPELINE_BIND_POINT_GRAPHICS:
*out_stages &= VK_SHADER_STAGE_ALL_GRAPHICS |
(cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader ?
(VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT) : 0);
return &cmd_buffer->state.gfx.base;
case VK_PIPELINE_BIND_POINT_COMPUTE:
*out_stages &= VK_SHADER_STAGE_COMPUTE_BIT;
return &cmd_buffer->state.compute.base;
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
*out_stages &= ANV_RT_STAGE_BITS;
return &cmd_buffer->state.rt.base;
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("invalid bind point");
}
}
static void
anv_cmd_buffer_maybe_dirty_descriptor_mode(struct anv_cmd_buffer *cmd_buffer,
enum anv_cmd_descriptor_buffer_mode new_mode)
{
if (cmd_buffer->state.pending_db_mode == new_mode)
return;
/* Ensure we program the STATE_BASE_ADDRESS properly at least once */
cmd_buffer->state.descriptor_buffers.dirty = true;
cmd_buffer->state.pending_db_mode = new_mode;
}
static void
anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
struct vk_pipeline_layout *layout,
uint32_t set_index,
struct anv_descriptor_set *set,
uint32_t *dynamic_offset_count,
const uint32_t **dynamic_offsets)
{
/* Either we have no pool because it's a push descriptor or the pool is not
* host only :
*
* VUID-vkCmdBindDescriptorSets-pDescriptorSets-04616:
*
* "Each element of pDescriptorSets must not have been allocated from a
* VkDescriptorPool with the
* VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT flag set"
*/
assert(!set->pool || !set->pool->host_only);
struct anv_descriptor_set_layout *set_layout = set->layout;
anv_cmd_buffer_maybe_dirty_descriptor_mode(
cmd_buffer,
(set->layout->vk.flags &
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) != 0 ?
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER :
ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY);
VkShaderStageFlags stages;
struct anv_cmd_pipeline_state *pipe_state =
anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point,
set_layout, &stages);
VkShaderStageFlags dirty_stages = 0;
/* If it's a push descriptor set, we have to flag things as dirty
* regardless of whether or not the CPU-side data structure changed as we
* may have edited in-place.
*/
if (pipe_state->descriptors[set_index] != set ||
anv_descriptor_set_is_push(set)) {
pipe_state->descriptors[set_index] = set;
if (set->layout->vk.flags &
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) {
assert(set->is_push);
pipe_state->descriptor_buffers[set_index].buffer_index = -1;
pipe_state->descriptor_buffers[set_index].buffer_offset = set->desc_offset;
pipe_state->descriptor_buffers[set_index].bound = true;
anv_cmd_buffer_dirty_descriptors(cmd_buffer, stages, "push descriptor bind");
cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages;
} else {
/* Plaforms with LSC will use descriptor buffer push constant
* offsets
*/
bool update_desc_sets = cmd_buffer->device->info->has_lsc;
if (update_desc_sets) {
struct anv_push_constants *push = &pipe_state->push_constants;
uint64_t offset =
anv_address_physical(set->desc_surface_addr) -
cmd_buffer->device->physical->va.internal_surface_state_pool.addr;
assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
push->desc_surface_offsets[set_index] |= offset;
push->desc_sampler_offsets[set_index] =
anv_address_physical(set->desc_sampler_addr) -
cmd_buffer->device->physical->va.dynamic_state_pool.addr;
}
}
/* Always add a reference to the buffers */
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
set->desc_surface_addr.bo);
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
set->desc_sampler_addr.bo);
dirty_stages |= stages;
}
if (dynamic_offsets) {
if (set_layout->vk.dynamic_descriptor_count > 0) {
struct anv_push_constants *push = &pipe_state->push_constants;
assert(layout != NULL);
uint32_t dynamic_offset_start =
layout->dynamic_descriptor_offset[set_index];
uint32_t *push_offsets =
&push->dynamic_offsets[dynamic_offset_start];
anv: add dynamic buffer offsets support with independent sets With independent sets, we're not able to compute immediate values for the index at which to read anv_push_constants::dynamic_offsets to get the offset of a dynamic buffer. This is because the pipeline layout may not have all the descriptor set layouts when we compile the shader. To solve that issue, we insert a layer of indirection. This reworks the dynamic buffer offset storage with a 2D array in anv_cmd_pipeline_state : dynamic_offsets[MAX_SETS][MAX_DYN_BUFFERS] When the pipeline or the dynamic buffer offsets are updated, we flatten that array into the anv_push_constants::dynamic_offsets[MAX_DYN_BUFFERS] array. For shaders compiled with independent sets, the bottom 6 bits of element X in anv_push_constants::desc_sets[] is used to specify the base offsets into the anv_push_constants::dynamic_offsets[] for the set X. The computation in the shader is now something like : base_dyn_buffer_set_idx = anv_push_constants::desc_sets[set_idx] & 0x3f dyn_buffer_offset = anv_push_constants::dynamic_offsets[base_dyn_buffer_set_idx + dynamic_buffer_idx] It was suggested by Faith to use a different push constant buffer with dynamic_offsets prepared for each stage when using independent sets instead, but it feels easier to understand this way. And there is some room for optimization if you are set X and that you know all the sets in the range [0, X], then you can still avoid the indirection. Separate push constant allocations per stage do have a CPU cost. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15637>
2022-04-06 18:12:02 +03:00
memcpy(pipe_state->dynamic_offsets[set_index].offsets,
*dynamic_offsets,
sizeof(uint32_t) * MIN2(*dynamic_offset_count,
set_layout->vk.dynamic_descriptor_count));
anv: add dynamic buffer offsets support with independent sets With independent sets, we're not able to compute immediate values for the index at which to read anv_push_constants::dynamic_offsets to get the offset of a dynamic buffer. This is because the pipeline layout may not have all the descriptor set layouts when we compile the shader. To solve that issue, we insert a layer of indirection. This reworks the dynamic buffer offset storage with a 2D array in anv_cmd_pipeline_state : dynamic_offsets[MAX_SETS][MAX_DYN_BUFFERS] When the pipeline or the dynamic buffer offsets are updated, we flatten that array into the anv_push_constants::dynamic_offsets[MAX_DYN_BUFFERS] array. For shaders compiled with independent sets, the bottom 6 bits of element X in anv_push_constants::desc_sets[] is used to specify the base offsets into the anv_push_constants::dynamic_offsets[] for the set X. The computation in the shader is now something like : base_dyn_buffer_set_idx = anv_push_constants::desc_sets[set_idx] & 0x3f dyn_buffer_offset = anv_push_constants::dynamic_offsets[base_dyn_buffer_set_idx + dynamic_buffer_idx] It was suggested by Faith to use a different push constant buffer with dynamic_offsets prepared for each stage when using independent sets instead, but it feels easier to understand this way. And there is some room for optimization if you are set X and that you know all the sets in the range [0, X], then you can still avoid the indirection. Separate push constant allocations per stage do have a CPU cost. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15637>
2022-04-06 18:12:02 +03:00
/* Assert that everything is in range */
assert(set_layout->vk.dynamic_descriptor_count <= *dynamic_offset_count);
assert(dynamic_offset_start + set_layout->vk.dynamic_descriptor_count <=
ARRAY_SIZE(push->dynamic_offsets));
for (uint32_t i = 0; i < set_layout->vk.dynamic_descriptor_count; i++) {
if (push_offsets[i] != (*dynamic_offsets)[i]) {
anv: add dynamic buffer offsets support with independent sets With independent sets, we're not able to compute immediate values for the index at which to read anv_push_constants::dynamic_offsets to get the offset of a dynamic buffer. This is because the pipeline layout may not have all the descriptor set layouts when we compile the shader. To solve that issue, we insert a layer of indirection. This reworks the dynamic buffer offset storage with a 2D array in anv_cmd_pipeline_state : dynamic_offsets[MAX_SETS][MAX_DYN_BUFFERS] When the pipeline or the dynamic buffer offsets are updated, we flatten that array into the anv_push_constants::dynamic_offsets[MAX_DYN_BUFFERS] array. For shaders compiled with independent sets, the bottom 6 bits of element X in anv_push_constants::desc_sets[] is used to specify the base offsets into the anv_push_constants::dynamic_offsets[] for the set X. The computation in the shader is now something like : base_dyn_buffer_set_idx = anv_push_constants::desc_sets[set_idx] & 0x3f dyn_buffer_offset = anv_push_constants::dynamic_offsets[base_dyn_buffer_set_idx + dynamic_buffer_idx] It was suggested by Faith to use a different push constant buffer with dynamic_offsets prepared for each stage when using independent sets instead, but it feels easier to understand this way. And there is some room for optimization if you are set X and that you know all the sets in the range [0, X], then you can still avoid the indirection. Separate push constant allocations per stage do have a CPU cost. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15637>
2022-04-06 18:12:02 +03:00
pipe_state->dynamic_offsets[set_index].offsets[i] =
push_offsets[i] = (*dynamic_offsets)[i];
/* dynamic_offset_stages[] elements could contain blanket
* values like VK_SHADER_STAGE_ALL, so limit this to the
* binding point's bits.
*/
dirty_stages |= set_layout->dynamic_offset_stages[i] & stages;
}
}
*dynamic_offsets += set_layout->vk.dynamic_descriptor_count;
*dynamic_offset_count -= set_layout->vk.dynamic_descriptor_count;
}
}
/* Update the push descriptor index tracking */
if (anv_descriptor_set_is_push(set))
pipe_state->push_descriptor_index = set_index;
else if (pipe_state->push_descriptor_index == set_index)
pipe_state->push_descriptor_index = UINT8_MAX;
anv: reduce BT emissions & surface state writes with push descriptors Zink on Anv running Gfxbench gl_driver2 is significantly slower than Iris. The reason is simple, whereas Iris implements uniform updates using push constants and only has to emit 3DSTATE_CONSTANT_* packets, Zink uses push descriptors with a uniform buffer, which on our implementation use both push constants & binding tables. Anv ends up doing the following for each uniform update : - allocate 2 surface states : - one for the uniform buffer as the offset specify by zink - one for the descriptor set buffer - pack the 2 RENDER_SURFACE_STATE - re-emit binding tables - re-emit push constants Of all of those operations, only the last one ends up being useful in this benchmark because all the uniforms have been promoted to push constants. This change defers the 3 first operations at draw time and executes them only if the pipeline needs them. Vkoverhead before / after : descriptor_template_1ubo_push: 40670 / 85786 descriptor_template_12ubo_push: 4050 / 13820 descriptor_template_1combined_sampler_push, 34410 / 34043 descriptor_template_16combined_sampler_push, 2746 / 2711 descriptor_template_1sampled_image_push, 34765 / 34089 descriptor_template_16sampled_image_push, 2794 / 2649 descriptor_template_1texelbuffer_push, 108537 / 111342 descriptor_template_16texelbuffer_push, 20619 / 20166 descriptor_template_1ssbo_push, 41506 / 85976 descriptor_template_8ssbo_push, 6036 / 18703 descriptor_template_1image_push, 88932 / 89610 descriptor_template_16image_push, 20937 / 20959 descriptor_template_1imagebuffer_push, 108407 / 113240 descriptor_template_16imagebuffer_push, 32661 / 34651 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19050>
2022-10-02 19:24:40 +03:00
if (set->is_push)
cmd_buffer->state.push_descriptors_dirty |= dirty_stages;
else
anv_cmd_buffer_dirty_descriptors(cmd_buffer, dirty_stages, "descriptor bind");
cmd_buffer->state.push_constants_dirty |= dirty_stages;
pipe_state->push_constants_data_dirty = true;
}
void anv_CmdBindDescriptorSets2KHR(
VkCommandBuffer commandBuffer,
const VkBindDescriptorSetsInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_pipeline_layout, layout, pInfo->layout);
assert(pInfo->firstSet + pInfo->descriptorSetCount <= MAX_SETS);
if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
if (set == NULL)
continue;
anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
layout, pInfo->firstSet + i, set,
&dynamicOffsetCount,
&pDynamicOffsets);
}
}
if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
if (set == NULL)
continue;
anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
layout, pInfo->firstSet + i, set,
&dynamicOffsetCount,
&pDynamicOffsets);
}
}
if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
if (set == NULL)
continue;
anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
layout, pInfo->firstSet + i, set,
&dynamicOffsetCount,
&pDynamicOffsets);
}
}
}
void anv_CmdBindDescriptorBuffersEXT(
VkCommandBuffer commandBuffer,
uint32_t bufferCount,
const VkDescriptorBufferBindingInfoEXT* pBindingInfos)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_state *state = &cmd_buffer->state;
for (uint32_t i = 0; i < bufferCount; i++) {
assert(pBindingInfos[i].address >= cmd_buffer->device->physical->va.dynamic_visible_pool.addr &&
pBindingInfos[i].address < (cmd_buffer->device->physical->va.dynamic_visible_pool.addr +
cmd_buffer->device->physical->va.dynamic_visible_pool.size));
if (state->descriptor_buffers.address[i] != pBindingInfos[i].address) {
state->descriptor_buffers.address[i] = pBindingInfos[i].address;
if (pBindingInfos[i].usage & VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT)
state->descriptor_buffers.surfaces_address = pBindingInfos[i].address;
if (pBindingInfos[i].usage & VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)
state->descriptor_buffers.samplers_address = pBindingInfos[i].address;
state->descriptor_buffers.dirty = true;
state->descriptor_buffers.offsets_dirty = ~0;
}
}
}
static void
anv_cmd_buffer_set_descriptor_buffer_offsets(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
struct vk_pipeline_layout *layout,
uint32_t first_set,
uint32_t set_count,
const VkDeviceSize *buffer_offsets,
const uint32_t *buffer_indices)
{
for (uint32_t i = 0; i < set_count; i++) {
const uint32_t set_index = first_set + i;
const struct anv_descriptor_set_layout *set_layout =
container_of(layout->set_layouts[set_index],
const struct anv_descriptor_set_layout, vk);
VkShaderStageFlags stages;
struct anv_cmd_pipeline_state *pipe_state =
anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point,
set_layout, &stages);
if (buffer_offsets[i] != pipe_state->descriptor_buffers[set_index].buffer_offset ||
buffer_indices[i] != pipe_state->descriptor_buffers[set_index].buffer_index ||
!pipe_state->descriptor_buffers[set_index].bound) {
pipe_state->descriptor_buffers[set_index].buffer_index = buffer_indices[i];
pipe_state->descriptor_buffers[set_index].buffer_offset = buffer_offsets[i];
anv_cmd_buffer_dirty_descriptors(cmd_buffer, stages, "EXT_DB offset");
cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages;
}
pipe_state->descriptor_buffers[set_index].bound = true;
}
}
void anv_CmdSetDescriptorBufferOffsets2EXT(
VkCommandBuffer commandBuffer,
const VkSetDescriptorBufferOffsetsInfoEXT* pSetDescriptorBufferOffsetsInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_pipeline_layout, layout, pSetDescriptorBufferOffsetsInfo->layout);
if (pSetDescriptorBufferOffsetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
layout,
pSetDescriptorBufferOffsetsInfo->firstSet,
pSetDescriptorBufferOffsetsInfo->setCount,
pSetDescriptorBufferOffsetsInfo->pOffsets,
pSetDescriptorBufferOffsetsInfo->pBufferIndices);
}
if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
layout,
pSetDescriptorBufferOffsetsInfo->firstSet,
pSetDescriptorBufferOffsetsInfo->setCount,
pSetDescriptorBufferOffsetsInfo->pOffsets,
pSetDescriptorBufferOffsetsInfo->pBufferIndices);
}
if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_RT_STAGE_BITS) {
anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
layout,
pSetDescriptorBufferOffsetsInfo->firstSet,
pSetDescriptorBufferOffsetsInfo->setCount,
pSetDescriptorBufferOffsetsInfo->pOffsets,
pSetDescriptorBufferOffsetsInfo->pBufferIndices);
}
anv_cmd_buffer_maybe_dirty_descriptor_mode(cmd_buffer,
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER);
}
void anv_CmdBindDescriptorBufferEmbeddedSamplers2EXT(
VkCommandBuffer commandBuffer,
const VkBindDescriptorBufferEmbeddedSamplersInfoEXT* pBindDescriptorBufferEmbeddedSamplersInfo)
{
/* no-op */
}
void anv_CmdBindVertexBuffers2(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets,
const VkDeviceSize* pSizes,
const VkDeviceSize* pStrides)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
assert(firstBinding + bindingCount <= get_max_vbs(cmd_buffer->device->info));
for (uint32_t i = 0; i < bindingCount; i++) {
ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
if (buffer == NULL) {
vb[firstBinding + i] = (struct anv_vertex_binding) { 0 };
} else {
vb[firstBinding + i] = (struct anv_vertex_binding) {
.addr = anv_address_physical(
anv_address_add(buffer->address, pOffsets[i])),
.size = vk_buffer_range(&buffer->vk, pOffsets[i],
pSizes ? pSizes[i] : VK_WHOLE_SIZE),
.mocs = anv_mocs(cmd_buffer->device, buffer->address.bo,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT),
};
}
cmd_buffer->state.gfx.vb_dirty |= 1 << (firstBinding + i);
}
if (pStrides != NULL) {
vk_cmd_set_vertex_binding_strides(&cmd_buffer->vk, firstBinding,
bindingCount, pStrides);
}
}
void anv_CmdBindIndexBuffer2KHR(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkDeviceSize size,
VkIndexType indexType)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
if (cmd_buffer->state.gfx.index_type != indexType) {
cmd_buffer->state.gfx.index_type = indexType;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_TYPE;
}
uint64_t index_addr = buffer ?
anv_address_physical(anv_address_add(buffer->address, offset)) : 0;
uint32_t index_size = buffer ? vk_buffer_range(&buffer->vk, offset, size) : 0;
if (cmd_buffer->state.gfx.index_addr != index_addr ||
cmd_buffer->state.gfx.index_size != index_size) {
cmd_buffer->state.gfx.index_addr = index_addr;
cmd_buffer->state.gfx.index_size = index_size;
cmd_buffer->state.gfx.index_mocs =
anv_mocs(cmd_buffer->device, buffer->address.bo,
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
}
}
void anv_CmdBindTransformFeedbackBuffersEXT(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets,
const VkDeviceSize* pSizes)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS);
for (uint32_t i = 0; i < bindingCount; i++) {
if (pBuffers[i] == VK_NULL_HANDLE) {
xfb[firstBinding + i] = (struct anv_xfb_binding) { 0 };
} else {
ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
xfb[firstBinding + i] = (struct anv_xfb_binding) {
.addr = anv_address_physical(
anv_address_add(buffer->address, pOffsets[i])),
.size = vk_buffer_range(&buffer->vk, pOffsets[i],
pSizes ? pSizes[i] : VK_WHOLE_SIZE),
.mocs = anv_mocs(cmd_buffer->device, buffer->address.bo,
ISL_SURF_USAGE_STREAM_OUT_BIT),
};
}
}
}
enum isl_format
anv_isl_format_for_descriptor_type(const struct anv_device *device,
VkDescriptorType type)
{
switch (type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
return intel_indirect_ubos_use_sampler(device->info) ?
ISL_FORMAT_R32G32B32A32_FLOAT : ISL_FORMAT_RAW;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
return ISL_FORMAT_RAW;
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("Invalid descriptor type");
}
}
struct anv_state
anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
2015-12-01 15:37:12 -08:00
const void *data, uint32_t size, uint32_t alignment)
{
struct anv_state state;
2015-12-01 15:37:12 -08:00
state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
memcpy(state.map, data, size);
VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
return state;
}
struct anv_state
anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
uint32_t *a, uint32_t *b,
uint32_t dwords, uint32_t alignment)
{
struct anv_state state;
uint32_t *p;
state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
dwords * 4, alignment);
p = state.map;
for (uint32_t i = 0; i < dwords; i++) {
assert((a[i] & b[i]) == 0);
p[i] = a[i] | b[i];
}
VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
return state;
}
struct anv_state
anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
{
const struct anv_push_constants *data =
&cmd_buffer->state.gfx.base.push_constants;
/* For Mesh/Task shaders the 3DSTATE_(MESH|TASK)_SHADER_DATA require a 64B
* alignment.
*
* ATMS PRMs Volume 2d: Command Reference: Structures,
* 3DSTATE_MESH_SHADER_DATA_BODY::Indirect Data Start Address:
*
* "This pointer is relative to the General State Base Address. It is
* the 64-byte aligned address of the indirect data."
*/
struct anv_state state =
anv_cmd_buffer_alloc_temporary_state(cmd_buffer,
sizeof(struct anv_push_constants),
32 /* bottom 5 bits MBZ */);
if (state.alloc_size == 0)
return state;
memcpy(state.map, data->client_data,
cmd_buffer->state.gfx.base.push_constants_client_size);
memcpy(state.map + sizeof(data->client_data),
&data->desc_surface_offsets,
sizeof(struct anv_push_constants) - sizeof(data->client_data));
return state;
}
struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
{
const struct intel_device_info *devinfo = cmd_buffer->device->info;
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
struct anv_cmd_pipeline_state *pipe_state = &comp_state->base;
anv: add dynamic buffer offsets support with independent sets With independent sets, we're not able to compute immediate values for the index at which to read anv_push_constants::dynamic_offsets to get the offset of a dynamic buffer. This is because the pipeline layout may not have all the descriptor set layouts when we compile the shader. To solve that issue, we insert a layer of indirection. This reworks the dynamic buffer offset storage with a 2D array in anv_cmd_pipeline_state : dynamic_offsets[MAX_SETS][MAX_DYN_BUFFERS] When the pipeline or the dynamic buffer offsets are updated, we flatten that array into the anv_push_constants::dynamic_offsets[MAX_DYN_BUFFERS] array. For shaders compiled with independent sets, the bottom 6 bits of element X in anv_push_constants::desc_sets[] is used to specify the base offsets into the anv_push_constants::dynamic_offsets[] for the set X. The computation in the shader is now something like : base_dyn_buffer_set_idx = anv_push_constants::desc_sets[set_idx] & 0x3f dyn_buffer_offset = anv_push_constants::dynamic_offsets[base_dyn_buffer_set_idx + dynamic_buffer_idx] It was suggested by Faith to use a different push constant buffer with dynamic_offsets prepared for each stage when using independent sets instead, but it feels easier to understand this way. And there is some room for optimization if you are set X and that you know all the sets in the range [0, X], then you can still avoid the indirection. Separate push constant allocations per stage do have a CPU cost. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15637>
2022-04-06 18:12:02 +03:00
struct anv_push_constants *data = &pipe_state->push_constants;
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(comp_state);
const struct anv_push_range *range = &comp_state->shader->bind_map.push_ranges[0];
const struct intel_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
const unsigned total_push_constants_size =
brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
if (total_push_constants_size == 0)
return (struct anv_state) { .offset = 0 };
const unsigned push_constant_alignment = 64;
const unsigned aligned_total_push_constants_size =
align(total_push_constants_size, push_constant_alignment);
struct anv_state state;
if (devinfo->verx10 >= 125) {
state = anv_cmd_buffer_alloc_general_state(cmd_buffer,
aligned_total_push_constants_size,
push_constant_alignment);
} else {
state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
aligned_total_push_constants_size,
push_constant_alignment);
}
if (state.map == NULL)
return state;
void *dst = state.map;
const void *src = (char *)data + (range->start * 32);
if (cs_prog_data->push.cross_thread.size > 0) {
memcpy(dst, src, cs_prog_data->push.cross_thread.size);
dst += cs_prog_data->push.cross_thread.size;
src += cs_prog_data->push.cross_thread.size;
}
if (cs_prog_data->push.per_thread.size > 0) {
for (unsigned t = 0; t < dispatch.threads; t++) {
memcpy(dst, src, cs_prog_data->push.per_thread.size);
uint32_t *subgroup_id = dst +
offsetof(struct anv_push_constants, cs.subgroup_id) -
(range->start * 32 + cs_prog_data->push.cross_thread.size);
*subgroup_id = t;
dst += cs_prog_data->push.per_thread.size;
}
}
return state;
}
void anv_CmdPushConstants2KHR(
VkCommandBuffer commandBuffer,
const VkPushConstantsInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
struct anv_cmd_pipeline_state *pipe_state =
&cmd_buffer->state.gfx.base;
memcpy(pipe_state->push_constants.client_data + pInfo->offset,
pInfo->pValues, pInfo->size);
pipe_state->push_constants_data_dirty = true;
pipe_state->push_constants_client_size = MAX2(
pipe_state->push_constants_client_size, pInfo->offset + pInfo->size);
}
if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
struct anv_cmd_pipeline_state *pipe_state =
&cmd_buffer->state.compute.base;
memcpy(pipe_state->push_constants.client_data + pInfo->offset,
pInfo->pValues, pInfo->size);
pipe_state->push_constants_data_dirty = true;
pipe_state->push_constants_client_size = MAX2(
pipe_state->push_constants_client_size, pInfo->offset + pInfo->size);
}
if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
struct anv_cmd_pipeline_state *pipe_state =
&cmd_buffer->state.rt.base;
memcpy(pipe_state->push_constants.client_data + pInfo->offset,
pInfo->pValues, pInfo->size);
pipe_state->push_constants_data_dirty = true;
pipe_state->push_constants_client_size = MAX2(
pipe_state->push_constants_client_size, pInfo->offset + pInfo->size);
}
cmd_buffer->state.push_constants_dirty |= pInfo->stageFlags;
}
static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipe_state(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point)
{
switch (bind_point) {
case VK_PIPELINE_BIND_POINT_GRAPHICS:
return &cmd_buffer->state.gfx.base;
case VK_PIPELINE_BIND_POINT_COMPUTE:
return &cmd_buffer->state.compute.base;
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
return &cmd_buffer->state.rt.base;
break;
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("invalid bind point");
}
}
static void
anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
const VkPushDescriptorSetInfoKHR *pInfo)
{
VK_FROM_HANDLE(vk_pipeline_layout, layout, pInfo->layout);
assert(pInfo->set < MAX_SETS);
struct anv_descriptor_set_layout *set_layout =
container_of(layout->set_layouts[pInfo->set],
struct anv_descriptor_set_layout, vk);
struct anv_push_descriptor_set *push_set =
&anv_cmd_buffer_get_pipe_state(cmd_buffer,
bind_point)->push_descriptor;
if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
return;
anv_descriptor_set_write(cmd_buffer->device, &push_set->set,
pInfo->descriptorWriteCount,
pInfo->pDescriptorWrites);
anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point,
layout, pInfo->set, &push_set->set,
NULL, NULL);
}
void anv_CmdPushDescriptorSet2KHR(
VkCommandBuffer commandBuffer,
const VkPushDescriptorSetInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
pInfo);
if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS)
anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
pInfo);
if (pInfo->stageFlags & ANV_RT_STAGE_BITS)
anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
pInfo);
}
void anv_CmdPushDescriptorSetWithTemplate2KHR(
VkCommandBuffer commandBuffer,
const VkPushDescriptorSetWithTemplateInfoKHR* pInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_descriptor_update_template, template,
pInfo->descriptorUpdateTemplate);
VK_FROM_HANDLE(vk_pipeline_layout, layout, pInfo->layout);
assert(pInfo->set < MAX_PUSH_DESCRIPTORS);
struct anv_descriptor_set_layout *set_layout =
container_of(layout->set_layouts[pInfo->set],
struct anv_descriptor_set_layout, vk);
UNUSED VkShaderStageFlags stages;
struct anv_cmd_pipeline_state *pipe_state =
anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, template->bind_point,
set_layout, &stages);
struct anv_push_descriptor_set *push_set = &pipe_state->push_descriptor;
if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
return;
anv_descriptor_set_write_template(cmd_buffer->device, &push_set->set,
template,
pInfo->pData);
anv_cmd_buffer_bind_descriptor_set(cmd_buffer, template->bind_point,
layout, pInfo->set, &push_set->set,
NULL, NULL);
}
void
anv_cmd_buffer_set_rt_state(struct vk_command_buffer *vk_cmd_buffer,
VkDeviceSize scratch_size,
uint32_t ray_queries,
const uint8_t *dynamic_descriptor_offsets)
{
struct anv_cmd_buffer *cmd_buffer =
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
rt->scratch_size = MAX2(rt->scratch_size, scratch_size);
if (ray_queries > 0) {
anv_cmd_buffer_set_rt_query_buffer(cmd_buffer, &rt->base, ray_queries,
ANV_RT_STAGE_BITS);
}
if (maybe_update_dynamic_buffers_indices(&rt->base,
dynamic_descriptor_offsets)) {
cmd_buffer->state.push_constants_dirty |= ANV_RT_STAGE_BITS;
rt->base.push_constants_data_dirty = true;
}
}
void
anv_cmd_buffer_set_stack_size(struct vk_command_buffer *vk_cmd_buffer,
VkDeviceSize stack_size)
{
struct anv_cmd_buffer *cmd_buffer =
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
struct anv_device *device = cmd_buffer->device;
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
uint32_t stack_ids_per_dss =
brw_rt_ray_queries_stack_ids_per_dss(device->info);
unsigned stack_size_log2 = util_logbase2_ceil(stack_size);
if (stack_size_log2 < 10)
stack_size_log2 = 10;
if (rt->scratch.layout.sw_stack_size == 1 << stack_size_log2)
return;
brw_rt_compute_scratch_layout(&rt->scratch.layout, device->info,
stack_ids_per_dss, 1 << stack_size_log2);
unsigned bucket = stack_size_log2 - 10;
assert(bucket < ARRAY_SIZE(device->rt_scratch_bos));
struct anv_bo *bo = p_atomic_read(&device->rt_scratch_bos[bucket]);
if (bo == NULL) {
struct anv_bo *new_bo;
VkResult result = anv_device_alloc_bo(device, "RT scratch",
rt->scratch.layout.total_size,
ANV_BO_ALLOC_INTERNAL, /* alloc_flags */
0, /* explicit_address */
&new_bo);
ANV_DMR_BO_ALLOC(&device->vk.base, new_bo, result);
if (result != VK_SUCCESS) {
rt->scratch.layout.total_size = 0;
anv_batch_set_error(&cmd_buffer->batch, result);
return;
}
bo = p_atomic_cmpxchg(&device->rt_scratch_bos[bucket], NULL, new_bo);
if (bo != NULL) {
ANV_DMR_BO_FREE(&device->vk.base, new_bo);
anv_device_release_bo(device, new_bo);
} else {
bo = new_bo;
}
}
rt->scratch.bo = bo;
}
void
anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
uint32_t flags,
struct anv_cmd_saved_state *state)
{
state->flags = flags;
/* we only support the compute pipeline at the moment */
assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
const struct anv_cmd_pipeline_state *pipe_state =
&cmd_buffer->state.compute.base;
if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE)
state->shader = &cmd_buffer->state.compute.shader->vk;
if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0)
state->descriptor_set[0] = pipe_state->descriptors[0];
if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL) {
for (uint32_t i = 0; i < MAX_SETS; i++) {
state->descriptor_set[i] = pipe_state->descriptors[i];
}
}
if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
memcpy(state->push_constants, pipe_state->push_constants.client_data,
sizeof(state->push_constants));
}
}
void
anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_saved_state *state)
{
VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
const VkPipelineBindPoint bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
const VkShaderStageFlags stage_flags = VK_SHADER_STAGE_COMPUTE_BIT;
struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE) {
if (state->shader) {
mesa_shader_stage stage = MESA_SHADER_COMPUTE;
anv_cmd_buffer_bind_shaders(&cmd_buffer->vk, 1, &stage, &state->shader);
} else {
cmd_buffer->state.compute.shader = NULL;
}
}
if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0) {
if (state->descriptor_set[0]) {
anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point, NULL, 0,
state->descriptor_set[0], NULL,
NULL);
} else {
pipe_state->descriptors[0] = NULL;
}
}
if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL) {
for (uint32_t i = 0; i < MAX_SETS; i++) {
if (state->descriptor_set[i]) {
anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point, NULL, i,
state->descriptor_set[i], NULL,
NULL);
} else {
pipe_state->descriptors[i] = NULL;
}
}
}
if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
VkPushConstantsInfoKHR push_info = {
.sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
.layout = VK_NULL_HANDLE,
.stageFlags = stage_flags,
.offset = 0,
.size = sizeof(state->push_constants),
.pValues = state->push_constants,
};
anv_CmdPushConstants2KHR(cmd_buffer_, &push_info);
}
}
void
anv_cmd_write_buffer_cp(VkCommandBuffer commandBuffer,
VkDeviceAddress dstAddr,
void *data,
uint32_t size)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
anv_genX(cmd_buffer->device->info, cmd_write_buffer_cp)(cmd_buffer, dstAddr,
data, size);
}
void
anv_cmd_flush_buffer_write_cp(VkCommandBuffer commandBuffer)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
/* IR header would get written by compute shader using BLORP code path, so
* we need to flush HDC and untyped dataport cache.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT,
"Flush buffer write cp");
}
void
anv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer,
uint32_t invocations_x,
uint32_t invocations_y,
uint32_t invocations_z)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned)
(commandBuffer, invocations_x, invocations_y, invocations_z);
}
static void
bind_compute_shader(struct anv_cmd_buffer *cmd_buffer,
struct anv_shader *shader)
{
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
cmd_buffer->state.compute.shader = shader;
if (shader == NULL)
return;
cmd_buffer->state.compute.pipeline_dirty = true;
set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE, &shader->bind_map);
update_push_descriptor_flags(&comp_state->base,
&cmd_buffer->state.compute.shader, 1);
if (shader->vk.ray_queries > 0) {
assert(cmd_buffer->device->info->verx10 >= 125);
anv_cmd_buffer_set_rt_query_buffer(cmd_buffer, &comp_state->base,
shader->vk.ray_queries,
VK_SHADER_STAGE_COMPUTE_BIT);
}
}
static void
bind_graphics_shaders(struct anv_cmd_buffer *cmd_buffer,
struct anv_shader *new_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT])
{
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
uint32_t ray_queries = 0;
static const enum anv_cmd_dirty_bits mesa_stage_to_dirty_bit[] = {
[MESA_SHADER_VERTEX] = ANV_CMD_DIRTY_VS,
[MESA_SHADER_TESS_CTRL] = ANV_CMD_DIRTY_HS,
[MESA_SHADER_TESS_EVAL] = ANV_CMD_DIRTY_DS,
[MESA_SHADER_GEOMETRY] = ANV_CMD_DIRTY_GS,
[MESA_SHADER_TASK] = ANV_CMD_DIRTY_TASK,
[MESA_SHADER_MESH] = ANV_CMD_DIRTY_MESH,
[MESA_SHADER_FRAGMENT] = ANV_CMD_DIRTY_PS,
};
gfx->active_stages = 0;
gfx->instance_multiplier = 0;
mesa_shader_stage new_streamout_stage = MESA_SHADER_NONE;
/* Find the last pre-rasterization stage */
for (uint32_t i = 0; i < ANV_GRAPHICS_SHADER_STAGE_COUNT; i++) {
mesa_shader_stage s = ANV_GRAPHICS_SHADER_STAGE_COUNT - i - 1;
if (new_shaders[s] == NULL)
continue;
assert(gfx->instance_multiplier == 0 ||
gfx->instance_multiplier == new_shaders[s]->instance_multiplier);
gfx->active_stages |= mesa_to_vk_shader_stage(s);
gfx->instance_multiplier = new_shaders[s]->instance_multiplier;
if (s == MESA_SHADER_FRAGMENT ||
s == MESA_SHADER_TASK ||
s == MESA_SHADER_TESS_CTRL)
continue;
new_streamout_stage = MAX2(new_streamout_stage, s);
}
#define diff_fix_state_stage(bit, name, old_stage, shader) \
do { \
/* Fixed states should always have matching sizes */ \
assert(old_stage == MESA_SHADER_NONE || \
gfx->shaders[old_stage] == NULL || \
gfx->shaders[old_stage]->name.len == shader->name.len); \
/* Don't bother memcmp if the state is already dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##bit) && \
(old_stage == MESA_SHADER_NONE || \
gfx->shaders[old_stage] == NULL || \
memcmp(&gfx->shaders[old_stage]->cmd_data[ \
gfx->shaders[old_stage]->name.offset], \
&shader->cmd_data[ \
shader->name.offset], \
4 * shader->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
#define diff_var_state_stage(bit, name, old_stage, shader) \
do { \
/* Don't bother memcmp if the state is already dirty */ \
/* Also if the new state is empty, avoid marking dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##bit) && \
shader->name.len != 0 && \
(old_stage == MESA_SHADER_NONE || \
gfx->shaders[old_stage] == NULL || \
gfx->shaders[old_stage]->name.len != shader->name.len || \
memcmp(&gfx->shaders[old_stage]->cmd_data[ \
gfx->shaders[old_stage]->name.offset], \
&shader->cmd_data[shader->name.offset], \
4 * shader->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
if (new_streamout_stage != MESA_SHADER_NONE) {
/* Compare the stream instructions first because we go through the
* stages of shaders and update gfx->shaders[], we can't compare the old
* streamout configuration from the old vertex shader with the new
* configuration of the tessellation shader.
*/
diff_fix_state_stage(STREAMOUT, so, gfx->streamout_stage, new_shaders[new_streamout_stage]);
diff_var_state_stage(SO_DECL_LIST, so_decl_list, gfx->streamout_stage, new_shaders[new_streamout_stage]);
gfx->streamout_stage = new_streamout_stage;
}
#undef diff_fix_state_stage
#undef diff_var_state_stage
uint8_t dynamic_descriptors[MAX_SETS] = {};
for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
struct anv_shader *shader = new_shaders[s];
if (shader != NULL) {
gfx->active_stages |= mesa_to_vk_shader_stage(s);
ray_queries = MAX2(ray_queries, shader->vk.ray_queries);
if (gfx->shaders[s] != shader)
set_dirty_for_bind_map(cmd_buffer, s, &shader->bind_map);
for (uint32_t i = 0; i < MAX_SETS; i++) {
if (shader->bind_map.binding_mask & ANV_PIPELINE_BIND_MASK_SET(i)) {
assert(dynamic_descriptors[i] == 0 ||
dynamic_descriptors[i] ==
shader->bind_map.dynamic_descriptors[i]);
dynamic_descriptors[i] = shader->bind_map.dynamic_descriptors[i];
}
}
}
if (gfx->shaders[s] != shader)
gfx->dirty |= mesa_stage_to_dirty_bit[s];
else
continue;
#define diff_fix_state(bit, name) \
do { \
/* Fixed states should always have matching sizes */ \
assert(gfx->shaders[s] == NULL || \
gfx->shaders[s]->name.len == shader->name.len); \
/* Don't bother memcmp if the state is already dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, \
ANV_GFX_STATE_##bit) && \
(gfx->shaders[s] == NULL || \
memcmp(&gfx->shaders[s]->cmd_data[ \
gfx->shaders[s]->name.offset], \
&shader->cmd_data[ \
shader->name.offset], \
4 * shader->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
#define diff_var_state(bit, name) \
do { \
/* Don't bother memcmp if the state is already dirty */ \
/* Also if the new state is empty, avoid marking dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, \
ANV_GFX_STATE_##bit) && \
shader->name.len != 0 && \
(gfx->shaders[s] == NULL || \
gfx->shaders[s]->name.len != shader->name.len || \
memcmp(&gfx->shaders[s]->cmd_data[ \
gfx->shaders[s]->name.offset], \
&shader->cmd_data[shader->name.offset], \
4 * shader->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
switch (s) {
case MESA_SHADER_VERTEX:
if (shader != NULL) {
diff_fix_state(VS, vs.vs);
diff_fix_state(VF_SGVS, vs.vf_sgvs);
if (cmd_buffer->device->info->ver >= 11)
diff_fix_state(VF_SGVS_2, vs.vf_sgvs_2);
diff_fix_state(VF_COMPONENT_PACKING, vs.vf_component_packing);
diff_var_state(VF_SGVS_INSTANCING, vs.vf_sgvs_instancing);
gfx->vs_source_hash = shader->prog_data->source_hash;
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_VS);
}
break;
case MESA_SHADER_TESS_CTRL:
if (shader != NULL)
diff_fix_state(HS, hs.hs);
else
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_HS);
break;
case MESA_SHADER_TESS_EVAL:
if (shader != NULL) {
diff_fix_state(DS, ds.ds);
diff_fix_state(TE, ds.te);
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_DS);
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_TE);
}
break;
case MESA_SHADER_GEOMETRY:
if (shader != NULL)
diff_fix_state(GS, gs.gs);
else
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_GS);
break;
case MESA_SHADER_MESH:
if (shader != NULL) {
diff_fix_state(MESH_CONTROL, ms.control);
diff_fix_state(MESH_SHADER, ms.shader);
diff_fix_state(MESH_DISTRIB, ms.distrib);
diff_fix_state(CLIP_MESH, ms.clip);
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_MESH_CONTROL);
}
break;
case MESA_SHADER_TASK:
if (shader != NULL) {
diff_fix_state(TASK_CONTROL, ts.control);
diff_fix_state(TASK_SHADER, ts.shader);
diff_fix_state(TASK_REDISTRIB, ts.redistrib);
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_TASK_CONTROL);
}
break;
case MESA_SHADER_FRAGMENT:
if (shader != NULL) {
diff_fix_state(WM, ps.wm);
diff_fix_state(PS, ps.ps);
diff_fix_state(PS_EXTRA, ps.ps_extra);
gfx->fs_source_hash = shader->prog_data->source_hash;
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_PS_EXTRA);
}
break;
default:
UNREACHABLE("Invalid shader stage");
}
gfx->shaders[s] = shader;
}
#undef diff_fix_state
#undef diff_var_state
update_push_descriptor_flags(&gfx->base,
cmd_buffer->state.gfx.shaders,
ARRAY_SIZE(cmd_buffer->state.gfx.shaders));
uint8_t dynamic_descriptor_count = 0;
uint8_t dynamic_descriptor_offsets[MAX_SETS] = {};
for (uint32_t i = 0; i < MAX_SETS; i++) {
dynamic_descriptor_offsets[i] = dynamic_descriptor_count;
dynamic_descriptor_count += dynamic_descriptors[i];
}
if (maybe_update_dynamic_buffers_indices(&gfx->base,
dynamic_descriptor_offsets)) {
cmd_buffer->state.push_constants_dirty |= gfx->active_stages;
gfx->base.push_constants_data_dirty = true;
}
if (ray_queries > 0) {
assert(cmd_buffer->device->info->verx10 >= 125);
anv_cmd_buffer_set_rt_query_buffer(cmd_buffer, &gfx->base, ray_queries,
gfx->active_stages);
}
}
void
anv_cmd_buffer_bind_shaders(struct vk_command_buffer *vk_cmd_buffer,
uint32_t stage_count,
const mesa_shader_stage *stages,
struct vk_shader ** const vk_shaders)
{
struct anv_shader ** const shaders = (struct anv_shader ** const)vk_shaders;
struct anv_cmd_buffer *cmd_buffer =
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
/* Append any scratch surface used by the shaders */
for (uint32_t i = 0; i < stage_count; i++) {
if (shaders[i] != NULL) {
anv_reloc_list_append(cmd_buffer->batch.relocs,
&shaders[i]->relocs);
}
}
struct anv_shader *cs_shader = cmd_buffer->state.compute.shader;
struct anv_shader *gfx_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
memcpy(gfx_shaders, cmd_buffer->state.gfx.shaders, sizeof(gfx_shaders));
for (uint32_t i = 0; i < stage_count; i++) {
if (mesa_shader_stage_is_compute(stages[i]))
cs_shader = shaders[i];
else
gfx_shaders[stages[i]] = shaders[i];
}
if (cs_shader != cmd_buffer->state.compute.shader)
bind_compute_shader(cmd_buffer, cs_shader);
if (memcmp(gfx_shaders, cmd_buffer->state.gfx.shaders, sizeof(gfx_shaders)))
bind_graphics_shaders(cmd_buffer, gfx_shaders);
}
struct anv_companion_prev_cmd_buffer_helper
anv_begin_companion_cmd_buffer_helper(struct anv_cmd_buffer **cmd_buffer,
bool needs_companion)
{
if (likely(!needs_companion))
return (struct anv_companion_prev_cmd_buffer_helper) { 0 };
struct anv_cmd_buffer* prev_cmd_buffer = *cmd_buffer;
const struct intel_device_info *info = prev_cmd_buffer->device->info;
const VkResult result = anv_cmd_buffer_ensure_rcs_companion(prev_cmd_buffer);
if (result != VK_SUCCESS) {
anv_batch_set_error(&prev_cmd_buffer->batch, result);
return (struct anv_companion_prev_cmd_buffer_helper) { 0 };
}
assert(prev_cmd_buffer->companion_rcs_cmd_buffer != NULL);
/* Re-emit the aux table register in every command buffer. This way we're
* ensured that we have the table even if this command buffer doesn't
* initialize any images.
*/
if (prev_cmd_buffer->device->info->has_aux_map) {
anv_add_pending_pipe_bits(prev_cmd_buffer->companion_rcs_cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
"new cmd buffer with aux-tt");
}
struct anv_state syncpoint =
anv_genX(info, cmd_buffer_begin_companion_rcs_syncpoint)(prev_cmd_buffer);
*cmd_buffer = prev_cmd_buffer->companion_rcs_cmd_buffer;
return (struct anv_companion_prev_cmd_buffer_helper) {
.prev_cmd_buffer = prev_cmd_buffer,
.syncpoint = syncpoint,
};
}
void
anv_end_companion_cmd_buffer_helper(struct anv_cmd_buffer **cmd_buffer,
struct anv_companion_prev_cmd_buffer_helper prev_cmd_buffer)
{
if (likely(!prev_cmd_buffer.prev_cmd_buffer))
return;
if (prev_cmd_buffer.syncpoint.alloc_size) {
const struct intel_device_info *info = (*cmd_buffer)->device->info;
anv_genX(info, cmd_buffer_end_companion_rcs_syncpoint)(prev_cmd_buffer.prev_cmd_buffer,
prev_cmd_buffer.syncpoint);
}
*cmd_buffer = prev_cmd_buffer.prev_cmd_buffer;
}