anv: add infrastructure for common vk_pipeline

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34872>
This commit is contained in:
Lionel Landwerlin 2024-08-07 23:32:23 +03:00 committed by Marge Bot
parent 7cbabcad36
commit d39e443ef8
7 changed files with 2560 additions and 154 deletions

View file

@ -980,11 +980,6 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
pipe_state->push_constants_data_dirty = true;
}
#define ANV_GRAPHICS_STAGE_BITS \
(VK_SHADER_STAGE_ALL_GRAPHICS | \
VK_SHADER_STAGE_MESH_BIT_EXT | \
VK_SHADER_STAGE_TASK_BIT_EXT)
void anv_CmdBindDescriptorSets2KHR(
VkCommandBuffer commandBuffer,
const VkBindDescriptorSetsInfoKHR* pInfo)

View file

@ -2013,6 +2013,11 @@ get_properties(const struct anv_physical_device *pdevice,
/* We support 4k/64k tiling alignments on most platforms */
props->supportedImageAlignmentMask = (1 << 12) | (1 << 16);
}
/* For the runtime common code (even thought we don't support
* VK_EXT_shader_object)
*/
memcpy(props->shaderBinaryUUID, pdevice->shader_binary_uuid, VK_UUID_SIZE);
}
/* This function restricts the maximum size of system memory heap. The
@ -2761,6 +2766,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
anv_physical_device_init_perf(device, fd);
anv_shader_init_uuid(device);
/* Gather major/minor before WSI. */
struct stat st;

View file

@ -97,6 +97,7 @@
#include "vk_pipeline_layout.h"
#include "vk_physical_device.h"
#include "vk_sampler.h"
#include "vk_shader.h"
#include "vk_shader_module.h"
#include "vk_sync.h"
#include "vk_texcompress_astc.h"
@ -151,6 +152,10 @@ struct intel_perf_query_result;
#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
#endif
#define ANV_GRAPHICS_STAGE_BITS (VK_SHADER_STAGE_ALL_GRAPHICS | \
VK_SHADER_STAGE_MESH_BIT_EXT | \
VK_SHADER_STAGE_TASK_BIT_EXT)
#define ANV_RT_STAGE_BITS (VK_SHADER_STAGE_RAYGEN_BIT_KHR | \
VK_SHADER_STAGE_ANY_HIT_BIT_KHR | \
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | \
@ -158,6 +163,11 @@ struct intel_perf_query_result;
VK_SHADER_STAGE_INTERSECTION_BIT_KHR | \
VK_SHADER_STAGE_CALLABLE_BIT_KHR)
#define ANV_VK_STAGE_MASK (ANV_GRAPHICS_STAGE_BITS | \
ANV_RT_STAGE_BITS | \
VK_SHADER_STAGE_COMPUTE_BIT)
#define NSEC_PER_SEC 1000000000ull
#define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
@ -245,6 +255,7 @@ get_max_vbs(const struct intel_device_info *devinfo) {
#define ANV_BINDLESS_SURFACE_BASE_ADDR_REG 0x2668 /* MI_ALU_REG13 */
#define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
#define ANV_RT_SHADER_STAGE_COUNT (MESA_SHADER_CALLABLE - MESA_SHADER_RAYGEN + 1)
/* Defines where various values are defined in the inline parameter register.
*/
@ -1022,6 +1033,188 @@ VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
struct anv_device *device);
void anv_bo_cache_finish(struct anv_bo_cache *cache);
/* Relocations */
struct anv_reloc_list {
bool uses_relocs;
uint32_t dep_words;
BITSET_WORD * deps;
const VkAllocationCallbacks *alloc;
};
VkResult anv_reloc_list_init(struct anv_reloc_list *list,
const VkAllocationCallbacks *alloc,
bool uses_relocs);
void anv_reloc_list_finish(struct anv_reloc_list *list);
VkResult
anv_reloc_list_add_bo_impl(struct anv_reloc_list *list, struct anv_bo *target_bo);
static inline VkResult
anv_reloc_list_add_bo(struct anv_reloc_list *list, struct anv_bo *target_bo)
{
return list->uses_relocs ? anv_reloc_list_add_bo_impl(list, target_bo) : VK_SUCCESS;
}
VkResult anv_reloc_list_append(struct anv_reloc_list *list,
struct anv_reloc_list *other);
/* Shaders */
#define ANV_DESCRIPTOR_SET_PER_PRIM_PADDING (UINT8_MAX - 5)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 4)
#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 3)
#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 2)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
struct anv_pipeline_binding {
/** Index in the descriptor set
*
* This is a flattened index; the descriptor set layout is already taken
* into account.
*/
uint32_t index;
/** Binding in the descriptor set. Not valid for any of the
* ANV_DESCRIPTOR_SET_*
*/
uint32_t binding;
/** Offset in the descriptor buffer
*
* Relative to anv_descriptor_set::desc_addr. This is useful for
* ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT, to generate the binding
* table entry.
*/
uint32_t set_offset;
/** The descriptor set this surface corresponds to.
*
* The special ANV_DESCRIPTOR_SET_* values above indicates that this
* binding is not a normal descriptor set but something else.
*/
uint8_t set;
union {
/** Plane in the binding index for images */
uint8_t plane;
/** Input attachment index (relative to the subpass) */
uint8_t input_attachment_index;
/** Dynamic offset index
*
* For dynamic UBOs and SSBOs, relative to set.
*/
uint8_t dynamic_offset_index;
};
};
struct anv_embedded_sampler_key {
/** No need to track binding elements for embedded samplers as :
*
* VUID-VkDescriptorSetLayoutBinding-flags-08006:
*
* "If VkDescriptorSetLayoutCreateInfo:flags contains
* VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT,
* descriptorCount must: less than or equal to 1"
*
* The following struct can be safely hash as it doesn't include in
* address/offset.
*/
uint32_t sampler[4];
uint32_t color[4];
};
struct anv_pipeline_embedded_sampler_binding {
/** The descriptor set this sampler belongs to */
uint8_t set;
/** The binding in the set this sampler belongs to */
uint32_t binding;
/** The data configuring the sampler */
struct anv_embedded_sampler_key key;
};
struct anv_push_range {
/** Index in the descriptor set */
uint32_t index;
/** Descriptor set index */
uint8_t set;
/** Dynamic offset index (for dynamic UBOs), relative to set. */
uint8_t dynamic_offset_index;
/** Start offset in units of 32B */
uint8_t start;
/** Range in units of 32B */
uint8_t length;
};
struct anv_pipeline_bind_map {
unsigned char surface_sha1[20];
unsigned char sampler_sha1[20];
unsigned char push_sha1[20];
/* enum anv_descriptor_set_layout_type */
uint32_t layout_type;
uint32_t surface_count;
uint32_t sampler_count;
uint32_t embedded_sampler_count;
struct anv_pipeline_binding * surface_to_descriptor;
struct anv_pipeline_binding * sampler_to_descriptor;
struct anv_pipeline_embedded_sampler_binding* embedded_sampler_to_binding;
BITSET_DECLARE(input_attachments, MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS + 1);
struct anv_push_range push_ranges[4];
};
struct anv_push_descriptor_info {
/* A bitfield of descriptors used. */
uint32_t used_descriptors;
/* A bitfield of UBOs bindings fully promoted to push constants. */
uint32_t fully_promoted_ubo_descriptors;
/* A bitfield with one bit set indicating the push descriptor set used. */
uint8_t push_set_buffer;
};
struct anv_shader {
struct vk_shader vk;
struct anv_state kernel;
const struct brw_stage_prog_data *prog_data;
uint32_t prog_data_size;
struct brw_compile_stats stats[3];
uint32_t num_stats;
struct nir_xfb_info *xfb_info;
struct anv_push_descriptor_info push_desc_info;
struct anv_pipeline_bind_map bind_map;
uint32_t instance_multiplier;
/* Not saved in the pipeline cache.
*
* Array of pointers of length bind_map.embedded_sampler_count
*/
struct anv_embedded_sampler **embedded_samplers;
};
extern struct vk_device_shader_ops anv_device_shader_ops;
/* Physical device */
struct anv_queue_family {
/* Standard bits passed on to the client */
VkQueueFlags queueFlags;
@ -1274,6 +1467,7 @@ struct anv_physical_device {
struct anv_memregion vram_non_mappable;
struct anv_memregion sys;
uint8_t driver_build_sha1[20];
uint8_t shader_binary_uuid[VK_UUID_SIZE];
uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
uint8_t driver_uuid[VK_UUID_SIZE];
uint8_t device_uuid[VK_UUID_SIZE];
@ -1458,7 +1652,6 @@ struct anv_queue {
};
struct nir_xfb_info;
struct anv_pipeline_bind_map;
struct anv_pipeline_sets_layout;
struct anv_push_descriptor_info;
@ -2598,30 +2791,6 @@ anv_bo_is_small_heap(enum anv_bo_alloc_flags alloc_flags)
ANV_BO_ALLOC_32BIT_ADDRESS);
}
struct anv_reloc_list {
bool uses_relocs;
uint32_t dep_words;
BITSET_WORD * deps;
const VkAllocationCallbacks *alloc;
};
VkResult anv_reloc_list_init(struct anv_reloc_list *list,
const VkAllocationCallbacks *alloc,
bool uses_relocs);
void anv_reloc_list_finish(struct anv_reloc_list *list);
VkResult
anv_reloc_list_add_bo_impl(struct anv_reloc_list *list, struct anv_bo *target_bo);
static inline VkResult
anv_reloc_list_add_bo(struct anv_reloc_list *list, struct anv_bo *target_bo)
{
return list->uses_relocs ? anv_reloc_list_add_bo_impl(list, target_bo) : VK_SUCCESS;
}
VkResult anv_reloc_list_append(struct anv_reloc_list *list,
struct anv_reloc_list *other);
struct anv_batch_bo {
/* Link in the anv_cmd_buffer.owned_batch_bos list */
struct list_head link;
@ -3057,22 +3226,6 @@ enum anv_descriptor_data {
ANV_DESCRIPTOR_SURFACE_SAMPLER = BITFIELD_BIT(9),
};
struct anv_embedded_sampler_key {
/** No need to track binding elements for embedded samplers as :
*
* VUID-VkDescriptorSetLayoutBinding-flags-08006:
*
* "If VkDescriptorSetLayoutCreateInfo:flags contains
* VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT,
* descriptorCount must: less than or equal to 1"
*
* The following struct can be safely hash as it doesn't include in
* address/offset.
*/
uint32_t sampler[4];
uint32_t color[4];
};
struct anv_descriptor_set_layout_sampler {
/* Immutable sampler used to populate descriptor sets on allocation */
struct anv_sampler *immutable_sampler;
@ -3450,84 +3603,6 @@ anv_descriptor_set_write_template(struct anv_device *device,
const struct vk_descriptor_update_template *template,
const void *data);
#define ANV_DESCRIPTOR_SET_PER_PRIM_PADDING (UINT8_MAX - 5)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 4)
#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 3)
#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 2)
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
struct anv_pipeline_binding {
/** Index in the descriptor set
*
* This is a flattened index; the descriptor set layout is already taken
* into account.
*/
uint32_t index;
/** Binding in the descriptor set. Not valid for any of the
* ANV_DESCRIPTOR_SET_*
*/
uint32_t binding;
/** Offset in the descriptor buffer
*
* Relative to anv_descriptor_set::desc_addr. This is useful for
* ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT, to generate the binding
* table entry.
*/
uint32_t set_offset;
/** The descriptor set this surface corresponds to.
*
* The special ANV_DESCRIPTOR_SET_* values above indicates that this
* binding is not a normal descriptor set but something else.
*/
uint8_t set;
union {
/** Plane in the binding index for images */
uint8_t plane;
/** Input attachment index (relative to the subpass) */
uint8_t input_attachment_index;
/** Dynamic offset index
*
* For dynamic UBOs and SSBOs, relative to set.
*/
uint8_t dynamic_offset_index;
};
};
struct anv_pipeline_embedded_sampler_binding {
/** The descriptor set this sampler belongs to */
uint8_t set;
/** The binding in the set this sampler belongs to */
uint32_t binding;
/** The data configuring the sampler */
struct anv_embedded_sampler_key key;
};
struct anv_push_range {
/** Index in the descriptor set */
uint32_t index;
/** Descriptor set index */
uint8_t set;
/** Dynamic offset index (for dynamic UBOs), relative to set. */
uint8_t dynamic_offset_index;
/** Start offset in units of 32B */
uint8_t start;
/** Range in units of 32B */
uint8_t length;
};
struct anv_pipeline_sets_layout {
struct anv_device *device;
@ -4909,36 +4984,11 @@ struct anv_event {
stage = __builtin_ffs(__tmp) - 1, __tmp; \
__tmp &= ~(1 << (stage)))
struct anv_pipeline_bind_map {
unsigned char surface_sha1[20];
unsigned char sampler_sha1[20];
unsigned char push_sha1[20];
/* enum anv_descriptor_set_layout_type */
uint32_t layout_type;
uint32_t surface_count;
uint32_t sampler_count;
uint32_t embedded_sampler_count;
struct anv_pipeline_binding * surface_to_descriptor;
struct anv_pipeline_binding * sampler_to_descriptor;
struct anv_pipeline_embedded_sampler_binding* embedded_sampler_to_binding;
BITSET_DECLARE(input_attachments, MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS + 1);
struct anv_push_range push_ranges[4];
};
struct anv_push_descriptor_info {
/* A bitfield of descriptors used. */
uint32_t used_descriptors;
/* A bitfield of UBOs bindings fully promoted to push constants. */
uint32_t fully_promoted_ubo_descriptors;
/* A bitfield with one bit set indicating the push descriptor set used. */
uint8_t push_set_buffer;
};
#define anv_foreach_vk_stage(stage, stage_bits) \
for (VkShaderStageFlags stage, \
__tmp = (stage_bits & ANV_VK_STAGE_MASK); \
stage = BITFIELD_BIT(__builtin_ffs(__tmp) - 1), __tmp; \
__tmp &= ~(stage))
struct anv_shader_upload_params {
mesa_shader_stage stage;
@ -5046,6 +5096,8 @@ enum anv_pipeline_type {
ANV_PIPELINE_RAY_TRACING,
};
void anv_shader_init_uuid(struct anv_physical_device *device);
struct anv_pipeline {
struct vk_pipeline vk;

View file

@ -0,0 +1,427 @@
/* Copyright © 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "anv_private.h"
#include "anv_shader.h"
#include "nir/nir_serialize.h"
static void
anv_shader_destroy(struct vk_device *vk_device,
struct vk_shader *vk_shader,
const VkAllocationCallbacks *pAllocator)
{
struct anv_device *device =
container_of(vk_device, struct anv_device, vk);
struct anv_shader *shader =
container_of(vk_shader, struct anv_shader, vk);
for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++)
anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
vk_shader_free(vk_device, pAllocator, vk_shader);
}
VkResult
anv_shader_deserialize(struct vk_device *vk_device,
struct blob_reader *blob,
uint32_t binary_version,
const VkAllocationCallbacks* pAllocator,
struct vk_shader **shader_out)
{
struct anv_device *device = container_of(vk_device, struct anv_device, vk);
struct anv_shader_data data = {};
mesa_shader_stage stage = blob_read_uint32(blob);
uint32_t code_len = blob_read_uint32(blob);
data.code = (void *)blob_read_bytes(blob, code_len);
blob_copy_bytes(blob, &data.prog_data, brw_prog_data_size(stage));
data.prog_data.base.relocs =
blob_read_bytes(blob, data.prog_data.base.num_relocs *
sizeof(data.prog_data.base.relocs[0]));
data.num_stats = blob_read_uint32(blob);
blob_copy_bytes(blob, data.stats, data.num_stats * sizeof(data.stats[0]));
uint32_t xfb_size = blob_read_uint32(blob);
if (xfb_size)
data.xfb_info = blob_read_bytes(blob, xfb_size);
data.instance_multiplier = blob_read_uint32(blob);
data.push_desc_info.used_descriptors = blob_read_uint32(blob);
data.push_desc_info.fully_promoted_ubo_descriptors = blob_read_uint32(blob);
data.push_desc_info.push_set_buffer = blob_read_uint8(blob);
blob_copy_bytes(blob, data.bind_map.surface_sha1, sizeof(data.bind_map.surface_sha1));
blob_copy_bytes(blob, data.bind_map.sampler_sha1, sizeof(data.bind_map.sampler_sha1));
blob_copy_bytes(blob, data.bind_map.push_sha1, sizeof(data.bind_map.push_sha1));
data.bind_map.layout_type = blob_read_uint32(blob);
data.bind_map.surface_count = blob_read_uint32(blob);
data.bind_map.sampler_count = blob_read_uint32(blob);
data.bind_map.embedded_sampler_count = blob_read_uint32(blob);
data.bind_map.surface_to_descriptor = (void *)
blob_read_bytes(blob, data.bind_map.surface_count *
sizeof(*data.bind_map.surface_to_descriptor));
data.bind_map.sampler_to_descriptor = (void *)
blob_read_bytes(blob, data.bind_map.sampler_count *
sizeof(*data.bind_map.sampler_to_descriptor));
data.bind_map.embedded_sampler_to_binding = (void *)
blob_read_bytes(blob, data.bind_map.embedded_sampler_count *
sizeof(*data.bind_map.embedded_sampler_to_binding));
blob_copy_bytes(blob, data.bind_map.input_attachments,
sizeof(data.bind_map.input_attachments));
blob_copy_bytes(blob, data.bind_map.push_ranges, sizeof(data.bind_map.push_ranges));
if (blob->overrun)
return vk_error(device, VK_ERROR_UNKNOWN);
VkResult result =
anv_shader_create(device, stage, &data, pAllocator, shader_out);
return result;
}
static bool
anv_shader_serialize(struct vk_device *device,
const struct vk_shader *vk_shader,
struct blob *blob)
{
struct anv_shader *shader =
container_of(vk_shader, struct anv_shader, vk);
blob_write_uint32(blob, vk_shader->stage);
blob_write_uint32(blob, shader->prog_data->program_size);
blob_write_bytes(blob, shader->kernel.map,
shader->prog_data->program_size);
union brw_any_prog_data prog_data;
memcpy(&prog_data, shader->prog_data, brw_prog_data_size(vk_shader->stage));
prog_data.base.relocs = NULL;
prog_data.base.param = NULL;
blob_write_bytes(blob, &prog_data, brw_prog_data_size(vk_shader->stage));
blob_write_bytes(blob, shader->prog_data->relocs,
shader->prog_data->num_relocs *
sizeof(shader->prog_data->relocs[0]));
blob_write_uint32(blob, shader->num_stats);
blob_write_bytes(blob, shader->stats,
shader->num_stats * sizeof(shader->stats[0]));
if (shader->xfb_info) {
uint32_t xfb_info_size =
nir_xfb_info_size(shader->xfb_info->output_count);
blob_write_uint32(blob, xfb_info_size);
blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
} else {
blob_write_uint32(blob, 0);
}
blob_write_uint32(blob, shader->instance_multiplier);
blob_write_uint32(blob, shader->push_desc_info.used_descriptors);
blob_write_uint32(blob, shader->push_desc_info.fully_promoted_ubo_descriptors);
blob_write_uint8(blob, shader->push_desc_info.push_set_buffer);
blob_write_bytes(blob, shader->bind_map.surface_sha1,
sizeof(shader->bind_map.surface_sha1));
blob_write_bytes(blob, shader->bind_map.sampler_sha1,
sizeof(shader->bind_map.sampler_sha1));
blob_write_bytes(blob, shader->bind_map.push_sha1,
sizeof(shader->bind_map.push_sha1));
blob_write_uint32(blob, shader->bind_map.layout_type);
blob_write_uint32(blob, shader->bind_map.surface_count);
blob_write_uint32(blob, shader->bind_map.sampler_count);
blob_write_uint32(blob, shader->bind_map.embedded_sampler_count);
blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
shader->bind_map.surface_count *
sizeof(*shader->bind_map.surface_to_descriptor));
blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
shader->bind_map.sampler_count *
sizeof(*shader->bind_map.sampler_to_descriptor));
blob_write_bytes(blob, shader->bind_map.embedded_sampler_to_binding,
shader->bind_map.embedded_sampler_count *
sizeof(*shader->bind_map.embedded_sampler_to_binding));
blob_write_bytes(blob, shader->bind_map.input_attachments,
sizeof(shader->bind_map.input_attachments));
blob_write_bytes(blob, shader->bind_map.push_ranges,
sizeof(shader->bind_map.push_ranges));
return !blob->out_of_memory;
}
static VkResult
anv_shader_get_executable_properties(struct vk_device *device,
const struct vk_shader *shader,
uint32_t *executable_count,
VkPipelineExecutablePropertiesKHR *properties)
{
return VK_SUCCESS;
}
static VkResult
anv_shader_get_executable_statistics(struct vk_device *device,
const struct vk_shader *shader,
uint32_t executable_index,
uint32_t *statistic_count,
VkPipelineExecutableStatisticKHR *statistics)
{
return VK_SUCCESS;
}
static VkResult
anv_shader_get_executable_internal_representations(
struct vk_device *device,
const struct vk_shader *shader,
uint32_t executable_index,
uint32_t *internal_representation_count,
VkPipelineExecutableInternalRepresentationKHR *internal_representations)
{
return VK_SUCCESS;
}
static struct vk_shader_ops anv_shader_ops = {
.destroy = anv_shader_destroy,
.serialize = anv_shader_serialize,
.get_executable_properties = anv_shader_get_executable_properties,
.get_executable_statistics = anv_shader_get_executable_statistics,
.get_executable_internal_representations =
anv_shader_get_executable_internal_representations,
};
static VkResult
anv_shader_reloc(struct anv_device *device,
void *code,
struct anv_shader *shader,
const VkAllocationCallbacks *pAllocator)
{
uint64_t shader_data_addr =
device->physical->va.instruction_state_pool.addr +
shader->kernel.offset +
shader->prog_data->const_data_offset;
const uint32_t max_relocs =
BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE +
shader->bind_map.embedded_sampler_count;
uint32_t rv_count = 0;
struct brw_shader_reloc_value *reloc_values =
vk_zalloc2(&device->vk.alloc, pAllocator,
sizeof(struct brw_shader_reloc_value) * max_relocs, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (reloc_values == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
assert((device->physical->va.dynamic_visible_pool.addr & 0xffffffff) == 0);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_INSTRUCTION_BASE_ADDR_HIGH,
.value = device->physical->va.instruction_state_pool.addr >> 32,
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
.value = device->physical->va.dynamic_visible_pool.addr >> 32,
};
assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
.value = device->physical->indirect_descriptors ?
(device->physical->va.indirect_descriptor_pool.addr >> 32) :
(device->physical->va.internal_surface_state_pool.addr >> 32),
};
assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
.value = shader_data_addr,
};
assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
.value = device->physical->va.instruction_state_pool.addr >> 32,
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
.value = shader->kernel.offset,
};
if (brw_shader_stage_is_bindless(shader->vk.stage)) {
const struct brw_bs_prog_data *bs_prog_data =
brw_bs_prog_data_const(shader->prog_data);
uint64_t resume_sbt_addr =
device->physical->va.instruction_state_pool.addr +
shader->kernel.offset +
bs_prog_data->resume_sbt_offset;
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
.value = resume_sbt_addr,
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
.value = resume_sbt_addr >> 32,
};
}
if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
struct anv_bo *bo = device->printf.bo;
assert(bo != NULL);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_LOW,
.value = bo->offset & 0xffffffff,
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_HIGH,
.value = bo->offset >> 32,
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_PRINTF_BUFFER_SIZE,
.value = anv_printf_buffer_size(),
};
}
for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++) {
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + i,
.value = shader->embedded_samplers[i]->sampler_state.offset,
};
}
assert(rv_count <= max_relocs);
brw_write_shader_relocs(&device->physical->compiler->isa,
code, shader->prog_data,
reloc_values, rv_count);
vk_free2(&device->vk.alloc, pAllocator, reloc_values);
return VK_SUCCESS;
}
VkResult
anv_shader_create(struct anv_device *device,
mesa_shader_stage stage,
struct anv_shader_data *shader_data,
const VkAllocationCallbacks *pAllocator,
struct vk_shader **shader_out)
{
/* We never need this at runtime */
shader_data->prog_data.base.param = NULL;
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct anv_shader, shader, 1);
VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, brw_prog_key_size(stage));
VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
brw_prog_data_size(stage));
VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
shader_data->prog_data.base.num_relocs);
VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
shader_data->xfb_info == NULL ? 0 :
nir_xfb_info_size(shader_data->xfb_info->output_count));
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
shader_data->bind_map.surface_count);
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
shader_data->bind_map.sampler_count);
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_embedded_sampler_binding,
embedded_sampler_to_binding,
shader_data->bind_map.embedded_sampler_count);
VK_MULTIALLOC_DECL(&ma, struct anv_embedded_sampler *, embedded_samplers,
shader_data->bind_map.embedded_sampler_count);
if (!vk_shader_multizalloc(&device->vk, &ma, &anv_shader_ops,
stage, pAllocator))
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
VkResult result;
if (shader_data->bind_map.embedded_sampler_count > 0) {
shader->embedded_samplers = embedded_samplers;
result = anv_device_get_embedded_samplers(
device, embedded_samplers, &shader_data->bind_map);
if (result != VK_SUCCESS)
goto error_shader;
}
shader->kernel =
anv_state_pool_alloc(&device->instruction_state_pool,
shader_data->prog_data.base.program_size, 64);
if (shader->kernel.alloc_size == 0) {
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto error_embedded_samplers;
}
memcpy(prog_data, &shader_data->prog_data, brw_prog_data_size(stage));
typed_memcpy(prog_data_relocs,
shader_data->prog_data.base.relocs,
shader_data->prog_data.base.num_relocs);
prog_data->relocs = prog_data_relocs;
shader->prog_data = prog_data;
shader->num_stats = shader_data->num_stats;
typed_memcpy(shader->stats, shader_data->stats, shader_data->num_stats);
if (shader_data->xfb_info) {
*xfb_info = *shader_data->xfb_info;
typed_memcpy(xfb_info->outputs, shader_data->xfb_info->outputs,
shader_data->xfb_info->output_count);
shader->xfb_info = xfb_info;
}
typed_memcpy(&shader->push_desc_info, &shader_data->push_desc_info, 1);
typed_memcpy(&shader->bind_map, &shader_data->bind_map, 1);
typed_memcpy(surface_to_descriptor,
shader_data->bind_map.surface_to_descriptor,
shader_data->bind_map.surface_count);
typed_memcpy(sampler_to_descriptor,
shader_data->bind_map.sampler_to_descriptor,
shader_data->bind_map.sampler_count);
typed_memcpy(embedded_sampler_to_binding,
shader_data->bind_map.embedded_sampler_to_binding,
shader_data->bind_map.embedded_sampler_count);
typed_memcpy(shader->bind_map.input_attachments,
shader_data->bind_map.input_attachments,
ARRAY_SIZE(shader_data->bind_map.input_attachments));
shader->bind_map.surface_to_descriptor = surface_to_descriptor;
shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
shader->bind_map.embedded_sampler_to_binding = embedded_sampler_to_binding;
shader->instance_multiplier = shader_data->instance_multiplier;
result = anv_shader_reloc(device, shader_data->code, shader, pAllocator);
if (result != VK_SUCCESS)
goto error_embedded_samplers;
memcpy(shader->kernel.map, shader_data->code,
shader_data->prog_data.base.program_size);
if (mesa_shader_stage_is_rt(shader->vk.stage)) {
const struct brw_bs_prog_data *bs_prog_data =
(const struct brw_bs_prog_data *)shader->prog_data;
shader->vk.stack_size = bs_prog_data->max_stack_size;
}
shader->vk.scratch_size = shader->prog_data->total_scratch;
shader->vk.ray_queries = shader->prog_data->ray_queries;
*shader_out = &shader->vk;
return VK_SUCCESS;
error_embedded_samplers:
for (uint32_t s = 0; s < shader->bind_map.embedded_sampler_count; s++)
anv_embedded_sampler_unref(device, shader->embedded_samplers[s]);
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
error_shader:
vk_shader_free(&device->vk, pAllocator, &shader->vk);
return result;
}

View file

@ -0,0 +1,59 @@
/* Copyright © 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include "anv_private.h"
#include "anv_nir.h"
#include "nir/nir_xfb_info.h"
struct anv_shader_data {
struct vk_shader_compile_info *info;
struct vk_shader **shader_out;
union brw_any_prog_key key;
uint32_t key_size;
union brw_any_prog_data prog_data;
uint32_t source_hash;
const nir_xfb_info *xfb_info;
uint32_t num_stats;
struct brw_compile_stats stats[3];
char *disasm[3];
bool use_primitive_replication;
uint32_t instance_multiplier;
/* For fragment shaders only */
struct brw_mue_map *mue_map;
struct anv_push_descriptor_info push_desc_info;
struct anv_pipeline_bind_map bind_map;
struct anv_pipeline_push_map push_map;
bool uses_bt_for_push_descs;
unsigned *code;
};
VkResult anv_shader_create(struct anv_device *device,
mesa_shader_stage stage,
struct anv_shader_data *shader_data,
const VkAllocationCallbacks *pAllocator,
struct vk_shader **shader_out);
VkResult anv_shader_deserialize(struct vk_device *device,
struct blob_reader *blob,
uint32_t binary_version,
const VkAllocationCallbacks* pAllocator,
struct vk_shader **shader_out);
extern struct vk_device_shader_ops anv_device_shader_ops;

File diff suppressed because it is too large Load diff

View file

@ -187,6 +187,8 @@ libanv_files = files(
'anv_rmv.c',
'anv_rmv.h',
'anv_sampler.c',
'anv_shader.c',
'anv_shader_compile.c',
'anv_slab_bo.c',
'anv_slab_bo.h',
'anv_sparse.c',