mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 15:48:36 +02:00
anv: add support for indirect execution set
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
This commit is contained in:
parent
c6dc2df9e5
commit
68885511d2
7 changed files with 485 additions and 0 deletions
|
|
@ -51,6 +51,8 @@ genX_bits_included_symbols = [
|
|||
'3DSTATE_STENCIL_BUFFER::Surface Pitch',
|
||||
'3DSTATE_HIER_DEPTH_BUFFER::Surface Base Address',
|
||||
'3DSTATE_HIER_DEPTH_BUFFER::Surface Pitch',
|
||||
'3DSTATE_DS',
|
||||
'3DSTATE_HS',
|
||||
'3DSTATE_CLEAR_PARAMS',
|
||||
'3DSTATE_SO_BUFFER::Surface Base Address',
|
||||
'3DSTATE_SO_BUFFER::Stream Offset',
|
||||
|
|
|
|||
336
src/intel/vulkan/anv_dgc_set.c
Normal file
336
src/intel/vulkan/anv_dgc_set.c
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
/*
|
||||
* Copyright 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "genxml/genX_bits.h"
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
enum anv_dgc_stage
|
||||
anv_vk_stage_to_dgc_stage(VkShaderStageFlags vk_stage)
|
||||
{
|
||||
switch (vk_stage) {
|
||||
case VK_SHADER_STAGE_VERTEX_BIT:
|
||||
return ANV_DGC_STAGE_VERTEX;
|
||||
case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
|
||||
return ANV_DGC_STAGE_TESS_CTRL;
|
||||
case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
|
||||
return ANV_DGC_STAGE_TESS_EVAL;
|
||||
case VK_SHADER_STAGE_GEOMETRY_BIT:
|
||||
return ANV_DGC_STAGE_GEOMETRY;
|
||||
case VK_SHADER_STAGE_FRAGMENT_BIT:
|
||||
return ANV_DGC_STAGE_FRAGMENT;
|
||||
case VK_SHADER_STAGE_TASK_BIT_EXT:
|
||||
return ANV_DGC_STAGE_TASK;
|
||||
case VK_SHADER_STAGE_MESH_BIT_EXT:
|
||||
return ANV_DGC_STAGE_MESH;
|
||||
case VK_SHADER_STAGE_COMPUTE_BIT:
|
||||
return ANV_DGC_STAGE_COMPUTE;
|
||||
default:
|
||||
UNREACHABLE("Unhandled stage");
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_vk_stages_to_generated_stages(VkShaderStageFlags vk_stages)
|
||||
{
|
||||
uint32_t gen_stages = 0;
|
||||
anv_foreach_vk_stage(stage, vk_stages)
|
||||
gen_stages |= BITFIELD_BIT(anv_vk_stage_to_dgc_stage(stage));
|
||||
return gen_stages;
|
||||
}
|
||||
|
||||
void
|
||||
anv_write_gfx_indirect_descriptor(struct anv_device *device,
|
||||
struct anv_dgc_gfx_descriptor *descriptor,
|
||||
struct anv_cmd_graphics_state *gfx)
|
||||
{
|
||||
struct anv_dgc_push_stage_state empty_push = {};
|
||||
|
||||
if (intel_needs_workaround(device->info, 16011107343) &&
|
||||
gfx->shaders[MESA_SHADER_TESS_CTRL] != NULL) {
|
||||
memcpy(&descriptor->final_commands[descriptor->final_commands_size],
|
||||
gfx->dyn_state.packed.hs,
|
||||
_3DSTATE_HS_length(device->info) * 4);
|
||||
descriptor->final_commands_size += _3DSTATE_HS_length(device->info) * 4;
|
||||
}
|
||||
|
||||
if (intel_needs_workaround(device->info, 22018402687) &&
|
||||
gfx->shaders[MESA_SHADER_TESS_EVAL] != NULL) {
|
||||
memcpy(&descriptor->final_commands[descriptor->final_commands_size],
|
||||
gfx->dyn_state.packed.ds,
|
||||
_3DSTATE_DS_length(device->info) * 4);
|
||||
descriptor->final_commands_size += _3DSTATE_DS_length(device->info) * 4;
|
||||
}
|
||||
assert(descriptor->final_commands_size <= sizeof(descriptor->final_commands));
|
||||
|
||||
anv_foreach_vk_stage(vk_stage, ANV_GRAPHICS_STAGE_BITS) {
|
||||
enum anv_dgc_stage gen_stage = anv_vk_stage_to_dgc_stage(vk_stage);
|
||||
enum mesa_shader_stage stage = vk_to_mesa_shader_stage(vk_stage);
|
||||
|
||||
if ((gfx->active_stages & vk_stage) == 0) {
|
||||
descriptor->push_constants.stages[gen_stage] = empty_push;
|
||||
continue;
|
||||
}
|
||||
|
||||
const struct anv_pipeline_bind_map *bind_map =
|
||||
&gfx->shaders[stage]->bind_map;
|
||||
if ((bind_map->push_ranges[0].length == 0 ||
|
||||
bind_map->push_ranges[0].set != ANV_DESCRIPTOR_SET_PUSH_CONSTANTS) &&
|
||||
bind_map->inline_dwords_count == 0) {
|
||||
descriptor->push_constants.stages[gen_stage] = empty_push;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_MESH &&
|
||||
intel_needs_workaround(device->info, 18019110168)) {
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
|
||||
descriptor->wa_18019110168_remapping_table_offset =
|
||||
gfx->shaders[MESA_SHADER_MESH]->kernel.offset +
|
||||
mesh_prog_data->wa_18019110168_mapping_offset;
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_MESH || stage == MESA_SHADER_TASK) {
|
||||
descriptor->push_constants.stages[gen_stage].bindless.inline_dwords_count =
|
||||
bind_map->inline_dwords_count;
|
||||
assert(sizeof(bind_map->inline_dwords) ==
|
||||
sizeof(descriptor->push_constants.stages[gen_stage].bindless.inline_dwords));
|
||||
memcpy(descriptor->push_constants.stages[gen_stage].bindless.inline_dwords,
|
||||
bind_map->inline_dwords, sizeof(bind_map->inline_dwords));
|
||||
} else {
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(bind_map->push_ranges); i++) {
|
||||
const struct anv_push_range *range = &bind_map->push_ranges[i];
|
||||
if (range->length == 0)
|
||||
break;
|
||||
|
||||
/* We should have compiler all the indirectly bindable shaders in
|
||||
* such a way that it's the only types of push constants we should
|
||||
* see.
|
||||
*/
|
||||
assert(range->set == ANV_DESCRIPTOR_SET_PUSH_CONSTANTS ||
|
||||
range->set == ANV_DESCRIPTOR_SET_DESCRIPTORS ||
|
||||
range->set == ANV_DESCRIPTOR_SET_NULL ||
|
||||
range->set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING);
|
||||
|
||||
struct anv_dgc_push_stage_slot *slot =
|
||||
&descriptor->push_constants.stages[gen_stage].legacy.slots[i];
|
||||
|
||||
slot->push_data_size = 32 * range->length;
|
||||
|
||||
slot->push_data_offset = 32 * range->start;
|
||||
slot->type = ANV_DGC_PUSH_SLOT_TYPE_PUSH_CONSTANTS;
|
||||
descriptor->push_constants.stages[gen_stage].legacy.n_slots++;
|
||||
}
|
||||
}
|
||||
descriptor->push_constants.active_stages |= 1u << gen_stage;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
write_cs_set_entry(struct anv_device *device,
|
||||
struct anv_indirect_execution_set *indirect_set,
|
||||
uint32_t entry, struct anv_shader *shader)
|
||||
{
|
||||
struct anv_dgc_cs_descriptor descriptor;
|
||||
anv_genX(device->info, write_cs_descriptor)(&descriptor, device, shader);
|
||||
|
||||
const struct brw_cs_prog_data *prog_data =
|
||||
brw_cs_prog_data_const(shader->prog_data);
|
||||
|
||||
if (device->info->verx10 < 125)
|
||||
anv_reloc_list_append(&indirect_set->relocs, &shader->relocs);
|
||||
|
||||
memcpy(indirect_set->bo->map + entry * indirect_set->stride,
|
||||
&descriptor, sizeof(descriptor));
|
||||
|
||||
indirect_set->uses_systolic |= prog_data->uses_systolic;
|
||||
indirect_set->max_scratch = MAX2(indirect_set->max_scratch,
|
||||
prog_data->base.total_scratch);
|
||||
indirect_set->max_ray_queries = MAX2(indirect_set->max_ray_queries,
|
||||
shader->prog_data->ray_queries);
|
||||
}
|
||||
|
||||
static void
|
||||
write_rt_set_entry(struct anv_indirect_execution_set *indirect_set,
|
||||
uint32_t entry, struct vk_pipeline *pipeline)
|
||||
{
|
||||
indirect_set->max_scratch = MAX2(indirect_set->max_scratch,
|
||||
vk_pipeline_get_rt_scratch_size(pipeline));
|
||||
indirect_set->max_ray_queries = MAX2(indirect_set->max_ray_queries,
|
||||
vk_pipeline_get_rt_ray_queries(pipeline));
|
||||
}
|
||||
|
||||
VkResult anv_CreateIndirectExecutionSetEXT(
|
||||
VkDevice _device,
|
||||
const VkIndirectExecutionSetCreateInfoEXT* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkIndirectExecutionSetEXT* pIndirectExecutionSet)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
|
||||
struct anv_indirect_execution_set *indirect_set =
|
||||
vk_object_zalloc(&device->vk, pAllocator,
|
||||
sizeof(struct anv_indirect_execution_set),
|
||||
VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT);
|
||||
if (indirect_set == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
VkResult result =
|
||||
anv_reloc_list_init(&indirect_set->relocs,
|
||||
pAllocator ? pAllocator : &device->vk.alloc,
|
||||
device->physical->uses_relocs);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_object;
|
||||
|
||||
struct vk_pipeline *vk_pipeline = NULL;
|
||||
struct vk_shader *vk_shader = NULL;
|
||||
VkPipelineBindPoint bind_point;
|
||||
uint32_t entry_count;
|
||||
if (pCreateInfo->type == VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT) {
|
||||
entry_count = pCreateInfo->info.pPipelineInfo->maxPipelineCount;
|
||||
vk_pipeline =
|
||||
vk_pipeline_from_handle(pCreateInfo->info.pPipelineInfo->initialPipeline);
|
||||
bind_point = vk_pipeline->bind_point;
|
||||
if (vk_pipeline->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
|
||||
vk_shader = vk_pipeline_get_shader(vk_pipeline, MESA_SHADER_COMPUTE);
|
||||
} else {
|
||||
entry_count = pCreateInfo->info.pShaderInfo->maxShaderCount;
|
||||
vk_shader =
|
||||
vk_shader_from_handle(pCreateInfo->info.pShaderInfo->pInitialShaders[0]);
|
||||
bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
|
||||
}
|
||||
|
||||
enum anv_bo_alloc_flags alloc_flags =
|
||||
ANV_BO_ALLOC_CAPTURE |
|
||||
ANV_BO_ALLOC_MAPPED |
|
||||
ANV_BO_ALLOC_HOST_CACHED_COHERENT;
|
||||
|
||||
switch (bind_point) {
|
||||
case VK_PIPELINE_BIND_POINT_COMPUTE: {
|
||||
struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk);
|
||||
|
||||
/* Alignment required for
|
||||
* MEDIA_INTERFACE_DESCRIPTOR_LOAD::InterfaceDescriptorDataStartAddress
|
||||
*/
|
||||
STATIC_ASSERT(sizeof(struct anv_dgc_cs_descriptor) % 64 == 0);
|
||||
|
||||
indirect_set->stride = sizeof(struct anv_dgc_cs_descriptor);
|
||||
|
||||
uint32_t size = align(entry_count * indirect_set->stride, 4096);
|
||||
|
||||
/* Generations up to Gfx12.0 have a structures describing the compute
|
||||
* shader that needs to live in the dynamic state heap.
|
||||
*/
|
||||
if (device->info->verx10 <= 120)
|
||||
alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL;
|
||||
|
||||
result = anv_device_alloc_bo(device, "indirect-exec-set", size,
|
||||
alloc_flags, 0 /* explicit_address */,
|
||||
&indirect_set->bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_relocs;
|
||||
|
||||
indirect_set->bind_map = anv_pipeline_bind_map_clone(
|
||||
device, pAllocator, &shader->bind_map);
|
||||
if (indirect_set->bind_map == NULL) {
|
||||
result = vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
|
||||
"Fail to allocate bind map");
|
||||
goto fail_bo;
|
||||
}
|
||||
|
||||
write_cs_set_entry(device, indirect_set, 0, shader);
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
|
||||
VK_FROM_HANDLE(vk_pipeline, pipeline,
|
||||
pCreateInfo->info.pPipelineInfo->initialPipeline);
|
||||
write_rt_set_entry(indirect_set, 0, pipeline);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unsupported indirect pipeline type");
|
||||
}
|
||||
|
||||
*pIndirectExecutionSet = anv_indirect_execution_set_to_handle(indirect_set);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_bo:
|
||||
anv_device_release_bo(device, indirect_set->bo);
|
||||
fail_relocs:
|
||||
anv_reloc_list_finish(&indirect_set->relocs);
|
||||
fail_object:
|
||||
vk_object_free(&device->vk, pAllocator, indirect_set);
|
||||
return result;
|
||||
}
|
||||
|
||||
void anv_DestroyIndirectExecutionSetEXT(
|
||||
VkDevice _device,
|
||||
VkIndirectExecutionSetEXT indirectExecutionSet,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_indirect_execution_set, indirect_set, indirectExecutionSet);
|
||||
|
||||
vk_free2(&device->vk.alloc, pAllocator, indirect_set->bind_map);
|
||||
anv_reloc_list_finish(&indirect_set->relocs);
|
||||
if (indirect_set->bo)
|
||||
anv_device_release_bo(device, indirect_set->bo);
|
||||
vk_object_free(&device->vk, pAllocator, indirect_set);
|
||||
}
|
||||
|
||||
void anv_UpdateIndirectExecutionSetPipelineEXT(
|
||||
VkDevice _device,
|
||||
VkIndirectExecutionSetEXT indirectExecutionSet,
|
||||
uint32_t executionSetWriteCount,
|
||||
const VkWriteIndirectExecutionSetPipelineEXT* pExecutionSetWrites)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_indirect_execution_set, indirect_set, indirectExecutionSet);
|
||||
|
||||
for (uint32_t i = 0; i < executionSetWriteCount; i++) {
|
||||
VK_FROM_HANDLE(vk_pipeline, pipeline, pExecutionSetWrites[i].pipeline);
|
||||
|
||||
switch (pipeline->bind_point) {
|
||||
case VK_PIPELINE_BIND_POINT_COMPUTE: {
|
||||
struct vk_shader *vk_shader =
|
||||
vk_pipeline_get_shader(pipeline, MESA_SHADER_COMPUTE);
|
||||
struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk);
|
||||
write_cs_set_entry(device, indirect_set,
|
||||
pExecutionSetWrites[i].index, shader);
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
|
||||
write_rt_set_entry(indirect_set, pExecutionSetWrites[i].index, pipeline);
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unsupported indirect pipeline type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void anv_UpdateIndirectExecutionSetShaderEXT(
|
||||
VkDevice _device,
|
||||
VkIndirectExecutionSetEXT indirectExecutionSet,
|
||||
uint32_t executionSetWriteCount,
|
||||
const VkWriteIndirectExecutionSetShaderEXT* pExecutionSetWrites)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_indirect_execution_set, indirect_set, indirectExecutionSet);
|
||||
|
||||
for (uint32_t i = 0; i < executionSetWriteCount; i++) {
|
||||
VK_FROM_HANDLE(vk_shader, vk_shader, pExecutionSetWrites[i].shader);
|
||||
assert(vk_shader->stage == MESA_SHADER_COMPUTE);
|
||||
struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk);
|
||||
write_cs_set_entry(device, indirect_set,
|
||||
pExecutionSetWrites[i].index, shader);
|
||||
}
|
||||
}
|
||||
|
|
@ -548,6 +548,10 @@ void genX(write_rt_shader_group)(struct anv_device *device,
|
|||
uint32_t shader_count,
|
||||
void *output);
|
||||
|
||||
void genX(write_cs_descriptor)(struct anv_dgc_cs_descriptor *desc,
|
||||
struct anv_device *device,
|
||||
struct anv_shader *shader);
|
||||
|
||||
uint32_t genX(shader_cmd_size)(struct anv_device *device,
|
||||
mesa_shader_stage stage);
|
||||
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@
|
|||
#include "compiler/brw/brw_rt.h"
|
||||
#include "ds/intel_driver_ds.h"
|
||||
#include "dev/virtio/intel_virtio.h"
|
||||
#include "shaders/libintel_shaders.h"
|
||||
#include "util/bitset.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/cache_ops.h"
|
||||
|
|
@ -1116,6 +1117,11 @@ struct anv_pipeline_bind_map {
|
|||
uint8_t inferred_behavior;
|
||||
};
|
||||
|
||||
struct anv_pipeline_bind_map *
|
||||
anv_pipeline_bind_map_clone(struct anv_device *device,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
const struct anv_pipeline_bind_map *src);
|
||||
|
||||
struct anv_push_descriptor_info {
|
||||
/* A bitfield of descriptors used. */
|
||||
uint32_t used_descriptors;
|
||||
|
|
@ -5193,6 +5199,10 @@ struct anv_event {
|
|||
|
||||
#define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
|
||||
|
||||
#define ANV_VK_STAGE_MASK (ANV_GRAPHICS_STAGE_BITS | \
|
||||
ANV_RT_STAGE_BITS | \
|
||||
VK_SHADER_STAGE_COMPUTE_BIT)
|
||||
|
||||
#define anv_foreach_stage(stage, stage_bits) \
|
||||
u_foreach_bit(stage, (stage_bits & ANV_STAGE_MASK))
|
||||
|
||||
|
|
@ -6539,6 +6549,36 @@ static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_poo
|
|||
pool->khr_perf_preamble_stride * pass;
|
||||
}
|
||||
|
||||
struct anv_indirect_execution_set {
|
||||
struct vk_object_base base;
|
||||
|
||||
struct anv_pipeline_bind_map *bind_map;
|
||||
|
||||
/** List of all the scratch buffers on < Gfx12.5 */
|
||||
struct anv_reloc_list relocs;
|
||||
|
||||
struct anv_bo *bo;
|
||||
|
||||
bool uses_systolic;
|
||||
|
||||
uint32_t stride;
|
||||
|
||||
uint32_t max_final_commands_size;
|
||||
|
||||
/** Maximum scratch space for shaders */
|
||||
uint32_t max_scratch;
|
||||
/** Maximum number of ray queries used by shaders */
|
||||
uint32_t max_ray_queries;
|
||||
};
|
||||
|
||||
void anv_write_gfx_indirect_descriptor(struct anv_device *device,
|
||||
struct anv_dgc_gfx_descriptor *descriptor,
|
||||
struct anv_cmd_graphics_state *gfx);
|
||||
|
||||
enum anv_dgc_stage anv_vk_stage_to_dgc_stage(VkShaderStageFlags vk_stage);
|
||||
|
||||
uint32_t anv_vk_stages_to_generated_stages(VkShaderStageFlags vk_stages);
|
||||
|
||||
struct anv_vid_mem {
|
||||
struct anv_device_memory *mem;
|
||||
VkDeviceSize offset;
|
||||
|
|
@ -6949,6 +6989,9 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
|
|||
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base,
|
||||
VkVideoSessionKHR,
|
||||
VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_indirect_execution_set, base,
|
||||
VkIndirectExecutionSetEXT,
|
||||
VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT)
|
||||
|
||||
#define anv_genX(devinfo, thing) ({ \
|
||||
__typeof(&gfx9_##thing) genX_thing; \
|
||||
|
|
|
|||
|
|
@ -574,3 +574,33 @@ anv_device_finish_rt_shaders(struct anv_device *device)
|
|||
if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
|
||||
return;
|
||||
}
|
||||
|
||||
struct anv_pipeline_bind_map *
|
||||
anv_pipeline_bind_map_clone(struct anv_device *device,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
const struct anv_pipeline_bind_map *src)
|
||||
{
|
||||
VK_MULTIALLOC(ma);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_bind_map, bind_map, 1);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surfaces, src->surface_count);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, samplers, src->sampler_count);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_embedded_sampler_binding, embedded_samplers, src->embedded_sampler_count);
|
||||
|
||||
if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
|
||||
return NULL;
|
||||
|
||||
memcpy(bind_map, src, sizeof(*src));
|
||||
|
||||
memcpy(surfaces, src->surface_to_descriptor,
|
||||
sizeof(*surfaces) * src->surface_count);
|
||||
bind_map->surface_to_descriptor = surfaces;
|
||||
memcpy(samplers, src->sampler_to_descriptor,
|
||||
sizeof(*samplers) * src->sampler_count);
|
||||
bind_map->sampler_to_descriptor = samplers;
|
||||
memcpy(embedded_samplers, src->embedded_sampler_to_binding,
|
||||
sizeof(*embedded_samplers) * src->embedded_sampler_count);
|
||||
bind_map->embedded_sampler_to_binding = embedded_samplers;
|
||||
|
||||
return bind_map;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1272,6 +1272,75 @@ emit_cs_shader(struct anv_batch *batch,
|
|||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
genX(write_cs_descriptor)(struct anv_dgc_cs_descriptor *desc,
|
||||
struct anv_device *device,
|
||||
struct anv_shader *shader)
|
||||
{
|
||||
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
|
||||
const struct anv_push_range *push_range = &bind_map->push_ranges[0];
|
||||
|
||||
*desc = (struct anv_dgc_cs_descriptor) {
|
||||
.push_data_offset = 32 * (push_range->set == ANV_DESCRIPTOR_SET_PUSH_CONSTANTS ?
|
||||
push_range->start : 0),
|
||||
};
|
||||
|
||||
const struct brw_cs_prog_data *prog_data =
|
||||
brw_cs_prog_data_const(shader->prog_data);
|
||||
const struct intel_cs_dispatch_info dispatch =
|
||||
brw_cs_get_dispatch_info(device->info, prog_data, NULL);
|
||||
|
||||
desc->right_mask = dispatch.right_mask;
|
||||
desc->threads = dispatch.threads;
|
||||
desc->simd_size = dispatch.simd_size;
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
GENX(COMPUTE_WALKER_pack)(NULL, desc->gfx125.compute_walker,
|
||||
&(struct GENX(COMPUTE_WALKER)) {
|
||||
GENX(COMPUTE_WALKER_header),
|
||||
.body = {
|
||||
.PostSync.MOCS = anv_mocs(device, NULL, 0),
|
||||
},
|
||||
});
|
||||
|
||||
assert(sizeof(desc->gfx125.compute_walker) >
|
||||
sizeof(shader->cs.gfx125.compute_walker_body));
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(shader->cs.gfx125.compute_walker_body); i++)
|
||||
desc->gfx125.compute_walker[1 + i] |= shader->cs.gfx125.compute_walker_body[i];
|
||||
desc->gfx125.inline_dwords_count = bind_map->inline_dwords_count;
|
||||
assert(sizeof(desc->gfx125.inline_dwords) ==
|
||||
sizeof(bind_map->inline_dwords));
|
||||
memcpy(desc->gfx125.inline_dwords,
|
||||
bind_map->inline_dwords,
|
||||
sizeof(bind_map->inline_dwords));
|
||||
|
||||
#else
|
||||
assert(sizeof(desc->gfx9.media_vfe_state) ==
|
||||
shader->cs.gfx9.vfe.len * 4);
|
||||
assert(sizeof(desc->gfx9.interface_descriptor_data) ==
|
||||
sizeof(shader->cs.gfx9.idd));
|
||||
|
||||
memcpy(desc->gfx9.media_vfe_state,
|
||||
&shader->cmd_data[shader->cs.gfx9.vfe.offset],
|
||||
shader->cs.gfx9.vfe.len * 4);
|
||||
memcpy(desc->gfx9.interface_descriptor_data,
|
||||
shader->cs.gfx9.idd,
|
||||
sizeof(desc->gfx9.interface_descriptor_data));
|
||||
|
||||
desc->gfx9.n_threads = dispatch.threads;
|
||||
desc->gfx9.cross_thread_push_size = prog_data->push.cross_thread.size;
|
||||
desc->gfx9.per_thread_push_size = prog_data->push.per_thread.size;
|
||||
desc->gfx9.subgroup_id_offset =
|
||||
offsetof(struct anv_push_constants, cs.subgroup_id) -
|
||||
(32 * push_range->start + prog_data->push.cross_thread.size);
|
||||
|
||||
GENX(GPGPU_WALKER_pack)(NULL, desc->gfx9.gpgpu_walker,
|
||||
&(struct GENX(GPGPU_WALKER)) {
|
||||
GENX(GPGPU_WALKER_header),
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
genX(init_instructions)(struct anv_physical_device *device)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -160,6 +160,7 @@ libanv_files = files(
|
|||
'anv_cmd_buffer.c',
|
||||
'anv_descriptor_set.c',
|
||||
'anv_device.c',
|
||||
'anv_dgc_set.c',
|
||||
'anv_embedded_sampler.c',
|
||||
'anv_event.c',
|
||||
'anv_formats.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue