hasvk: remove ray tracing code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18208>
This commit is contained in:
Lionel Landwerlin 2022-08-22 10:01:10 +03:00 committed by Marge Bot
parent 4488253570
commit 6cbaaf27ab
7 changed files with 2 additions and 1540 deletions

View file

@ -309,71 +309,6 @@ ilog2_round_up(uint32_t value)
return 32 - __builtin_clz(value - 1);
}
static void
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipeline_state,
struct anv_pipeline *pipeline,
VkShaderStageFlags stages)
{
struct anv_device *device = cmd_buffer->device;
uint64_t ray_shadow_size =
align_u64(brw_rt_ray_queries_shadow_stacks_size(device->info,
pipeline->ray_queries),
4096);
if (ray_shadow_size > 0 &&
(!cmd_buffer->state.ray_query_shadow_bo ||
cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
unsigned shadow_size_log2 = MAX2(ilog2_round_up(ray_shadow_size), 16);
unsigned bucket = shadow_size_log2 - 16;
assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos));
struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[bucket]);
if (bo == NULL) {
struct anv_bo *new_bo;
VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
ray_shadow_size,
0, /* alloc_flags */
0, /* explicit_address */
&new_bo);
if (result != VK_SUCCESS) {
anv_batch_set_error(&cmd_buffer->batch, result);
return;
}
bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[bucket], NULL, new_bo);
if (bo != NULL) {
anv_device_release_bo(device, bo);
} else {
bo = new_bo;
}
}
cmd_buffer->state.ray_query_shadow_bo = bo;
/* Add the ray query buffers to the batch list. */
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->batch.alloc,
cmd_buffer->state.ray_query_shadow_bo);
}
/* Add the HW buffer to the list of BO used. */
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->batch.alloc,
device->ray_query_bo);
/* Fill the push constants & mark them dirty. */
struct anv_state ray_query_global_state =
anv_genX(device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
struct anv_address ray_query_globals_addr = (struct anv_address) {
.bo = device->dynamic_state_pool.block_pool.bo,
.offset = ray_query_global_state.offset,
};
pipeline_state->push_constants.ray_query_globals =
anv_address_physical(ray_query_globals_addr);
cmd_buffer->state.push_constants_dirty |= stages;
}
void anv_CmdBindPipeline(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
@ -381,8 +316,6 @@ void anv_CmdBindPipeline(
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
struct anv_cmd_pipeline_state *state;
VkShaderStageFlags stages = 0;
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_COMPUTE: {
@ -395,9 +328,6 @@ void anv_CmdBindPipeline(
cmd_buffer->state.compute.pipeline_dirty = true;
set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
&compute_pipeline->cs->bind_map);
state = &cmd_buffer->state.compute.base;
stages = VK_SHADER_STAGE_COMPUTE_BIT;
break;
}
@ -419,27 +349,6 @@ void anv_CmdBindPipeline(
/* Apply the non dynamic state from the pipeline */
vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
&gfx_pipeline->dynamic_state);
state = &cmd_buffer->state.gfx.base;
stages = gfx_pipeline->active_stages;
break;
}
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
struct anv_ray_tracing_pipeline *rt_pipeline =
anv_pipeline_to_ray_tracing(pipeline);
if (cmd_buffer->state.rt.pipeline == rt_pipeline)
return;
cmd_buffer->state.rt.pipeline = rt_pipeline;
cmd_buffer->state.rt.pipeline_dirty = true;
if (rt_pipeline->stack_size > 0) {
anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
rt_pipeline->stack_size);
}
state = &cmd_buffer->state.rt.base;
break;
}
@ -447,9 +356,6 @@ void anv_CmdBindPipeline(
unreachable("invalid bind point");
break;
}
if (pipeline->ray_queries > 0)
anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
}
static void
@ -492,16 +398,6 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
pipe_state = &cmd_buffer->state.compute.base;
break;
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
pipe_state = &cmd_buffer->state.rt.base;
break;
default:
unreachable("invalid bind point");
}
@ -520,13 +416,7 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
* as an 64-bit address in the push constants.
*/
bool update_desc_sets = stages & (VK_SHADER_STAGE_TASK_BIT_NV |
VK_SHADER_STAGE_MESH_BIT_NV |
VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR);
VK_SHADER_STAGE_MESH_BIT_NV);
if (update_desc_sets) {
struct anv_push_constants *push = &pipe_state->push_constants;
@ -818,17 +708,6 @@ void anv_CmdPushConstants(
memcpy(pipe_state->push_constants.client_data + offset, pValues, size);
}
if (stageFlags & (VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR)) {
struct anv_cmd_pipeline_state *pipe_state =
&cmd_buffer->state.rt.base;
memcpy(pipe_state->push_constants.client_data + offset, pValues, size);
}
cmd_buffer->state.push_constants_dirty |= stageFlags;
}
@ -850,10 +729,6 @@ anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
pipe_state = &cmd_buffer->state.compute.base;
break;
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
pipe_state = &cmd_buffer->state.rt.base;
break;
default:
unreachable("invalid bind point");
}
@ -1043,54 +918,3 @@ void anv_CmdSetDeviceMask(
{
/* No-op */
}
void anv_CmdSetRayTracingPipelineStackSizeKHR(
VkCommandBuffer commandBuffer,
uint32_t pipelineStackSize)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
struct anv_device *device = cmd_buffer->device;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
uint32_t stack_ids_per_dss = 2048; /* TODO */
unsigned stack_size_log2 = ilog2_round_up(pipelineStackSize);
if (stack_size_log2 < 10)
stack_size_log2 = 10;
if (rt->scratch.layout.total_size == 1 << stack_size_log2)
return;
brw_rt_compute_scratch_layout(&rt->scratch.layout, device->info,
stack_ids_per_dss, 1 << stack_size_log2);
unsigned bucket = stack_size_log2 - 10;
assert(bucket < ARRAY_SIZE(device->rt_scratch_bos));
struct anv_bo *bo = p_atomic_read(&device->rt_scratch_bos[bucket]);
if (bo == NULL) {
struct anv_bo *new_bo;
VkResult result = anv_device_alloc_bo(device, "RT scratch",
rt->scratch.layout.total_size,
0, /* alloc_flags */
0, /* explicit_address */
&new_bo);
if (result != VK_SUCCESS) {
rt->scratch.layout.total_size = 0;
anv_batch_set_error(&cmd_buffer->batch, result);
return;
}
bo = p_atomic_cmpxchg(&device->rt_scratch_bos[bucket], NULL, new_bo);
if (bo != NULL) {
anv_device_release_bo(device, bo);
} else {
bo = new_bo;
}
}
rt->scratch.bo = bo;
}

View file

@ -226,7 +226,6 @@ get_device_extensions(const struct anv_physical_device *device,
device->use_call_secondary,
.KHR_pipeline_executable_properties = true,
.KHR_push_descriptor = true,
.KHR_ray_query = device->info.has_ray_tracing,
.KHR_relaxed_block_layout = true,
.KHR_sampler_mirror_clamp_to_edge = true,
.KHR_sampler_ycbcr_conversion = true,
@ -1562,12 +1561,6 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
VkPhysicalDeviceRayQueryFeaturesKHR *features = (void *)ext;
features->rayQuery = pdevice->info.has_ray_tracing;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
features->robustBufferAccess2 = true;
@ -1939,14 +1932,6 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
if (pdevice->compiler->scalar_stage[stage])
scalar_stages |= mesa_to_vk_shader_stage(stage);
}
if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
}
if (pdevice->vk.supported_extensions.NV_mesh_shader) {
scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV |
VK_SHADER_STAGE_MESH_BIT_NV;
@ -3314,22 +3299,9 @@ VkResult anv_CreateDevice(
device->workaround_bo->size,
INTEL_DEBUG_BLOCK_TYPE_FRAME);
if (device->vk.enabled_extensions.KHR_ray_query) {
uint32_t ray_queries_size =
align_u32(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
result = anv_device_alloc_bo(device, "ray queries",
ray_queries_size,
0,
0 /* explicit_address */,
&device->ray_query_bo);
if (result != VK_SUCCESS)
goto fail_workaround_bo;
}
result = anv_device_init_trivial_batch(device);
if (result != VK_SUCCESS)
goto fail_ray_query_bo;
goto fail_workaround_bo;
if (device->info->ver >= 12 &&
device->vk.enabled_extensions.KHR_fragment_shading_rate) {
@ -3367,9 +3339,6 @@ VkResult anv_CreateDevice(
anv_scratch_pool_init(device, &device->scratch_pool);
/* TODO(RT): Do we want some sort of data structure for this? */
memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
result = anv_genX(device->info, init_device_state)(device);
if (result != VK_SUCCESS)
goto fail_trivial_batch_bo_and_scratch_pool;
@ -3395,12 +3364,6 @@ VkResult anv_CreateDevice(
goto fail_default_pipeline_cache;
}
result = anv_device_init_rt_shaders(device);
if (result != VK_SUCCESS) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_internal_cache;
}
anv_device_init_blorp(device);
anv_device_init_border_colors(device);
@ -3413,17 +3376,12 @@ VkResult anv_CreateDevice(
return VK_SUCCESS;
fail_internal_cache:
vk_pipeline_cache_destroy(device->internal_cache, NULL);
fail_default_pipeline_cache:
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
fail_trivial_batch_bo_and_scratch_pool:
anv_scratch_pool_finish(device, &device->scratch_pool);
fail_trivial_batch:
anv_device_release_bo(device, device->trivial_batch_bo);
fail_ray_query_bo:
if (device->ray_query_bo)
anv_device_release_bo(device, device->ray_query_bo);
fail_workaround_bo:
anv_device_release_bo(device, device->workaround_bo);
fail_surface_aux_map_pool:
@ -3486,8 +3444,6 @@ void anv_DestroyDevice(
anv_device_finish_blorp(device);
anv_device_finish_rt_shaders(device);
vk_pipeline_cache_destroy(device->internal_cache, NULL);
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
@ -3502,20 +3458,8 @@ void anv_DestroyDevice(
anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
#endif
for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
if (device->rt_scratch_bos[i] != NULL)
anv_device_release_bo(device, device->rt_scratch_bos[i]);
}
anv_scratch_pool_finish(device, &device->scratch_pool);
if (device->vk.enabled_extensions.KHR_ray_query) {
for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
if (device->ray_query_shadow_bos[i] != NULL)
anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
}
anv_device_release_bo(device, device->ray_query_bo);
}
anv_device_release_bo(device, device->workaround_bo);
anv_device_release_bo(device, device->trivial_batch_bo);

View file

@ -175,6 +175,3 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
void
genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline);
void
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);

View file

@ -95,8 +95,6 @@ anv_shader_stage_to_nir(struct anv_device *device,
.post_depth_coverage = pdevice->info.ver >= 9,
.runtime_descriptor_array = true,
.float_controls = pdevice->info.ver >= 8,
.ray_query = pdevice->info.has_ray_tracing,
.ray_tracing = pdevice->info.has_ray_tracing,
.shader_clock = true,
.shader_viewport_index_layer = true,
.stencil_export = pdevice->info.ver >= 9,
@ -251,17 +249,6 @@ void anv_DestroyPipeline(
break;
}
case ANV_PIPELINE_RAY_TRACING: {
struct anv_ray_tracing_pipeline *rt_pipeline =
anv_pipeline_to_ray_tracing(pipeline);
util_dynarray_foreach(&rt_pipeline->shaders,
struct anv_shader_bin *, shader) {
anv_shader_bin_unref(device, *shader);
}
break;
}
default:
unreachable("invalid pipeline type");
}
@ -600,51 +587,6 @@ anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
_mesa_sha1_final(&ctx, sha1_out);
}
static void
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
struct anv_pipeline_layout *layout,
struct anv_pipeline_stage *stage,
unsigned char *sha1_out)
{
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
if (layout != NULL)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
const bool rba = pipeline->base.device->robust_buffer_access;
_mesa_sha1_update(&ctx, &rba, sizeof(rba));
_mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
_mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
_mesa_sha1_final(&ctx, sha1_out);
}
static void
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
struct anv_pipeline_layout *layout,
struct anv_pipeline_stage *intersection,
struct anv_pipeline_stage *any_hit,
unsigned char *sha1_out)
{
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
if (layout != NULL)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
const bool rba = pipeline->base.device->robust_buffer_access;
_mesa_sha1_update(&ctx, &rba, sizeof(rba));
_mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
_mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
_mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
_mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
_mesa_sha1_final(&ctx, sha1_out);
}
static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
struct vk_pipeline_cache *cache,
@ -2230,757 +2172,6 @@ VkResult anv_CreateGraphicsPipelines(
return result;
}
static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
struct vk_pipeline_cache *cache,
nir_shader *nir,
struct anv_pipeline_stage *stage,
struct anv_shader_bin **shader_out,
void *mem_ctx)
{
const struct brw_compiler *compiler =
pipeline->base.device->physical->compiler;
const struct intel_device_info *devinfo = compiler->devinfo;
nir_shader **resume_shaders = NULL;
uint32_t num_resume_shaders = 0;
if (nir->info.stage != MESA_SHADER_COMPUTE) {
NIR_PASS(_, nir, nir_lower_shader_calls,
nir_address_format_64bit_global,
BRW_BTD_STACK_ALIGN,
&resume_shaders, &num_resume_shaders, mem_ctx);
NIR_PASS(_, nir, brw_nir_lower_shader_calls);
NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
}
for (unsigned i = 0; i < num_resume_shaders; i++) {
NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls);
NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
}
struct brw_compile_bs_params params = {
.nir = nir,
.key = &stage->key.bs,
.prog_data = &stage->prog_data.bs,
.num_resume_shaders = num_resume_shaders,
.resume_shaders = resume_shaders,
.stats = stage->stats,
.log_data = pipeline->base.device,
};
stage->code = brw_compile_bs(compiler, mem_ctx, &params);
if (stage->code == NULL)
return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
/* Ray-tracing shaders don't have a "real" bind map */
struct anv_pipeline_bind_map empty_bind_map = {};
const unsigned code_size = stage->prog_data.base.program_size;
struct anv_shader_bin *bin =
anv_device_upload_kernel(pipeline->base.device,
cache,
stage->stage,
&stage->cache_key, sizeof(stage->cache_key),
stage->code, code_size,
&stage->prog_data.base,
sizeof(stage->prog_data.bs),
stage->stats, 1,
NULL, &empty_bind_map);
if (bin == NULL)
return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
/* TODO: Figure out executables for resume shaders */
anv_pipeline_add_executables(&pipeline->base, stage, bin);
util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
*shader_out = bin;
return VK_SUCCESS;
}
static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
{
if (info->pDynamicState == NULL)
return false;
for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
if (info->pDynamicState->pDynamicStates[i] ==
VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
return true;
}
return false;
}
static void
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
const VkRayTracingPipelineCreateInfoKHR *info,
uint32_t *stack_max)
{
if (is_rt_stack_size_dynamic(info)) {
pipeline->stack_size = 0; /* 0 means dynamic */
} else {
/* From the Vulkan spec:
*
* "If the stack size is not set explicitly, the stack size for a
* pipeline is:
*
* rayGenStackMax +
* min(1, maxPipelineRayRecursionDepth) ×
* max(closestHitStackMax, missStackMax,
* intersectionStackMax + anyHitStackMax) +
* max(0, maxPipelineRayRecursionDepth-1) ×
* max(closestHitStackMax, missStackMax) +
* 2 × callableStackMax"
*/
pipeline->stack_size =
stack_max[MESA_SHADER_RAYGEN] +
MIN2(1, info->maxPipelineRayRecursionDepth) *
MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
stack_max[MESA_SHADER_MISS],
stack_max[MESA_SHADER_INTERSECTION],
stack_max[MESA_SHADER_ANY_HIT]) +
MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
stack_max[MESA_SHADER_MISS]) +
2 * stack_max[MESA_SHADER_CALLABLE];
/* This is an extremely unlikely case but we need to set it to some
* non-zero value so that we don't accidentally think it's dynamic.
* Our minimum stack size is 2KB anyway so we could set to any small
* value we like.
*/
if (pipeline->stack_size == 0)
pipeline->stack_size = 1;
}
}
static struct anv_pipeline_stage *
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
const VkRayTracingPipelineCreateInfoKHR *info,
void *pipeline_ctx)
{
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
/* Create enough stage entries for all shader modules plus potential
* combinaisons in the groups.
*/
struct anv_pipeline_stage *stages =
rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
for (uint32_t i = 0; i < info->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
if (vk_pipeline_shader_stage_is_null(sinfo))
continue;
int64_t stage_start = os_time_get_nano();
stages[i] = (struct anv_pipeline_stage) {
.stage = vk_to_mesa_shader_stage(sinfo->stage),
.info = sinfo,
.cache_key = {
.stage = vk_to_mesa_shader_stage(sinfo->stage),
},
.feedback = {
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
},
};
populate_bs_prog_key(pipeline->base.device,
pipeline->base.device->robust_buffer_access,
&stages[i].key.bs);
vk_pipeline_hash_shader_stage(sinfo, stages[i].shader_sha1);
if (stages[i].stage != MESA_SHADER_INTERSECTION) {
anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
stages[i].cache_key.sha1);
}
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
for (uint32_t i = 0; i < info->groupCount; i++) {
const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
continue;
int64_t stage_start = os_time_get_nano();
uint32_t intersection_idx = ginfo->intersectionShader;
assert(intersection_idx < info->stageCount);
uint32_t any_hit_idx = ginfo->anyHitShader;
if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
assert(any_hit_idx < info->stageCount);
anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
layout,
&stages[intersection_idx],
&stages[any_hit_idx],
stages[intersection_idx].cache_key.sha1);
} else {
anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
&stages[intersection_idx],
stages[intersection_idx].cache_key.sha1);
}
stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
}
return stages;
}
static bool
anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *info,
struct anv_pipeline_stage *stages,
uint32_t *stack_max)
{
uint32_t shaders = 0, cache_hits = 0;
for (uint32_t i = 0; i < info->stageCount; i++) {
if (stages[i].info == NULL)
continue;
shaders++;
int64_t stage_start = os_time_get_nano();
bool cache_hit;
stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
&stages[i].cache_key,
sizeof(stages[i].cache_key),
&cache_hit);
if (cache_hit) {
cache_hits++;
stages[i].feedback.flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
}
if (stages[i].bin != NULL) {
anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
uint32_t stack_size =
brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
stack_max[stages[i].stage] =
MAX2(stack_max[stages[i].stage], stack_size);
}
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
return cache_hits == shaders;
}
static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *info)
{
const struct intel_device_info *devinfo = pipeline->base.device->info;
VkResult result;
VkPipelineCreationFeedback pipeline_feedback = {
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
};
int64_t pipeline_start = os_time_get_nano();
void *pipeline_ctx = ralloc_context(NULL);
struct anv_pipeline_stage *stages =
anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
const bool skip_cache_lookup =
(pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
if (!skip_cache_lookup &&
anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
pipeline_feedback.flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
goto done;
}
if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) {
ralloc_free(pipeline_ctx);
return VK_PIPELINE_COMPILE_REQUIRED;
}
for (uint32_t i = 0; i < info->stageCount; i++) {
if (stages[i].info == NULL)
continue;
int64_t stage_start = os_time_get_nano();
stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
pipeline_ctx, &stages[i]);
if (stages[i].nir == NULL) {
ralloc_free(pipeline_ctx);
return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
}
anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i],
layout, false /* use_primitive_replication */);
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
for (uint32_t i = 0; i < info->stageCount; i++) {
if (stages[i].info == NULL)
continue;
/* Shader found in cache already. */
if (stages[i].bin != NULL)
continue;
/* We handle intersection shaders as part of the group */
if (stages[i].stage == MESA_SHADER_INTERSECTION)
continue;
int64_t stage_start = os_time_get_nano();
void *stage_ctx = ralloc_context(pipeline_ctx);
nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
switch (stages[i].stage) {
case MESA_SHADER_RAYGEN:
brw_nir_lower_raygen(nir);
break;
case MESA_SHADER_ANY_HIT:
brw_nir_lower_any_hit(nir, devinfo);
break;
case MESA_SHADER_CLOSEST_HIT:
brw_nir_lower_closest_hit(nir);
break;
case MESA_SHADER_MISS:
brw_nir_lower_miss(nir);
break;
case MESA_SHADER_INTERSECTION:
unreachable("These are handled later");
case MESA_SHADER_CALLABLE:
brw_nir_lower_callable(nir);
break;
default:
unreachable("Invalid ray-tracing shader stage");
}
result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
&stages[i].bin, stage_ctx);
if (result != VK_SUCCESS) {
ralloc_free(pipeline_ctx);
return result;
}
uint32_t stack_size =
brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
ralloc_free(stage_ctx);
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
for (uint32_t i = 0; i < info->groupCount; i++) {
const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
struct anv_rt_shader_group *group = &pipeline->groups[i];
group->type = ginfo->type;
switch (ginfo->type) {
case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
assert(ginfo->generalShader < info->stageCount);
group->general = stages[ginfo->generalShader].bin;
break;
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
if (ginfo->anyHitShader < info->stageCount)
group->any_hit = stages[ginfo->anyHitShader].bin;
if (ginfo->closestHitShader < info->stageCount)
group->closest_hit = stages[ginfo->closestHitShader].bin;
break;
case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
if (ginfo->closestHitShader < info->stageCount)
group->closest_hit = stages[ginfo->closestHitShader].bin;
uint32_t intersection_idx = info->pGroups[i].intersectionShader;
assert(intersection_idx < info->stageCount);
/* Only compile this stage if not already found in the cache. */
if (stages[intersection_idx].bin == NULL) {
/* The any-hit and intersection shader have to be combined */
uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
const nir_shader *any_hit = NULL;
if (any_hit_idx < info->stageCount)
any_hit = stages[any_hit_idx].nir;
void *group_ctx = ralloc_context(pipeline_ctx);
nir_shader *intersection =
nir_shader_clone(group_ctx, stages[intersection_idx].nir);
brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
devinfo);
result = compile_upload_rt_shader(pipeline, cache,
intersection,
&stages[intersection_idx],
&group->intersection,
group_ctx);
ralloc_free(group_ctx);
if (result != VK_SUCCESS)
return result;
} else {
group->intersection = stages[intersection_idx].bin;
}
uint32_t stack_size =
brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
stack_max[MESA_SHADER_INTERSECTION] =
MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
break;
}
default:
unreachable("Invalid ray tracing shader group type");
}
}
done:
ralloc_free(pipeline_ctx);
anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
const VkPipelineCreationFeedbackCreateInfo *create_feedback =
vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
if (create_feedback) {
*create_feedback->pPipelineCreationFeedback = pipeline_feedback;
assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
for (uint32_t i = 0; i < info->stageCount; i++) {
gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
}
}
return VK_SUCCESS;
}
VkResult
anv_device_init_rt_shaders(struct anv_device *device)
{
if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
return VK_SUCCESS;
bool cache_hit;
struct brw_rt_trampoline {
char name[16];
struct brw_cs_prog_key key;
} trampoline_key = {
.name = "rt-trampoline",
};
device->rt_trampoline =
anv_device_search_for_kernel(device, device->internal_cache,
&trampoline_key, sizeof(trampoline_key),
&cache_hit);
if (device->rt_trampoline == NULL) {
void *tmp_ctx = ralloc_context(NULL);
nir_shader *trampoline_nir =
brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
struct anv_pipeline_bind_map bind_map = {
.surface_count = 0,
.sampler_count = 0,
};
uint32_t dummy_params[4] = { 0, };
struct brw_cs_prog_data trampoline_prog_data = {
.base.nr_params = 4,
.base.param = dummy_params,
.uses_inline_data = true,
.uses_btd_stack_ids = true,
};
struct brw_compile_cs_params params = {
.nir = trampoline_nir,
.key = &trampoline_key.key,
.prog_data = &trampoline_prog_data,
.log_data = device,
};
const unsigned *tramp_data =
brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
device->rt_trampoline =
anv_device_upload_kernel(device, device->internal_cache,
MESA_SHADER_COMPUTE,
&trampoline_key, sizeof(trampoline_key),
tramp_data,
trampoline_prog_data.base.program_size,
&trampoline_prog_data.base,
sizeof(trampoline_prog_data),
NULL, 0, NULL, &bind_map);
ralloc_free(tmp_ctx);
if (device->rt_trampoline == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
/* The cache already has a reference and it's not going anywhere so there
* is no need to hold a second reference.
*/
anv_shader_bin_unref(device, device->rt_trampoline);
struct brw_rt_trivial_return {
char name[16];
struct brw_bs_prog_key key;
} return_key = {
.name = "rt-trivial-ret",
};
device->rt_trivial_return =
anv_device_search_for_kernel(device, device->internal_cache,
&return_key, sizeof(return_key),
&cache_hit);
if (device->rt_trivial_return == NULL) {
void *tmp_ctx = ralloc_context(NULL);
nir_shader *trivial_return_nir =
brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, device->info);
struct anv_pipeline_bind_map bind_map = {
.surface_count = 0,
.sampler_count = 0,
};
struct brw_bs_prog_data return_prog_data = { 0, };
struct brw_compile_bs_params params = {
.nir = trivial_return_nir,
.key = &return_key.key,
.prog_data = &return_prog_data,
.log_data = device,
};
const unsigned *return_data =
brw_compile_bs(device->physical->compiler, tmp_ctx, &params);
device->rt_trivial_return =
anv_device_upload_kernel(device, device->internal_cache,
MESA_SHADER_CALLABLE,
&return_key, sizeof(return_key),
return_data, return_prog_data.base.program_size,
&return_prog_data.base, sizeof(return_prog_data),
NULL, 0, NULL, &bind_map);
ralloc_free(tmp_ctx);
if (device->rt_trivial_return == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
/* The cache already has a reference and it's not going anywhere so there
* is no need to hold a second reference.
*/
anv_shader_bin_unref(device, device->rt_trivial_return);
return VK_SUCCESS;
}
void
anv_device_finish_rt_shaders(struct anv_device *device)
{
if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
return;
}
static VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
struct anv_device *device,
struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *alloc)
{
VkResult result;
util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
if (result != VK_SUCCESS)
goto fail;
anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
return VK_SUCCESS;
fail:
util_dynarray_foreach(&pipeline->shaders,
struct anv_shader_bin *, shader) {
anv_shader_bin_unref(device, *shader);
}
return result;
}
static void
assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
uint32_t stage_idx,
VkShaderStageFlags valid_stages)
{
if (stage_idx == VK_SHADER_UNUSED_KHR)
return;
assert(stage_idx <= pCreateInfo->stageCount);
assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1);
assert(pCreateInfo->pStages[stage_idx].stage & valid_stages);
}
static VkResult
anv_ray_tracing_pipeline_create(
VkDevice _device,
struct vk_pipeline_cache * cache,
const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipeline)
{
ANV_FROM_HANDLE(anv_device, device, _device);
VkResult result;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR);
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1);
VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, pCreateInfo->groupCount);
if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
result = anv_pipeline_init(&pipeline->base, device,
ANV_PIPELINE_RAY_TRACING, pCreateInfo->flags,
pAllocator);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, pAllocator, pipeline);
return result;
}
pipeline->group_count = pCreateInfo->groupCount;
pipeline->groups = groups;
ASSERTED const VkShaderStageFlags ray_tracing_stages =
VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0);
for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
const VkRayTracingShaderGroupCreateInfoKHR *ginfo =
&pCreateInfo->pGroups[i];
assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader,
VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR);
assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader,
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader,
VK_SHADER_STAGE_ANY_HIT_BIT_KHR);
assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader,
VK_SHADER_STAGE_INTERSECTION_BIT_KHR);
switch (ginfo->type) {
case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
assert(ginfo->generalShader < pCreateInfo->stageCount);
assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR);
assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR);
assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
break;
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
break;
case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
break;
default:
unreachable("Invalid ray-tracing shader group type");
}
}
result = anv_ray_tracing_pipeline_init(pipeline, device, cache,
pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
anv_pipeline_finish(&pipeline->base, device, pAllocator);
vk_free2(&device->vk.alloc, pAllocator, pipeline);
return result;
}
anv_genX(device->info, ray_tracing_pipeline_emit)(pipeline);
*pPipeline = anv_pipeline_to_handle(&pipeline->base);
return pipeline->base.batch.status;
}
VkResult
anv_CreateRayTracingPipelinesKHR(
VkDevice _device,
VkDeferredOperationKHR deferredOperation,
VkPipelineCache pipelineCache,
uint32_t createInfoCount,
const VkRayTracingPipelineCreateInfoKHR* pCreateInfos,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipelines)
{
ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
VkResult result = VK_SUCCESS;
unsigned i;
for (i = 0; i < createInfoCount; i++) {
VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache,
&pCreateInfos[i],
pAllocator, &pPipelines[i]);
if (res == VK_SUCCESS)
continue;
/* Bail out on the first error as it is not obvious what error should be
* report upon 2 different failures. */
result = res;
if (result != VK_PIPELINE_COMPILE_REQUIRED)
break;
pPipelines[i] = VK_NULL_HANDLE;
if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
break;
}
for (; i < createInfoCount; i++)
pPipelines[i] = VK_NULL_HANDLE;
return result;
}
#define WRITE_STR(field, ...) ({ \
memset(field, 0, sizeof(field)); \
UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
@ -3058,14 +2249,6 @@ VkResult anv_GetPipelineExecutableStatisticsKHR(
prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
break;
}
case ANV_PIPELINE_RAY_TRACING: {
struct anv_shader_bin **shader =
util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders,
struct anv_shader_bin *,
pExecutableInfo->executableIndex);
prog_data = (*shader)->prog_data;
break;
}
default:
unreachable("invalid pipeline type");
}
@ -3214,87 +2397,3 @@ VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
}
VkResult
anv_GetRayTracingShaderGroupHandlesKHR(
VkDevice _device,
VkPipeline _pipeline,
uint32_t firstGroup,
uint32_t groupCount,
size_t dataSize,
void* pData)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
struct anv_ray_tracing_pipeline *rt_pipeline =
anv_pipeline_to_ray_tracing(pipeline);
for (uint32_t i = 0; i < groupCount; i++) {
struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
memcpy(pData, group->handle, sizeof(group->handle));
pData += sizeof(group->handle);
}
return VK_SUCCESS;
}
VkResult
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
VkDevice _device,
VkPipeline pipeline,
uint32_t firstGroup,
uint32_t groupCount,
size_t dataSize,
void* pData)
{
ANV_FROM_HANDLE(anv_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
VkDeviceSize
anv_GetRayTracingShaderGroupStackSizeKHR(
VkDevice device,
VkPipeline _pipeline,
uint32_t group,
VkShaderGroupShaderKHR groupShader)
{
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
struct anv_ray_tracing_pipeline *rt_pipeline =
anv_pipeline_to_ray_tracing(pipeline);
assert(group < rt_pipeline->group_count);
struct anv_shader_bin *bin;
switch (groupShader) {
case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
bin = rt_pipeline->groups[group].general;
break;
case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
bin = rt_pipeline->groups[group].closest_hit;
break;
case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
bin = rt_pipeline->groups[group].any_hit;
break;
case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
bin = rt_pipeline->groups[group].intersection;
break;
default:
unreachable("Invalid VkShaderGroupShader enum");
}
if (bin == NULL)
return 0;
return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
}

View file

@ -1215,25 +1215,6 @@ struct anv_device {
struct anv_queue * queues;
struct anv_scratch_pool scratch_pool;
struct anv_bo *rt_scratch_bos[16];
/** Shadow ray query BO
*
* The ray_query_bo only holds the current ray being traced. When using
* more than 1 ray query per thread, we cannot fit all the queries in
* there, so we need a another buffer to hold query data that is not
* currently being used by the HW for tracing, similar to a scratch space.
*
* The size of the shadow buffer depends on the number of queries per
* shader.
*/
struct anv_bo *ray_query_shadow_bos[16];
/** Ray query buffer used to communicated with HW unit.
*/
struct anv_bo *ray_query_bo;
struct anv_shader_bin *rt_trampoline;
struct anv_shader_bin *rt_trivial_return;
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
@ -2690,19 +2671,6 @@ struct anv_cmd_compute_state {
struct anv_address num_workgroups;
};
struct anv_cmd_ray_tracing_state {
struct anv_cmd_pipeline_state base;
struct anv_ray_tracing_pipeline *pipeline;
bool pipeline_dirty;
struct {
struct anv_bo *bo;
struct brw_rt_scratch_layout layout;
} scratch;
};
/** State required while building cmd buffer */
struct anv_cmd_state {
/* PIPELINE_SELECT.PipelineSelection */
@ -2712,7 +2680,6 @@ struct anv_cmd_state {
struct anv_cmd_graphics_state gfx;
struct anv_cmd_compute_state compute;
struct anv_cmd_ray_tracing_state rt;
enum anv_pipe_bits pending_pipe_bits;
VkShaderStageFlags descriptors_dirty;
@ -3034,20 +3001,6 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
vk_pipeline_cache_object_unref(&shader->base);
}
#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
assert((local_arg_offset) % 8 == 0); \
const struct brw_bs_prog_data *prog_data = \
brw_bs_prog_data_const(bin->prog_data); \
assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \
\
(struct GFX_BINDLESS_SHADER_RECORD) { \
.OffsetToLocalArguments = (local_arg_offset) / 8, \
.BindlessShaderDispatchMode = \
prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8, \
.KernelStartPointer = bin->kernel.offset, \
}; \
})
struct anv_pipeline_executable {
gl_shader_stage stage;
@ -3060,7 +3013,6 @@ struct anv_pipeline_executable {
enum anv_pipeline_type {
ANV_PIPELINE_GRAPHICS,
ANV_PIPELINE_COMPUTE,
ANV_PIPELINE_RAY_TRACING,
};
struct anv_pipeline {
@ -3155,34 +3107,6 @@ struct anv_compute_pipeline {
uint32_t interface_descriptor_data[8];
};
struct anv_rt_shader_group {
VkRayTracingShaderGroupTypeKHR type;
struct anv_shader_bin *general;
struct anv_shader_bin *closest_hit;
struct anv_shader_bin *any_hit;
struct anv_shader_bin *intersection;
/* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
uint32_t handle[8];
};
struct anv_ray_tracing_pipeline {
struct anv_pipeline base;
/* All shaders in the pipeline */
struct util_dynarray shaders;
uint32_t group_count;
struct anv_rt_shader_group * groups;
/* If non-zero, this is the default computed stack size as per the stack
* size computation in the Vulkan spec. If zero, that indicates that the
* client has requested a dynamic stack size.
*/
uint32_t stack_size;
};
#define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
static inline struct anv_##pipe_type##_pipeline * \
anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \
@ -3193,7 +3117,6 @@ struct anv_ray_tracing_pipeline {
ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
static inline bool
anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
@ -3273,12 +3196,6 @@ anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline
return &get_vs_prog_data(pipeline)->base;
}
VkResult
anv_device_init_rt_shaders(struct anv_device *device);
void
anv_device_finish_rt_shaders(struct anv_device *device);
VkResult
anv_pipeline_init(struct anv_pipeline *pipeline,
struct anv_device *device,

View file

@ -5610,274 +5610,6 @@ genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer)
#endif
}
#if GFX_VERx10 >= 125
static void
calc_local_trace_size(uint8_t local_shift[3], const uint32_t global[3])
{
unsigned total_shift = 0;
memset(local_shift, 0, 3);
bool progress;
do {
progress = false;
for (unsigned i = 0; i < 3; i++) {
assert(global[i] > 0);
if ((1 << local_shift[i]) < global[i]) {
progress = true;
local_shift[i]++;
total_shift++;
}
if (total_shift == 3)
return;
}
} while(progress);
/* Assign whatever's left to x */
local_shift[0] += 3 - total_shift;
}
static struct GFX_RT_SHADER_TABLE
vk_sdar_to_shader_table(const VkStridedDeviceAddressRegionKHR *region)
{
return (struct GFX_RT_SHADER_TABLE) {
.BaseAddress = anv_address_from_u64(region->deviceAddress),
.Stride = region->stride,
};
}
static void
cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
const VkStridedDeviceAddressRegionKHR *raygen_sbt,
const VkStridedDeviceAddressRegionKHR *miss_sbt,
const VkStridedDeviceAddressRegionKHR *hit_sbt,
const VkStridedDeviceAddressRegionKHR *callable_sbt,
bool is_indirect,
uint32_t launch_width,
uint32_t launch_height,
uint32_t launch_depth,
uint64_t launch_size_addr)
{
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
struct anv_ray_tracing_pipeline *pipeline = rt->pipeline;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
/* If we have a known degenerate launch size, just bail */
if (!is_indirect &&
(launch_width == 0 || launch_height == 0 || launch_depth == 0))
return;
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
cmd_buffer->state.rt.pipeline_dirty = false;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
/* Add these to the reloc list as they're internal buffers that don't
* actually have relocs to pick them up manually.
*
* TODO(RT): This is a bit of a hack
*/
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->batch.alloc,
rt->scratch.bo);
/* Allocate and set up our RT_DISPATCH_GLOBALS */
struct anv_state rtdg_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
BRW_RT_PUSH_CONST_OFFSET +
sizeof(struct anv_push_constants),
64);
struct GFX_RT_DISPATCH_GLOBALS rtdg = {
.MemBaseAddress = (struct anv_address) {
.bo = rt->scratch.bo,
.offset = rt->scratch.layout.ray_stack_start,
},
.CallStackHandler =
anv_shader_bin_get_bsr(cmd_buffer->device->rt_trivial_return, 0),
.AsyncRTStackSize = rt->scratch.layout.ray_stack_stride / 64,
.NumDSSRTStacks = rt->scratch.layout.stack_ids_per_dss,
.MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS,
.Flags = RT_DEPTH_TEST_LESS_EQUAL,
.HitGroupTable = vk_sdar_to_shader_table(hit_sbt),
.MissGroupTable = vk_sdar_to_shader_table(miss_sbt),
.SWStackSize = rt->scratch.layout.sw_stack_size / 64,
.LaunchWidth = launch_width,
.LaunchHeight = launch_height,
.LaunchDepth = launch_depth,
.CallableGroupTable = vk_sdar_to_shader_table(callable_sbt),
};
GFX_RT_DISPATCH_GLOBALS_pack(NULL, rtdg_state.map, &rtdg);
/* Push constants go after the RT_DISPATCH_GLOBALS */
assert(GFX_RT_DISPATCH_GLOBALS_length * 4 <= BRW_RT_PUSH_CONST_OFFSET);
memcpy(rtdg_state.map + BRW_RT_PUSH_CONST_OFFSET,
&cmd_buffer->state.rt.base.push_constants,
sizeof(struct anv_push_constants));
struct anv_address rtdg_addr = {
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = rtdg_state.offset,
};
uint8_t local_size_log2[3];
uint32_t global_size[3] = {};
if (is_indirect) {
/* Pick a local size that's probably ok. We assume most TraceRays calls
* will use a two-dimensional dispatch size. Worst case, our initial
* dispatch will be a little slower than it has to be.
*/
local_size_log2[0] = 2;
local_size_log2[1] = 1;
local_size_log2[2] = 0;
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
struct mi_value launch_size[3] = {
mi_mem32(anv_address_from_u64(launch_size_addr + 0)),
mi_mem32(anv_address_from_u64(launch_size_addr + 4)),
mi_mem32(anv_address_from_u64(launch_size_addr + 8)),
};
/* Store the original launch size into RT_DISPATCH_GLOBALS
*
* TODO: Pull values from genX_bits.h once RT_DISPATCH_GLOBALS gets
* moved into a genX version.
*/
mi_store(&b, mi_mem32(anv_address_add(rtdg_addr, 52)),
mi_value_ref(&b, launch_size[0]));
mi_store(&b, mi_mem32(anv_address_add(rtdg_addr, 56)),
mi_value_ref(&b, launch_size[1]));
mi_store(&b, mi_mem32(anv_address_add(rtdg_addr, 60)),
mi_value_ref(&b, launch_size[2]));
/* Compute the global dispatch size */
for (unsigned i = 0; i < 3; i++) {
if (local_size_log2[i] == 0)
continue;
/* global_size = DIV_ROUND_UP(launch_size, local_size)
*
* Fortunately for us MI_ALU math is 64-bit and , mi_ushr32_imm
* has the semantics of shifting the enture 64-bit value and taking
* the bottom 32 so we don't have to worry about roll-over.
*/
uint32_t local_size = 1 << local_size_log2[i];
launch_size[i] = mi_iadd(&b, launch_size[i],
mi_imm(local_size - 1));
launch_size[i] = mi_ushr32_imm(&b, launch_size[i],
local_size_log2[i]);
}
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMX), launch_size[0]);
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMY), launch_size[1]);
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMZ), launch_size[2]);
} else {
uint32_t launch_size[3] = { launch_width, launch_height, launch_depth };
calc_local_trace_size(local_size_log2, launch_size);
for (unsigned i = 0; i < 3; i++) {
/* We have to be a bit careful here because DIV_ROUND_UP adds to the
* numerator value may overflow. Cast to uint64_t to avoid this.
*/
uint32_t local_size = 1 << local_size_log2[i];
global_size[i] = DIV_ROUND_UP((uint64_t)launch_size[i], local_size);
}
}
anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
cw.IndirectParameterEnable = is_indirect;
cw.PredicateEnable = false;
cw.SIMDSize = SIMD8;
cw.LocalXMaximum = (1 << local_size_log2[0]) - 1;
cw.LocalYMaximum = (1 << local_size_log2[1]) - 1;
cw.LocalZMaximum = (1 << local_size_log2[2]) - 1;
cw.ThreadGroupIDXDimension = global_size[0];
cw.ThreadGroupIDYDimension = global_size[1];
cw.ThreadGroupIDZDimension = global_size[2];
cw.ExecutionMask = 0xff;
cw.EmitInlineParameter = true;
cw.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0);
const gl_shader_stage s = MESA_SHADER_RAYGEN;
struct anv_device *device = cmd_buffer->device;
struct anv_state *surfaces = &cmd_buffer->state.binding_tables[s];
struct anv_state *samplers = &cmd_buffer->state.samplers[s];
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = device->rt_trampoline->kernel.offset,
.SamplerStatePointer = samplers->offset,
/* i965: DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4), */
.SamplerCount = 0,
.BindingTablePointer = surfaces->offset,
.NumberofThreadsinGPGPUThreadGroup = 1,
.BTDMode = true,
};
struct brw_rt_raygen_trampoline_params trampoline_params = {
.rt_disp_globals_addr = anv_address_physical(rtdg_addr),
.raygen_bsr_addr = raygen_sbt->deviceAddress,
.is_indirect = is_indirect,
.local_group_size_log2 = {
local_size_log2[0],
local_size_log2[1],
local_size_log2[2],
},
};
STATIC_ASSERT(sizeof(trampoline_params) == 32);
memcpy(cw.InlineData, &trampoline_params, sizeof(trampoline_params));
}
}
void
genX(CmdTraceRaysKHR)(
VkCommandBuffer commandBuffer,
const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
uint32_t width,
uint32_t height,
uint32_t depth)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer_trace_rays(cmd_buffer,
pRaygenShaderBindingTable,
pMissShaderBindingTable,
pHitShaderBindingTable,
pCallableShaderBindingTable,
false /* is_indirect */,
width, height, depth,
0 /* launch_size_addr */);
}
void
genX(CmdTraceRaysIndirectKHR)(
VkCommandBuffer commandBuffer,
const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
VkDeviceAddress indirectDeviceAddress)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer_trace_rays(cmd_buffer,
pRaygenShaderBindingTable,
pMissShaderBindingTable,
pHitShaderBindingTable,
pCallableShaderBindingTable,
true /* is_indirect */,
0, 0, 0, /* width, height, depth, */
indirectDeviceAddress);
}
#endif /* GFX_VERx10 >= 125 */
static void
genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
uint32_t pipeline)

View file

@ -2510,54 +2510,3 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
}
#endif /* #if GFX_VERx10 >= 125 */
#if GFX_VERx10 >= 125
void
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline)
{
for (uint32_t i = 0; i < pipeline->group_count; i++) {
struct anv_rt_shader_group *group = &pipeline->groups[i];
switch (group->type) {
case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: {
struct GFX_RT_GENERAL_SBT_HANDLE sh = {};
sh.General = anv_shader_bin_get_bsr(group->general, 32);
GFX_RT_GENERAL_SBT_HANDLE_pack(NULL, group->handle, &sh);
break;
}
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: {
struct GFX_RT_TRIANGLES_SBT_HANDLE sh = {};
if (group->closest_hit)
sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32);
if (group->any_hit)
sh.AnyHit = anv_shader_bin_get_bsr(group->any_hit, 24);
GFX_RT_TRIANGLES_SBT_HANDLE_pack(NULL, group->handle, &sh);
break;
}
case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
struct GFX_RT_PROCEDURAL_SBT_HANDLE sh = {};
if (group->closest_hit)
sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32);
sh.Intersection = anv_shader_bin_get_bsr(group->intersection, 24);
GFX_RT_PROCEDURAL_SBT_HANDLE_pack(NULL, group->handle, &sh);
break;
}
default:
unreachable("Invalid shader group type");
}
}
}
#else
void
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline)
{
unreachable("Ray tracing not supported");
}
#endif /* GFX_VERx10 >= 125 */