mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
hasvk: remove ray tracing code
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Jason Ekstrand <jason.ekstrand@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18208>
This commit is contained in:
parent
4488253570
commit
6cbaaf27ab
7 changed files with 2 additions and 1540 deletions
|
|
@ -309,71 +309,6 @@ ilog2_round_up(uint32_t value)
|
|||
return 32 - __builtin_clz(value - 1);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_cmd_pipeline_state *pipeline_state,
|
||||
struct anv_pipeline *pipeline,
|
||||
VkShaderStageFlags stages)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
uint64_t ray_shadow_size =
|
||||
align_u64(brw_rt_ray_queries_shadow_stacks_size(device->info,
|
||||
pipeline->ray_queries),
|
||||
4096);
|
||||
if (ray_shadow_size > 0 &&
|
||||
(!cmd_buffer->state.ray_query_shadow_bo ||
|
||||
cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
|
||||
unsigned shadow_size_log2 = MAX2(ilog2_round_up(ray_shadow_size), 16);
|
||||
unsigned bucket = shadow_size_log2 - 16;
|
||||
assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos));
|
||||
|
||||
struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[bucket]);
|
||||
if (bo == NULL) {
|
||||
struct anv_bo *new_bo;
|
||||
VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
|
||||
ray_shadow_size,
|
||||
0, /* alloc_flags */
|
||||
0, /* explicit_address */
|
||||
&new_bo);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_batch_set_error(&cmd_buffer->batch, result);
|
||||
return;
|
||||
}
|
||||
|
||||
bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[bucket], NULL, new_bo);
|
||||
if (bo != NULL) {
|
||||
anv_device_release_bo(device, bo);
|
||||
} else {
|
||||
bo = new_bo;
|
||||
}
|
||||
}
|
||||
cmd_buffer->state.ray_query_shadow_bo = bo;
|
||||
|
||||
/* Add the ray query buffers to the batch list. */
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
cmd_buffer->batch.alloc,
|
||||
cmd_buffer->state.ray_query_shadow_bo);
|
||||
}
|
||||
|
||||
/* Add the HW buffer to the list of BO used. */
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
cmd_buffer->batch.alloc,
|
||||
device->ray_query_bo);
|
||||
|
||||
/* Fill the push constants & mark them dirty. */
|
||||
struct anv_state ray_query_global_state =
|
||||
anv_genX(device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
|
||||
|
||||
struct anv_address ray_query_globals_addr = (struct anv_address) {
|
||||
.bo = device->dynamic_state_pool.block_pool.bo,
|
||||
.offset = ray_query_global_state.offset,
|
||||
};
|
||||
pipeline_state->push_constants.ray_query_globals =
|
||||
anv_address_physical(ray_query_globals_addr);
|
||||
cmd_buffer->state.push_constants_dirty |= stages;
|
||||
}
|
||||
|
||||
void anv_CmdBindPipeline(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkPipelineBindPoint pipelineBindPoint,
|
||||
|
|
@ -381,8 +316,6 @@ void anv_CmdBindPipeline(
|
|||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
|
||||
struct anv_cmd_pipeline_state *state;
|
||||
VkShaderStageFlags stages = 0;
|
||||
|
||||
switch (pipelineBindPoint) {
|
||||
case VK_PIPELINE_BIND_POINT_COMPUTE: {
|
||||
|
|
@ -395,9 +328,6 @@ void anv_CmdBindPipeline(
|
|||
cmd_buffer->state.compute.pipeline_dirty = true;
|
||||
set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
|
||||
&compute_pipeline->cs->bind_map);
|
||||
|
||||
state = &cmd_buffer->state.compute.base;
|
||||
stages = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -419,27 +349,6 @@ void anv_CmdBindPipeline(
|
|||
/* Apply the non dynamic state from the pipeline */
|
||||
vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
|
||||
&gfx_pipeline->dynamic_state);
|
||||
|
||||
state = &cmd_buffer->state.gfx.base;
|
||||
stages = gfx_pipeline->active_stages;
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
|
||||
struct anv_ray_tracing_pipeline *rt_pipeline =
|
||||
anv_pipeline_to_ray_tracing(pipeline);
|
||||
if (cmd_buffer->state.rt.pipeline == rt_pipeline)
|
||||
return;
|
||||
|
||||
cmd_buffer->state.rt.pipeline = rt_pipeline;
|
||||
cmd_buffer->state.rt.pipeline_dirty = true;
|
||||
|
||||
if (rt_pipeline->stack_size > 0) {
|
||||
anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
|
||||
rt_pipeline->stack_size);
|
||||
}
|
||||
|
||||
state = &cmd_buffer->state.rt.base;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -447,9 +356,6 @@ void anv_CmdBindPipeline(
|
|||
unreachable("invalid bind point");
|
||||
break;
|
||||
}
|
||||
|
||||
if (pipeline->ray_queries > 0)
|
||||
anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -492,16 +398,6 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
|||
pipe_state = &cmd_buffer->state.compute.base;
|
||||
break;
|
||||
|
||||
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
|
||||
stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
|
||||
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_MISS_BIT_KHR |
|
||||
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
|
||||
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
|
||||
pipe_state = &cmd_buffer->state.rt.base;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("invalid bind point");
|
||||
}
|
||||
|
|
@ -520,13 +416,7 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
|||
* as an 64-bit address in the push constants.
|
||||
*/
|
||||
bool update_desc_sets = stages & (VK_SHADER_STAGE_TASK_BIT_NV |
|
||||
VK_SHADER_STAGE_MESH_BIT_NV |
|
||||
VK_SHADER_STAGE_RAYGEN_BIT_KHR |
|
||||
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_MISS_BIT_KHR |
|
||||
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
|
||||
VK_SHADER_STAGE_CALLABLE_BIT_KHR);
|
||||
VK_SHADER_STAGE_MESH_BIT_NV);
|
||||
|
||||
if (update_desc_sets) {
|
||||
struct anv_push_constants *push = &pipe_state->push_constants;
|
||||
|
|
@ -818,17 +708,6 @@ void anv_CmdPushConstants(
|
|||
|
||||
memcpy(pipe_state->push_constants.client_data + offset, pValues, size);
|
||||
}
|
||||
if (stageFlags & (VK_SHADER_STAGE_RAYGEN_BIT_KHR |
|
||||
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_MISS_BIT_KHR |
|
||||
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
|
||||
VK_SHADER_STAGE_CALLABLE_BIT_KHR)) {
|
||||
struct anv_cmd_pipeline_state *pipe_state =
|
||||
&cmd_buffer->state.rt.base;
|
||||
|
||||
memcpy(pipe_state->push_constants.client_data + offset, pValues, size);
|
||||
}
|
||||
|
||||
cmd_buffer->state.push_constants_dirty |= stageFlags;
|
||||
}
|
||||
|
|
@ -850,10 +729,6 @@ anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
|||
pipe_state = &cmd_buffer->state.compute.base;
|
||||
break;
|
||||
|
||||
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
|
||||
pipe_state = &cmd_buffer->state.rt.base;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("invalid bind point");
|
||||
}
|
||||
|
|
@ -1043,54 +918,3 @@ void anv_CmdSetDeviceMask(
|
|||
{
|
||||
/* No-op */
|
||||
}
|
||||
|
||||
void anv_CmdSetRayTracingPipelineStackSizeKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t pipelineStackSize)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
uint32_t stack_ids_per_dss = 2048; /* TODO */
|
||||
|
||||
unsigned stack_size_log2 = ilog2_round_up(pipelineStackSize);
|
||||
if (stack_size_log2 < 10)
|
||||
stack_size_log2 = 10;
|
||||
|
||||
if (rt->scratch.layout.total_size == 1 << stack_size_log2)
|
||||
return;
|
||||
|
||||
brw_rt_compute_scratch_layout(&rt->scratch.layout, device->info,
|
||||
stack_ids_per_dss, 1 << stack_size_log2);
|
||||
|
||||
unsigned bucket = stack_size_log2 - 10;
|
||||
assert(bucket < ARRAY_SIZE(device->rt_scratch_bos));
|
||||
|
||||
struct anv_bo *bo = p_atomic_read(&device->rt_scratch_bos[bucket]);
|
||||
if (bo == NULL) {
|
||||
struct anv_bo *new_bo;
|
||||
VkResult result = anv_device_alloc_bo(device, "RT scratch",
|
||||
rt->scratch.layout.total_size,
|
||||
0, /* alloc_flags */
|
||||
0, /* explicit_address */
|
||||
&new_bo);
|
||||
if (result != VK_SUCCESS) {
|
||||
rt->scratch.layout.total_size = 0;
|
||||
anv_batch_set_error(&cmd_buffer->batch, result);
|
||||
return;
|
||||
}
|
||||
|
||||
bo = p_atomic_cmpxchg(&device->rt_scratch_bos[bucket], NULL, new_bo);
|
||||
if (bo != NULL) {
|
||||
anv_device_release_bo(device, bo);
|
||||
} else {
|
||||
bo = new_bo;
|
||||
}
|
||||
}
|
||||
|
||||
rt->scratch.bo = bo;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -226,7 +226,6 @@ get_device_extensions(const struct anv_physical_device *device,
|
|||
device->use_call_secondary,
|
||||
.KHR_pipeline_executable_properties = true,
|
||||
.KHR_push_descriptor = true,
|
||||
.KHR_ray_query = device->info.has_ray_tracing,
|
||||
.KHR_relaxed_block_layout = true,
|
||||
.KHR_sampler_mirror_clamp_to_edge = true,
|
||||
.KHR_sampler_ycbcr_conversion = true,
|
||||
|
|
@ -1562,12 +1561,6 @@ void anv_GetPhysicalDeviceFeatures2(
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
|
||||
VkPhysicalDeviceRayQueryFeaturesKHR *features = (void *)ext;
|
||||
features->rayQuery = pdevice->info.has_ray_tracing;
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
|
||||
VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
|
||||
features->robustBufferAccess2 = true;
|
||||
|
|
@ -1939,14 +1932,6 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
|
|||
if (pdevice->compiler->scalar_stage[stage])
|
||||
scalar_stages |= mesa_to_vk_shader_stage(stage);
|
||||
}
|
||||
if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
|
||||
scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
|
||||
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_MISS_BIT_KHR |
|
||||
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
|
||||
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
|
||||
}
|
||||
if (pdevice->vk.supported_extensions.NV_mesh_shader) {
|
||||
scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV |
|
||||
VK_SHADER_STAGE_MESH_BIT_NV;
|
||||
|
|
@ -3314,22 +3299,9 @@ VkResult anv_CreateDevice(
|
|||
device->workaround_bo->size,
|
||||
INTEL_DEBUG_BLOCK_TYPE_FRAME);
|
||||
|
||||
if (device->vk.enabled_extensions.KHR_ray_query) {
|
||||
uint32_t ray_queries_size =
|
||||
align_u32(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
|
||||
|
||||
result = anv_device_alloc_bo(device, "ray queries",
|
||||
ray_queries_size,
|
||||
0,
|
||||
0 /* explicit_address */,
|
||||
&device->ray_query_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_workaround_bo;
|
||||
}
|
||||
|
||||
result = anv_device_init_trivial_batch(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_ray_query_bo;
|
||||
goto fail_workaround_bo;
|
||||
|
||||
if (device->info->ver >= 12 &&
|
||||
device->vk.enabled_extensions.KHR_fragment_shading_rate) {
|
||||
|
|
@ -3367,9 +3339,6 @@ VkResult anv_CreateDevice(
|
|||
|
||||
anv_scratch_pool_init(device, &device->scratch_pool);
|
||||
|
||||
/* TODO(RT): Do we want some sort of data structure for this? */
|
||||
memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
|
||||
|
||||
result = anv_genX(device->info, init_device_state)(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_trivial_batch_bo_and_scratch_pool;
|
||||
|
|
@ -3395,12 +3364,6 @@ VkResult anv_CreateDevice(
|
|||
goto fail_default_pipeline_cache;
|
||||
}
|
||||
|
||||
result = anv_device_init_rt_shaders(device);
|
||||
if (result != VK_SUCCESS) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail_internal_cache;
|
||||
}
|
||||
|
||||
anv_device_init_blorp(device);
|
||||
|
||||
anv_device_init_border_colors(device);
|
||||
|
|
@ -3413,17 +3376,12 @@ VkResult anv_CreateDevice(
|
|||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_internal_cache:
|
||||
vk_pipeline_cache_destroy(device->internal_cache, NULL);
|
||||
fail_default_pipeline_cache:
|
||||
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
|
||||
fail_trivial_batch_bo_and_scratch_pool:
|
||||
anv_scratch_pool_finish(device, &device->scratch_pool);
|
||||
fail_trivial_batch:
|
||||
anv_device_release_bo(device, device->trivial_batch_bo);
|
||||
fail_ray_query_bo:
|
||||
if (device->ray_query_bo)
|
||||
anv_device_release_bo(device, device->ray_query_bo);
|
||||
fail_workaround_bo:
|
||||
anv_device_release_bo(device, device->workaround_bo);
|
||||
fail_surface_aux_map_pool:
|
||||
|
|
@ -3486,8 +3444,6 @@ void anv_DestroyDevice(
|
|||
|
||||
anv_device_finish_blorp(device);
|
||||
|
||||
anv_device_finish_rt_shaders(device);
|
||||
|
||||
vk_pipeline_cache_destroy(device->internal_cache, NULL);
|
||||
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
|
||||
|
||||
|
|
@ -3502,20 +3458,8 @@ void anv_DestroyDevice(
|
|||
anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
|
||||
#endif
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
|
||||
if (device->rt_scratch_bos[i] != NULL)
|
||||
anv_device_release_bo(device, device->rt_scratch_bos[i]);
|
||||
}
|
||||
|
||||
anv_scratch_pool_finish(device, &device->scratch_pool);
|
||||
|
||||
if (device->vk.enabled_extensions.KHR_ray_query) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
|
||||
if (device->ray_query_shadow_bos[i] != NULL)
|
||||
anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
|
||||
}
|
||||
anv_device_release_bo(device, device->ray_query_bo);
|
||||
}
|
||||
anv_device_release_bo(device, device->workaround_bo);
|
||||
anv_device_release_bo(device, device->trivial_batch_bo);
|
||||
|
||||
|
|
|
|||
|
|
@ -175,6 +175,3 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
|
|||
|
||||
void
|
||||
genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline);
|
||||
|
||||
void
|
||||
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);
|
||||
|
|
|
|||
|
|
@ -95,8 +95,6 @@ anv_shader_stage_to_nir(struct anv_device *device,
|
|||
.post_depth_coverage = pdevice->info.ver >= 9,
|
||||
.runtime_descriptor_array = true,
|
||||
.float_controls = pdevice->info.ver >= 8,
|
||||
.ray_query = pdevice->info.has_ray_tracing,
|
||||
.ray_tracing = pdevice->info.has_ray_tracing,
|
||||
.shader_clock = true,
|
||||
.shader_viewport_index_layer = true,
|
||||
.stencil_export = pdevice->info.ver >= 9,
|
||||
|
|
@ -251,17 +249,6 @@ void anv_DestroyPipeline(
|
|||
break;
|
||||
}
|
||||
|
||||
case ANV_PIPELINE_RAY_TRACING: {
|
||||
struct anv_ray_tracing_pipeline *rt_pipeline =
|
||||
anv_pipeline_to_ray_tracing(pipeline);
|
||||
|
||||
util_dynarray_foreach(&rt_pipeline->shaders,
|
||||
struct anv_shader_bin *, shader) {
|
||||
anv_shader_bin_unref(device, *shader);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("invalid pipeline type");
|
||||
}
|
||||
|
|
@ -600,51 +587,6 @@ anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
|
|||
_mesa_sha1_final(&ctx, sha1_out);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
|
||||
struct anv_pipeline_layout *layout,
|
||||
struct anv_pipeline_stage *stage,
|
||||
unsigned char *sha1_out)
|
||||
{
|
||||
struct mesa_sha1 ctx;
|
||||
_mesa_sha1_init(&ctx);
|
||||
|
||||
if (layout != NULL)
|
||||
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
|
||||
|
||||
const bool rba = pipeline->base.device->robust_buffer_access;
|
||||
_mesa_sha1_update(&ctx, &rba, sizeof(rba));
|
||||
|
||||
_mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
|
||||
_mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
|
||||
|
||||
_mesa_sha1_final(&ctx, sha1_out);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
|
||||
struct anv_pipeline_layout *layout,
|
||||
struct anv_pipeline_stage *intersection,
|
||||
struct anv_pipeline_stage *any_hit,
|
||||
unsigned char *sha1_out)
|
||||
{
|
||||
struct mesa_sha1 ctx;
|
||||
_mesa_sha1_init(&ctx);
|
||||
|
||||
if (layout != NULL)
|
||||
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
|
||||
|
||||
const bool rba = pipeline->base.device->robust_buffer_access;
|
||||
_mesa_sha1_update(&ctx, &rba, sizeof(rba));
|
||||
|
||||
_mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
|
||||
_mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
|
||||
_mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
|
||||
_mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
|
||||
|
||||
_mesa_sha1_final(&ctx, sha1_out);
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
|
||||
struct vk_pipeline_cache *cache,
|
||||
|
|
@ -2230,757 +2172,6 @@ VkResult anv_CreateGraphicsPipelines(
|
|||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
|
||||
struct vk_pipeline_cache *cache,
|
||||
nir_shader *nir,
|
||||
struct anv_pipeline_stage *stage,
|
||||
struct anv_shader_bin **shader_out,
|
||||
void *mem_ctx)
|
||||
{
|
||||
const struct brw_compiler *compiler =
|
||||
pipeline->base.device->physical->compiler;
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
|
||||
nir_shader **resume_shaders = NULL;
|
||||
uint32_t num_resume_shaders = 0;
|
||||
if (nir->info.stage != MESA_SHADER_COMPUTE) {
|
||||
NIR_PASS(_, nir, nir_lower_shader_calls,
|
||||
nir_address_format_64bit_global,
|
||||
BRW_BTD_STACK_ALIGN,
|
||||
&resume_shaders, &num_resume_shaders, mem_ctx);
|
||||
NIR_PASS(_, nir, brw_nir_lower_shader_calls);
|
||||
NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_resume_shaders; i++) {
|
||||
NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls);
|
||||
NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
|
||||
}
|
||||
|
||||
struct brw_compile_bs_params params = {
|
||||
.nir = nir,
|
||||
.key = &stage->key.bs,
|
||||
.prog_data = &stage->prog_data.bs,
|
||||
.num_resume_shaders = num_resume_shaders,
|
||||
.resume_shaders = resume_shaders,
|
||||
|
||||
.stats = stage->stats,
|
||||
.log_data = pipeline->base.device,
|
||||
};
|
||||
|
||||
stage->code = brw_compile_bs(compiler, mem_ctx, ¶ms);
|
||||
if (stage->code == NULL)
|
||||
return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* Ray-tracing shaders don't have a "real" bind map */
|
||||
struct anv_pipeline_bind_map empty_bind_map = {};
|
||||
|
||||
const unsigned code_size = stage->prog_data.base.program_size;
|
||||
struct anv_shader_bin *bin =
|
||||
anv_device_upload_kernel(pipeline->base.device,
|
||||
cache,
|
||||
stage->stage,
|
||||
&stage->cache_key, sizeof(stage->cache_key),
|
||||
stage->code, code_size,
|
||||
&stage->prog_data.base,
|
||||
sizeof(stage->prog_data.bs),
|
||||
stage->stats, 1,
|
||||
NULL, &empty_bind_map);
|
||||
if (bin == NULL)
|
||||
return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* TODO: Figure out executables for resume shaders */
|
||||
anv_pipeline_add_executables(&pipeline->base, stage, bin);
|
||||
util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
|
||||
|
||||
*shader_out = bin;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
|
||||
{
|
||||
if (info->pDynamicState == NULL)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
|
||||
if (info->pDynamicState->pDynamicStates[i] ==
|
||||
VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
|
||||
const VkRayTracingPipelineCreateInfoKHR *info,
|
||||
uint32_t *stack_max)
|
||||
{
|
||||
if (is_rt_stack_size_dynamic(info)) {
|
||||
pipeline->stack_size = 0; /* 0 means dynamic */
|
||||
} else {
|
||||
/* From the Vulkan spec:
|
||||
*
|
||||
* "If the stack size is not set explicitly, the stack size for a
|
||||
* pipeline is:
|
||||
*
|
||||
* rayGenStackMax +
|
||||
* min(1, maxPipelineRayRecursionDepth) ×
|
||||
* max(closestHitStackMax, missStackMax,
|
||||
* intersectionStackMax + anyHitStackMax) +
|
||||
* max(0, maxPipelineRayRecursionDepth-1) ×
|
||||
* max(closestHitStackMax, missStackMax) +
|
||||
* 2 × callableStackMax"
|
||||
*/
|
||||
pipeline->stack_size =
|
||||
stack_max[MESA_SHADER_RAYGEN] +
|
||||
MIN2(1, info->maxPipelineRayRecursionDepth) *
|
||||
MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
|
||||
stack_max[MESA_SHADER_MISS],
|
||||
stack_max[MESA_SHADER_INTERSECTION],
|
||||
stack_max[MESA_SHADER_ANY_HIT]) +
|
||||
MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
|
||||
MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
|
||||
stack_max[MESA_SHADER_MISS]) +
|
||||
2 * stack_max[MESA_SHADER_CALLABLE];
|
||||
|
||||
/* This is an extremely unlikely case but we need to set it to some
|
||||
* non-zero value so that we don't accidentally think it's dynamic.
|
||||
* Our minimum stack size is 2KB anyway so we could set to any small
|
||||
* value we like.
|
||||
*/
|
||||
if (pipeline->stack_size == 0)
|
||||
pipeline->stack_size = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static struct anv_pipeline_stage *
|
||||
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
|
||||
const VkRayTracingPipelineCreateInfoKHR *info,
|
||||
void *pipeline_ctx)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||
|
||||
/* Create enough stage entries for all shader modules plus potential
|
||||
* combinaisons in the groups.
|
||||
*/
|
||||
struct anv_pipeline_stage *stages =
|
||||
rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
|
||||
|
||||
for (uint32_t i = 0; i < info->stageCount; i++) {
|
||||
const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
|
||||
if (vk_pipeline_shader_stage_is_null(sinfo))
|
||||
continue;
|
||||
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
stages[i] = (struct anv_pipeline_stage) {
|
||||
.stage = vk_to_mesa_shader_stage(sinfo->stage),
|
||||
.info = sinfo,
|
||||
.cache_key = {
|
||||
.stage = vk_to_mesa_shader_stage(sinfo->stage),
|
||||
},
|
||||
.feedback = {
|
||||
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
populate_bs_prog_key(pipeline->base.device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[i].key.bs);
|
||||
|
||||
vk_pipeline_hash_shader_stage(sinfo, stages[i].shader_sha1);
|
||||
|
||||
if (stages[i].stage != MESA_SHADER_INTERSECTION) {
|
||||
anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
|
||||
stages[i].cache_key.sha1);
|
||||
}
|
||||
|
||||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info->groupCount; i++) {
|
||||
const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
|
||||
|
||||
if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
|
||||
continue;
|
||||
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
uint32_t intersection_idx = ginfo->intersectionShader;
|
||||
assert(intersection_idx < info->stageCount);
|
||||
|
||||
uint32_t any_hit_idx = ginfo->anyHitShader;
|
||||
if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
|
||||
assert(any_hit_idx < info->stageCount);
|
||||
anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
|
||||
layout,
|
||||
&stages[intersection_idx],
|
||||
&stages[any_hit_idx],
|
||||
stages[intersection_idx].cache_key.sha1);
|
||||
} else {
|
||||
anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
|
||||
&stages[intersection_idx],
|
||||
stages[intersection_idx].cache_key.sha1);
|
||||
}
|
||||
|
||||
stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
||||
return stages;
|
||||
}
|
||||
|
||||
static bool
|
||||
anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
|
||||
struct vk_pipeline_cache *cache,
|
||||
const VkRayTracingPipelineCreateInfoKHR *info,
|
||||
struct anv_pipeline_stage *stages,
|
||||
uint32_t *stack_max)
|
||||
{
|
||||
uint32_t shaders = 0, cache_hits = 0;
|
||||
for (uint32_t i = 0; i < info->stageCount; i++) {
|
||||
if (stages[i].info == NULL)
|
||||
continue;
|
||||
|
||||
shaders++;
|
||||
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
bool cache_hit;
|
||||
stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
|
||||
&stages[i].cache_key,
|
||||
sizeof(stages[i].cache_key),
|
||||
&cache_hit);
|
||||
if (cache_hit) {
|
||||
cache_hits++;
|
||||
stages[i].feedback.flags |=
|
||||
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
|
||||
}
|
||||
|
||||
if (stages[i].bin != NULL) {
|
||||
anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
|
||||
util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
|
||||
|
||||
uint32_t stack_size =
|
||||
brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
|
||||
stack_max[stages[i].stage] =
|
||||
MAX2(stack_max[stages[i].stage], stack_size);
|
||||
}
|
||||
|
||||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
||||
return cache_hits == shaders;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
|
||||
struct vk_pipeline_cache *cache,
|
||||
const VkRayTracingPipelineCreateInfoKHR *info)
|
||||
{
|
||||
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
||||
VkResult result;
|
||||
|
||||
VkPipelineCreationFeedback pipeline_feedback = {
|
||||
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
|
||||
};
|
||||
int64_t pipeline_start = os_time_get_nano();
|
||||
|
||||
void *pipeline_ctx = ralloc_context(NULL);
|
||||
|
||||
struct anv_pipeline_stage *stages =
|
||||
anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||
|
||||
const bool skip_cache_lookup =
|
||||
(pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
|
||||
|
||||
uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
|
||||
|
||||
if (!skip_cache_lookup &&
|
||||
anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
|
||||
pipeline_feedback.flags |=
|
||||
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) {
|
||||
ralloc_free(pipeline_ctx);
|
||||
return VK_PIPELINE_COMPILE_REQUIRED;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info->stageCount; i++) {
|
||||
if (stages[i].info == NULL)
|
||||
continue;
|
||||
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
|
||||
pipeline_ctx, &stages[i]);
|
||||
if (stages[i].nir == NULL) {
|
||||
ralloc_free(pipeline_ctx);
|
||||
return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i],
|
||||
layout, false /* use_primitive_replication */);
|
||||
|
||||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info->stageCount; i++) {
|
||||
if (stages[i].info == NULL)
|
||||
continue;
|
||||
|
||||
/* Shader found in cache already. */
|
||||
if (stages[i].bin != NULL)
|
||||
continue;
|
||||
|
||||
/* We handle intersection shaders as part of the group */
|
||||
if (stages[i].stage == MESA_SHADER_INTERSECTION)
|
||||
continue;
|
||||
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
void *stage_ctx = ralloc_context(pipeline_ctx);
|
||||
|
||||
nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
|
||||
switch (stages[i].stage) {
|
||||
case MESA_SHADER_RAYGEN:
|
||||
brw_nir_lower_raygen(nir);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_ANY_HIT:
|
||||
brw_nir_lower_any_hit(nir, devinfo);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_CLOSEST_HIT:
|
||||
brw_nir_lower_closest_hit(nir);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_MISS:
|
||||
brw_nir_lower_miss(nir);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_INTERSECTION:
|
||||
unreachable("These are handled later");
|
||||
|
||||
case MESA_SHADER_CALLABLE:
|
||||
brw_nir_lower_callable(nir);
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid ray-tracing shader stage");
|
||||
}
|
||||
|
||||
result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
|
||||
&stages[i].bin, stage_ctx);
|
||||
if (result != VK_SUCCESS) {
|
||||
ralloc_free(pipeline_ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t stack_size =
|
||||
brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
|
||||
stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
|
||||
|
||||
ralloc_free(stage_ctx);
|
||||
|
||||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info->groupCount; i++) {
|
||||
const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
|
||||
struct anv_rt_shader_group *group = &pipeline->groups[i];
|
||||
group->type = ginfo->type;
|
||||
switch (ginfo->type) {
|
||||
case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
|
||||
assert(ginfo->generalShader < info->stageCount);
|
||||
group->general = stages[ginfo->generalShader].bin;
|
||||
break;
|
||||
|
||||
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
|
||||
if (ginfo->anyHitShader < info->stageCount)
|
||||
group->any_hit = stages[ginfo->anyHitShader].bin;
|
||||
|
||||
if (ginfo->closestHitShader < info->stageCount)
|
||||
group->closest_hit = stages[ginfo->closestHitShader].bin;
|
||||
break;
|
||||
|
||||
case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
|
||||
if (ginfo->closestHitShader < info->stageCount)
|
||||
group->closest_hit = stages[ginfo->closestHitShader].bin;
|
||||
|
||||
uint32_t intersection_idx = info->pGroups[i].intersectionShader;
|
||||
assert(intersection_idx < info->stageCount);
|
||||
|
||||
/* Only compile this stage if not already found in the cache. */
|
||||
if (stages[intersection_idx].bin == NULL) {
|
||||
/* The any-hit and intersection shader have to be combined */
|
||||
uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
|
||||
const nir_shader *any_hit = NULL;
|
||||
if (any_hit_idx < info->stageCount)
|
||||
any_hit = stages[any_hit_idx].nir;
|
||||
|
||||
void *group_ctx = ralloc_context(pipeline_ctx);
|
||||
nir_shader *intersection =
|
||||
nir_shader_clone(group_ctx, stages[intersection_idx].nir);
|
||||
|
||||
brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
|
||||
devinfo);
|
||||
|
||||
result = compile_upload_rt_shader(pipeline, cache,
|
||||
intersection,
|
||||
&stages[intersection_idx],
|
||||
&group->intersection,
|
||||
group_ctx);
|
||||
ralloc_free(group_ctx);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
} else {
|
||||
group->intersection = stages[intersection_idx].bin;
|
||||
}
|
||||
|
||||
uint32_t stack_size =
|
||||
brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
|
||||
stack_max[MESA_SHADER_INTERSECTION] =
|
||||
MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid ray tracing shader group type");
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
ralloc_free(pipeline_ctx);
|
||||
|
||||
anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
|
||||
|
||||
pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
|
||||
|
||||
const VkPipelineCreationFeedbackCreateInfo *create_feedback =
|
||||
vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
|
||||
if (create_feedback) {
|
||||
*create_feedback->pPipelineCreationFeedback = pipeline_feedback;
|
||||
|
||||
assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
|
||||
for (uint32_t i = 0; i < info->stageCount; i++) {
|
||||
gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
|
||||
create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_init_rt_shaders(struct anv_device *device)
|
||||
{
|
||||
if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
|
||||
return VK_SUCCESS;
|
||||
|
||||
bool cache_hit;
|
||||
|
||||
struct brw_rt_trampoline {
|
||||
char name[16];
|
||||
struct brw_cs_prog_key key;
|
||||
} trampoline_key = {
|
||||
.name = "rt-trampoline",
|
||||
};
|
||||
device->rt_trampoline =
|
||||
anv_device_search_for_kernel(device, device->internal_cache,
|
||||
&trampoline_key, sizeof(trampoline_key),
|
||||
&cache_hit);
|
||||
if (device->rt_trampoline == NULL) {
|
||||
|
||||
void *tmp_ctx = ralloc_context(NULL);
|
||||
nir_shader *trampoline_nir =
|
||||
brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
|
||||
|
||||
trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
|
||||
|
||||
struct anv_pipeline_bind_map bind_map = {
|
||||
.surface_count = 0,
|
||||
.sampler_count = 0,
|
||||
};
|
||||
uint32_t dummy_params[4] = { 0, };
|
||||
struct brw_cs_prog_data trampoline_prog_data = {
|
||||
.base.nr_params = 4,
|
||||
.base.param = dummy_params,
|
||||
.uses_inline_data = true,
|
||||
.uses_btd_stack_ids = true,
|
||||
};
|
||||
struct brw_compile_cs_params params = {
|
||||
.nir = trampoline_nir,
|
||||
.key = &trampoline_key.key,
|
||||
.prog_data = &trampoline_prog_data,
|
||||
.log_data = device,
|
||||
};
|
||||
const unsigned *tramp_data =
|
||||
brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms);
|
||||
|
||||
device->rt_trampoline =
|
||||
anv_device_upload_kernel(device, device->internal_cache,
|
||||
MESA_SHADER_COMPUTE,
|
||||
&trampoline_key, sizeof(trampoline_key),
|
||||
tramp_data,
|
||||
trampoline_prog_data.base.program_size,
|
||||
&trampoline_prog_data.base,
|
||||
sizeof(trampoline_prog_data),
|
||||
NULL, 0, NULL, &bind_map);
|
||||
|
||||
ralloc_free(tmp_ctx);
|
||||
|
||||
if (device->rt_trampoline == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
/* The cache already has a reference and it's not going anywhere so there
|
||||
* is no need to hold a second reference.
|
||||
*/
|
||||
anv_shader_bin_unref(device, device->rt_trampoline);
|
||||
|
||||
struct brw_rt_trivial_return {
|
||||
char name[16];
|
||||
struct brw_bs_prog_key key;
|
||||
} return_key = {
|
||||
.name = "rt-trivial-ret",
|
||||
};
|
||||
device->rt_trivial_return =
|
||||
anv_device_search_for_kernel(device, device->internal_cache,
|
||||
&return_key, sizeof(return_key),
|
||||
&cache_hit);
|
||||
if (device->rt_trivial_return == NULL) {
|
||||
void *tmp_ctx = ralloc_context(NULL);
|
||||
nir_shader *trivial_return_nir =
|
||||
brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
|
||||
|
||||
NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, device->info);
|
||||
|
||||
struct anv_pipeline_bind_map bind_map = {
|
||||
.surface_count = 0,
|
||||
.sampler_count = 0,
|
||||
};
|
||||
struct brw_bs_prog_data return_prog_data = { 0, };
|
||||
struct brw_compile_bs_params params = {
|
||||
.nir = trivial_return_nir,
|
||||
.key = &return_key.key,
|
||||
.prog_data = &return_prog_data,
|
||||
|
||||
.log_data = device,
|
||||
};
|
||||
const unsigned *return_data =
|
||||
brw_compile_bs(device->physical->compiler, tmp_ctx, ¶ms);
|
||||
|
||||
device->rt_trivial_return =
|
||||
anv_device_upload_kernel(device, device->internal_cache,
|
||||
MESA_SHADER_CALLABLE,
|
||||
&return_key, sizeof(return_key),
|
||||
return_data, return_prog_data.base.program_size,
|
||||
&return_prog_data.base, sizeof(return_prog_data),
|
||||
NULL, 0, NULL, &bind_map);
|
||||
|
||||
ralloc_free(tmp_ctx);
|
||||
|
||||
if (device->rt_trivial_return == NULL)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
/* The cache already has a reference and it's not going anywhere so there
|
||||
* is no need to hold a second reference.
|
||||
*/
|
||||
anv_shader_bin_unref(device, device->rt_trivial_return);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_finish_rt_shaders(struct anv_device *device)
|
||||
{
|
||||
if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
|
||||
return;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
|
||||
struct anv_device *device,
|
||||
struct vk_pipeline_cache *cache,
|
||||
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
|
||||
const VkAllocationCallbacks *alloc)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
|
||||
|
||||
result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
util_dynarray_foreach(&pipeline->shaders,
|
||||
struct anv_shader_bin *, shader) {
|
||||
anv_shader_bin_unref(device, *shader);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
|
||||
uint32_t stage_idx,
|
||||
VkShaderStageFlags valid_stages)
|
||||
{
|
||||
if (stage_idx == VK_SHADER_UNUSED_KHR)
|
||||
return;
|
||||
|
||||
assert(stage_idx <= pCreateInfo->stageCount);
|
||||
assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1);
|
||||
assert(pCreateInfo->pStages[stage_idx].stage & valid_stages);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_ray_tracing_pipeline_create(
|
||||
VkDevice _device,
|
||||
struct vk_pipeline_cache * cache,
|
||||
const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkPipeline* pPipeline)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
VkResult result;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR);
|
||||
|
||||
VK_MULTIALLOC(ma);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, pCreateInfo->groupCount);
|
||||
if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = anv_pipeline_init(&pipeline->base, device,
|
||||
ANV_PIPELINE_RAY_TRACING, pCreateInfo->flags,
|
||||
pAllocator);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free2(&device->vk.alloc, pAllocator, pipeline);
|
||||
return result;
|
||||
}
|
||||
|
||||
pipeline->group_count = pCreateInfo->groupCount;
|
||||
pipeline->groups = groups;
|
||||
|
||||
ASSERTED const VkShaderStageFlags ray_tracing_stages =
|
||||
VK_SHADER_STAGE_RAYGEN_BIT_KHR |
|
||||
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
|
||||
VK_SHADER_STAGE_MISS_BIT_KHR |
|
||||
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
|
||||
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
|
||||
|
||||
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
|
||||
assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0);
|
||||
|
||||
for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
|
||||
const VkRayTracingShaderGroupCreateInfoKHR *ginfo =
|
||||
&pCreateInfo->pGroups[i];
|
||||
assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader,
|
||||
VK_SHADER_STAGE_RAYGEN_BIT_KHR |
|
||||
VK_SHADER_STAGE_MISS_BIT_KHR |
|
||||
VK_SHADER_STAGE_CALLABLE_BIT_KHR);
|
||||
assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader,
|
||||
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
|
||||
assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader,
|
||||
VK_SHADER_STAGE_ANY_HIT_BIT_KHR);
|
||||
assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader,
|
||||
VK_SHADER_STAGE_INTERSECTION_BIT_KHR);
|
||||
switch (ginfo->type) {
|
||||
case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
|
||||
assert(ginfo->generalShader < pCreateInfo->stageCount);
|
||||
assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR);
|
||||
assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR);
|
||||
assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
|
||||
break;
|
||||
|
||||
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
|
||||
assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
|
||||
assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
|
||||
break;
|
||||
|
||||
case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
|
||||
assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid ray-tracing shader group type");
|
||||
}
|
||||
}
|
||||
|
||||
result = anv_ray_tracing_pipeline_init(pipeline, device, cache,
|
||||
pCreateInfo, pAllocator);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_pipeline_finish(&pipeline->base, device, pAllocator);
|
||||
vk_free2(&device->vk.alloc, pAllocator, pipeline);
|
||||
return result;
|
||||
}
|
||||
|
||||
anv_genX(device->info, ray_tracing_pipeline_emit)(pipeline);
|
||||
|
||||
*pPipeline = anv_pipeline_to_handle(&pipeline->base);
|
||||
|
||||
return pipeline->base.batch.status;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_CreateRayTracingPipelinesKHR(
|
||||
VkDevice _device,
|
||||
VkDeferredOperationKHR deferredOperation,
|
||||
VkPipelineCache pipelineCache,
|
||||
uint32_t createInfoCount,
|
||||
const VkRayTracingPipelineCreateInfoKHR* pCreateInfos,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkPipeline* pPipelines)
|
||||
{
|
||||
ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < createInfoCount; i++) {
|
||||
VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache,
|
||||
&pCreateInfos[i],
|
||||
pAllocator, &pPipelines[i]);
|
||||
|
||||
if (res == VK_SUCCESS)
|
||||
continue;
|
||||
|
||||
/* Bail out on the first error as it is not obvious what error should be
|
||||
* report upon 2 different failures. */
|
||||
result = res;
|
||||
if (result != VK_PIPELINE_COMPILE_REQUIRED)
|
||||
break;
|
||||
|
||||
pPipelines[i] = VK_NULL_HANDLE;
|
||||
|
||||
if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
|
||||
break;
|
||||
}
|
||||
|
||||
for (; i < createInfoCount; i++)
|
||||
pPipelines[i] = VK_NULL_HANDLE;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#define WRITE_STR(field, ...) ({ \
|
||||
memset(field, 0, sizeof(field)); \
|
||||
UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
|
||||
|
|
@ -3058,14 +2249,6 @@ VkResult anv_GetPipelineExecutableStatisticsKHR(
|
|||
prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
|
||||
break;
|
||||
}
|
||||
case ANV_PIPELINE_RAY_TRACING: {
|
||||
struct anv_shader_bin **shader =
|
||||
util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders,
|
||||
struct anv_shader_bin *,
|
||||
pExecutableInfo->executableIndex);
|
||||
prog_data = (*shader)->prog_data;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("invalid pipeline type");
|
||||
}
|
||||
|
|
@ -3214,87 +2397,3 @@ VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
|
|||
|
||||
return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_GetRayTracingShaderGroupHandlesKHR(
|
||||
VkDevice _device,
|
||||
VkPipeline _pipeline,
|
||||
uint32_t firstGroup,
|
||||
uint32_t groupCount,
|
||||
size_t dataSize,
|
||||
void* pData)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
|
||||
|
||||
if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
|
||||
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
|
||||
struct anv_ray_tracing_pipeline *rt_pipeline =
|
||||
anv_pipeline_to_ray_tracing(pipeline);
|
||||
|
||||
for (uint32_t i = 0; i < groupCount; i++) {
|
||||
struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
|
||||
memcpy(pData, group->handle, sizeof(group->handle));
|
||||
pData += sizeof(group->handle);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
|
||||
VkDevice _device,
|
||||
VkPipeline pipeline,
|
||||
uint32_t firstGroup,
|
||||
uint32_t groupCount,
|
||||
size_t dataSize,
|
||||
void* pData)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
unreachable("Unimplemented");
|
||||
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
|
||||
VkDeviceSize
|
||||
anv_GetRayTracingShaderGroupStackSizeKHR(
|
||||
VkDevice device,
|
||||
VkPipeline _pipeline,
|
||||
uint32_t group,
|
||||
VkShaderGroupShaderKHR groupShader)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
|
||||
assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
|
||||
|
||||
struct anv_ray_tracing_pipeline *rt_pipeline =
|
||||
anv_pipeline_to_ray_tracing(pipeline);
|
||||
|
||||
assert(group < rt_pipeline->group_count);
|
||||
|
||||
struct anv_shader_bin *bin;
|
||||
switch (groupShader) {
|
||||
case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
|
||||
bin = rt_pipeline->groups[group].general;
|
||||
break;
|
||||
|
||||
case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
|
||||
bin = rt_pipeline->groups[group].closest_hit;
|
||||
break;
|
||||
|
||||
case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
|
||||
bin = rt_pipeline->groups[group].any_hit;
|
||||
break;
|
||||
|
||||
case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
|
||||
bin = rt_pipeline->groups[group].intersection;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid VkShaderGroupShader enum");
|
||||
}
|
||||
|
||||
if (bin == NULL)
|
||||
return 0;
|
||||
|
||||
return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1215,25 +1215,6 @@ struct anv_device {
|
|||
struct anv_queue * queues;
|
||||
|
||||
struct anv_scratch_pool scratch_pool;
|
||||
struct anv_bo *rt_scratch_bos[16];
|
||||
|
||||
/** Shadow ray query BO
|
||||
*
|
||||
* The ray_query_bo only holds the current ray being traced. When using
|
||||
* more than 1 ray query per thread, we cannot fit all the queries in
|
||||
* there, so we need a another buffer to hold query data that is not
|
||||
* currently being used by the HW for tracing, similar to a scratch space.
|
||||
*
|
||||
* The size of the shadow buffer depends on the number of queries per
|
||||
* shader.
|
||||
*/
|
||||
struct anv_bo *ray_query_shadow_bos[16];
|
||||
/** Ray query buffer used to communicated with HW unit.
|
||||
*/
|
||||
struct anv_bo *ray_query_bo;
|
||||
|
||||
struct anv_shader_bin *rt_trampoline;
|
||||
struct anv_shader_bin *rt_trivial_return;
|
||||
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t queue_submit;
|
||||
|
|
@ -2690,19 +2671,6 @@ struct anv_cmd_compute_state {
|
|||
struct anv_address num_workgroups;
|
||||
};
|
||||
|
||||
struct anv_cmd_ray_tracing_state {
|
||||
struct anv_cmd_pipeline_state base;
|
||||
|
||||
struct anv_ray_tracing_pipeline *pipeline;
|
||||
|
||||
bool pipeline_dirty;
|
||||
|
||||
struct {
|
||||
struct anv_bo *bo;
|
||||
struct brw_rt_scratch_layout layout;
|
||||
} scratch;
|
||||
};
|
||||
|
||||
/** State required while building cmd buffer */
|
||||
struct anv_cmd_state {
|
||||
/* PIPELINE_SELECT.PipelineSelection */
|
||||
|
|
@ -2712,7 +2680,6 @@ struct anv_cmd_state {
|
|||
|
||||
struct anv_cmd_graphics_state gfx;
|
||||
struct anv_cmd_compute_state compute;
|
||||
struct anv_cmd_ray_tracing_state rt;
|
||||
|
||||
enum anv_pipe_bits pending_pipe_bits;
|
||||
VkShaderStageFlags descriptors_dirty;
|
||||
|
|
@ -3034,20 +3001,6 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
|
|||
vk_pipeline_cache_object_unref(&shader->base);
|
||||
}
|
||||
|
||||
#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
|
||||
assert((local_arg_offset) % 8 == 0); \
|
||||
const struct brw_bs_prog_data *prog_data = \
|
||||
brw_bs_prog_data_const(bin->prog_data); \
|
||||
assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \
|
||||
\
|
||||
(struct GFX_BINDLESS_SHADER_RECORD) { \
|
||||
.OffsetToLocalArguments = (local_arg_offset) / 8, \
|
||||
.BindlessShaderDispatchMode = \
|
||||
prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8, \
|
||||
.KernelStartPointer = bin->kernel.offset, \
|
||||
}; \
|
||||
})
|
||||
|
||||
struct anv_pipeline_executable {
|
||||
gl_shader_stage stage;
|
||||
|
||||
|
|
@ -3060,7 +3013,6 @@ struct anv_pipeline_executable {
|
|||
enum anv_pipeline_type {
|
||||
ANV_PIPELINE_GRAPHICS,
|
||||
ANV_PIPELINE_COMPUTE,
|
||||
ANV_PIPELINE_RAY_TRACING,
|
||||
};
|
||||
|
||||
struct anv_pipeline {
|
||||
|
|
@ -3155,34 +3107,6 @@ struct anv_compute_pipeline {
|
|||
uint32_t interface_descriptor_data[8];
|
||||
};
|
||||
|
||||
struct anv_rt_shader_group {
|
||||
VkRayTracingShaderGroupTypeKHR type;
|
||||
|
||||
struct anv_shader_bin *general;
|
||||
struct anv_shader_bin *closest_hit;
|
||||
struct anv_shader_bin *any_hit;
|
||||
struct anv_shader_bin *intersection;
|
||||
|
||||
/* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
|
||||
uint32_t handle[8];
|
||||
};
|
||||
|
||||
struct anv_ray_tracing_pipeline {
|
||||
struct anv_pipeline base;
|
||||
|
||||
/* All shaders in the pipeline */
|
||||
struct util_dynarray shaders;
|
||||
|
||||
uint32_t group_count;
|
||||
struct anv_rt_shader_group * groups;
|
||||
|
||||
/* If non-zero, this is the default computed stack size as per the stack
|
||||
* size computation in the Vulkan spec. If zero, that indicates that the
|
||||
* client has requested a dynamic stack size.
|
||||
*/
|
||||
uint32_t stack_size;
|
||||
};
|
||||
|
||||
#define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
|
||||
static inline struct anv_##pipe_type##_pipeline * \
|
||||
anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \
|
||||
|
|
@ -3193,7 +3117,6 @@ struct anv_ray_tracing_pipeline {
|
|||
|
||||
ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
|
||||
ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
|
||||
ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
|
||||
|
||||
static inline bool
|
||||
anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
|
||||
|
|
@ -3273,12 +3196,6 @@ anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline
|
|||
return &get_vs_prog_data(pipeline)->base;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_init_rt_shaders(struct anv_device *device);
|
||||
|
||||
void
|
||||
anv_device_finish_rt_shaders(struct anv_device *device);
|
||||
|
||||
VkResult
|
||||
anv_pipeline_init(struct anv_pipeline *pipeline,
|
||||
struct anv_device *device,
|
||||
|
|
|
|||
|
|
@ -5610,274 +5610,6 @@ genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer)
|
|||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static void
|
||||
calc_local_trace_size(uint8_t local_shift[3], const uint32_t global[3])
|
||||
{
|
||||
unsigned total_shift = 0;
|
||||
memset(local_shift, 0, 3);
|
||||
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
assert(global[i] > 0);
|
||||
if ((1 << local_shift[i]) < global[i]) {
|
||||
progress = true;
|
||||
local_shift[i]++;
|
||||
total_shift++;
|
||||
}
|
||||
|
||||
if (total_shift == 3)
|
||||
return;
|
||||
}
|
||||
} while(progress);
|
||||
|
||||
/* Assign whatever's left to x */
|
||||
local_shift[0] += 3 - total_shift;
|
||||
}
|
||||
|
||||
static struct GFX_RT_SHADER_TABLE
|
||||
vk_sdar_to_shader_table(const VkStridedDeviceAddressRegionKHR *region)
|
||||
{
|
||||
return (struct GFX_RT_SHADER_TABLE) {
|
||||
.BaseAddress = anv_address_from_u64(region->deviceAddress),
|
||||
.Stride = region->stride,
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
||||
const VkStridedDeviceAddressRegionKHR *raygen_sbt,
|
||||
const VkStridedDeviceAddressRegionKHR *miss_sbt,
|
||||
const VkStridedDeviceAddressRegionKHR *hit_sbt,
|
||||
const VkStridedDeviceAddressRegionKHR *callable_sbt,
|
||||
bool is_indirect,
|
||||
uint32_t launch_width,
|
||||
uint32_t launch_height,
|
||||
uint32_t launch_depth,
|
||||
uint64_t launch_size_addr)
|
||||
{
|
||||
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
|
||||
struct anv_ray_tracing_pipeline *pipeline = rt->pipeline;
|
||||
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
/* If we have a known degenerate launch size, just bail */
|
||||
if (!is_indirect &&
|
||||
(launch_width == 0 || launch_height == 0 || launch_depth == 0))
|
||||
return;
|
||||
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
|
||||
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
|
||||
|
||||
cmd_buffer->state.rt.pipeline_dirty = false;
|
||||
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
/* Add these to the reloc list as they're internal buffers that don't
|
||||
* actually have relocs to pick them up manually.
|
||||
*
|
||||
* TODO(RT): This is a bit of a hack
|
||||
*/
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
cmd_buffer->batch.alloc,
|
||||
rt->scratch.bo);
|
||||
|
||||
/* Allocate and set up our RT_DISPATCH_GLOBALS */
|
||||
struct anv_state rtdg_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
BRW_RT_PUSH_CONST_OFFSET +
|
||||
sizeof(struct anv_push_constants),
|
||||
64);
|
||||
|
||||
struct GFX_RT_DISPATCH_GLOBALS rtdg = {
|
||||
.MemBaseAddress = (struct anv_address) {
|
||||
.bo = rt->scratch.bo,
|
||||
.offset = rt->scratch.layout.ray_stack_start,
|
||||
},
|
||||
.CallStackHandler =
|
||||
anv_shader_bin_get_bsr(cmd_buffer->device->rt_trivial_return, 0),
|
||||
.AsyncRTStackSize = rt->scratch.layout.ray_stack_stride / 64,
|
||||
.NumDSSRTStacks = rt->scratch.layout.stack_ids_per_dss,
|
||||
.MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS,
|
||||
.Flags = RT_DEPTH_TEST_LESS_EQUAL,
|
||||
.HitGroupTable = vk_sdar_to_shader_table(hit_sbt),
|
||||
.MissGroupTable = vk_sdar_to_shader_table(miss_sbt),
|
||||
.SWStackSize = rt->scratch.layout.sw_stack_size / 64,
|
||||
.LaunchWidth = launch_width,
|
||||
.LaunchHeight = launch_height,
|
||||
.LaunchDepth = launch_depth,
|
||||
.CallableGroupTable = vk_sdar_to_shader_table(callable_sbt),
|
||||
};
|
||||
GFX_RT_DISPATCH_GLOBALS_pack(NULL, rtdg_state.map, &rtdg);
|
||||
|
||||
/* Push constants go after the RT_DISPATCH_GLOBALS */
|
||||
assert(GFX_RT_DISPATCH_GLOBALS_length * 4 <= BRW_RT_PUSH_CONST_OFFSET);
|
||||
memcpy(rtdg_state.map + BRW_RT_PUSH_CONST_OFFSET,
|
||||
&cmd_buffer->state.rt.base.push_constants,
|
||||
sizeof(struct anv_push_constants));
|
||||
|
||||
struct anv_address rtdg_addr = {
|
||||
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
|
||||
.offset = rtdg_state.offset,
|
||||
};
|
||||
|
||||
uint8_t local_size_log2[3];
|
||||
uint32_t global_size[3] = {};
|
||||
if (is_indirect) {
|
||||
/* Pick a local size that's probably ok. We assume most TraceRays calls
|
||||
* will use a two-dimensional dispatch size. Worst case, our initial
|
||||
* dispatch will be a little slower than it has to be.
|
||||
*/
|
||||
local_size_log2[0] = 2;
|
||||
local_size_log2[1] = 1;
|
||||
local_size_log2[2] = 0;
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
||||
struct mi_value launch_size[3] = {
|
||||
mi_mem32(anv_address_from_u64(launch_size_addr + 0)),
|
||||
mi_mem32(anv_address_from_u64(launch_size_addr + 4)),
|
||||
mi_mem32(anv_address_from_u64(launch_size_addr + 8)),
|
||||
};
|
||||
|
||||
/* Store the original launch size into RT_DISPATCH_GLOBALS
|
||||
*
|
||||
* TODO: Pull values from genX_bits.h once RT_DISPATCH_GLOBALS gets
|
||||
* moved into a genX version.
|
||||
*/
|
||||
mi_store(&b, mi_mem32(anv_address_add(rtdg_addr, 52)),
|
||||
mi_value_ref(&b, launch_size[0]));
|
||||
mi_store(&b, mi_mem32(anv_address_add(rtdg_addr, 56)),
|
||||
mi_value_ref(&b, launch_size[1]));
|
||||
mi_store(&b, mi_mem32(anv_address_add(rtdg_addr, 60)),
|
||||
mi_value_ref(&b, launch_size[2]));
|
||||
|
||||
/* Compute the global dispatch size */
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (local_size_log2[i] == 0)
|
||||
continue;
|
||||
|
||||
/* global_size = DIV_ROUND_UP(launch_size, local_size)
|
||||
*
|
||||
* Fortunately for us MI_ALU math is 64-bit and , mi_ushr32_imm
|
||||
* has the semantics of shifting the enture 64-bit value and taking
|
||||
* the bottom 32 so we don't have to worry about roll-over.
|
||||
*/
|
||||
uint32_t local_size = 1 << local_size_log2[i];
|
||||
launch_size[i] = mi_iadd(&b, launch_size[i],
|
||||
mi_imm(local_size - 1));
|
||||
launch_size[i] = mi_ushr32_imm(&b, launch_size[i],
|
||||
local_size_log2[i]);
|
||||
}
|
||||
|
||||
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMX), launch_size[0]);
|
||||
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMY), launch_size[1]);
|
||||
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMZ), launch_size[2]);
|
||||
} else {
|
||||
uint32_t launch_size[3] = { launch_width, launch_height, launch_depth };
|
||||
calc_local_trace_size(local_size_log2, launch_size);
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
/* We have to be a bit careful here because DIV_ROUND_UP adds to the
|
||||
* numerator value may overflow. Cast to uint64_t to avoid this.
|
||||
*/
|
||||
uint32_t local_size = 1 << local_size_log2[i];
|
||||
global_size[i] = DIV_ROUND_UP((uint64_t)launch_size[i], local_size);
|
||||
}
|
||||
}
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
|
||||
cw.IndirectParameterEnable = is_indirect;
|
||||
cw.PredicateEnable = false;
|
||||
cw.SIMDSize = SIMD8;
|
||||
cw.LocalXMaximum = (1 << local_size_log2[0]) - 1;
|
||||
cw.LocalYMaximum = (1 << local_size_log2[1]) - 1;
|
||||
cw.LocalZMaximum = (1 << local_size_log2[2]) - 1;
|
||||
cw.ThreadGroupIDXDimension = global_size[0];
|
||||
cw.ThreadGroupIDYDimension = global_size[1];
|
||||
cw.ThreadGroupIDZDimension = global_size[2];
|
||||
cw.ExecutionMask = 0xff;
|
||||
cw.EmitInlineParameter = true;
|
||||
cw.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0);
|
||||
|
||||
const gl_shader_stage s = MESA_SHADER_RAYGEN;
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_state *surfaces = &cmd_buffer->state.binding_tables[s];
|
||||
struct anv_state *samplers = &cmd_buffer->state.samplers[s];
|
||||
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
||||
.KernelStartPointer = device->rt_trampoline->kernel.offset,
|
||||
.SamplerStatePointer = samplers->offset,
|
||||
/* i965: DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4), */
|
||||
.SamplerCount = 0,
|
||||
.BindingTablePointer = surfaces->offset,
|
||||
.NumberofThreadsinGPGPUThreadGroup = 1,
|
||||
.BTDMode = true,
|
||||
};
|
||||
|
||||
struct brw_rt_raygen_trampoline_params trampoline_params = {
|
||||
.rt_disp_globals_addr = anv_address_physical(rtdg_addr),
|
||||
.raygen_bsr_addr = raygen_sbt->deviceAddress,
|
||||
.is_indirect = is_indirect,
|
||||
.local_group_size_log2 = {
|
||||
local_size_log2[0],
|
||||
local_size_log2[1],
|
||||
local_size_log2[2],
|
||||
},
|
||||
};
|
||||
STATIC_ASSERT(sizeof(trampoline_params) == 32);
|
||||
memcpy(cw.InlineData, &trampoline_params, sizeof(trampoline_params));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
genX(CmdTraceRaysKHR)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
|
||||
const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
|
||||
const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
|
||||
const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t depth)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
cmd_buffer_trace_rays(cmd_buffer,
|
||||
pRaygenShaderBindingTable,
|
||||
pMissShaderBindingTable,
|
||||
pHitShaderBindingTable,
|
||||
pCallableShaderBindingTable,
|
||||
false /* is_indirect */,
|
||||
width, height, depth,
|
||||
0 /* launch_size_addr */);
|
||||
}
|
||||
|
||||
void
|
||||
genX(CmdTraceRaysIndirectKHR)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
|
||||
const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
|
||||
const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
|
||||
const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
|
||||
VkDeviceAddress indirectDeviceAddress)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
cmd_buffer_trace_rays(cmd_buffer,
|
||||
pRaygenShaderBindingTable,
|
||||
pMissShaderBindingTable,
|
||||
pHitShaderBindingTable,
|
||||
pCallableShaderBindingTable,
|
||||
true /* is_indirect */,
|
||||
0, 0, 0, /* width, height, depth, */
|
||||
indirectDeviceAddress);
|
||||
}
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
|
||||
static void
|
||||
genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
|
||||
uint32_t pipeline)
|
||||
|
|
|
|||
|
|
@ -2510,54 +2510,3 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
|||
}
|
||||
|
||||
#endif /* #if GFX_VERx10 >= 125 */
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
|
||||
void
|
||||
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline)
|
||||
{
|
||||
for (uint32_t i = 0; i < pipeline->group_count; i++) {
|
||||
struct anv_rt_shader_group *group = &pipeline->groups[i];
|
||||
|
||||
switch (group->type) {
|
||||
case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: {
|
||||
struct GFX_RT_GENERAL_SBT_HANDLE sh = {};
|
||||
sh.General = anv_shader_bin_get_bsr(group->general, 32);
|
||||
GFX_RT_GENERAL_SBT_HANDLE_pack(NULL, group->handle, &sh);
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: {
|
||||
struct GFX_RT_TRIANGLES_SBT_HANDLE sh = {};
|
||||
if (group->closest_hit)
|
||||
sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32);
|
||||
if (group->any_hit)
|
||||
sh.AnyHit = anv_shader_bin_get_bsr(group->any_hit, 24);
|
||||
GFX_RT_TRIANGLES_SBT_HANDLE_pack(NULL, group->handle, &sh);
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
|
||||
struct GFX_RT_PROCEDURAL_SBT_HANDLE sh = {};
|
||||
if (group->closest_hit)
|
||||
sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32);
|
||||
sh.Intersection = anv_shader_bin_get_bsr(group->intersection, 24);
|
||||
GFX_RT_PROCEDURAL_SBT_HANDLE_pack(NULL, group->handle, &sh);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid shader group type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void
|
||||
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline)
|
||||
{
|
||||
unreachable("Ray tracing not supported");
|
||||
}
|
||||
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue