diff --git a/src/intel/vulkan_hasvk/anv_cmd_buffer.c b/src/intel/vulkan_hasvk/anv_cmd_buffer.c index e53df922296..fc2bf738d18 100644 --- a/src/intel/vulkan_hasvk/anv_cmd_buffer.c +++ b/src/intel/vulkan_hasvk/anv_cmd_buffer.c @@ -309,71 +309,6 @@ ilog2_round_up(uint32_t value) return 32 - __builtin_clz(value - 1); } -static void -anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer, - struct anv_cmd_pipeline_state *pipeline_state, - struct anv_pipeline *pipeline, - VkShaderStageFlags stages) -{ - struct anv_device *device = cmd_buffer->device; - - uint64_t ray_shadow_size = - align_u64(brw_rt_ray_queries_shadow_stacks_size(device->info, - pipeline->ray_queries), - 4096); - if (ray_shadow_size > 0 && - (!cmd_buffer->state.ray_query_shadow_bo || - cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) { - unsigned shadow_size_log2 = MAX2(ilog2_round_up(ray_shadow_size), 16); - unsigned bucket = shadow_size_log2 - 16; - assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos)); - - struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[bucket]); - if (bo == NULL) { - struct anv_bo *new_bo; - VkResult result = anv_device_alloc_bo(device, "RT queries shadow", - ray_shadow_size, - 0, /* alloc_flags */ - 0, /* explicit_address */ - &new_bo); - if (result != VK_SUCCESS) { - anv_batch_set_error(&cmd_buffer->batch, result); - return; - } - - bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[bucket], NULL, new_bo); - if (bo != NULL) { - anv_device_release_bo(device, bo); - } else { - bo = new_bo; - } - } - cmd_buffer->state.ray_query_shadow_bo = bo; - - /* Add the ray query buffers to the batch list. */ - anv_reloc_list_add_bo(cmd_buffer->batch.relocs, - cmd_buffer->batch.alloc, - cmd_buffer->state.ray_query_shadow_bo); - } - - /* Add the HW buffer to the list of BO used. */ - anv_reloc_list_add_bo(cmd_buffer->batch.relocs, - cmd_buffer->batch.alloc, - device->ray_query_bo); - - /* Fill the push constants & mark them dirty. */ - struct anv_state ray_query_global_state = - anv_genX(device->info, cmd_buffer_ray_query_globals)(cmd_buffer); - - struct anv_address ray_query_globals_addr = (struct anv_address) { - .bo = device->dynamic_state_pool.block_pool.bo, - .offset = ray_query_global_state.offset, - }; - pipeline_state->push_constants.ray_query_globals = - anv_address_physical(ray_query_globals_addr); - cmd_buffer->state.push_constants_dirty |= stages; -} - void anv_CmdBindPipeline( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -381,8 +316,6 @@ void anv_CmdBindPipeline( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - struct anv_cmd_pipeline_state *state; - VkShaderStageFlags stages = 0; switch (pipelineBindPoint) { case VK_PIPELINE_BIND_POINT_COMPUTE: { @@ -395,9 +328,6 @@ void anv_CmdBindPipeline( cmd_buffer->state.compute.pipeline_dirty = true; set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE, &compute_pipeline->cs->bind_map); - - state = &cmd_buffer->state.compute.base; - stages = VK_SHADER_STAGE_COMPUTE_BIT; break; } @@ -419,27 +349,6 @@ void anv_CmdBindPipeline( /* Apply the non dynamic state from the pipeline */ vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk, &gfx_pipeline->dynamic_state); - - state = &cmd_buffer->state.gfx.base; - stages = gfx_pipeline->active_stages; - break; - } - - case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: { - struct anv_ray_tracing_pipeline *rt_pipeline = - anv_pipeline_to_ray_tracing(pipeline); - if (cmd_buffer->state.rt.pipeline == rt_pipeline) - return; - - cmd_buffer->state.rt.pipeline = rt_pipeline; - cmd_buffer->state.rt.pipeline_dirty = true; - - if (rt_pipeline->stack_size > 0) { - anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer, - rt_pipeline->stack_size); - } - - state = &cmd_buffer->state.rt.base; break; } @@ -447,9 +356,6 @@ void anv_CmdBindPipeline( unreachable("invalid bind point"); break; } - - if (pipeline->ray_queries > 0) - anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages); } static void @@ -492,16 +398,6 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, pipe_state = &cmd_buffer->state.compute.base; break; - case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: - stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_ANY_HIT_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR; - pipe_state = &cmd_buffer->state.rt.base; - break; - default: unreachable("invalid bind point"); } @@ -520,13 +416,7 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, * as an 64-bit address in the push constants. */ bool update_desc_sets = stages & (VK_SHADER_STAGE_TASK_BIT_NV | - VK_SHADER_STAGE_MESH_BIT_NV | - VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_ANY_HIT_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR); + VK_SHADER_STAGE_MESH_BIT_NV); if (update_desc_sets) { struct anv_push_constants *push = &pipe_state->push_constants; @@ -818,17 +708,6 @@ void anv_CmdPushConstants( memcpy(pipe_state->push_constants.client_data + offset, pValues, size); } - if (stageFlags & (VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_ANY_HIT_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR)) { - struct anv_cmd_pipeline_state *pipe_state = - &cmd_buffer->state.rt.base; - - memcpy(pipe_state->push_constants.client_data + offset, pValues, size); - } cmd_buffer->state.push_constants_dirty |= stageFlags; } @@ -850,10 +729,6 @@ anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, pipe_state = &cmd_buffer->state.compute.base; break; - case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: - pipe_state = &cmd_buffer->state.rt.base; - break; - default: unreachable("invalid bind point"); } @@ -1043,54 +918,3 @@ void anv_CmdSetDeviceMask( { /* No-op */ } - -void anv_CmdSetRayTracingPipelineStackSizeKHR( - VkCommandBuffer commandBuffer, - uint32_t pipelineStackSize) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt; - struct anv_device *device = cmd_buffer->device; - - if (anv_batch_has_error(&cmd_buffer->batch)) - return; - - uint32_t stack_ids_per_dss = 2048; /* TODO */ - - unsigned stack_size_log2 = ilog2_round_up(pipelineStackSize); - if (stack_size_log2 < 10) - stack_size_log2 = 10; - - if (rt->scratch.layout.total_size == 1 << stack_size_log2) - return; - - brw_rt_compute_scratch_layout(&rt->scratch.layout, device->info, - stack_ids_per_dss, 1 << stack_size_log2); - - unsigned bucket = stack_size_log2 - 10; - assert(bucket < ARRAY_SIZE(device->rt_scratch_bos)); - - struct anv_bo *bo = p_atomic_read(&device->rt_scratch_bos[bucket]); - if (bo == NULL) { - struct anv_bo *new_bo; - VkResult result = anv_device_alloc_bo(device, "RT scratch", - rt->scratch.layout.total_size, - 0, /* alloc_flags */ - 0, /* explicit_address */ - &new_bo); - if (result != VK_SUCCESS) { - rt->scratch.layout.total_size = 0; - anv_batch_set_error(&cmd_buffer->batch, result); - return; - } - - bo = p_atomic_cmpxchg(&device->rt_scratch_bos[bucket], NULL, new_bo); - if (bo != NULL) { - anv_device_release_bo(device, bo); - } else { - bo = new_bo; - } - } - - rt->scratch.bo = bo; -} diff --git a/src/intel/vulkan_hasvk/anv_device.c b/src/intel/vulkan_hasvk/anv_device.c index f1cfe83a74a..e167a5c671d 100644 --- a/src/intel/vulkan_hasvk/anv_device.c +++ b/src/intel/vulkan_hasvk/anv_device.c @@ -226,7 +226,6 @@ get_device_extensions(const struct anv_physical_device *device, device->use_call_secondary, .KHR_pipeline_executable_properties = true, .KHR_push_descriptor = true, - .KHR_ray_query = device->info.has_ray_tracing, .KHR_relaxed_block_layout = true, .KHR_sampler_mirror_clamp_to_edge = true, .KHR_sampler_ycbcr_conversion = true, @@ -1562,12 +1561,6 @@ void anv_GetPhysicalDeviceFeatures2( break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: { - VkPhysicalDeviceRayQueryFeaturesKHR *features = (void *)ext; - features->rayQuery = pdevice->info.has_ray_tracing; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: { VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext; features->robustBufferAccess2 = true; @@ -1939,14 +1932,6 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice, if (pdevice->compiler->scalar_stage[stage]) scalar_stages |= mesa_to_vk_shader_stage(stage); } - if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) { - scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_ANY_HIT_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR; - } if (pdevice->vk.supported_extensions.NV_mesh_shader) { scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV | VK_SHADER_STAGE_MESH_BIT_NV; @@ -3314,22 +3299,9 @@ VkResult anv_CreateDevice( device->workaround_bo->size, INTEL_DEBUG_BLOCK_TYPE_FRAME); - if (device->vk.enabled_extensions.KHR_ray_query) { - uint32_t ray_queries_size = - align_u32(brw_rt_ray_queries_hw_stacks_size(device->info), 4096); - - result = anv_device_alloc_bo(device, "ray queries", - ray_queries_size, - 0, - 0 /* explicit_address */, - &device->ray_query_bo); - if (result != VK_SUCCESS) - goto fail_workaround_bo; - } - result = anv_device_init_trivial_batch(device); if (result != VK_SUCCESS) - goto fail_ray_query_bo; + goto fail_workaround_bo; if (device->info->ver >= 12 && device->vk.enabled_extensions.KHR_fragment_shading_rate) { @@ -3367,9 +3339,6 @@ VkResult anv_CreateDevice( anv_scratch_pool_init(device, &device->scratch_pool); - /* TODO(RT): Do we want some sort of data structure for this? */ - memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos)); - result = anv_genX(device->info, init_device_state)(device); if (result != VK_SUCCESS) goto fail_trivial_batch_bo_and_scratch_pool; @@ -3395,12 +3364,6 @@ VkResult anv_CreateDevice( goto fail_default_pipeline_cache; } - result = anv_device_init_rt_shaders(device); - if (result != VK_SUCCESS) { - result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_internal_cache; - } - anv_device_init_blorp(device); anv_device_init_border_colors(device); @@ -3413,17 +3376,12 @@ VkResult anv_CreateDevice( return VK_SUCCESS; - fail_internal_cache: - vk_pipeline_cache_destroy(device->internal_cache, NULL); fail_default_pipeline_cache: vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL); fail_trivial_batch_bo_and_scratch_pool: anv_scratch_pool_finish(device, &device->scratch_pool); fail_trivial_batch: anv_device_release_bo(device, device->trivial_batch_bo); - fail_ray_query_bo: - if (device->ray_query_bo) - anv_device_release_bo(device, device->ray_query_bo); fail_workaround_bo: anv_device_release_bo(device, device->workaround_bo); fail_surface_aux_map_pool: @@ -3486,8 +3444,6 @@ void anv_DestroyDevice( anv_device_finish_blorp(device); - anv_device_finish_rt_shaders(device); - vk_pipeline_cache_destroy(device->internal_cache, NULL); vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL); @@ -3502,20 +3458,8 @@ void anv_DestroyDevice( anv_state_pool_free(&device->dynamic_state_pool, device->cps_states); #endif - for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) { - if (device->rt_scratch_bos[i] != NULL) - anv_device_release_bo(device, device->rt_scratch_bos[i]); - } - anv_scratch_pool_finish(device, &device->scratch_pool); - if (device->vk.enabled_extensions.KHR_ray_query) { - for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) { - if (device->ray_query_shadow_bos[i] != NULL) - anv_device_release_bo(device, device->ray_query_shadow_bos[i]); - } - anv_device_release_bo(device, device->ray_query_bo); - } anv_device_release_bo(device, device->workaround_bo); anv_device_release_bo(device, device->trivial_batch_bo); diff --git a/src/intel/vulkan_hasvk/anv_genX.h b/src/intel/vulkan_hasvk/anv_genX.h index 102514d5e7d..64bfbf01793 100644 --- a/src/intel/vulkan_hasvk/anv_genX.h +++ b/src/intel/vulkan_hasvk/anv_genX.h @@ -175,6 +175,3 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, void genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline); - -void -genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline); diff --git a/src/intel/vulkan_hasvk/anv_pipeline.c b/src/intel/vulkan_hasvk/anv_pipeline.c index 1765b33070d..66dd6f37a96 100644 --- a/src/intel/vulkan_hasvk/anv_pipeline.c +++ b/src/intel/vulkan_hasvk/anv_pipeline.c @@ -95,8 +95,6 @@ anv_shader_stage_to_nir(struct anv_device *device, .post_depth_coverage = pdevice->info.ver >= 9, .runtime_descriptor_array = true, .float_controls = pdevice->info.ver >= 8, - .ray_query = pdevice->info.has_ray_tracing, - .ray_tracing = pdevice->info.has_ray_tracing, .shader_clock = true, .shader_viewport_index_layer = true, .stencil_export = pdevice->info.ver >= 9, @@ -251,17 +249,6 @@ void anv_DestroyPipeline( break; } - case ANV_PIPELINE_RAY_TRACING: { - struct anv_ray_tracing_pipeline *rt_pipeline = - anv_pipeline_to_ray_tracing(pipeline); - - util_dynarray_foreach(&rt_pipeline->shaders, - struct anv_shader_bin *, shader) { - anv_shader_bin_unref(device, *shader); - } - break; - } - default: unreachable("invalid pipeline type"); } @@ -600,51 +587,6 @@ anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline, _mesa_sha1_final(&ctx, sha1_out); } -static void -anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline, - struct anv_pipeline_layout *layout, - struct anv_pipeline_stage *stage, - unsigned char *sha1_out) -{ - struct mesa_sha1 ctx; - _mesa_sha1_init(&ctx); - - if (layout != NULL) - _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); - - const bool rba = pipeline->base.device->robust_buffer_access; - _mesa_sha1_update(&ctx, &rba, sizeof(rba)); - - _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1)); - _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs)); - - _mesa_sha1_final(&ctx, sha1_out); -} - -static void -anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline, - struct anv_pipeline_layout *layout, - struct anv_pipeline_stage *intersection, - struct anv_pipeline_stage *any_hit, - unsigned char *sha1_out) -{ - struct mesa_sha1 ctx; - _mesa_sha1_init(&ctx); - - if (layout != NULL) - _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); - - const bool rba = pipeline->base.device->robust_buffer_access; - _mesa_sha1_update(&ctx, &rba, sizeof(rba)); - - _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1)); - _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs)); - _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1)); - _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs)); - - _mesa_sha1_final(&ctx, sha1_out); -} - static nir_shader * anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline, struct vk_pipeline_cache *cache, @@ -2230,757 +2172,6 @@ VkResult anv_CreateGraphicsPipelines( return result; } -static VkResult -compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, - struct vk_pipeline_cache *cache, - nir_shader *nir, - struct anv_pipeline_stage *stage, - struct anv_shader_bin **shader_out, - void *mem_ctx) -{ - const struct brw_compiler *compiler = - pipeline->base.device->physical->compiler; - const struct intel_device_info *devinfo = compiler->devinfo; - - nir_shader **resume_shaders = NULL; - uint32_t num_resume_shaders = 0; - if (nir->info.stage != MESA_SHADER_COMPUTE) { - NIR_PASS(_, nir, nir_lower_shader_calls, - nir_address_format_64bit_global, - BRW_BTD_STACK_ALIGN, - &resume_shaders, &num_resume_shaders, mem_ctx); - NIR_PASS(_, nir, brw_nir_lower_shader_calls); - NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo); - } - - for (unsigned i = 0; i < num_resume_shaders; i++) { - NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls); - NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo); - } - - struct brw_compile_bs_params params = { - .nir = nir, - .key = &stage->key.bs, - .prog_data = &stage->prog_data.bs, - .num_resume_shaders = num_resume_shaders, - .resume_shaders = resume_shaders, - - .stats = stage->stats, - .log_data = pipeline->base.device, - }; - - stage->code = brw_compile_bs(compiler, mem_ctx, ¶ms); - if (stage->code == NULL) - return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); - - /* Ray-tracing shaders don't have a "real" bind map */ - struct anv_pipeline_bind_map empty_bind_map = {}; - - const unsigned code_size = stage->prog_data.base.program_size; - struct anv_shader_bin *bin = - anv_device_upload_kernel(pipeline->base.device, - cache, - stage->stage, - &stage->cache_key, sizeof(stage->cache_key), - stage->code, code_size, - &stage->prog_data.base, - sizeof(stage->prog_data.bs), - stage->stats, 1, - NULL, &empty_bind_map); - if (bin == NULL) - return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); - - /* TODO: Figure out executables for resume shaders */ - anv_pipeline_add_executables(&pipeline->base, stage, bin); - util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin); - - *shader_out = bin; - - return VK_SUCCESS; -} - -static bool -is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info) -{ - if (info->pDynamicState == NULL) - return false; - - for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) { - if (info->pDynamicState->pDynamicStates[i] == - VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) - return true; - } - - return false; -} - -static void -anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline, - const VkRayTracingPipelineCreateInfoKHR *info, - uint32_t *stack_max) -{ - if (is_rt_stack_size_dynamic(info)) { - pipeline->stack_size = 0; /* 0 means dynamic */ - } else { - /* From the Vulkan spec: - * - * "If the stack size is not set explicitly, the stack size for a - * pipeline is: - * - * rayGenStackMax + - * min(1, maxPipelineRayRecursionDepth) × - * max(closestHitStackMax, missStackMax, - * intersectionStackMax + anyHitStackMax) + - * max(0, maxPipelineRayRecursionDepth-1) × - * max(closestHitStackMax, missStackMax) + - * 2 × callableStackMax" - */ - pipeline->stack_size = - stack_max[MESA_SHADER_RAYGEN] + - MIN2(1, info->maxPipelineRayRecursionDepth) * - MAX4(stack_max[MESA_SHADER_CLOSEST_HIT], - stack_max[MESA_SHADER_MISS], - stack_max[MESA_SHADER_INTERSECTION], - stack_max[MESA_SHADER_ANY_HIT]) + - MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) * - MAX2(stack_max[MESA_SHADER_CLOSEST_HIT], - stack_max[MESA_SHADER_MISS]) + - 2 * stack_max[MESA_SHADER_CALLABLE]; - - /* This is an extremely unlikely case but we need to set it to some - * non-zero value so that we don't accidentally think it's dynamic. - * Our minimum stack size is 2KB anyway so we could set to any small - * value we like. - */ - if (pipeline->stack_size == 0) - pipeline->stack_size = 1; - } -} - -static struct anv_pipeline_stage * -anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, - const VkRayTracingPipelineCreateInfoKHR *info, - void *pipeline_ctx) -{ - ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); - - /* Create enough stage entries for all shader modules plus potential - * combinaisons in the groups. - */ - struct anv_pipeline_stage *stages = - rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount); - - for (uint32_t i = 0; i < info->stageCount; i++) { - const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i]; - if (vk_pipeline_shader_stage_is_null(sinfo)) - continue; - - int64_t stage_start = os_time_get_nano(); - - stages[i] = (struct anv_pipeline_stage) { - .stage = vk_to_mesa_shader_stage(sinfo->stage), - .info = sinfo, - .cache_key = { - .stage = vk_to_mesa_shader_stage(sinfo->stage), - }, - .feedback = { - .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, - }, - }; - - populate_bs_prog_key(pipeline->base.device, - pipeline->base.device->robust_buffer_access, - &stages[i].key.bs); - - vk_pipeline_hash_shader_stage(sinfo, stages[i].shader_sha1); - - if (stages[i].stage != MESA_SHADER_INTERSECTION) { - anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i], - stages[i].cache_key.sha1); - } - - stages[i].feedback.duration += os_time_get_nano() - stage_start; - } - - for (uint32_t i = 0; i < info->groupCount; i++) { - const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; - - if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR) - continue; - - int64_t stage_start = os_time_get_nano(); - - uint32_t intersection_idx = ginfo->intersectionShader; - assert(intersection_idx < info->stageCount); - - uint32_t any_hit_idx = ginfo->anyHitShader; - if (any_hit_idx != VK_SHADER_UNUSED_KHR) { - assert(any_hit_idx < info->stageCount); - anv_pipeline_hash_ray_tracing_combined_shader(pipeline, - layout, - &stages[intersection_idx], - &stages[any_hit_idx], - stages[intersection_idx].cache_key.sha1); - } else { - anv_pipeline_hash_ray_tracing_shader(pipeline, layout, - &stages[intersection_idx], - stages[intersection_idx].cache_key.sha1); - } - - stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start; - } - - return stages; -} - -static bool -anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline, - struct vk_pipeline_cache *cache, - const VkRayTracingPipelineCreateInfoKHR *info, - struct anv_pipeline_stage *stages, - uint32_t *stack_max) -{ - uint32_t shaders = 0, cache_hits = 0; - for (uint32_t i = 0; i < info->stageCount; i++) { - if (stages[i].info == NULL) - continue; - - shaders++; - - int64_t stage_start = os_time_get_nano(); - - bool cache_hit; - stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache, - &stages[i].cache_key, - sizeof(stages[i].cache_key), - &cache_hit); - if (cache_hit) { - cache_hits++; - stages[i].feedback.flags |= - VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; - } - - if (stages[i].bin != NULL) { - anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin); - util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin); - - uint32_t stack_size = - brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; - stack_max[stages[i].stage] = - MAX2(stack_max[stages[i].stage], stack_size); - } - - stages[i].feedback.duration += os_time_get_nano() - stage_start; - } - - return cache_hits == shaders; -} - -static VkResult -anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline, - struct vk_pipeline_cache *cache, - const VkRayTracingPipelineCreateInfoKHR *info) -{ - const struct intel_device_info *devinfo = pipeline->base.device->info; - VkResult result; - - VkPipelineCreationFeedback pipeline_feedback = { - .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, - }; - int64_t pipeline_start = os_time_get_nano(); - - void *pipeline_ctx = ralloc_context(NULL); - - struct anv_pipeline_stage *stages = - anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx); - - ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); - - const bool skip_cache_lookup = - (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); - - uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {}; - - if (!skip_cache_lookup && - anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) { - pipeline_feedback.flags |= - VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; - goto done; - } - - if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) { - ralloc_free(pipeline_ctx); - return VK_PIPELINE_COMPILE_REQUIRED; - } - - for (uint32_t i = 0; i < info->stageCount; i++) { - if (stages[i].info == NULL) - continue; - - int64_t stage_start = os_time_get_nano(); - - stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, - pipeline_ctx, &stages[i]); - if (stages[i].nir == NULL) { - ralloc_free(pipeline_ctx); - return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], - layout, false /* use_primitive_replication */); - - stages[i].feedback.duration += os_time_get_nano() - stage_start; - } - - for (uint32_t i = 0; i < info->stageCount; i++) { - if (stages[i].info == NULL) - continue; - - /* Shader found in cache already. */ - if (stages[i].bin != NULL) - continue; - - /* We handle intersection shaders as part of the group */ - if (stages[i].stage == MESA_SHADER_INTERSECTION) - continue; - - int64_t stage_start = os_time_get_nano(); - - void *stage_ctx = ralloc_context(pipeline_ctx); - - nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir); - switch (stages[i].stage) { - case MESA_SHADER_RAYGEN: - brw_nir_lower_raygen(nir); - break; - - case MESA_SHADER_ANY_HIT: - brw_nir_lower_any_hit(nir, devinfo); - break; - - case MESA_SHADER_CLOSEST_HIT: - brw_nir_lower_closest_hit(nir); - break; - - case MESA_SHADER_MISS: - brw_nir_lower_miss(nir); - break; - - case MESA_SHADER_INTERSECTION: - unreachable("These are handled later"); - - case MESA_SHADER_CALLABLE: - brw_nir_lower_callable(nir); - break; - - default: - unreachable("Invalid ray-tracing shader stage"); - } - - result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i], - &stages[i].bin, stage_ctx); - if (result != VK_SUCCESS) { - ralloc_free(pipeline_ctx); - return result; - } - - uint32_t stack_size = - brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; - stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size); - - ralloc_free(stage_ctx); - - stages[i].feedback.duration += os_time_get_nano() - stage_start; - } - - for (uint32_t i = 0; i < info->groupCount; i++) { - const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; - struct anv_rt_shader_group *group = &pipeline->groups[i]; - group->type = ginfo->type; - switch (ginfo->type) { - case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: - assert(ginfo->generalShader < info->stageCount); - group->general = stages[ginfo->generalShader].bin; - break; - - case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: - if (ginfo->anyHitShader < info->stageCount) - group->any_hit = stages[ginfo->anyHitShader].bin; - - if (ginfo->closestHitShader < info->stageCount) - group->closest_hit = stages[ginfo->closestHitShader].bin; - break; - - case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: { - if (ginfo->closestHitShader < info->stageCount) - group->closest_hit = stages[ginfo->closestHitShader].bin; - - uint32_t intersection_idx = info->pGroups[i].intersectionShader; - assert(intersection_idx < info->stageCount); - - /* Only compile this stage if not already found in the cache. */ - if (stages[intersection_idx].bin == NULL) { - /* The any-hit and intersection shader have to be combined */ - uint32_t any_hit_idx = info->pGroups[i].anyHitShader; - const nir_shader *any_hit = NULL; - if (any_hit_idx < info->stageCount) - any_hit = stages[any_hit_idx].nir; - - void *group_ctx = ralloc_context(pipeline_ctx); - nir_shader *intersection = - nir_shader_clone(group_ctx, stages[intersection_idx].nir); - - brw_nir_lower_combined_intersection_any_hit(intersection, any_hit, - devinfo); - - result = compile_upload_rt_shader(pipeline, cache, - intersection, - &stages[intersection_idx], - &group->intersection, - group_ctx); - ralloc_free(group_ctx); - if (result != VK_SUCCESS) - return result; - } else { - group->intersection = stages[intersection_idx].bin; - } - - uint32_t stack_size = - brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size; - stack_max[MESA_SHADER_INTERSECTION] = - MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size); - - break; - } - - default: - unreachable("Invalid ray tracing shader group type"); - } - } - - done: - ralloc_free(pipeline_ctx); - - anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max); - - pipeline_feedback.duration = os_time_get_nano() - pipeline_start; - - const VkPipelineCreationFeedbackCreateInfo *create_feedback = - vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); - if (create_feedback) { - *create_feedback->pPipelineCreationFeedback = pipeline_feedback; - - assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount); - for (uint32_t i = 0; i < info->stageCount; i++) { - gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage); - create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; - } - } - - return VK_SUCCESS; -} - -VkResult -anv_device_init_rt_shaders(struct anv_device *device) -{ - if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) - return VK_SUCCESS; - - bool cache_hit; - - struct brw_rt_trampoline { - char name[16]; - struct brw_cs_prog_key key; - } trampoline_key = { - .name = "rt-trampoline", - }; - device->rt_trampoline = - anv_device_search_for_kernel(device, device->internal_cache, - &trampoline_key, sizeof(trampoline_key), - &cache_hit); - if (device->rt_trampoline == NULL) { - - void *tmp_ctx = ralloc_context(NULL); - nir_shader *trampoline_nir = - brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx); - - trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8; - - struct anv_pipeline_bind_map bind_map = { - .surface_count = 0, - .sampler_count = 0, - }; - uint32_t dummy_params[4] = { 0, }; - struct brw_cs_prog_data trampoline_prog_data = { - .base.nr_params = 4, - .base.param = dummy_params, - .uses_inline_data = true, - .uses_btd_stack_ids = true, - }; - struct brw_compile_cs_params params = { - .nir = trampoline_nir, - .key = &trampoline_key.key, - .prog_data = &trampoline_prog_data, - .log_data = device, - }; - const unsigned *tramp_data = - brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms); - - device->rt_trampoline = - anv_device_upload_kernel(device, device->internal_cache, - MESA_SHADER_COMPUTE, - &trampoline_key, sizeof(trampoline_key), - tramp_data, - trampoline_prog_data.base.program_size, - &trampoline_prog_data.base, - sizeof(trampoline_prog_data), - NULL, 0, NULL, &bind_map); - - ralloc_free(tmp_ctx); - - if (device->rt_trampoline == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - /* The cache already has a reference and it's not going anywhere so there - * is no need to hold a second reference. - */ - anv_shader_bin_unref(device, device->rt_trampoline); - - struct brw_rt_trivial_return { - char name[16]; - struct brw_bs_prog_key key; - } return_key = { - .name = "rt-trivial-ret", - }; - device->rt_trivial_return = - anv_device_search_for_kernel(device, device->internal_cache, - &return_key, sizeof(return_key), - &cache_hit); - if (device->rt_trivial_return == NULL) { - void *tmp_ctx = ralloc_context(NULL); - nir_shader *trivial_return_nir = - brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx); - - NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, device->info); - - struct anv_pipeline_bind_map bind_map = { - .surface_count = 0, - .sampler_count = 0, - }; - struct brw_bs_prog_data return_prog_data = { 0, }; - struct brw_compile_bs_params params = { - .nir = trivial_return_nir, - .key = &return_key.key, - .prog_data = &return_prog_data, - - .log_data = device, - }; - const unsigned *return_data = - brw_compile_bs(device->physical->compiler, tmp_ctx, ¶ms); - - device->rt_trivial_return = - anv_device_upload_kernel(device, device->internal_cache, - MESA_SHADER_CALLABLE, - &return_key, sizeof(return_key), - return_data, return_prog_data.base.program_size, - &return_prog_data.base, sizeof(return_prog_data), - NULL, 0, NULL, &bind_map); - - ralloc_free(tmp_ctx); - - if (device->rt_trivial_return == NULL) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - /* The cache already has a reference and it's not going anywhere so there - * is no need to hold a second reference. - */ - anv_shader_bin_unref(device, device->rt_trivial_return); - - return VK_SUCCESS; -} - -void -anv_device_finish_rt_shaders(struct anv_device *device) -{ - if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) - return; -} - -static VkResult -anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline, - struct anv_device *device, - struct vk_pipeline_cache *cache, - const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, - const VkAllocationCallbacks *alloc) -{ - VkResult result; - - util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx); - - result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo); - if (result != VK_SUCCESS) - goto fail; - - anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false); - - return VK_SUCCESS; - -fail: - util_dynarray_foreach(&pipeline->shaders, - struct anv_shader_bin *, shader) { - anv_shader_bin_unref(device, *shader); - } - return result; -} - -static void -assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, - uint32_t stage_idx, - VkShaderStageFlags valid_stages) -{ - if (stage_idx == VK_SHADER_UNUSED_KHR) - return; - - assert(stage_idx <= pCreateInfo->stageCount); - assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1); - assert(pCreateInfo->pStages[stage_idx].stage & valid_stages); -} - -static VkResult -anv_ray_tracing_pipeline_create( - VkDevice _device, - struct vk_pipeline_cache * cache, - const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR); - - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1); - VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, pCreateInfo->groupCount); - if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_pipeline_init(&pipeline->base, device, - ANV_PIPELINE_RAY_TRACING, pCreateInfo->flags, - pAllocator); - if (result != VK_SUCCESS) { - vk_free2(&device->vk.alloc, pAllocator, pipeline); - return result; - } - - pipeline->group_count = pCreateInfo->groupCount; - pipeline->groups = groups; - - ASSERTED const VkShaderStageFlags ray_tracing_stages = - VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_ANY_HIT_BIT_KHR | - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_INTERSECTION_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR; - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) - assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0); - - for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) { - const VkRayTracingShaderGroupCreateInfoKHR *ginfo = - &pCreateInfo->pGroups[i]; - assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader, - VK_SHADER_STAGE_RAYGEN_BIT_KHR | - VK_SHADER_STAGE_MISS_BIT_KHR | - VK_SHADER_STAGE_CALLABLE_BIT_KHR); - assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader, - VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR); - assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader, - VK_SHADER_STAGE_ANY_HIT_BIT_KHR); - assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader, - VK_SHADER_STAGE_INTERSECTION_BIT_KHR); - switch (ginfo->type) { - case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: - assert(ginfo->generalShader < pCreateInfo->stageCount); - assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR); - assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR); - assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR); - break; - - case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: - assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR); - assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR); - break; - - case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: - assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR); - break; - - default: - unreachable("Invalid ray-tracing shader group type"); - } - } - - result = anv_ray_tracing_pipeline_init(pipeline, device, cache, - pCreateInfo, pAllocator); - if (result != VK_SUCCESS) { - anv_pipeline_finish(&pipeline->base, device, pAllocator); - vk_free2(&device->vk.alloc, pAllocator, pipeline); - return result; - } - - anv_genX(device->info, ray_tracing_pipeline_emit)(pipeline); - - *pPipeline = anv_pipeline_to_handle(&pipeline->base); - - return pipeline->base.batch.status; -} - -VkResult -anv_CreateRayTracingPipelinesKHR( - VkDevice _device, - VkDeferredOperationKHR deferredOperation, - VkPipelineCache pipelineCache, - uint32_t createInfoCount, - const VkRayTracingPipelineCreateInfoKHR* pCreateInfos, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipelines) -{ - ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache); - - VkResult result = VK_SUCCESS; - - unsigned i; - for (i = 0; i < createInfoCount; i++) { - VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache, - &pCreateInfos[i], - pAllocator, &pPipelines[i]); - - if (res == VK_SUCCESS) - continue; - - /* Bail out on the first error as it is not obvious what error should be - * report upon 2 different failures. */ - result = res; - if (result != VK_PIPELINE_COMPILE_REQUIRED) - break; - - pPipelines[i] = VK_NULL_HANDLE; - - if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) - break; - } - - for (; i < createInfoCount; i++) - pPipelines[i] = VK_NULL_HANDLE; - - return result; -} - #define WRITE_STR(field, ...) ({ \ memset(field, 0, sizeof(field)); \ UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \ @@ -3058,14 +2249,6 @@ VkResult anv_GetPipelineExecutableStatisticsKHR( prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data; break; } - case ANV_PIPELINE_RAY_TRACING: { - struct anv_shader_bin **shader = - util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders, - struct anv_shader_bin *, - pExecutableInfo->executableIndex); - prog_data = (*shader)->prog_data; - break; - } default: unreachable("invalid pipeline type"); } @@ -3214,87 +2397,3 @@ VkResult anv_GetPipelineExecutableInternalRepresentationsKHR( return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out); } - -VkResult -anv_GetRayTracingShaderGroupHandlesKHR( - VkDevice _device, - VkPipeline _pipeline, - uint32_t firstGroup, - uint32_t groupCount, - size_t dataSize, - void* pData) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - - if (pipeline->type != ANV_PIPELINE_RAY_TRACING) - return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); - - struct anv_ray_tracing_pipeline *rt_pipeline = - anv_pipeline_to_ray_tracing(pipeline); - - for (uint32_t i = 0; i < groupCount; i++) { - struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i]; - memcpy(pData, group->handle, sizeof(group->handle)); - pData += sizeof(group->handle); - } - - return VK_SUCCESS; -} - -VkResult -anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR( - VkDevice _device, - VkPipeline pipeline, - uint32_t firstGroup, - uint32_t groupCount, - size_t dataSize, - void* pData) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - unreachable("Unimplemented"); - return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); -} - -VkDeviceSize -anv_GetRayTracingShaderGroupStackSizeKHR( - VkDevice device, - VkPipeline _pipeline, - uint32_t group, - VkShaderGroupShaderKHR groupShader) -{ - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - assert(pipeline->type == ANV_PIPELINE_RAY_TRACING); - - struct anv_ray_tracing_pipeline *rt_pipeline = - anv_pipeline_to_ray_tracing(pipeline); - - assert(group < rt_pipeline->group_count); - - struct anv_shader_bin *bin; - switch (groupShader) { - case VK_SHADER_GROUP_SHADER_GENERAL_KHR: - bin = rt_pipeline->groups[group].general; - break; - - case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR: - bin = rt_pipeline->groups[group].closest_hit; - break; - - case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR: - bin = rt_pipeline->groups[group].any_hit; - break; - - case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR: - bin = rt_pipeline->groups[group].intersection; - break; - - default: - unreachable("Invalid VkShaderGroupShader enum"); - } - - if (bin == NULL) - return 0; - - return brw_bs_prog_data_const(bin->prog_data)->max_stack_size; -} diff --git a/src/intel/vulkan_hasvk/anv_private.h b/src/intel/vulkan_hasvk/anv_private.h index 0e23081dc99..a66e57e0542 100644 --- a/src/intel/vulkan_hasvk/anv_private.h +++ b/src/intel/vulkan_hasvk/anv_private.h @@ -1215,25 +1215,6 @@ struct anv_device { struct anv_queue * queues; struct anv_scratch_pool scratch_pool; - struct anv_bo *rt_scratch_bos[16]; - - /** Shadow ray query BO - * - * The ray_query_bo only holds the current ray being traced. When using - * more than 1 ray query per thread, we cannot fit all the queries in - * there, so we need a another buffer to hold query data that is not - * currently being used by the HW for tracing, similar to a scratch space. - * - * The size of the shadow buffer depends on the number of queries per - * shader. - */ - struct anv_bo *ray_query_shadow_bos[16]; - /** Ray query buffer used to communicated with HW unit. - */ - struct anv_bo *ray_query_bo; - - struct anv_shader_bin *rt_trampoline; - struct anv_shader_bin *rt_trivial_return; pthread_mutex_t mutex; pthread_cond_t queue_submit; @@ -2690,19 +2671,6 @@ struct anv_cmd_compute_state { struct anv_address num_workgroups; }; -struct anv_cmd_ray_tracing_state { - struct anv_cmd_pipeline_state base; - - struct anv_ray_tracing_pipeline *pipeline; - - bool pipeline_dirty; - - struct { - struct anv_bo *bo; - struct brw_rt_scratch_layout layout; - } scratch; -}; - /** State required while building cmd buffer */ struct anv_cmd_state { /* PIPELINE_SELECT.PipelineSelection */ @@ -2712,7 +2680,6 @@ struct anv_cmd_state { struct anv_cmd_graphics_state gfx; struct anv_cmd_compute_state compute; - struct anv_cmd_ray_tracing_state rt; enum anv_pipe_bits pending_pipe_bits; VkShaderStageFlags descriptors_dirty; @@ -3034,20 +3001,6 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader) vk_pipeline_cache_object_unref(&shader->base); } -#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \ - assert((local_arg_offset) % 8 == 0); \ - const struct brw_bs_prog_data *prog_data = \ - brw_bs_prog_data_const(bin->prog_data); \ - assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \ - \ - (struct GFX_BINDLESS_SHADER_RECORD) { \ - .OffsetToLocalArguments = (local_arg_offset) / 8, \ - .BindlessShaderDispatchMode = \ - prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8, \ - .KernelStartPointer = bin->kernel.offset, \ - }; \ -}) - struct anv_pipeline_executable { gl_shader_stage stage; @@ -3060,7 +3013,6 @@ struct anv_pipeline_executable { enum anv_pipeline_type { ANV_PIPELINE_GRAPHICS, ANV_PIPELINE_COMPUTE, - ANV_PIPELINE_RAY_TRACING, }; struct anv_pipeline { @@ -3155,34 +3107,6 @@ struct anv_compute_pipeline { uint32_t interface_descriptor_data[8]; }; -struct anv_rt_shader_group { - VkRayTracingShaderGroupTypeKHR type; - - struct anv_shader_bin *general; - struct anv_shader_bin *closest_hit; - struct anv_shader_bin *any_hit; - struct anv_shader_bin *intersection; - - /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */ - uint32_t handle[8]; -}; - -struct anv_ray_tracing_pipeline { - struct anv_pipeline base; - - /* All shaders in the pipeline */ - struct util_dynarray shaders; - - uint32_t group_count; - struct anv_rt_shader_group * groups; - - /* If non-zero, this is the default computed stack size as per the stack - * size computation in the Vulkan spec. If zero, that indicates that the - * client has requested a dynamic stack size. - */ - uint32_t stack_size; -}; - #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \ static inline struct anv_##pipe_type##_pipeline * \ anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \ @@ -3193,7 +3117,6 @@ struct anv_ray_tracing_pipeline { ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS) ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE) -ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING) static inline bool anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline, @@ -3273,12 +3196,6 @@ anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline return &get_vs_prog_data(pipeline)->base; } -VkResult -anv_device_init_rt_shaders(struct anv_device *device); - -void -anv_device_finish_rt_shaders(struct anv_device *device); - VkResult anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, diff --git a/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/src/intel/vulkan_hasvk/genX_cmd_buffer.c index 89a695a470e..147d256cff5 100644 --- a/src/intel/vulkan_hasvk/genX_cmd_buffer.c +++ b/src/intel/vulkan_hasvk/genX_cmd_buffer.c @@ -5610,274 +5610,6 @@ genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer) #endif } -#if GFX_VERx10 >= 125 -static void -calc_local_trace_size(uint8_t local_shift[3], const uint32_t global[3]) -{ - unsigned total_shift = 0; - memset(local_shift, 0, 3); - - bool progress; - do { - progress = false; - for (unsigned i = 0; i < 3; i++) { - assert(global[i] > 0); - if ((1 << local_shift[i]) < global[i]) { - progress = true; - local_shift[i]++; - total_shift++; - } - - if (total_shift == 3) - return; - } - } while(progress); - - /* Assign whatever's left to x */ - local_shift[0] += 3 - total_shift; -} - -static struct GFX_RT_SHADER_TABLE -vk_sdar_to_shader_table(const VkStridedDeviceAddressRegionKHR *region) -{ - return (struct GFX_RT_SHADER_TABLE) { - .BaseAddress = anv_address_from_u64(region->deviceAddress), - .Stride = region->stride, - }; -} - -static void -cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, - const VkStridedDeviceAddressRegionKHR *raygen_sbt, - const VkStridedDeviceAddressRegionKHR *miss_sbt, - const VkStridedDeviceAddressRegionKHR *hit_sbt, - const VkStridedDeviceAddressRegionKHR *callable_sbt, - bool is_indirect, - uint32_t launch_width, - uint32_t launch_height, - uint32_t launch_depth, - uint64_t launch_size_addr) -{ - struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt; - struct anv_ray_tracing_pipeline *pipeline = rt->pipeline; - - if (anv_batch_has_error(&cmd_buffer->batch)) - return; - - /* If we have a known degenerate launch size, just bail */ - if (!is_indirect && - (launch_width == 0 || launch_height == 0 || launch_depth == 0)) - return; - - genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config); - genX(flush_pipeline_select_gpgpu)(cmd_buffer); - - cmd_buffer->state.rt.pipeline_dirty = false; - - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - - /* Add these to the reloc list as they're internal buffers that don't - * actually have relocs to pick them up manually. - * - * TODO(RT): This is a bit of a hack - */ - anv_reloc_list_add_bo(cmd_buffer->batch.relocs, - cmd_buffer->batch.alloc, - rt->scratch.bo); - - /* Allocate and set up our RT_DISPATCH_GLOBALS */ - struct anv_state rtdg_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - BRW_RT_PUSH_CONST_OFFSET + - sizeof(struct anv_push_constants), - 64); - - struct GFX_RT_DISPATCH_GLOBALS rtdg = { - .MemBaseAddress = (struct anv_address) { - .bo = rt->scratch.bo, - .offset = rt->scratch.layout.ray_stack_start, - }, - .CallStackHandler = - anv_shader_bin_get_bsr(cmd_buffer->device->rt_trivial_return, 0), - .AsyncRTStackSize = rt->scratch.layout.ray_stack_stride / 64, - .NumDSSRTStacks = rt->scratch.layout.stack_ids_per_dss, - .MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS, - .Flags = RT_DEPTH_TEST_LESS_EQUAL, - .HitGroupTable = vk_sdar_to_shader_table(hit_sbt), - .MissGroupTable = vk_sdar_to_shader_table(miss_sbt), - .SWStackSize = rt->scratch.layout.sw_stack_size / 64, - .LaunchWidth = launch_width, - .LaunchHeight = launch_height, - .LaunchDepth = launch_depth, - .CallableGroupTable = vk_sdar_to_shader_table(callable_sbt), - }; - GFX_RT_DISPATCH_GLOBALS_pack(NULL, rtdg_state.map, &rtdg); - - /* Push constants go after the RT_DISPATCH_GLOBALS */ - assert(GFX_RT_DISPATCH_GLOBALS_length * 4 <= BRW_RT_PUSH_CONST_OFFSET); - memcpy(rtdg_state.map + BRW_RT_PUSH_CONST_OFFSET, - &cmd_buffer->state.rt.base.push_constants, - sizeof(struct anv_push_constants)); - - struct anv_address rtdg_addr = { - .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, - .offset = rtdg_state.offset, - }; - - uint8_t local_size_log2[3]; - uint32_t global_size[3] = {}; - if (is_indirect) { - /* Pick a local size that's probably ok. We assume most TraceRays calls - * will use a two-dimensional dispatch size. Worst case, our initial - * dispatch will be a little slower than it has to be. - */ - local_size_log2[0] = 2; - local_size_log2[1] = 1; - local_size_log2[2] = 0; - - struct mi_builder b; - mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); - - struct mi_value launch_size[3] = { - mi_mem32(anv_address_from_u64(launch_size_addr + 0)), - mi_mem32(anv_address_from_u64(launch_size_addr + 4)), - mi_mem32(anv_address_from_u64(launch_size_addr + 8)), - }; - - /* Store the original launch size into RT_DISPATCH_GLOBALS - * - * TODO: Pull values from genX_bits.h once RT_DISPATCH_GLOBALS gets - * moved into a genX version. - */ - mi_store(&b, mi_mem32(anv_address_add(rtdg_addr, 52)), - mi_value_ref(&b, launch_size[0])); - mi_store(&b, mi_mem32(anv_address_add(rtdg_addr, 56)), - mi_value_ref(&b, launch_size[1])); - mi_store(&b, mi_mem32(anv_address_add(rtdg_addr, 60)), - mi_value_ref(&b, launch_size[2])); - - /* Compute the global dispatch size */ - for (unsigned i = 0; i < 3; i++) { - if (local_size_log2[i] == 0) - continue; - - /* global_size = DIV_ROUND_UP(launch_size, local_size) - * - * Fortunately for us MI_ALU math is 64-bit and , mi_ushr32_imm - * has the semantics of shifting the enture 64-bit value and taking - * the bottom 32 so we don't have to worry about roll-over. - */ - uint32_t local_size = 1 << local_size_log2[i]; - launch_size[i] = mi_iadd(&b, launch_size[i], - mi_imm(local_size - 1)); - launch_size[i] = mi_ushr32_imm(&b, launch_size[i], - local_size_log2[i]); - } - - mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMX), launch_size[0]); - mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMY), launch_size[1]); - mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMZ), launch_size[2]); - } else { - uint32_t launch_size[3] = { launch_width, launch_height, launch_depth }; - calc_local_trace_size(local_size_log2, launch_size); - - for (unsigned i = 0; i < 3; i++) { - /* We have to be a bit careful here because DIV_ROUND_UP adds to the - * numerator value may overflow. Cast to uint64_t to avoid this. - */ - uint32_t local_size = 1 << local_size_log2[i]; - global_size[i] = DIV_ROUND_UP((uint64_t)launch_size[i], local_size); - } - } - - anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) { - cw.IndirectParameterEnable = is_indirect; - cw.PredicateEnable = false; - cw.SIMDSize = SIMD8; - cw.LocalXMaximum = (1 << local_size_log2[0]) - 1; - cw.LocalYMaximum = (1 << local_size_log2[1]) - 1; - cw.LocalZMaximum = (1 << local_size_log2[2]) - 1; - cw.ThreadGroupIDXDimension = global_size[0]; - cw.ThreadGroupIDYDimension = global_size[1]; - cw.ThreadGroupIDZDimension = global_size[2]; - cw.ExecutionMask = 0xff; - cw.EmitInlineParameter = true; - cw.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0); - - const gl_shader_stage s = MESA_SHADER_RAYGEN; - struct anv_device *device = cmd_buffer->device; - struct anv_state *surfaces = &cmd_buffer->state.binding_tables[s]; - struct anv_state *samplers = &cmd_buffer->state.samplers[s]; - cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) { - .KernelStartPointer = device->rt_trampoline->kernel.offset, - .SamplerStatePointer = samplers->offset, - /* i965: DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4), */ - .SamplerCount = 0, - .BindingTablePointer = surfaces->offset, - .NumberofThreadsinGPGPUThreadGroup = 1, - .BTDMode = true, - }; - - struct brw_rt_raygen_trampoline_params trampoline_params = { - .rt_disp_globals_addr = anv_address_physical(rtdg_addr), - .raygen_bsr_addr = raygen_sbt->deviceAddress, - .is_indirect = is_indirect, - .local_group_size_log2 = { - local_size_log2[0], - local_size_log2[1], - local_size_log2[2], - }, - }; - STATIC_ASSERT(sizeof(trampoline_params) == 32); - memcpy(cw.InlineData, &trampoline_params, sizeof(trampoline_params)); - } -} - -void -genX(CmdTraceRaysKHR)( - VkCommandBuffer commandBuffer, - const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, - const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, - const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, - const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, - uint32_t width, - uint32_t height, - uint32_t depth) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer_trace_rays(cmd_buffer, - pRaygenShaderBindingTable, - pMissShaderBindingTable, - pHitShaderBindingTable, - pCallableShaderBindingTable, - false /* is_indirect */, - width, height, depth, - 0 /* launch_size_addr */); -} - -void -genX(CmdTraceRaysIndirectKHR)( - VkCommandBuffer commandBuffer, - const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, - const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, - const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, - const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, - VkDeviceAddress indirectDeviceAddress) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer_trace_rays(cmd_buffer, - pRaygenShaderBindingTable, - pMissShaderBindingTable, - pHitShaderBindingTable, - pCallableShaderBindingTable, - true /* is_indirect */, - 0, 0, 0, /* width, height, depth, */ - indirectDeviceAddress); -} -#endif /* GFX_VERx10 >= 125 */ - static void genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, uint32_t pipeline) diff --git a/src/intel/vulkan_hasvk/genX_pipeline.c b/src/intel/vulkan_hasvk/genX_pipeline.c index a28f34a0efa..65b8e25f568 100644 --- a/src/intel/vulkan_hasvk/genX_pipeline.c +++ b/src/intel/vulkan_hasvk/genX_pipeline.c @@ -2510,54 +2510,3 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline) } #endif /* #if GFX_VERx10 >= 125 */ - -#if GFX_VERx10 >= 125 - -void -genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline) -{ - for (uint32_t i = 0; i < pipeline->group_count; i++) { - struct anv_rt_shader_group *group = &pipeline->groups[i]; - - switch (group->type) { - case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: { - struct GFX_RT_GENERAL_SBT_HANDLE sh = {}; - sh.General = anv_shader_bin_get_bsr(group->general, 32); - GFX_RT_GENERAL_SBT_HANDLE_pack(NULL, group->handle, &sh); - break; - } - - case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: { - struct GFX_RT_TRIANGLES_SBT_HANDLE sh = {}; - if (group->closest_hit) - sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32); - if (group->any_hit) - sh.AnyHit = anv_shader_bin_get_bsr(group->any_hit, 24); - GFX_RT_TRIANGLES_SBT_HANDLE_pack(NULL, group->handle, &sh); - break; - } - - case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: { - struct GFX_RT_PROCEDURAL_SBT_HANDLE sh = {}; - if (group->closest_hit) - sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32); - sh.Intersection = anv_shader_bin_get_bsr(group->intersection, 24); - GFX_RT_PROCEDURAL_SBT_HANDLE_pack(NULL, group->handle, &sh); - break; - } - - default: - unreachable("Invalid shader group type"); - } - } -} - -#else - -void -genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline) -{ - unreachable("Ray tracing not supported"); -} - -#endif /* GFX_VERx10 >= 125 */