diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 9ff2d0b56da..c24eb89cc7d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2244,6 +2244,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; break; case VK_ACCESS_2_UNIFORM_READ_BIT: + case VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR: /* We transitioning a buffer to be used as uniform data. Because * uniform is accessed through the data port & sampler, we need to * invalidate the texture cache (sampler) & constant cache (data diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index f4496342d37..d04b4b5c81e 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -5679,6 +5679,96 @@ cmd_buffer_emit_rt_dispatch_globals(struct anv_cmd_buffer *cmd_buffer, return rtdg_state; } +static struct mi_value +mi_build_sbt_entry(struct mi_builder *b, + uint64_t addr_field_addr, + uint64_t stride_field_addr) +{ + return mi_ior(b, + mi_iand(b, mi_mem64(anv_address_from_u64(addr_field_addr)), + mi_imm(0xffffffffff)), + mi_ishl_imm(b, mi_mem32(anv_address_from_u64(stride_field_addr)), + 48)); +} + +static struct anv_state +cmd_buffer_emit_rt_dispatch_globals_indirect(struct anv_cmd_buffer *cmd_buffer, + struct trace_params *params) +{ + struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt; + + struct anv_state rtdg_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + BRW_RT_PUSH_CONST_OFFSET + + sizeof(struct anv_push_constants), + 64); + + struct GENX(RT_DISPATCH_GLOBALS) rtdg = { + .MemBaseAddress = (struct anv_address) { + .bo = rt->scratch.bo, + .offset = rt->scratch.layout.ray_stack_start, + }, + .CallStackHandler = anv_shader_bin_get_bsr( + cmd_buffer->device->rt_trivial_return, 0), + .AsyncRTStackSize = rt->scratch.layout.ray_stack_stride / 64, + .NumDSSRTStacks = rt->scratch.layout.stack_ids_per_dss, + .MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS, + .Flags = RT_DEPTH_TEST_LESS_EQUAL, + .SWStackSize = rt->scratch.layout.sw_stack_size / 64, + }; + GENX(RT_DISPATCH_GLOBALS_pack)(NULL, rtdg_state.map, &rtdg); + + struct anv_address rtdg_addr = + anv_state_pool_state_address( + &cmd_buffer->device->dynamic_state_pool, + rtdg_state); + + struct mi_builder b; + mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); + + /* Fill the MissGroupTable, HitGroupTable & CallableGroupTable fields of + * RT_DISPATCH_GLOBALS using the mi_builder. + */ + mi_store(&b, + mi_mem64( + anv_address_add( + rtdg_addr, + GENX(RT_DISPATCH_GLOBALS_MissGroupTable_start) / 8)), + mi_build_sbt_entry(&b, + params->indirect_sbts_addr + + offsetof(VkTraceRaysIndirectCommand2KHR, + missShaderBindingTableAddress), + params->indirect_sbts_addr + + offsetof(VkTraceRaysIndirectCommand2KHR, + missShaderBindingTableStride))); + mi_store(&b, + mi_mem64( + anv_address_add( + rtdg_addr, + GENX(RT_DISPATCH_GLOBALS_HitGroupTable_start) / 8)), + mi_build_sbt_entry(&b, + params->indirect_sbts_addr + + offsetof(VkTraceRaysIndirectCommand2KHR, + hitShaderBindingTableAddress), + params->indirect_sbts_addr + + offsetof(VkTraceRaysIndirectCommand2KHR, + hitShaderBindingTableStride))); + mi_store(&b, + mi_mem64( + anv_address_add( + rtdg_addr, + GENX(RT_DISPATCH_GLOBALS_CallableGroupTable_start) / 8)), + mi_build_sbt_entry(&b, + params->indirect_sbts_addr + + offsetof(VkTraceRaysIndirectCommand2KHR, + callableShaderBindingTableAddress), + params->indirect_sbts_addr + + offsetof(VkTraceRaysIndirectCommand2KHR, + callableShaderBindingTableStride))); + + return rtdg_state; +} + static void cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, struct trace_params *params) @@ -5718,6 +5808,8 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, /* Allocate and set up our RT_DISPATCH_GLOBALS */ struct anv_state rtdg_state = + params->is_sbt_indirect ? + cmd_buffer_emit_rt_dispatch_globals_indirect(cmd_buffer, params) : cmd_buffer_emit_rt_dispatch_globals(cmd_buffer, params); assert(rtdg_state.alloc_size >= (BRW_RT_PUSH_CONST_OFFSET + @@ -5860,8 +5952,13 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, struct brw_rt_raygen_trampoline_params trampoline_params = { .rt_disp_globals_addr = anv_address_physical(rtdg_addr), - .raygen_bsr_addr = params->raygen_sbt->deviceAddress, - .is_indirect = false, /* Only for raygen_bsr_addr */ + .raygen_bsr_addr = + params->is_sbt_indirect ? + (params->indirect_sbts_addr + + offsetof(VkTraceRaysIndirectCommand2KHR, + raygenShaderRecordAddress)) : + params->raygen_sbt->deviceAddress, + .is_indirect = params->is_sbt_indirect, .local_group_size_log2 = { local_size_log2[0], local_size_log2[1], @@ -5924,6 +6021,24 @@ genX(CmdTraceRaysIndirectKHR)( cmd_buffer_trace_rays(cmd_buffer, ¶ms); } + +void +genX(CmdTraceRaysIndirect2KHR)( + VkCommandBuffer commandBuffer, + VkDeviceAddress indirectDeviceAddress) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct trace_params params = { + .is_sbt_indirect = true, + .indirect_sbts_addr = indirectDeviceAddress, + .is_launch_size_indirect = true, + .launch_size_addr = indirectDeviceAddress + + offsetof(VkTraceRaysIndirectCommand2KHR, width), + }; + + cmd_buffer_trace_rays(cmd_buffer, ¶ms); +} + #endif /* GFX_VERx10 >= 125 */ static void