From 5ad803840d8479412d859db4594e2cbb7ba77300 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 17 Aug 2021 14:51:12 +0300 Subject: [PATCH] anv: setup scratch space correctly for RT shaders Things are a bit confusing because we use the term "scratch" for 2 different things : * the buffer for register allocation spilling * the buffer for storing live values between splitted shaders around shader calls Here we're fixing the missing register allocation spilling buffer. v2: update comments (Caio) fix scratch bo size computation with pipeline libraries (Lionel) Signed-off-by: Lionel Landwerlin Reviewed-by: Caio Oliveira Part-of: --- src/intel/vulkan/anv_pipeline.c | 17 +++++++++++++++++ src/intel/vulkan/anv_private.h | 3 +++ src/intel/vulkan/genX_cmd_buffer.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index df23f1a3eaa..16f001c3786 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -2474,6 +2474,9 @@ compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, anv_pipeline_add_executables(&pipeline->base, stage, bin); util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin); + pipeline->scratch_size = + MAX2(pipeline->scratch_size, bin->prog_data->total_scratch); + *shader_out = bin; return VK_SUCCESS; @@ -3131,6 +3134,14 @@ anv_ray_tracing_pipeline_create( return result; } + /* Compute the size of the scratch BO (for register spilling) by taking the + * max of all the shaders in the pipeline. + */ + util_dynarray_foreach(&pipeline->shaders, struct anv_shader_bin *, shader) { + pipeline->scratch_size = + MAX2(pipeline->scratch_size, (*shader)->prog_data->total_scratch); + } + if (pCreateInfo->pLibraryInfo) { uint32_t g = pCreateInfo->groupCount; for (uint32_t l = 0; l < pCreateInfo->pLibraryInfo->libraryCount; l++) { @@ -3140,6 +3151,12 @@ anv_ray_tracing_pipeline_create( anv_pipeline_to_ray_tracing(library); for (uint32_t lg = 0; lg < rt_library->group_count; lg++) pipeline->groups[g++] = rt_library->groups[lg]; + + /* Also account for all the pipeline libraries for the size of the + * scratch BO. + */ + pipeline->scratch_size = + MAX2(pipeline->scratch_size, rt_library->scratch_size); } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6c7407742d5..1d07fd3b1c8 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3016,6 +3016,9 @@ struct anv_ray_tracing_pipeline { * client has requested a dynamic stack size. */ uint32_t stack_size; + + /* Maximum scratch size for all shaders in this pipeline. */ + uint32_t scratch_size; }; #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \ diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 565a80e0d3e..a0f0e9cf121 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -5478,6 +5478,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, uint32_t launch_depth, uint64_t launch_size_addr) { + struct anv_device *device = cmd_buffer->device; struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt; struct anv_ray_tracing_pipeline *pipeline = rt->pipeline; @@ -5613,6 +5614,35 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, } } + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BTD), btd) { + /* TODO: This is the timeout after which the bucketed thread dispatcher + * will kick off a wave of threads. We go with the lowest value + * for now. It could be tweaked on a per application basis + * (drirc). + */ + btd.DispatchTimeoutCounter = _64clocks; + /* BSpec 43851: "This field must be programmed to 6h i.e. memory backed + * buffer must be 128KB." + */ + btd.PerDSSMemoryBackedBufferSize = 6; + btd.MemoryBackedBufferBasePointer = (struct anv_address) { .bo = device->btd_fifo_bo }; + if (pipeline->scratch_size > 0) { + struct anv_bo *scratch_bo = + anv_scratch_pool_alloc(device, + &device->scratch_pool, + MESA_SHADER_COMPUTE, + pipeline->scratch_size); + anv_reloc_list_add_bo(cmd_buffer->batch.relocs, + cmd_buffer->batch.alloc, + scratch_bo); + uint32_t scratch_surf = + anv_scratch_pool_get_surf(cmd_buffer->device, + &device->scratch_pool, + pipeline->scratch_size); + btd.ScratchSpaceBuffer = scratch_surf >> 4; + } + } + anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) { cw.IndirectParameterEnable = is_indirect; cw.PredicateEnable = false;