mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-26 20:50:23 +01:00
radv: Use compute for transfer operations unsupported by SDMA
For transfer queue operations that aren't supported by SDMA, implement them with ACE (Async Compute Engine) using the pre- existing compute copy functions. Add a helper radv_get_pm4_cs that returns the ACE gang CS for transfer command buffers and the main CS for graphics/compute command buffers. Use radv_get_pm4_cs to make sure to emit the compute commands to the correct command stream. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25594>
This commit is contained in:
parent
aa6c8b8953
commit
0638fa5156
2 changed files with 51 additions and 13 deletions
|
|
@ -74,6 +74,14 @@ alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void gfx_or_compute_copy_memory_to_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr,
|
||||
uint64_t buffer_size, enum radv_copy_flags src_copy_flags,
|
||||
struct radv_image *image, VkImageLayout layout,
|
||||
const VkBufferImageCopy2 *region, const bool use_compute);
|
||||
|
||||
static void compute_copy_image_to_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_addr, uint64_t buffer_size,
|
||||
enum radv_copy_flags dst_copy_flags, struct radv_image *image,
|
||||
VkImageLayout layout, const VkBufferImageCopy2 *region);
|
||||
static void
|
||||
transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_va, uint64_t buffer_size,
|
||||
enum radv_copy_flags buffer_flags, struct radv_image *image, const VkImageLayout layout,
|
||||
|
|
@ -90,6 +98,14 @@ transfer_copy_memory_image(struct radv_cmd_buffer *cmd_buffer, uint64_t buffer_v
|
|||
radv_wait_gang_leader(cmd_buffer);
|
||||
|
||||
radv_gang_cache_flush(cmd_buffer);
|
||||
|
||||
if (to_image) {
|
||||
gfx_or_compute_copy_memory_to_image(cmd_buffer, buffer_va, buffer_size, buffer_flags, image, layout, region,
|
||||
true);
|
||||
} else {
|
||||
compute_copy_image_to_memory(cmd_buffer, buffer_va, buffer_size, buffer_flags, image, layout, region);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -390,6 +406,11 @@ radv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBuf
|
|||
radv_resume_conditional_rendering(cmd_buffer);
|
||||
}
|
||||
|
||||
static void gfx_or_compute_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
|
||||
VkImageLayout src_image_layout, struct radv_image *dst_image,
|
||||
VkImageLayout dst_image_layout, const VkImageCopy2 *region,
|
||||
const bool use_compute);
|
||||
|
||||
static void
|
||||
transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout,
|
||||
struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region)
|
||||
|
|
@ -406,6 +427,9 @@ transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_i
|
|||
radv_wait_gang_leader(cmd_buffer);
|
||||
|
||||
radv_gang_cache_flush(cmd_buffer);
|
||||
|
||||
gfx_or_compute_copy_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region, true);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -66,6 +66,16 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, str
|
|||
uint32_t dst_family_index, const VkImageSubresourceRange *range,
|
||||
struct radv_sample_locations_state *sample_locs);
|
||||
|
||||
static struct radv_cmd_stream *
|
||||
radv_get_pm4_cs(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
if (cmd_buffer->cs->hw_ip == AMD_IP_GFX || cmd_buffer->cs->hw_ip == AMD_IP_COMPUTE)
|
||||
return cmd_buffer->cs;
|
||||
|
||||
assert(cmd_buffer->cs->hw_ip == AMD_IP_SDMA);
|
||||
return cmd_buffer->gang.cs;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
radv_cmd_set_line_width(struct radv_cmd_buffer *cmd_buffer, float line_width)
|
||||
{
|
||||
|
|
@ -1661,7 +1671,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flu
|
|||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
|
||||
|
||||
if (unlikely(device->sqtt.bo) && !dgc) {
|
||||
radeon_check_space(device->ws, cs->b, 2);
|
||||
|
|
@ -2640,14 +2650,14 @@ radv_emit_binning_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
static void
|
||||
radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader)
|
||||
{
|
||||
uint64_t va;
|
||||
|
||||
if (!shader)
|
||||
return;
|
||||
|
||||
va = radv_shader_get_va(shader);
|
||||
struct radv_device *const device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
|
||||
const uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radv_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
|
||||
radv_cs_cp_dma_prefetch(device, cs, va, shader->code_size, cmd_buffer->state.predicating);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
|
|
@ -6423,7 +6433,7 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags st
|
|||
{
|
||||
struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
|
||||
|
||||
if (descriptors_state->need_indirect_descriptors)
|
||||
radv_upload_indirect_descriptor_sets(cmd_buffer, descriptors_state);
|
||||
|
|
@ -6525,7 +6535,7 @@ static void
|
|||
radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, VkPipelineBindPoint bind_point)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
|
||||
const struct radv_push_constant_state *push_constants = radv_get_push_constants_state(cmd_buffer, bind_point);
|
||||
uint64_t va = 0;
|
||||
uint32_t internal_stages = stages;
|
||||
|
|
@ -6597,7 +6607,7 @@ radv_flush_dynamic_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStage
|
|||
{
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
|
||||
uint64_t va = 0;
|
||||
|
||||
radv_upload_dynamic_descriptors(cmd_buffer, descriptors_state, &va);
|
||||
|
|
@ -8316,7 +8326,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compu
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
|
||||
|
||||
if (pipeline == cmd_buffer->state.emitted_compute_pipeline)
|
||||
return;
|
||||
|
|
@ -8752,7 +8762,7 @@ radv_bind_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader,
|
|||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
|
||||
|
||||
if (!shader) {
|
||||
cmd_buffer->state.shaders[stage] = NULL;
|
||||
|
|
@ -13601,7 +13611,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
|
|||
unsigned dispatch_initiator = device->dispatch_initiator;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
bool predicating = cmd_buffer->state.predicating;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
|
||||
const uint32_t grid_size_offset = radv_get_user_sgpr_loc(compute_shader, AC_UD_CS_GRID_SIZE);
|
||||
|
||||
radv_describe_dispatch(cmd_buffer, info);
|
||||
|
|
@ -13875,6 +13885,7 @@ static void
|
|||
radv_before_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pipeline *pipeline)
|
||||
{
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline;
|
||||
const struct radv_shader *compute_shader = cmd_buffer->state.shaders[MESA_SHADER_COMPUTE];
|
||||
|
|
@ -13889,9 +13900,12 @@ radv_before_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pip
|
|||
radv_upload_compute_shader_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12)
|
||||
radv_gfx12_emit_buffered_regs(device, cmd_buffer->cs);
|
||||
radv_gfx12_emit_buffered_regs(device, cs);
|
||||
|
||||
radv_emit_cache_flush(cmd_buffer);
|
||||
if (cs == cmd_buffer->cs)
|
||||
radv_emit_cache_flush(cmd_buffer);
|
||||
else
|
||||
radv_gang_cache_flush(cmd_buffer);
|
||||
|
||||
/* <-- CUs are idle here if shaders are synchronized. */
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue