mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 23:10:11 +01:00
radv: optimize NOPs padding with DGC
There is two different alignment requirements: a) IB VA must be aligned to ib_alignment b) IB size must be aligned to ib_pad_dw_mask Though RADV was aligning DGC cmdbuf to ib_alignment always, but this is unnecessary. Using the optimal padding size for DGC cmdbuf removes a bunch of useless NOPs. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30768>
This commit is contained in:
parent
a7547a9781
commit
7f7ecaf08c
1 changed files with 41 additions and 13 deletions
|
|
@ -237,7 +237,16 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
|
|||
}
|
||||
|
||||
static uint32_t
|
||||
radv_align_cmdbuf_size(const struct radv_device *device, uint32_t size, enum amd_ip_type ip_type)
|
||||
radv_pad_cmdbuf(const struct radv_device *device, uint32_t size, enum amd_ip_type ip_type)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint32_t ib_alignment = (pdev->info.ip[ip_type].ib_pad_dw_mask + 1) * 4;
|
||||
|
||||
return align(size, ib_alignment);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
radv_align_cmdbuf(const struct radv_device *device, uint32_t size, enum amd_ip_type ip_type)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint32_t ib_alignment = pdev->info.ip[ip_type].ib_alignment;
|
||||
|
|
@ -248,7 +257,15 @@ radv_align_cmdbuf_size(const struct radv_device *device, uint32_t size, enum amd
|
|||
static unsigned
|
||||
radv_dgc_preamble_cmdbuf_size(const struct radv_device *device, enum amd_ip_type ip_type)
|
||||
{
|
||||
return radv_align_cmdbuf_size(device, 16, ip_type);
|
||||
return radv_pad_cmdbuf(device, 16, ip_type);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
radv_dgc_main_cmdbuf_offset(const struct radv_device *device, enum amd_ip_type ip_type)
|
||||
{
|
||||
const uint32_t preamble_size = radv_dgc_preamble_cmdbuf_size(device, ip_type);
|
||||
|
||||
return radv_align_cmdbuf(device, preamble_size, ip_type);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -270,7 +287,7 @@ radv_get_indirect_cmdbuf_sequence_size(const VkGeneratedCommandsInfoNV *cmd_info
|
|||
radv_get_sequence_size(layout, pipeline, &gfx_cmd_size, &ace_cmd_size, &upload_size);
|
||||
|
||||
const uint32_t cmd_size = ip_type == AMD_IP_GFX ? gfx_cmd_size : ace_cmd_size;
|
||||
return radv_align_cmdbuf_size(device, cmd_size * cmd_info->sequencesCount, ip_type);
|
||||
return radv_pad_cmdbuf(device, cmd_size * cmd_info->sequencesCount, ip_type);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
|
|
@ -290,11 +307,13 @@ radv_get_indirect_ace_cmdbuf_offset(const VkGeneratedCommandsInfoNV *cmd_info)
|
|||
{
|
||||
VK_FROM_HANDLE(radv_indirect_command_layout, layout, cmd_info->indirectCommandsLayout);
|
||||
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
|
||||
|
||||
uint32_t offset = radv_get_indirect_cmdbuf_sequence_size(cmd_info, AMD_IP_GFX);
|
||||
uint32_t offset = 0;
|
||||
|
||||
if (radv_dgc_use_preamble(cmd_info))
|
||||
offset += radv_dgc_preamble_cmdbuf_size(device, AMD_IP_GFX);
|
||||
offset += radv_dgc_main_cmdbuf_offset(device, AMD_IP_GFX);
|
||||
offset += radv_get_indirect_cmdbuf_sequence_size(cmd_info, AMD_IP_GFX);
|
||||
|
||||
offset = radv_align_cmdbuf(device, offset, AMD_IP_GFX);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
|
@ -828,7 +847,7 @@ dgc_cmd_buf_size(nir_builder *b, nir_def *sequence_count, bool is_ace, const str
|
|||
|
||||
nir_def *use_preamble = nir_ine_imm(b, load_param8(b, use_preamble), 0);
|
||||
nir_def *size = nir_imul(b, cmd_buf_stride, sequence_count);
|
||||
unsigned align_mask = radv_align_cmdbuf_size(device, 1, ip_type) - 1;
|
||||
unsigned align_mask = radv_pad_cmdbuf(device, 1, ip_type) - 1;
|
||||
|
||||
size = nir_iand_imm(b, nir_iadd_imm(b, size, align_mask), ~align_mask);
|
||||
|
||||
|
|
@ -2296,12 +2315,14 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
|
|||
uint32_t cmd_stride, ace_cmd_stride, upload_stride;
|
||||
radv_get_sequence_size(layout, pipeline, &cmd_stride, &ace_cmd_stride, &upload_stride);
|
||||
|
||||
VkDeviceSize cmd_buf_size = radv_align_cmdbuf_size(device, cmd_stride * pInfo->maxSequencesCount, AMD_IP_GFX) +
|
||||
radv_dgc_preamble_cmdbuf_size(device, AMD_IP_GFX);
|
||||
VkDeviceSize cmd_buf_size = radv_dgc_main_cmdbuf_offset(device, AMD_IP_GFX) +
|
||||
radv_pad_cmdbuf(device, cmd_stride * pInfo->maxSequencesCount, AMD_IP_GFX);
|
||||
|
||||
if (ace_cmd_stride) {
|
||||
cmd_buf_size += radv_align_cmdbuf_size(device, ace_cmd_stride * pInfo->maxSequencesCount, AMD_IP_COMPUTE) +
|
||||
radv_dgc_preamble_cmdbuf_size(device, AMD_IP_COMPUTE);
|
||||
cmd_buf_size = radv_align_cmdbuf(device, cmd_buf_size, AMD_IP_GFX);
|
||||
|
||||
cmd_buf_size += radv_dgc_main_cmdbuf_offset(device, AMD_IP_COMPUTE) +
|
||||
radv_pad_cmdbuf(device, ace_cmd_stride * pInfo->maxSequencesCount, AMD_IP_COMPUTE);
|
||||
}
|
||||
|
||||
VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
|
||||
|
|
@ -2555,6 +2576,7 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsIn
|
|||
VK_FROM_HANDLE(radv_buffer, stream_buffer, pGeneratedCommandsInfo->pStreams[0].buffer);
|
||||
VK_FROM_HANDLE(radv_buffer, sequence_count_buffer, pGeneratedCommandsInfo->sequencesCountBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_meta_saved_state saved_state;
|
||||
unsigned upload_offset, upload_size;
|
||||
struct radv_buffer token_buffer;
|
||||
|
|
@ -2585,20 +2607,26 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsIn
|
|||
uint32_t offset = 0;
|
||||
|
||||
if (use_preamble)
|
||||
offset += radv_dgc_preamble_cmdbuf_size(device, AMD_IP_GFX);
|
||||
offset += radv_dgc_main_cmdbuf_offset(device, AMD_IP_GFX);
|
||||
cmd_buf_main_offset = offset;
|
||||
|
||||
offset += cmd_buf_size;
|
||||
|
||||
offset = radv_align_cmdbuf(device, offset, AMD_IP_GFX);
|
||||
|
||||
ace_cmd_buf_preamble_offset = offset;
|
||||
|
||||
if (use_preamble)
|
||||
offset += radv_dgc_preamble_cmdbuf_size(device, AMD_IP_COMPUTE);
|
||||
offset += radv_dgc_main_cmdbuf_offset(device, AMD_IP_COMPUTE);
|
||||
ace_cmd_buf_main_offset = offset;
|
||||
|
||||
uint32_t upload_main_offset = cmd_buf_main_offset + cmd_buf_size;
|
||||
if (radv_dgc_with_task_shader(pGeneratedCommandsInfo))
|
||||
upload_main_offset = ace_cmd_buf_main_offset + ace_cmd_buf_size;
|
||||
|
||||
assert((cmd_buf_main_offset + upload_addr) % pdev->info.ip[AMD_IP_GFX].ib_alignment == 0);
|
||||
assert((ace_cmd_buf_main_offset + upload_addr) % pdev->info.ip[AMD_IP_COMPUTE].ib_alignment == 0);
|
||||
|
||||
struct radv_dgc_params params = {
|
||||
.cmd_buf_main_offset = cmd_buf_main_offset,
|
||||
.cmd_buf_stride = cmd_stride,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue