radv/winsys: fix executing huge secondary command buffers on GFX6

If the secondary has a list of CS buffers, it should be copied to
the primary.

Fixes dEQP-VK.api.command_buffers.record_many_draws_secondary_2.

Cc: 21.1 mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10547>
(cherry picked from commit 12a00da800)
This commit is contained in:
Samuel Pitoiset 2021-04-30 14:50:57 +02:00 committed by Eric Engestrom
parent 37c88be5ea
commit 5f48cd7041
2 changed files with 54 additions and 1 deletions

View file

@ -292,7 +292,7 @@
"description": "radv/winsys: fix executing huge secondary command buffers on GFX6",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"master_sha": null,
"because_sha": null
},

View file

@ -565,6 +565,59 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm
radeon_emit(&parent->base, child->ib.ib_mc_address >> 32);
radeon_emit(&parent->base, child->ib.size);
} else {
/* When the secondary command buffer is huge we have to copy the list of CS buffers to the
* parent to submit multiple IBs.
*/
if (child->num_old_cs_buffers > 0) {
unsigned num_cs_buffers;
uint32_t *new_buf;
/* Compute the total number of CS buffers needed. */
num_cs_buffers = parent->num_old_cs_buffers + child->num_old_cs_buffers + 1;
struct radeon_cmdbuf *old_cs_buffers =
realloc(parent->old_cs_buffers, num_cs_buffers * sizeof(*parent->old_cs_buffers));
if (!old_cs_buffers) {
parent->status = VK_ERROR_OUT_OF_HOST_MEMORY;
parent->base.cdw = 0;
return;
}
parent->old_cs_buffers = old_cs_buffers;
/* Copy the parent CS to its list of CS buffers, so submission ordering is maintained. */
new_buf = malloc(parent->base.max_dw * 4);
if (!new_buf) {
parent->status = VK_ERROR_OUT_OF_HOST_MEMORY;
parent->base.cdw = 0;
return;
}
memcpy(new_buf, parent->base.buf, parent->base.max_dw * 4);
parent->old_cs_buffers[parent->num_old_cs_buffers].cdw = parent->base.cdw;
parent->old_cs_buffers[parent->num_old_cs_buffers].max_dw = parent->base.max_dw;
parent->old_cs_buffers[parent->num_old_cs_buffers].buf = new_buf;
parent->num_old_cs_buffers++;
/* Then, copy all child CS buffers to the parent list. */
for (unsigned i = 0; i < child->num_old_cs_buffers; i++) {
new_buf = malloc(child->old_cs_buffers[i].max_dw * 4);
if (!new_buf) {
parent->status = VK_ERROR_OUT_OF_HOST_MEMORY;
parent->base.cdw = 0;
return;
}
memcpy(new_buf, child->old_cs_buffers[i].buf, child->old_cs_buffers[i].max_dw * 4);
parent->old_cs_buffers[parent->num_old_cs_buffers].cdw = child->old_cs_buffers[i].cdw;
parent->old_cs_buffers[parent->num_old_cs_buffers].max_dw = child->old_cs_buffers[i].max_dw;
parent->old_cs_buffers[parent->num_old_cs_buffers].buf = new_buf;
parent->num_old_cs_buffers++;
}
/* Reset the parent CS before copying the child CS into it. */
parent->base.cdw = 0;
}
if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
radv_amdgpu_cs_grow(&parent->base, child->base.cdw);