radv/amdgpu: allow cs_execute_ib() to pass a VA instead of a BO

DGC IBs are considered external IBs because they aren't managed by
the winsys and the BO itself isn't really useful. Passing a VA instead
will help for future work.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29600>
This commit is contained in:
Samuel Pitoiset 2024-06-06 16:20:00 +02:00 committed by Marge Bot
parent e51ae61a4d
commit 030d6e6280
3 changed files with 26 additions and 30 deletions

View file

@ -11582,8 +11582,8 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre
}
uint32_t cmdbuf_size = radv_get_indirect_cmdbuf_size(pGeneratedCommandsInfo);
struct radeon_winsys_bo *ib_bo = prep_buffer->bo;
const uint64_t ib_offset = prep_buffer->offset + pGeneratedCommandsInfo->preprocessOffset;
const uint64_t ib_va =
radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + pGeneratedCommandsInfo->preprocessOffset;
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
@ -11594,12 +11594,12 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre
radv_cs_add_buffer(device->ws, cmd_buffer->cs, prep_buffer->bo);
if (compute || !view_mask) {
device->ws->cs_execute_ib(cmd_buffer->cs, ib_bo, ib_offset, cmdbuf_size >> 2, cmd_buffer->state.predicating);
device->ws->cs_execute_ib(cmd_buffer->cs, NULL, ib_va, cmdbuf_size >> 2, cmd_buffer->state.predicating);
} else {
u_foreach_bit (view, view_mask) {
radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view);
device->ws->cs_execute_ib(cmd_buffer->cs, ib_bo, ib_offset, cmdbuf_size >> 2, cmd_buffer->state.predicating);
device->ws->cs_execute_ib(cmd_buffer->cs, NULL, ib_va, cmdbuf_size >> 2, cmd_buffer->state.predicating);
}
}

View file

@ -289,8 +289,8 @@ struct radeon_winsys {
void (*cs_execute_secondary)(struct radeon_cmdbuf *parent, struct radeon_cmdbuf *child, bool allow_ib2);
void (*cs_execute_ib)(struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo, const uint64_t offset,
const uint32_t cdw, const bool predicate);
void (*cs_execute_ib)(struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo, const uint64_t va, const uint32_t cdw,
const bool predicate);
void (*cs_dump)(struct radeon_cmdbuf *cs, FILE *file, const int *trace_ids, int trace_id_count,
enum radv_cs_dump_type type);

View file

@ -42,10 +42,9 @@
enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 };
struct radv_amdgpu_ib {
struct radeon_winsys_bo *bo;
struct radeon_winsys_bo *bo; /* NULL when not owned by the current CS object */
uint64_t va;
unsigned cdw;
unsigned offset; /* VA offset */
bool is_external; /* Not owned by the current CS object. */
};
struct radv_amdgpu_cs_ib_info {
@ -174,7 +173,7 @@ radv_amdgpu_cs_ib_to_info(struct radv_amdgpu_cs *cs, struct radv_amdgpu_ib ib)
struct radv_amdgpu_cs_ib_info info = {
.flags = 0,
.ip_type = cs->hw_ip,
.ib_mc_address = radv_amdgpu_winsys_bo(ib.bo)->base.va + ib.offset,
.ib_mc_address = ib.va,
.size = ib.cdw,
};
return info;
@ -197,7 +196,7 @@ radv_amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffer);
for (unsigned i = 0; i < cs->num_ib_buffers; ++i) {
if (cs->ib_buffers[i].is_external)
if (!cs->ib_buffers[i].bo)
continue;
cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffers[i].bo);
@ -342,8 +341,7 @@ get_nop_packet(struct radv_amdgpu_cs *cs)
}
static void
radv_amdgpu_cs_add_ib_buffer(struct radv_amdgpu_cs *cs, struct radeon_winsys_bo *bo, uint32_t offset, uint32_t cdw,
bool is_external)
radv_amdgpu_cs_add_ib_buffer(struct radv_amdgpu_cs *cs, struct radeon_winsys_bo *bo, uint64_t va, uint32_t cdw)
{
if (cs->num_ib_buffers == cs->max_num_ib_buffers) {
unsigned max_num_ib_buffers = MAX2(1, cs->max_num_ib_buffers * 2);
@ -357,8 +355,7 @@ radv_amdgpu_cs_add_ib_buffer(struct radv_amdgpu_cs *cs, struct radeon_winsys_bo
}
cs->ib_buffers[cs->num_ib_buffers].bo = bo;
cs->ib_buffers[cs->num_ib_buffers].offset = offset;
cs->ib_buffers[cs->num_ib_buffers].is_external = is_external;
cs->ib_buffers[cs->num_ib_buffers].va = va;
cs->ib_buffers[cs->num_ib_buffers++].cdw = cdw;
}
@ -366,7 +363,7 @@ static void
radv_amdgpu_restore_last_ib(struct radv_amdgpu_cs *cs)
{
struct radv_amdgpu_ib *ib = &cs->ib_buffers[--cs->num_ib_buffers];
assert(!ib->is_external);
assert(ib->bo);
cs->ib_buffer = ib->bo;
}
@ -467,8 +464,8 @@ radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs)
}
/* Append the current (last) IB to the array of IB buffers. */
radv_amdgpu_cs_add_ib_buffer(cs, cs->ib_buffer, 0, cs->use_ib ? G_3F2_IB_SIZE(*cs->ib_size_ptr) : cs->base.cdw,
false);
radv_amdgpu_cs_add_ib_buffer(cs, cs->ib_buffer, cs->ib_buffer->va,
cs->use_ib ? G_3F2_IB_SIZE(*cs->ib_size_ptr) : cs->base.cdw);
/* Prevent freeing this BO twice. */
cs->ib_buffer = NULL;
@ -509,7 +506,7 @@ radv_amdgpu_cs_reset(struct radeon_cmdbuf *_cs)
cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer);
for (unsigned i = 0; i < cs->num_ib_buffers; ++i) {
if (cs->ib_buffers[i].is_external)
if (!cs->ib_buffers[i].bo)
continue;
cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffers[i].bo);
@ -737,7 +734,7 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm
if (child->use_ib)
cdw -= 4;
assert(!ib->is_external);
assert(ib->bo);
if (parent->base.cdw + cdw > parent->base.max_dw)
radv_amdgpu_cs_grow(&parent->base, cdw);
@ -757,19 +754,19 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm
}
static void
radv_amdgpu_cs_execute_ib(struct radeon_cmdbuf *_cs, struct radeon_winsys_bo *bo, const uint64_t offset,
const uint32_t cdw, const bool predicate)
radv_amdgpu_cs_execute_ib(struct radeon_cmdbuf *_cs, struct radeon_winsys_bo *bo, uint64_t va, const uint32_t cdw,
const bool predicate)
{
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
const uint64_t va = bo->va + offset;
const uint64_t ib_va = bo ? bo->va : va;
if (cs->status != VK_SUCCESS)
return;
if (cs->hw_ip == AMD_IP_GFX && cs->use_ib) {
radeon_emit(&cs->base, PKT3(PKT3_INDIRECT_BUFFER, 2, predicate));
radeon_emit(&cs->base, va);
radeon_emit(&cs->base, va >> 32);
radeon_emit(&cs->base, ib_va);
radeon_emit(&cs->base, ib_va >> 32);
radeon_emit(&cs->base, cdw);
} else {
const uint32_t ib_size = radv_amdgpu_cs_get_initial_size(cs->ws, cs->hw_ip);
@ -778,7 +775,7 @@ radv_amdgpu_cs_execute_ib(struct radeon_cmdbuf *_cs, struct radeon_winsys_bo *bo
/* Finalize the current CS without chaining to execute the external IB. */
radv_amdgpu_cs_finalize(_cs);
radv_amdgpu_cs_add_ib_buffer(cs, bo, offset, cdw, true);
radv_amdgpu_cs_add_ib_buffer(cs, bo, ib_va, cdw);
/* Start a new CS which isn't chained to any previous CS. */
result = radv_amdgpu_cs_get_new_ib(_cs, ib_size);
@ -956,7 +953,7 @@ static bool
radv_amdgpu_cs_has_external_ib(const struct radv_amdgpu_cs *cs)
{
for (unsigned i = 0; i < cs->num_ib_buffers; i++) {
if (cs->ib_buffers[i].is_external)
if (!cs->ib_buffers[i].bo)
return true;
}
@ -972,7 +969,7 @@ radv_amdgpu_get_num_ibs_per_cs(const struct radv_amdgpu_cs *cs)
unsigned num_external_ibs = 0;
for (unsigned i = 0; i < cs->num_ib_buffers; i++) {
if (cs->ib_buffers[i].is_external)
if (!cs->ib_buffers[i].bo)
num_external_ibs++;
}
@ -1098,8 +1095,7 @@ radv_amdgpu_winsys_cs_submit_internal(struct radv_amdgpu_ctx *ctx, int queue_idx
ib = radv_amdgpu_cs_ib_to_info(cs, cs->ib_buffers[cs_ib_idx++]);
/* Loop until the next external IB is found. */
while (!cs->ib_buffers[cur_ib_idx].is_external && !cs->ib_buffers[cs_ib_idx].is_external &&
cs_ib_idx < cs->num_ib_buffers) {
while (cs->ib_buffers[cur_ib_idx].bo && cs->ib_buffers[cs_ib_idx].bo && cs_ib_idx < cs->num_ib_buffers) {
cs_ib_idx++;
}