mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 02:20:11 +01:00
winsys/amdgpu: add a parallel compute IB coupled with a gfx IB
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de> Acked-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
eda281e977
commit
b19884e08e
8 changed files with 204 additions and 10 deletions
|
|
@ -397,6 +397,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
|
|||
info->drm_minor >= 13;
|
||||
info->has_2d_tiling = true;
|
||||
info->has_read_registers_query = true;
|
||||
info->has_scheduled_fence_dependency = info->drm_minor >= 28;
|
||||
|
||||
info->num_render_backends = amdinfo->rb_pipes;
|
||||
/* The value returned by the kernel driver was wrong. */
|
||||
|
|
@ -470,6 +471,10 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
|
|||
else
|
||||
info->use_display_dcc_with_retile_blit = true;
|
||||
}
|
||||
|
||||
info->has_gds_ordered_append = info->chip_class >= GFX7 &&
|
||||
info->drm_minor >= 29 &&
|
||||
HAVE_LLVM >= 0x0800;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -572,6 +577,8 @@ void ac_print_gpu_info(struct radeon_info *info)
|
|||
printf(" has_sparse_vm_mappings = %u\n", info->has_sparse_vm_mappings);
|
||||
printf(" has_2d_tiling = %u\n", info->has_2d_tiling);
|
||||
printf(" has_read_registers_query = %u\n", info->has_read_registers_query);
|
||||
printf(" has_gds_ordered_append = %u\n", info->has_gds_ordered_append);
|
||||
printf(" has_scheduled_fence_dependency = %u\n", info->has_scheduled_fence_dependency);
|
||||
|
||||
printf("Shader core info:\n");
|
||||
printf(" max_shader_clock = %i\n", info->max_shader_clock);
|
||||
|
|
|
|||
|
|
@ -119,6 +119,8 @@ struct radeon_info {
|
|||
bool has_sparse_vm_mappings;
|
||||
bool has_2d_tiling;
|
||||
bool has_read_registers_query;
|
||||
bool has_gds_ordered_append;
|
||||
bool has_scheduled_fence_dependency;
|
||||
|
||||
/* Shader cores. */
|
||||
uint32_t r600_max_quad_pipes; /* wave size / 16 */
|
||||
|
|
|
|||
|
|
@ -351,8 +351,8 @@ static void r600_add_fence_dependency(struct r600_common_context *rctx,
|
|||
struct radeon_winsys *ws = rctx->ws;
|
||||
|
||||
if (rctx->dma.cs)
|
||||
ws->cs_add_fence_dependency(rctx->dma.cs, fence);
|
||||
ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
|
||||
ws->cs_add_fence_dependency(rctx->dma.cs, fence, 0);
|
||||
ws->cs_add_fence_dependency(rctx->gfx.cs, fence, 0);
|
||||
}
|
||||
|
||||
static void r600_fence_server_sync(struct pipe_context *ctx,
|
||||
|
|
|
|||
|
|
@ -67,6 +67,16 @@ enum radeon_bo_flag { /* bitfield */
|
|||
RADEON_FLAG_32BIT = (1 << 6),
|
||||
};
|
||||
|
||||
enum radeon_dependency_flag {
|
||||
/* Add the dependency to the parallel compute IB only. */
|
||||
RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY = 1 << 0,
|
||||
|
||||
/* Instead of waiting for a job to finish execution, the dependency will
|
||||
* be signaled when the job starts execution.
|
||||
*/
|
||||
RADEON_DEPENDENCY_START_FENCE = 1 << 1,
|
||||
};
|
||||
|
||||
enum radeon_bo_usage { /* bitfield */
|
||||
RADEON_USAGE_READ = 2,
|
||||
RADEON_USAGE_WRITE = 4,
|
||||
|
|
@ -492,6 +502,23 @@ struct radeon_winsys {
|
|||
void *flush_ctx,
|
||||
bool stop_exec_on_failure);
|
||||
|
||||
/**
|
||||
* Add a parallel compute IB to a gfx IB. It will share the buffer list
|
||||
* and fence dependencies with the gfx IB. The gfx flush call will submit
|
||||
* both IBs at the same time.
|
||||
*
|
||||
* The compute IB doesn't have an output fence, so the primary IB has
|
||||
* to use a wait packet for synchronization.
|
||||
*
|
||||
* The returned IB is only a stream for writing packets to the new
|
||||
* IB. Calling other winsys functions with it is not allowed, not even
|
||||
* "cs_destroy". Use the gfx IB instead.
|
||||
*
|
||||
* \param cs Gfx IB
|
||||
*/
|
||||
struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs,
|
||||
bool uses_gds_ordered_append);
|
||||
|
||||
/**
|
||||
* Destroy a command stream.
|
||||
*
|
||||
|
|
@ -614,9 +641,12 @@ struct radeon_winsys {
|
|||
/**
|
||||
* Add a fence dependency to the CS, so that the CS will wait for
|
||||
* the fence before execution.
|
||||
*
|
||||
* \param dependency_flags Bitmask of RADEON_DEPENDENCY_*
|
||||
*/
|
||||
void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs,
|
||||
struct pipe_fence_handle *fence);
|
||||
struct pipe_fence_handle *fence,
|
||||
unsigned dependency_flags);
|
||||
|
||||
/**
|
||||
* Signal a syncobj when the CS finishes execution.
|
||||
|
|
|
|||
|
|
@ -177,8 +177,8 @@ static void si_add_fence_dependency(struct si_context *sctx,
|
|||
struct radeon_winsys *ws = sctx->ws;
|
||||
|
||||
if (sctx->dma_cs)
|
||||
ws->cs_add_fence_dependency(sctx->dma_cs, fence);
|
||||
ws->cs_add_fence_dependency(sctx->gfx_cs, fence);
|
||||
ws->cs_add_fence_dependency(sctx->dma_cs, fence, 0);
|
||||
ws->cs_add_fence_dependency(sctx->gfx_cs, fence, 0);
|
||||
}
|
||||
|
||||
static void si_add_syncobj_signal(struct si_context *sctx,
|
||||
|
|
|
|||
|
|
@ -35,6 +35,14 @@
|
|||
|
||||
DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
|
||||
|
||||
#ifndef AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
|
||||
#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
|
||||
#endif
|
||||
|
||||
#ifndef AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
|
||||
#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07
|
||||
#endif
|
||||
|
||||
/* FENCES */
|
||||
|
||||
static struct pipe_fence_handle *
|
||||
|
|
@ -717,6 +725,7 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib,
|
|||
|
||||
static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type)
|
||||
{
|
||||
/* The maximum IB size including all chained IBs. */
|
||||
switch (ib_type) {
|
||||
case IB_MAIN:
|
||||
/* Smaller submits means the GPU gets busy sooner and there is less
|
||||
|
|
@ -724,6 +733,9 @@ static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type)
|
|||
* http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
|
||||
*/
|
||||
return 20 * 1024;
|
||||
case IB_PARALLEL_COMPUTE:
|
||||
/* Always chain this IB. */
|
||||
return UINT_MAX;
|
||||
default:
|
||||
unreachable("bad ib_type");
|
||||
}
|
||||
|
|
@ -739,12 +751,15 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs,
|
|||
*/
|
||||
struct amdgpu_ib *ib = NULL;
|
||||
struct drm_amdgpu_cs_chunk_ib *info = &cs->csc->ib[ib_type];
|
||||
unsigned ib_size = 0;
|
||||
/* This is the minimum size of a contiguous IB. */
|
||||
unsigned ib_size = 4 * 1024 * 4;
|
||||
|
||||
switch (ib_type) {
|
||||
case IB_PARALLEL_COMPUTE:
|
||||
ib = &cs->compute_ib;
|
||||
break;
|
||||
case IB_MAIN:
|
||||
ib = &cs->main;
|
||||
ib_size = 4 * 1024 * 4;
|
||||
break;
|
||||
default:
|
||||
unreachable("unhandled IB type");
|
||||
|
|
@ -866,6 +881,9 @@ static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws,
|
|||
assert(0);
|
||||
}
|
||||
|
||||
cs->ib[IB_PARALLEL_COMPUTE].ip_type = AMDGPU_HW_IP_COMPUTE;
|
||||
cs->ib[IB_PARALLEL_COMPUTE].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE;
|
||||
|
||||
memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist));
|
||||
cs->last_added_bo = NULL;
|
||||
return true;
|
||||
|
|
@ -897,6 +915,8 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
|
|||
cleanup_fence_list(&cs->fence_dependencies);
|
||||
cleanup_fence_list(&cs->syncobj_dependencies);
|
||||
cleanup_fence_list(&cs->syncobj_to_signal);
|
||||
cleanup_fence_list(&cs->compute_fence_dependencies);
|
||||
cleanup_fence_list(&cs->compute_start_fence_dependencies);
|
||||
|
||||
cs->num_real_buffers = 0;
|
||||
cs->num_slab_buffers = 0;
|
||||
|
|
@ -916,6 +936,8 @@ static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs)
|
|||
FREE(cs->fence_dependencies.list);
|
||||
FREE(cs->syncobj_dependencies.list);
|
||||
FREE(cs->syncobj_to_signal.list);
|
||||
FREE(cs->compute_fence_dependencies.list);
|
||||
FREE(cs->compute_start_fence_dependencies.list);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -949,6 +971,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
|
|||
amdgpu_cs_chunk_fence_info_to_data(&fence_info, (void*)&cs->fence_chunk);
|
||||
|
||||
cs->main.ib_type = IB_MAIN;
|
||||
cs->compute_ib.ib_type = IB_PARALLEL_COMPUTE;
|
||||
|
||||
if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ring_type)) {
|
||||
FREE(cs);
|
||||
|
|
@ -976,6 +999,33 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
|
|||
return &cs->main.base;
|
||||
}
|
||||
|
||||
static struct radeon_cmdbuf *
|
||||
amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib,
|
||||
bool uses_gds_ordered_append)
|
||||
{
|
||||
struct amdgpu_cs *cs = (struct amdgpu_cs*)ib;
|
||||
struct amdgpu_winsys *ws = cs->ctx->ws;
|
||||
|
||||
if (cs->ring_type != RING_GFX)
|
||||
return NULL;
|
||||
|
||||
/* only one secondary IB can be added */
|
||||
if (cs->compute_ib.ib_mapped)
|
||||
return NULL;
|
||||
|
||||
/* Allocate the compute IB. */
|
||||
if (!amdgpu_get_new_ib(&ws->base, cs, IB_PARALLEL_COMPUTE))
|
||||
return NULL;
|
||||
|
||||
if (uses_gds_ordered_append) {
|
||||
cs->csc1.ib[IB_PARALLEL_COMPUTE].flags |=
|
||||
AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID;
|
||||
cs->csc2.ib[IB_PARALLEL_COMPUTE].flags |=
|
||||
AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID;
|
||||
}
|
||||
return &cs->compute_ib.base;
|
||||
}
|
||||
|
||||
static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs)
|
||||
{
|
||||
return true;
|
||||
|
|
@ -1104,6 +1154,11 @@ static void add_fence_to_list(struct amdgpu_fence_list *fences,
|
|||
amdgpu_fence_reference(&fences->list[idx], (struct pipe_fence_handle*)fence);
|
||||
}
|
||||
|
||||
/* TODO: recognizing dependencies as no-ops doesn't take the parallel
|
||||
* compute IB into account. The compute IB won't wait for these.
|
||||
* Also, the scheduler can execute compute and SDMA IBs on any rings.
|
||||
* Should we always insert dependencies?
|
||||
*/
|
||||
static bool is_noop_fence_dependency(struct amdgpu_cs *acs,
|
||||
struct amdgpu_fence *fence)
|
||||
{
|
||||
|
|
@ -1120,7 +1175,8 @@ static bool is_noop_fence_dependency(struct amdgpu_cs *acs,
|
|||
}
|
||||
|
||||
static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws,
|
||||
struct pipe_fence_handle *pfence)
|
||||
struct pipe_fence_handle *pfence,
|
||||
unsigned dependency_flags)
|
||||
{
|
||||
struct amdgpu_cs *acs = amdgpu_cs(rws);
|
||||
struct amdgpu_cs_context *cs = acs->csc;
|
||||
|
|
@ -1128,6 +1184,21 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws,
|
|||
|
||||
util_queue_fence_wait(&fence->submitted);
|
||||
|
||||
if (dependency_flags & RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY) {
|
||||
/* Syncobjs are not needed here. */
|
||||
assert(!amdgpu_fence_is_syncobj(fence));
|
||||
|
||||
if (acs->ctx->ws->info.has_scheduled_fence_dependency &&
|
||||
dependency_flags & RADEON_DEPENDENCY_START_FENCE)
|
||||
add_fence_to_list(&cs->compute_start_fence_dependencies, fence);
|
||||
else
|
||||
add_fence_to_list(&cs->compute_fence_dependencies, fence);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Start fences are not needed here. */
|
||||
assert(!(dependency_flags & RADEON_DEPENDENCY_START_FENCE));
|
||||
|
||||
if (is_noop_fence_dependency(acs, fence))
|
||||
return;
|
||||
|
||||
|
|
@ -1324,7 +1395,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
|||
}
|
||||
|
||||
struct drm_amdgpu_bo_list_entry *list =
|
||||
alloca(cs->num_real_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
|
||||
alloca((cs->num_real_buffers + 2) * sizeof(struct drm_amdgpu_bo_list_entry));
|
||||
|
||||
unsigned num_handles = 0;
|
||||
for (i = 0; i < cs->num_real_buffers; ++i) {
|
||||
|
|
@ -1417,6 +1488,66 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
|||
num_chunks++;
|
||||
}
|
||||
|
||||
/* Submit the parallel compute IB first. */
|
||||
if (cs->ib[IB_PARALLEL_COMPUTE].ib_bytes > 0) {
|
||||
unsigned old_num_chunks = num_chunks;
|
||||
|
||||
/* Add compute fence dependencies. */
|
||||
unsigned num_dependencies = cs->compute_fence_dependencies.num;
|
||||
if (num_dependencies) {
|
||||
struct drm_amdgpu_cs_chunk_dep *dep_chunk =
|
||||
alloca(num_dependencies * sizeof(*dep_chunk));
|
||||
|
||||
for (unsigned i = 0; i < num_dependencies; i++) {
|
||||
struct amdgpu_fence *fence =
|
||||
(struct amdgpu_fence*)cs->compute_fence_dependencies.list[i];
|
||||
|
||||
assert(util_queue_fence_is_signalled(&fence->submitted));
|
||||
amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
|
||||
}
|
||||
|
||||
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
|
||||
chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies;
|
||||
chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
|
||||
num_chunks++;
|
||||
}
|
||||
|
||||
/* Add compute start fence dependencies. */
|
||||
unsigned num_start_dependencies = cs->compute_start_fence_dependencies.num;
|
||||
if (num_start_dependencies) {
|
||||
struct drm_amdgpu_cs_chunk_dep *dep_chunk =
|
||||
alloca(num_start_dependencies * sizeof(*dep_chunk));
|
||||
|
||||
for (unsigned i = 0; i < num_start_dependencies; i++) {
|
||||
struct amdgpu_fence *fence =
|
||||
(struct amdgpu_fence*)cs->compute_start_fence_dependencies.list[i];
|
||||
|
||||
assert(util_queue_fence_is_signalled(&fence->submitted));
|
||||
amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
|
||||
}
|
||||
|
||||
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES;
|
||||
chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_start_dependencies;
|
||||
chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
|
||||
num_chunks++;
|
||||
}
|
||||
|
||||
/* Convert from dwords to bytes. */
|
||||
cs->ib[IB_PARALLEL_COMPUTE].ib_bytes *= 4;
|
||||
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
|
||||
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
|
||||
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PARALLEL_COMPUTE];
|
||||
num_chunks++;
|
||||
|
||||
r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
|
||||
num_chunks, chunks, NULL);
|
||||
if (r)
|
||||
goto finalize;
|
||||
|
||||
/* Back off the compute chunks. */
|
||||
num_chunks = old_num_chunks;
|
||||
}
|
||||
|
||||
/* Syncobj signals. */
|
||||
unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num;
|
||||
if (num_syncobj_to_signal) {
|
||||
|
|
@ -1458,6 +1589,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
|||
r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
|
||||
num_chunks, chunks, &seq_no);
|
||||
}
|
||||
finalize:
|
||||
|
||||
if (r) {
|
||||
if (r == -ENOMEM)
|
||||
|
|
@ -1543,6 +1675,12 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
|
|||
}
|
||||
if (cs->ring_type == RING_GFX)
|
||||
ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
|
||||
|
||||
/* Also pad secondary IBs. */
|
||||
if (cs->compute_ib.ib_mapped) {
|
||||
while (cs->compute_ib.base.current.cdw & 7)
|
||||
radeon_emit(&cs->compute_ib.base, 0xffff1000); /* type3 nop packet */
|
||||
}
|
||||
break;
|
||||
case RING_UVD:
|
||||
case RING_UVD_ENC:
|
||||
|
|
@ -1578,6 +1716,9 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
|
|||
/* Set IB sizes. */
|
||||
amdgpu_ib_finalize(ws, &cs->main);
|
||||
|
||||
if (cs->compute_ib.ib_mapped)
|
||||
amdgpu_ib_finalize(ws, &cs->compute_ib);
|
||||
|
||||
/* Create a fence. */
|
||||
amdgpu_fence_reference(&cur->fence, NULL);
|
||||
if (cs->next_fence) {
|
||||
|
|
@ -1623,6 +1764,8 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
|
|||
}
|
||||
|
||||
amdgpu_get_new_ib(&ws->base, cs, IB_MAIN);
|
||||
if (cs->compute_ib.ib_mapped)
|
||||
amdgpu_get_new_ib(&ws->base, cs, IB_PARALLEL_COMPUTE);
|
||||
|
||||
cs->main.base.used_gart = 0;
|
||||
cs->main.base.used_vram = 0;
|
||||
|
|
@ -1644,6 +1787,8 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
|
|||
p_atomic_dec(&cs->ctx->ws->num_cs);
|
||||
pb_reference(&cs->main.big_ib_buffer, NULL);
|
||||
FREE(cs->main.base.prev);
|
||||
pb_reference(&cs->compute_ib.big_ib_buffer, NULL);
|
||||
FREE(cs->compute_ib.base.prev);
|
||||
amdgpu_destroy_cs_context(&cs->csc1);
|
||||
amdgpu_destroy_cs_context(&cs->csc2);
|
||||
amdgpu_fence_reference(&cs->next_fence, NULL);
|
||||
|
|
@ -1666,6 +1811,7 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
|
|||
ws->base.ctx_destroy = amdgpu_ctx_destroy;
|
||||
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
|
||||
ws->base.cs_create = amdgpu_cs_create;
|
||||
ws->base.cs_add_parallel_compute_ib = amdgpu_cs_add_parallel_compute_ib;
|
||||
ws->base.cs_destroy = amdgpu_cs_destroy;
|
||||
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
|
||||
ws->base.cs_validate = amdgpu_cs_validate;
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ struct amdgpu_cs_buffer {
|
|||
|
||||
enum ib_type {
|
||||
IB_MAIN,
|
||||
IB_PARALLEL_COMPUTE,
|
||||
IB_NUM,
|
||||
};
|
||||
|
||||
|
|
@ -113,6 +114,10 @@ struct amdgpu_cs_context {
|
|||
struct amdgpu_fence_list syncobj_dependencies;
|
||||
struct amdgpu_fence_list syncobj_to_signal;
|
||||
|
||||
/* The compute IB uses the dependencies above + these: */
|
||||
struct amdgpu_fence_list compute_fence_dependencies;
|
||||
struct amdgpu_fence_list compute_start_fence_dependencies;
|
||||
|
||||
struct pipe_fence_handle *fence;
|
||||
|
||||
/* the error returned from cs_flush for non-async submissions */
|
||||
|
|
@ -121,6 +126,7 @@ struct amdgpu_cs_context {
|
|||
|
||||
struct amdgpu_cs {
|
||||
struct amdgpu_ib main; /* must be first because this is inherited */
|
||||
struct amdgpu_ib compute_ib; /* optional parallel compute IB */
|
||||
struct amdgpu_ctx *ctx;
|
||||
enum ring_type ring_type;
|
||||
struct drm_amdgpu_cs_chunk_fence fence_chunk;
|
||||
|
|
@ -220,6 +226,8 @@ amdgpu_cs_from_ib(struct amdgpu_ib *ib)
|
|||
switch (ib->ib_type) {
|
||||
case IB_MAIN:
|
||||
return get_container(ib, struct amdgpu_cs, main);
|
||||
case IB_PARALLEL_COMPUTE:
|
||||
return get_container(ib, struct amdgpu_cs, compute_ib);
|
||||
default:
|
||||
unreachable("bad ib_type");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -798,7 +798,8 @@ radeon_drm_cs_get_next_fence(struct radeon_cmdbuf *rcs)
|
|||
|
||||
static void
|
||||
radeon_drm_cs_add_fence_dependency(struct radeon_cmdbuf *cs,
|
||||
struct pipe_fence_handle *fence)
|
||||
struct pipe_fence_handle *fence,
|
||||
unsigned dependency_flags)
|
||||
{
|
||||
/* TODO: Handle the following unlikely multi-threaded scenario:
|
||||
*
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue