mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
Pierre-Eric Pelloux-Prayer d25201903d winsys/amdgpu: use syncobj rather than amdgpu fence
syncobjs provide the same features and allow to unify code
paths because we don't need to handle imported syncobj
separately.

This simplifies the code and doesn't seem to have any perf
impact.

Syncobjs are supported in amdgpu since kernel commit 660e855813f78
during 4.12 cycle but the minor version wasn't bumped so use
the next bump value asthe minimum supported version.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24724>
2024-01-26 16:02:26 +00:00

270 lines
8.3 KiB
C

/*
* Copyright © 2011 Marek Olšák <maraeo@gmail.com>
* Copyright © 2015 Advanced Micro Devices, Inc.
*
* SPDX-License-Identifier: MIT
*/
#ifndef AMDGPU_CS_H
#define AMDGPU_CS_H
#include "amdgpu_bo.h"
#include "util/u_memory.h"
#include "drm-uapi/amdgpu_drm.h"
/* Smaller submits means the GPU gets busy sooner and there is less
* waiting for buffers and fences. Proof:
* http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
*/
#define IB_MAX_SUBMIT_BYTES (80 * 1024)
struct amdgpu_ctx {
struct pipe_reference reference;
struct amdgpu_winsys *ws;
amdgpu_context_handle ctx;
amdgpu_bo_handle user_fence_bo;
uint64_t *user_fence_cpu_address_base;
/* If true, report lost contexts and skip command submission.
* If false, terminate the process.
*/
bool allow_context_lost;
/* Lost context status due to ioctl and allocation failures. */
enum pipe_reset_status sw_status;
};
struct amdgpu_cs_buffer {
struct amdgpu_winsys_bo *bo;
unsigned slab_real_idx; /* index of underlying real BO, used by slab buffers only */
unsigned usage;
};
enum ib_type {
IB_PREAMBLE,
IB_MAIN,
IB_NUM,
};
struct amdgpu_ib {
/* A buffer out of which new IBs are allocated. */
struct pb_buffer_lean *big_buffer;
uint8_t *big_buffer_cpu_ptr;
uint64_t gpu_address;
unsigned used_ib_space;
/* The maximum seen size from cs_check_space. If the driver does
* cs_check_space and flush, the newly allocated IB should have at least
* this size.
*/
unsigned max_check_space_size;
unsigned max_ib_bytes;
/* ptr_ib_size initially points to cs->csc->chunk_ib->ib_bytes.
* If in amdgpu_cs_check_space() ib chaining is required, then ptr_ib_size will point
* to indirect buffer packet size field.
*/
uint32_t *ptr_ib_size;
bool is_chained_ib;
};
struct amdgpu_fence_list {
struct pipe_fence_handle **list;
unsigned num;
unsigned max;
};
struct amdgpu_buffer_list {
unsigned max_buffers;
unsigned num_buffers;
struct amdgpu_cs_buffer *buffers;
};
struct amdgpu_cs_context {
struct drm_amdgpu_cs_chunk_ib chunk_ib[IB_NUM];
uint32_t *ib_main_addr; /* the beginning of IB before chaining */
struct amdgpu_winsys *ws;
/* Buffers. */
struct amdgpu_buffer_list buffer_lists[NUM_BO_LIST_TYPES];
int16_t *buffer_indices_hashlist;
struct amdgpu_winsys_bo *last_added_bo;
unsigned last_added_bo_usage;
struct amdgpu_seq_no_fences seq_no_dependencies;
struct amdgpu_fence_list syncobj_dependencies;
struct amdgpu_fence_list syncobj_to_signal;
struct pipe_fence_handle *fence;
/* the error returned from cs_flush for non-async submissions */
int error_code;
/* TMZ: will this command be submitted using the TMZ flag */
bool secure;
};
/* This high limit is needed for viewperf2020/catia. */
#define BUFFER_HASHLIST_SIZE 32768
struct amdgpu_cs {
struct amdgpu_ib main_ib; /* must be first because this is inherited */
struct amdgpu_winsys *ws;
struct amdgpu_ctx *ctx;
/*
* Ensure a 64-bit alignment for drm_amdgpu_cs_chunk_fence.
*/
struct drm_amdgpu_cs_chunk_fence fence_chunk;
enum amd_ip_type ip_type;
unsigned queue_index;
/* We flip between these two CS. While one is being consumed
* by the kernel in another thread, the other one is being filled
* by the pipe driver. */
struct amdgpu_cs_context csc1;
struct amdgpu_cs_context csc2;
/* The currently-used CS. */
struct amdgpu_cs_context *csc;
/* The CS being currently-owned by the other thread. */
struct amdgpu_cs_context *cst;
/* buffer_indices_hashlist[hash(bo)] returns -1 if the bo
* isn't part of any buffer lists or the index where the bo could be found.
* Since 1) hash collisions of 2 different bo can happen and 2) we use a
* single hashlist for the 3 buffer list, this is only a hint.
* amdgpu_lookup_buffer uses this hint to speed up buffers look up.
*/
int16_t buffer_indices_hashlist[BUFFER_HASHLIST_SIZE];
/* Flush CS. */
void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
void *flush_data;
bool noop;
bool has_chaining;
struct util_queue_fence flush_completed;
struct pipe_fence_handle *next_fence;
struct pb_buffer_lean *preamble_ib_bo;
struct drm_amdgpu_cs_chunk_cp_gfx_shadow mcbp_fw_shadow_chunk;
};
struct amdgpu_fence {
struct pipe_reference reference;
uint32_t syncobj;
struct amdgpu_winsys *ws;
/* The following field aren't set for imported fences. */
struct amdgpu_ctx *ctx; /* submission context */
uint32_t ip_type;
uint64_t *user_fence_cpu_address;
uint64_t seq_no;
/* If the fence has been submitted. This is unsignalled for deferred fences
* (cs->next_fence) and while an IB is still being submitted in the submit
* thread. */
struct util_queue_fence submitted;
volatile int signalled; /* bool (int for atomicity) */
bool imported;
uint8_t queue_index; /* for non-imported fences */
uint_seq_no queue_seq_no; /* winsys-generated sequence number */
};
void amdgpu_fence_destroy(struct amdgpu_fence *fence);
static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence)
{
return fence->ctx == NULL;
}
static inline void amdgpu_ctx_reference(struct amdgpu_ctx **dst, struct amdgpu_ctx *src)
{
struct amdgpu_ctx *old_dst = *dst;
if (pipe_reference(old_dst ? &old_dst->reference : NULL,
src ? &src->reference : NULL)) {
amdgpu_cs_ctx_free(old_dst->ctx);
amdgpu_bo_free(old_dst->user_fence_bo);
FREE(old_dst);
}
*dst = src;
}
static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
struct pipe_fence_handle *src)
{
struct amdgpu_fence **adst = (struct amdgpu_fence **)dst;
struct amdgpu_fence *asrc = (struct amdgpu_fence *)src;
if (pipe_reference(&(*adst)->reference, &asrc->reference))
amdgpu_fence_destroy(*adst);
*adst = asrc;
}
/* Same as amdgpu_fence_reference, but ignore the value in *dst. */
static inline void amdgpu_fence_set_reference(struct pipe_fence_handle **dst,
struct pipe_fence_handle *src)
{
*dst = src;
pipe_reference(NULL, &((struct amdgpu_fence *)src)->reference); /* only increment refcount */
}
/* Unreference dst, but don't assign anything. */
static inline void amdgpu_fence_drop_reference(struct pipe_fence_handle *dst)
{
struct amdgpu_fence *adst = (struct amdgpu_fence *)dst;
if (pipe_reference(&adst->reference, NULL)) /* only decrement refcount */
amdgpu_fence_destroy(adst);
}
struct amdgpu_cs_buffer *
amdgpu_lookup_buffer_any_type(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo);
static inline struct amdgpu_cs *
amdgpu_cs(struct radeon_cmdbuf *rcs)
{
struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs->priv;
assert(cs);
return cs;
}
#define get_container(member_ptr, container_type, container_member) \
(container_type *)((char *)(member_ptr) - offsetof(container_type, container_member))
static inline bool
amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
struct amdgpu_winsys_bo *bo)
{
return amdgpu_lookup_buffer_any_type(cs->csc, bo) != NULL;
}
static inline unsigned get_buf_list_idx(struct amdgpu_winsys_bo *bo)
{
/* AMDGPU_BO_REAL_REUSABLE* maps to AMDGPU_BO_REAL. */
static_assert(ARRAY_SIZE(((struct amdgpu_cs_context*)NULL)->buffer_lists) == NUM_BO_LIST_TYPES, "");
return MIN2(bo->type, AMDGPU_BO_REAL);
}
static inline bool
amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
struct amdgpu_winsys_bo *bo,
unsigned usage)
{
struct amdgpu_cs_buffer *buffer = amdgpu_lookup_buffer_any_type(cs->csc, bo);
return buffer && (buffer->usage & usage) != 0;
}
bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
bool absolute);
void amdgpu_cs_sync_flush(struct radeon_cmdbuf *rcs);
void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws);
#endif