Merge branch 'vp/mr/add-gang-cs' into 'main'

amdgpu: add amdgpu_cs_submit_gang api

See merge request mesa/drm!344
This commit is contained in:
Vitaly Prosyak 2025-03-27 23:29:38 +00:00
commit 69641927c6
3 changed files with 113 additions and 8 deletions

View file

@ -82,3 +82,4 @@ amdgpu_va_get_start_addr
amdgpu_va_range_query
amdgpu_vm_reserve_vmid
amdgpu_vm_unreserve_vmid
amdgpu_cs_submit_gang

View file

@ -312,6 +312,36 @@ struct amdgpu_cs_ib_info {
uint32_t size;
};
/**
* Structure describing gang IB, used for submitting ib's on multiple HW IP.
*
* \sa amdgpu_cs_request, amdgpu_cs_submit_gang()
*
*/
struct amdgpu_cs_ib_info_gang {
/** Special flags */
uint64_t flags;
/** Virtual MC address of the command buffer */
uint64_t ib_mc_address;
/**
* Size of Command Buffer to be submitted.
* - The size is in units of dwords (4 bytes).
* - Could be 0
*/
uint32_t size;
/** To which HW IP type the ip belongs */
uint32_t ip_type;
/** IP instance index if there are several IPs of the same type. */
uint32_t ip_instance;
/** Ring index of the HW IP */
uint32_t ring;
};
/**
* Structure describing fence information
*
@ -383,6 +413,12 @@ struct amdgpu_cs_request {
* The fence information
*/
struct amdgpu_cs_fence_info fence_info;
/**
* Use below *ibs_gang instead of *ibs for gang submission. Gang submission
* allows ib from different HW IP to be submitted as single entity.
*/
struct amdgpu_cs_ib_info_gang *ibs_gang;
};
/**
@ -1051,6 +1087,45 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
struct amdgpu_cs_request *ibs_request,
uint32_t number_of_requests);
/**
* Send request to submit command buffers to hardware.
*
* Kernel driver could use GPU Scheduler to make decision when physically
* sent this request to the hardware. Accordingly this request could be put
* in queue and sent for execution later. The only guarantee is that request
* from the same GPU context will be executed in order.
*
* The caller can specify the user fence buffer/location with the fence_info in the
* cs_request.The sequence number is returned via the 'seq_no' parameter
* in ibs_request structure.
*
*
* \param dev - \c [in] Device handle.
* See #amdgpu_device_initialize()
* \param context - \c [in] GPU Context
* \param flags - \c [in] Global submission flags
* \param ibs_request - \c [in/out] Pointer to submission requests.
* We could submit to the several
* engines/rings simulteniously as
* 'atomic' operation
* \param number_of_requests - \c [in] Number of submission requests
*
* \return 0 on success\n
* <0 - Negative POSIX Error code
*
* \note It is required to pass correct resource list with buffer handles
* which will be accessible by command buffers from submission
* This will allow kernel driver to correctly implement "paging".
* Failure to do so will have unpredictable results.
*
* \sa amdgpu_cs_query_fence_status()
*
*/
int amdgpu_cs_submit_gang(amdgpu_context_handle context,
uint64_t flags,
struct amdgpu_cs_request *ibs_request,
uint32_t number_of_requests);
/**
* Query status of Command Buffer Submission
*

View file

@ -246,13 +246,13 @@ drm_public int amdgpu_cs_query_reset_state2(amdgpu_context_handle context,
* \param dev - \c [in] Device handle
* \param context - \c [in] GPU Context
* \param ibs_request - \c [in] Pointer to submission requests
* \param fence - \c [out] return fence for this submission
* \param gang - \c [in] if true different IP ib's can be passed
*
* \return 0 on success otherwise POSIX Error code
* \sa amdgpu_cs_submit()
*/
static int amdgpu_cs_submit_one(amdgpu_context_handle context,
struct amdgpu_cs_request *ibs_request)
struct amdgpu_cs_request *ibs_request, int gang)
{
struct drm_amdgpu_cs_chunk *chunks;
struct drm_amdgpu_cs_chunk_data *chunk_data;
@ -289,19 +289,26 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
num_chunks = ibs_request->number_of_ibs;
/* IB chunks */
for (i = 0; i < ibs_request->number_of_ibs; i++) {
struct amdgpu_cs_ib_info *ib;
struct amdgpu_cs_ib_info_gang *ib;
chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
ib = &ibs_request->ibs[i];
if (gang) {
ib = &ibs_request->ibs_gang[i];
chunk_data[i].ib_data.ip_type = ib->ip_type;
chunk_data[i].ib_data.ip_instance = ib->ip_instance;
chunk_data[i].ib_data.ring = ib->ring;
} else {
ib = (struct amdgpu_cs_ib_info_gang*)&ibs_request->ibs[i];
chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
chunk_data[i].ib_data.ring = ibs_request->ring;
}
chunk_data[i].ib_data._pad = 0;
chunk_data[i].ib_data.va_start = ib->ib_mc_address;
chunk_data[i].ib_data.ib_bytes = ib->size * 4;
chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
chunk_data[i].ib_data.ring = ibs_request->ring;
chunk_data[i].ib_data.flags = ib->flags;
}
@ -405,7 +412,29 @@ drm_public int amdgpu_cs_submit(amdgpu_context_handle context,
r = 0;
for (i = 0; i < number_of_requests; i++) {
r = amdgpu_cs_submit_one(context, ibs_request);
r = amdgpu_cs_submit_one(context, ibs_request, false);
if (r)
break;
ibs_request++;
}
return r;
}
drm_public int amdgpu_cs_submit_gang(amdgpu_context_handle context,
uint64_t flags,
struct amdgpu_cs_request *ibs_request,
uint32_t number_of_requests)
{
uint32_t i;
int r;
if (!context || !ibs_request)
return -EINVAL;
r = 0;
for (i = 0; i < number_of_requests; i++) {
r = amdgpu_cs_submit_one(context, ibs_request, true);
if (r)
break;
ibs_request++;