From 34494f6c5b10ee8d4e3eeb78482cf4fd6b1669f6 Mon Sep 17 00:00:00 2001 From: Yogesh Mohan Marimuthu Date: Tue, 14 Jun 2022 18:48:53 +0530 Subject: [PATCH] amdgpu: add amdgpu_cs_submit_gang api The amdgpu_cs_submit_gang api can be used to submit ibs from different HW IP as single entity. Signed-off-by: Yogesh Mohan Marimuthu Reviewed-by: Vitaly Prosyak --- amdgpu/amdgpu-symbols.txt | 1 + amdgpu/amdgpu.h | 75 +++++++++++++++++++++++++++++++++++++++ amdgpu/amdgpu_cs.c | 45 ++++++++++++++++++----- 3 files changed, 113 insertions(+), 8 deletions(-) diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt index 530b343b..9b95badd 100644 --- a/amdgpu/amdgpu-symbols.txt +++ b/amdgpu/amdgpu-symbols.txt @@ -77,3 +77,4 @@ amdgpu_va_get_start_addr amdgpu_va_range_query amdgpu_vm_reserve_vmid amdgpu_vm_unreserve_vmid +amdgpu_cs_submit_gang \ No newline at end of file diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 9bdbf366..50c23ef8 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h @@ -306,6 +306,36 @@ struct amdgpu_cs_ib_info { uint32_t size; }; +/** + * Structure describing gang IB, used for submitting ib's on multiple HW IP. + * + * \sa amdgpu_cs_request, amdgpu_cs_submit_gang() + * +*/ +struct amdgpu_cs_ib_info_gang { + /** Special flags */ + uint64_t flags; + + /** Virtual MC address of the command buffer */ + uint64_t ib_mc_address; + + /** + * Size of Command Buffer to be submitted. + * - The size is in units of dwords (4 bytes). + * - Could be 0 + */ + uint32_t size; + + /** To which HW IP type the ip belongs */ + uint32_t ip_type; + + /** IP instance index if there are several IPs of the same type. */ + uint32_t ip_instance; + + /** Ring index of the HW IP */ + uint32_t ring; +}; + /** * Structure describing fence information * @@ -377,6 +407,12 @@ struct amdgpu_cs_request { * The fence information */ struct amdgpu_cs_fence_info fence_info; + + /** + * Use below *ibs_gang instead of *ibs for gang submission. Gang submission + * allows ib from different HW IP to be submitted as single entity. + */ + struct amdgpu_cs_ib_info_gang *ibs_gang; }; /** @@ -1031,6 +1067,45 @@ int amdgpu_cs_submit(amdgpu_context_handle context, struct amdgpu_cs_request *ibs_request, uint32_t number_of_requests); +/** + * Send request to submit command buffers to hardware. + * + * Kernel driver could use GPU Scheduler to make decision when physically + * sent this request to the hardware. Accordingly this request could be put + * in queue and sent for execution later. The only guarantee is that request + * from the same GPU context will be executed in order. + * + * The caller can specify the user fence buffer/location with the fence_info in the + * cs_request.The sequence number is returned via the 'seq_no' parameter + * in ibs_request structure. + * + * + * \param dev - \c [in] Device handle. + * See #amdgpu_device_initialize() + * \param context - \c [in] GPU Context + * \param flags - \c [in] Global submission flags + * \param ibs_request - \c [in/out] Pointer to submission requests. + * We could submit to the several + * engines/rings simulteniously as + * 'atomic' operation + * \param number_of_requests - \c [in] Number of submission requests + * + * \return 0 on success\n + * <0 - Negative POSIX Error code + * + * \note It is required to pass correct resource list with buffer handles + * which will be accessible by command buffers from submission + * This will allow kernel driver to correctly implement "paging". + * Failure to do so will have unpredictable results. + * + * \sa amdgpu_cs_query_fence_status() + * +*/ +int amdgpu_cs_submit_gang(amdgpu_context_handle context, + uint64_t flags, + struct amdgpu_cs_request *ibs_request, + uint32_t number_of_requests); + /** * Query status of Command Buffer Submission * diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c index 49fc16c3..13b7e0ec 100644 --- a/amdgpu/amdgpu_cs.c +++ b/amdgpu/amdgpu_cs.c @@ -246,13 +246,13 @@ drm_public int amdgpu_cs_query_reset_state2(amdgpu_context_handle context, * \param dev - \c [in] Device handle * \param context - \c [in] GPU Context * \param ibs_request - \c [in] Pointer to submission requests - * \param fence - \c [out] return fence for this submission + * \param gang - \c [in] if true different IP ib's can be passed * * \return 0 on success otherwise POSIX Error code * \sa amdgpu_cs_submit() */ static int amdgpu_cs_submit_one(amdgpu_context_handle context, - struct amdgpu_cs_request *ibs_request) + struct amdgpu_cs_request *ibs_request, int gang) { struct drm_amdgpu_cs_chunk *chunks; struct drm_amdgpu_cs_chunk_data *chunk_data; @@ -289,19 +289,26 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context, num_chunks = ibs_request->number_of_ibs; /* IB chunks */ for (i = 0; i < ibs_request->number_of_ibs; i++) { - struct amdgpu_cs_ib_info *ib; + struct amdgpu_cs_ib_info_gang *ib; chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB; chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; - ib = &ibs_request->ibs[i]; + if (gang) { + ib = &ibs_request->ibs_gang[i]; + chunk_data[i].ib_data.ip_type = ib->ip_type; + chunk_data[i].ib_data.ip_instance = ib->ip_instance; + chunk_data[i].ib_data.ring = ib->ring; + } else { + ib = (struct amdgpu_cs_ib_info_gang*)&ibs_request->ibs[i]; + chunk_data[i].ib_data.ip_type = ibs_request->ip_type; + chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance; + chunk_data[i].ib_data.ring = ibs_request->ring; + } chunk_data[i].ib_data._pad = 0; chunk_data[i].ib_data.va_start = ib->ib_mc_address; chunk_data[i].ib_data.ib_bytes = ib->size * 4; - chunk_data[i].ib_data.ip_type = ibs_request->ip_type; - chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance; - chunk_data[i].ib_data.ring = ibs_request->ring; chunk_data[i].ib_data.flags = ib->flags; } @@ -405,7 +412,29 @@ drm_public int amdgpu_cs_submit(amdgpu_context_handle context, r = 0; for (i = 0; i < number_of_requests; i++) { - r = amdgpu_cs_submit_one(context, ibs_request); + r = amdgpu_cs_submit_one(context, ibs_request, false); + if (r) + break; + ibs_request++; + } + + return r; +} + +drm_public int amdgpu_cs_submit_gang(amdgpu_context_handle context, + uint64_t flags, + struct amdgpu_cs_request *ibs_request, + uint32_t number_of_requests) +{ + uint32_t i; + int r; + + if (!context || !ibs_request) + return -EINVAL; + + r = 0; + for (i = 0; i < number_of_requests; i++) { + r = amdgpu_cs_submit_one(context, ibs_request, true); if (r) break; ibs_request++;