From 43e8a3f86a8d578eb3accbca29021483d46f8479 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 13 Aug 2024 19:40:33 +0530 Subject: [PATCH] amdgpu: Add amdgpu userqueue IOCTL functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds new IOCTL functions to support userqueue create, remove, signal and wait etc. v2:(Marek) - Add csa support for SDMA queue. - Addressed's review comments. - Removed raw2/op2 ioctl. - Added syncobj_timeline_handles in amdgpu_userq_wait IOCTL. v3:(Yogesh) - Rename timeline* objects as per UAPI review (Arvind). v4: (Marek) - Drop AMDGPU_USERQ_BO_WRITE as this should not be a global option of the IOCTL, It should be option per buffer. Hence adding separate array for read and write BO handles. (Arun) - Modify num_fences to __u16, flags changed to __u16 and placed the num_fences next to flags for optimal padding and size. (Arun) v5:(Marek/Pierre-Eric) - add more detail params description for signal and wait IOCTL calls. - Remove the unused structure fields in signal and wait structs. - Add separate array of read and write for BO handles. (Arun) - Removes the unused flags parameter from the amdgpu_create_userqueue IOCTL. (Arvind) v6:(Pierre-Eric) - Remove unused headers. (Arvind) - Modify the function parameter names and struct field names as per the review comments. (Arun) v7:(Marek) - Modify the structure field name and comments. (Arun) - Rename vm_timeline_syncobj and add comment for vm_timeline_point. - Remove GDS buffer support from MQD. (Arvind) v8:(Pierre-Eric) - Modify the function parameter names. - Added new function in amdgpu-symbols.txt (Arvind) v9:(Marek) - Use the drm signal/wait structure as the parameter. (Arun) Cc: Deucher, Alexander Cc: Koenig, Christian Cc: Sharma, Shashank Reviewed-by: Marek Olšák Acked-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Arvind Yadav Signed-off-by: Arunpravin Paneer Selvam --- amdgpu/amdgpu-symbols.txt | 5 ++ amdgpu/amdgpu.h | 96 ++++++++++++++++++++++++++++++ amdgpu/amdgpu_bo.c | 36 ++++++++++++ amdgpu/amdgpu_userq.c | 121 ++++++++++++++++++++++++++++++++++++++ amdgpu/meson.build | 1 + 5 files changed, 259 insertions(+) create mode 100644 amdgpu/amdgpu_userq.c diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt index 2ba860bf..8cd5559c 100644 --- a/amdgpu/amdgpu-symbols.txt +++ b/amdgpu/amdgpu-symbols.txt @@ -14,6 +14,7 @@ amdgpu_bo_query_info amdgpu_bo_set_metadata amdgpu_bo_va_op amdgpu_bo_va_op_raw +amdgpu_bo_va_op_raw2 amdgpu_bo_wait_for_idle amdgpu_create_bo_from_user_mem amdgpu_cs_chunk_fence_info_to_data @@ -83,3 +84,7 @@ amdgpu_va_get_start_addr amdgpu_va_range_query amdgpu_vm_reserve_vmid amdgpu_vm_unreserve_vmid +amdgpu_create_userqueue +amdgpu_free_userqueue +amdgpu_userq_signal +amdgpu_userq_wait diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 2d317e93..db2cb7bd 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h @@ -44,6 +44,8 @@ extern "C" { struct drm_amdgpu_info_hw_ip; struct drm_amdgpu_info_uq_fw_areas; struct drm_amdgpu_bo_list_entry; +struct drm_amdgpu_userq_signal; +struct drm_amdgpu_userq_wait; /*--------------------------------------------------------------------------*/ /* --------------------------- Defines ------------------------------------ */ @@ -1532,6 +1534,42 @@ int amdgpu_bo_va_op_raw(amdgpu_device_handle dev, uint64_t flags, uint32_t ops); +/** + * VA mapping/unmapping of buffer object for usermode queue. + * + * This is not a simple drop-in extension for amdgpu_bo_va_op; instead, all + * parameters are treated "raw2", i.e. size is not automatically aligned, and + * all flags must be specified explicitly. + * + * \param dev - \c [in] device handle + * \param bo - \c [in] BO handle (may be NULL) + * \param offset - \c [in] Start offset to map + * \param size - \c [in] Size to map + * \param addr - \c [in] Start virtual address. + * \param flags - \c [in] Supported flags for mapping/unmapping + * \param ops - \c [in] AMDGPU_VA_OP_MAP or AMDGPU_VA_OP_UNMAP + * \param vm_timeline_syncobj_out - \c [out] syncobj handle for PT update fence + * \param vm_timeline_point - \c [in] input timeline point + * \param input_fence_syncobj_handles - \c [in] Array of syncobj handles for bo unmap, + * clear and replace + * \param num_syncobj_handles - \c [in] Number of syncobj handles + * + * \return 0 on success\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_bo_va_op_raw2(amdgpu_device_handle dev, + amdgpu_bo_handle bo, + uint64_t offset, + uint64_t size, + uint64_t addr, + uint64_t flags, + uint32_t ops, + uint32_t vm_timeline_syncobj_out, + uint64_t vm_timeline_point, + uint64_t input_fence_syncobj_array_in, + uint32_t num_syncobj_handles_in); + /** * create semaphore * @@ -1963,6 +2001,64 @@ int amdgpu_vm_reserve_vmid(amdgpu_device_handle dev, uint32_t flags); */ int amdgpu_vm_unreserve_vmid(amdgpu_device_handle dev, uint32_t flags); +/** + * Create USERQUEUE + * \param dev - \c [in] device handle + * \param ip_type - \c [in] ip type + * \param doorbell_handle - \c [in] doorbell handle + * \param doorbell_offset - \c [in] doorbell index + * \param mqd_in - \c [in] MQD data + * \param queue_va - \c [in] Virtual address of queue + * \param queue_size - \c [in] userqueue size + * \param wptr_va - \c [in] Virtual address of wptr + * \param rptr_va - \c [in] Virtual address of rptr + * \param queue_id - \c [out] queue id + * + * \return 0 on success otherwise POSIX Error code + */ + +int amdgpu_create_userqueue(amdgpu_device_handle dev, + uint32_t ip_type, + uint32_t doorbell_handle, + uint32_t doorbell_offset, + uint64_t queue_va, + uint64_t queue_size, + uint64_t wptr_va, + uint64_t rptr_va, + void *mqd_in, + uint32_t *queue_id); + +/** + * Free USERQUEUE + * \param dev - \c [in] device handle + * \param queue_id - \c [in] queue id + * + * \return 0 on success otherwise POSIX Error code + */ +int amdgpu_free_userqueue(amdgpu_device_handle dev, uint32_t queue_id); + +/** + * Signal USERQUEUE + * \param dev - \c [in] device handle + * \param signal_data - \c [in] pointer to struct drm_amdgpu_userq_signal + * to be filled by the caller + * + * \return 0 on success otherwise POSIX Error code + */ +int amdgpu_userq_signal(amdgpu_device_handle dev, + struct drm_amdgpu_userq_signal *signal_data); + +/** + * Wait USERQUEUE + * \param dev - \c [in] device handle + * \param wait_data - \c [in/out] pointer to struct drm_amdgpu_userq_wait + * to be filled by the caller + * + * \return 0 on success otherwise POSIX Error code + */ +int amdgpu_userq_wait(amdgpu_device_handle dev, + struct drm_amdgpu_userq_wait *wait_data); + #ifdef __cplusplus } #endif diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c index 672f000d..16ff35f9 100644 --- a/amdgpu/amdgpu_bo.c +++ b/amdgpu/amdgpu_bo.c @@ -789,3 +789,39 @@ drm_public int amdgpu_bo_va_op_raw(amdgpu_device_handle dev, return r; } + +drm_public int amdgpu_bo_va_op_raw2(amdgpu_device_handle dev, + amdgpu_bo_handle bo, + uint64_t offset, + uint64_t size, + uint64_t addr, + uint64_t flags, + uint32_t ops, + uint32_t vm_timeline_syncobj_out, + uint64_t vm_timeline_point, + uint64_t input_fence_syncobj_handles, + uint32_t num_syncobj_handles) +{ + struct drm_amdgpu_gem_va va; + int r; + + if (ops != AMDGPU_VA_OP_MAP && ops != AMDGPU_VA_OP_UNMAP && + ops != AMDGPU_VA_OP_REPLACE && ops != AMDGPU_VA_OP_CLEAR) + return -EINVAL; + + memset(&va, 0, sizeof(va)); + va.handle = bo ? bo->handle : 0; + va.operation = ops; + va.flags = flags; + va.va_address = addr; + va.offset_in_bo = offset; + va.map_size = size; + va.vm_timeline_syncobj_out = vm_timeline_syncobj_out; + va.vm_timeline_point = vm_timeline_point; + va.input_fence_syncobj_handles = input_fence_syncobj_handles; + va.num_syncobj_handles = num_syncobj_handles; + + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va)); + + return r; +} diff --git a/amdgpu/amdgpu_userq.c b/amdgpu/amdgpu_userq.c new file mode 100644 index 00000000..3de0bde5 --- /dev/null +++ b/amdgpu/amdgpu_userq.c @@ -0,0 +1,121 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include "xf86drm.h" +#include "amdgpu_drm.h" +#include "amdgpu_internal.h" + +drm_public int +amdgpu_create_userqueue(amdgpu_device_handle dev, + uint32_t ip_type, + uint32_t doorbell_handle, + uint32_t doorbell_offset, + uint64_t queue_va, + uint64_t queue_size, + uint64_t wptr_va, + uint64_t rptr_va, + void *mqd_in, + uint32_t *queue_id) +{ + int ret; + union drm_amdgpu_userq userq; + uint64_t mqd_size; + + if (!dev) + return -EINVAL; + + switch (ip_type) { + case AMDGPU_HW_IP_GFX: + mqd_size = sizeof(struct drm_amdgpu_userq_mqd_gfx11); + break; + case AMDGPU_HW_IP_DMA: + mqd_size = sizeof(struct drm_amdgpu_userq_mqd_sdma_gfx11); + break; + case AMDGPU_HW_IP_COMPUTE: + mqd_size = sizeof(struct drm_amdgpu_userq_mqd_compute_gfx11); + break; + default: + return -EINVAL; + } + + memset(&userq, 0, sizeof(userq)); + + userq.in.op = AMDGPU_USERQ_OP_CREATE; + userq.in.ip_type = ip_type; + + userq.in.doorbell_handle = doorbell_handle; + userq.in.doorbell_offset = doorbell_offset; + + userq.in.queue_va = queue_va; + userq.in.queue_size = queue_size; + userq.in.wptr_va = wptr_va; + userq.in.rptr_va = rptr_va; + + userq.in.mqd = (uint64_t)mqd_in; + userq.in.mqd_size = mqd_size; + + ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_USERQ, + &userq, sizeof(userq)); + *queue_id = userq.out.queue_id; + + return ret; +} + +drm_public int +amdgpu_free_userqueue(amdgpu_device_handle dev, uint32_t queue_id) +{ + union drm_amdgpu_userq userq; + + memset(&userq, 0, sizeof(userq)); + userq.in.op = AMDGPU_USERQ_OP_FREE; + userq.in.queue_id = queue_id; + + return drmCommandWriteRead(dev->fd, DRM_AMDGPU_USERQ, + &userq, sizeof(userq)); +} + +drm_public int +amdgpu_userq_signal(amdgpu_device_handle dev, + struct drm_amdgpu_userq_signal *signal_data) +{ + int r; + + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_USERQ_SIGNAL, + signal_data, sizeof(struct drm_amdgpu_userq_signal)); + + return r; +} + +drm_public int +amdgpu_userq_wait(amdgpu_device_handle dev, + struct drm_amdgpu_userq_wait *wait_data) +{ + int r; + + r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_USERQ_WAIT, + wait_data, sizeof(struct drm_amdgpu_userq_wait)); + + return r; +} diff --git a/amdgpu/meson.build b/amdgpu/meson.build index dd65fbb3..a98b983a 100644 --- a/amdgpu/meson.build +++ b/amdgpu/meson.build @@ -27,6 +27,7 @@ libdrm_amdgpu = library( files( 'amdgpu_asic_id.c', 'amdgpu_bo.c', 'amdgpu_cs.c', 'amdgpu_device.c', 'amdgpu_gpu_info.c', 'amdgpu_vamgr.c', 'amdgpu_vm.c', 'handle_table.c', + 'amdgpu_userq.c', ), config_file, ],