mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 04:48:08 +02:00
clover: implement SVM functions for devices with fine grained system SVM support
all of the functionality can be mapped to malloc/free if the device supports
fine grained system SVM.
v2: fix some API bugs found with the OpenCL CTS
v3: remove validate_even_wait_list
improve implementation of clSetKernelExecInfo
make clEnqueueSVMFree spec compliant
rename can_emulate_non_system_svm to has_system_svm and make it a member method
improve validation in clEnqueueSVMMemFill
handle CL_MEM_USES_SVM_POINTER in clGetMemObjectInfo
v4: break long lines and other minor cosmetic adjustments
Signed-off-by: Karol Herbst <kherbst@redhat.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2076>
This commit is contained in:
parent
d6754eb920
commit
a218658556
6 changed files with 259 additions and 20 deletions
|
|
@ -352,7 +352,37 @@ CLOVER_API cl_int
|
|||
clSetKernelExecInfo(cl_kernel d_kern,
|
||||
cl_kernel_exec_info param_name,
|
||||
size_t param_value_size,
|
||||
const void *param_value) {
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
return CL_INVALID_VALUE;
|
||||
const void *param_value) try {
|
||||
auto &kern = obj(d_kern);
|
||||
const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm),
|
||||
kern.program().context().devices());
|
||||
|
||||
if (!param_value)
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
switch (param_name) {
|
||||
case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: {
|
||||
if (param_value_size != sizeof(cl_bool))
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
cl_bool val = *static_cast<const cl_bool*>(param_value);
|
||||
if (val == CL_TRUE && !has_system_svm)
|
||||
return CL_INVALID_OPERATION;
|
||||
else
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
case CL_KERNEL_EXEC_INFO_SVM_PTRS:
|
||||
if (has_system_svm)
|
||||
return CL_SUCCESS;
|
||||
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
default:
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
} catch (error &e) {
|
||||
return e.get();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,15 +29,20 @@ using namespace clover;
|
|||
|
||||
namespace {
|
||||
cl_mem_flags
|
||||
validate_flags(cl_mem d_parent, cl_mem_flags d_flags) {
|
||||
validate_flags(cl_mem d_parent, cl_mem_flags d_flags, bool svm) {
|
||||
const cl_mem_flags dev_access_flags =
|
||||
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
|
||||
const cl_mem_flags host_ptr_flags =
|
||||
CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
|
||||
const cl_mem_flags host_access_flags =
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
|
||||
const cl_mem_flags svm_flags =
|
||||
CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS;
|
||||
|
||||
const cl_mem_flags valid_flags =
|
||||
dev_access_flags | host_access_flags | (d_parent ? 0 : host_ptr_flags);
|
||||
dev_access_flags
|
||||
| (svm || d_parent ? 0 : host_ptr_flags)
|
||||
| (svm ? svm_flags : host_access_flags);
|
||||
|
||||
if ((d_flags & ~valid_flags) ||
|
||||
util_bitcount(d_flags & dev_access_flags) > 1 ||
|
||||
|
|
@ -48,6 +53,10 @@ namespace {
|
|||
(d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
|
||||
throw error(CL_INVALID_VALUE);
|
||||
|
||||
if ((d_flags & CL_MEM_SVM_ATOMICS) &&
|
||||
!(d_flags & CL_MEM_SVM_FINE_GRAIN_BUFFER))
|
||||
throw error(CL_INVALID_VALUE);
|
||||
|
||||
if (d_parent) {
|
||||
const auto &parent = obj(d_parent);
|
||||
const cl_mem_flags flags = (d_flags |
|
||||
|
|
@ -77,7 +86,7 @@ namespace {
|
|||
CLOVER_API cl_mem
|
||||
clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size,
|
||||
void *host_ptr, cl_int *r_errcode) try {
|
||||
const cl_mem_flags flags = validate_flags(NULL, d_flags);
|
||||
const cl_mem_flags flags = validate_flags(NULL, d_flags, false);
|
||||
auto &ctx = obj(d_ctx);
|
||||
|
||||
if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
|
||||
|
|
@ -103,7 +112,7 @@ clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags,
|
|||
cl_buffer_create_type op,
|
||||
const void *op_info, cl_int *r_errcode) try {
|
||||
auto &parent = obj<root_buffer>(d_mem);
|
||||
const cl_mem_flags flags = validate_flags(d_mem, d_flags);
|
||||
const cl_mem_flags flags = validate_flags(d_mem, d_flags, false);
|
||||
|
||||
if (op == CL_BUFFER_CREATE_TYPE_REGION) {
|
||||
auto reg = reinterpret_cast<const cl_buffer_region *>(op_info);
|
||||
|
|
@ -163,7 +172,7 @@ clCreateImage(cl_context d_ctx, cl_mem_flags d_flags,
|
|||
CL_MEM_COPY_HOST_PTR)))
|
||||
throw error(CL_INVALID_HOST_PTR);
|
||||
|
||||
const cl_mem_flags flags = validate_flags(desc->buffer, d_flags);
|
||||
const cl_mem_flags flags = validate_flags(desc->buffer, d_flags, false);
|
||||
|
||||
if (!supported_formats(ctx, desc->image_type).count(*format))
|
||||
throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
|
||||
|
|
@ -249,7 +258,7 @@ clGetSupportedImageFormats(cl_context d_ctx, cl_mem_flags flags,
|
|||
auto &ctx = obj(d_ctx);
|
||||
auto formats = supported_formats(ctx, type);
|
||||
|
||||
validate_flags(NULL, flags);
|
||||
validate_flags(NULL, flags, false);
|
||||
|
||||
if (r_buf && !r_count)
|
||||
throw error(CL_INVALID_VALUE);
|
||||
|
|
@ -313,6 +322,15 @@ clGetMemObjectInfo(cl_mem d_mem, cl_mem_info param,
|
|||
buf.as_scalar<size_t>() = (sub ? sub->offset() : 0);
|
||||
break;
|
||||
}
|
||||
case CL_MEM_USES_SVM_POINTER: {
|
||||
// with system SVM all host ptrs are SVM pointers
|
||||
// TODO: once we support devices with lower levels of SVM, we have to
|
||||
// check the ptr in more detail
|
||||
const bool system_svm = all_of(std::mem_fn(&device::has_system_svm),
|
||||
mem.context().devices());
|
||||
buf.as_scalar<cl_bool>() = mem.host_ptr() && system_svm;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw error(CL_INVALID_VALUE);
|
||||
}
|
||||
|
|
@ -431,13 +449,48 @@ CLOVER_API void *
|
|||
clSVMAlloc(cl_context d_ctx,
|
||||
cl_svm_mem_flags flags,
|
||||
size_t size,
|
||||
unsigned int alignment) {
|
||||
unsigned int alignment) try {
|
||||
auto &ctx = obj(d_ctx);
|
||||
validate_flags(NULL, flags, true);
|
||||
|
||||
if (!size ||
|
||||
size > fold(minimum(), cl_ulong(ULONG_MAX),
|
||||
map(std::mem_fn(&device::max_mem_alloc_size), ctx.devices())))
|
||||
return nullptr;
|
||||
|
||||
if (!util_is_power_of_two_or_zero(alignment))
|
||||
return nullptr;
|
||||
|
||||
if (!alignment)
|
||||
alignment = 0x80; // sizeof(long16)
|
||||
|
||||
bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
|
||||
if (can_emulate) {
|
||||
// we can ignore all the flags as it's not required to honor them.
|
||||
void *ptr = nullptr;
|
||||
if (alignment < sizeof(void*))
|
||||
alignment = sizeof(void*);
|
||||
posix_memalign(&ptr, alignment, size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
return nullptr;
|
||||
|
||||
} catch (error &e) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CLOVER_API void
|
||||
clSVMFree(cl_context d_ctx,
|
||||
void *svm_pointer) {
|
||||
void *svm_pointer) try {
|
||||
auto &ctx = obj(d_ctx);
|
||||
bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
|
||||
|
||||
if (can_emulate)
|
||||
return free(svm_pointer);
|
||||
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
|
||||
} catch (error &e) {
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@
|
|||
|
||||
#include <cstring>
|
||||
|
||||
#include "util/bitscan.h"
|
||||
|
||||
#include "api/util.hpp"
|
||||
#include "core/event.hpp"
|
||||
#include "core/memory.hpp"
|
||||
|
|
@ -769,13 +771,47 @@ CLOVER_API cl_int
|
|||
clEnqueueSVMFree(cl_command_queue d_q,
|
||||
cl_uint num_svm_pointers,
|
||||
void *svm_pointers[],
|
||||
void (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[], void *user_data),
|
||||
void (CL_CALLBACK *pfn_free_func) (
|
||||
cl_command_queue queue, cl_uint num_svm_pointers,
|
||||
void *svm_pointers[], void *user_data),
|
||||
void *user_data,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
return CL_INVALID_VALUE;
|
||||
cl_event *event) try {
|
||||
if (bool(num_svm_pointers) != bool(svm_pointers))
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
auto &q = obj(d_q);
|
||||
bool can_emulate = q.device().has_system_svm();
|
||||
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
|
||||
|
||||
validate_common(q, deps);
|
||||
|
||||
std::vector<void *> svm_pointers_cpy(svm_pointers,
|
||||
svm_pointers + num_svm_pointers);
|
||||
if (!pfn_free_func) {
|
||||
if (!can_emulate) {
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
pfn_free_func = [](cl_command_queue, cl_uint num_svm_pointers,
|
||||
void *svm_pointers[], void *) {
|
||||
for (void *p : range(svm_pointers, num_svm_pointers))
|
||||
free(p);
|
||||
};
|
||||
}
|
||||
|
||||
auto hev = create<hard_event>(q, CL_COMMAND_SVM_FREE, deps,
|
||||
[=](clover::event &) mutable {
|
||||
pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
|
||||
user_data);
|
||||
});
|
||||
|
||||
ret_object(event, hev);
|
||||
return CL_SUCCESS;
|
||||
|
||||
} catch (error &e) {
|
||||
return e.get();
|
||||
}
|
||||
|
||||
CLOVER_API cl_int
|
||||
|
|
@ -786,9 +822,38 @@ clEnqueueSVMMemcpy(cl_command_queue d_q,
|
|||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
cl_event *event) try {
|
||||
|
||||
if (dst_ptr == nullptr || src_ptr == nullptr)
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
|
||||
reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
|
||||
return CL_MEM_COPY_OVERLAP;
|
||||
|
||||
auto &q = obj(d_q);
|
||||
bool can_emulate = q.device().has_system_svm();
|
||||
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
|
||||
|
||||
validate_common(q, deps);
|
||||
|
||||
if (can_emulate) {
|
||||
auto hev = create<hard_event>(q, CL_COMMAND_SVM_MEMCPY, deps,
|
||||
[=](clover::event &) {
|
||||
memcpy(dst_ptr, src_ptr, size);
|
||||
});
|
||||
|
||||
if (blocking_copy)
|
||||
hev().wait();
|
||||
ret_object(event, hev);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
} catch (error &e) {
|
||||
return e.get();
|
||||
}
|
||||
|
||||
CLOVER_API cl_int
|
||||
|
|
@ -799,9 +864,39 @@ clEnqueueSVMMemFill(cl_command_queue d_q,
|
|||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
cl_event *event) try {
|
||||
if (svm_ptr == nullptr || pattern == nullptr ||
|
||||
!util_is_power_of_two_nonzero(pattern_size) ||
|
||||
pattern_size > 128 ||
|
||||
!ptr_is_aligned(svm_ptr, pattern_size) ||
|
||||
size % pattern_size)
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
auto &q = obj(d_q);
|
||||
bool can_emulate = q.device().has_system_svm();
|
||||
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
|
||||
|
||||
validate_common(q, deps);
|
||||
|
||||
if (can_emulate) {
|
||||
auto hev = create<hard_event>(q, CL_COMMAND_SVM_MEMFILL, deps,
|
||||
[=](clover::event &) {
|
||||
void *ptr = svm_ptr;
|
||||
for (size_t s = size; s; s -= pattern_size) {
|
||||
memcpy(ptr, pattern, pattern_size);
|
||||
ptr = static_cast<uint8_t*>(ptr) + pattern_size;
|
||||
}
|
||||
});
|
||||
|
||||
ret_object(event, hev);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
} catch (error &e) {
|
||||
return e.get();
|
||||
}
|
||||
|
||||
CLOVER_API cl_int
|
||||
|
|
@ -812,9 +907,30 @@ clEnqueueSVMMap(cl_command_queue d_q,
|
|||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
cl_event *event) try {
|
||||
|
||||
if (svm_ptr == nullptr || size == 0)
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
auto &q = obj(d_q);
|
||||
bool can_emulate = q.device().has_system_svm();
|
||||
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
|
||||
|
||||
validate_common(q, deps);
|
||||
|
||||
if (can_emulate) {
|
||||
auto hev = create<hard_event>(q, CL_COMMAND_SVM_MAP, deps,
|
||||
[](clover::event &) { });
|
||||
|
||||
ret_object(event, hev);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
} catch (error &e) {
|
||||
return e.get();
|
||||
}
|
||||
|
||||
CLOVER_API cl_int
|
||||
|
|
@ -822,9 +938,30 @@ clEnqueueSVMUnmap(cl_command_queue d_q,
|
|||
void *svm_ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
cl_event *event) try {
|
||||
|
||||
if (svm_ptr == nullptr)
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
auto &q = obj(d_q);
|
||||
bool can_emulate = q.device().has_system_svm();
|
||||
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
|
||||
|
||||
validate_common(q, deps);
|
||||
|
||||
if (can_emulate) {
|
||||
auto hev = create<hard_event>(q, CL_COMMAND_SVM_UNMAP, deps,
|
||||
[](clover::event &) { });
|
||||
|
||||
ret_object(event, hev);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
} catch (error &e) {
|
||||
return e.get();
|
||||
}
|
||||
|
||||
CLOVER_API cl_int
|
||||
|
|
|
|||
|
|
@ -239,7 +239,10 @@ device::svm_support() const {
|
|||
// and SVM pointer into the same kernel at the same time.
|
||||
if (pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) &&
|
||||
pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
|
||||
return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
|
||||
// we can emulate all lower levels if we support fine grain system
|
||||
return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
|
||||
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
|
||||
CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -95,6 +95,11 @@ namespace clover {
|
|||
|
||||
clover::platform &platform;
|
||||
|
||||
inline bool
|
||||
has_system_svm() const {
|
||||
return svm_support() & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
|
||||
}
|
||||
|
||||
private:
|
||||
pipe_screen *pipe;
|
||||
pipe_loader_device *ldev;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,17 @@
|
|||
#include <atomic>
|
||||
|
||||
namespace clover {
|
||||
///
|
||||
/// Some helper functions for raw pointer operations
|
||||
///
|
||||
template <class T>
|
||||
static bool
|
||||
ptr_is_aligned(const T *ptr, uintptr_t a) noexcept {
|
||||
assert(a == (a & -a));
|
||||
uintptr_t ptr_value = reinterpret_cast<uintptr_t>(ptr);
|
||||
return (ptr_value & (a - 1)) == 0;
|
||||
}
|
||||
|
||||
///
|
||||
/// Base class for objects that support reference counting.
|
||||
///
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue