clover: implement SVM functions for devices with fine grained system SVM support

all of the functionality can be mapped to malloc/free if the device supports
fine grained system SVM.

v2: fix some API bugs found with the OpenCL CTS
v3: remove validate_even_wait_list
    improve implementation of clSetKernelExecInfo
    make clEnqueueSVMFree spec compliant
    rename can_emulate_non_system_svm to has_system_svm and make it a member method
    improve validation in clEnqueueSVMMemFill
    handle CL_MEM_USES_SVM_POINTER in clGetMemObjectInfo
v4: break long lines and other minor cosmetic adjustments

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2076>
This commit is contained in:
Karol Herbst 2019-05-22 22:34:09 +02:00 committed by Marge Bot
parent d6754eb920
commit a218658556
6 changed files with 259 additions and 20 deletions

View file

@ -352,7 +352,37 @@ CLOVER_API cl_int
clSetKernelExecInfo(cl_kernel d_kern,
cl_kernel_exec_info param_name,
size_t param_value_size,
const void *param_value) {
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
const void *param_value) try {
auto &kern = obj(d_kern);
const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm),
kern.program().context().devices());
if (!param_value)
return CL_INVALID_VALUE;
switch (param_name) {
case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: {
if (param_value_size != sizeof(cl_bool))
return CL_INVALID_VALUE;
cl_bool val = *static_cast<const cl_bool*>(param_value);
if (val == CL_TRUE && !has_system_svm)
return CL_INVALID_OPERATION;
else
return CL_SUCCESS;
}
case CL_KERNEL_EXEC_INFO_SVM_PTRS:
if (has_system_svm)
return CL_SUCCESS;
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
default:
return CL_INVALID_VALUE;
}
} catch (error &e) {
return e.get();
}

View file

@ -29,15 +29,20 @@ using namespace clover;
namespace {
cl_mem_flags
validate_flags(cl_mem d_parent, cl_mem_flags d_flags) {
validate_flags(cl_mem d_parent, cl_mem_flags d_flags, bool svm) {
const cl_mem_flags dev_access_flags =
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
const cl_mem_flags host_ptr_flags =
CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
const cl_mem_flags host_access_flags =
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
const cl_mem_flags svm_flags =
CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS;
const cl_mem_flags valid_flags =
dev_access_flags | host_access_flags | (d_parent ? 0 : host_ptr_flags);
dev_access_flags
| (svm || d_parent ? 0 : host_ptr_flags)
| (svm ? svm_flags : host_access_flags);
if ((d_flags & ~valid_flags) ||
util_bitcount(d_flags & dev_access_flags) > 1 ||
@ -48,6 +53,10 @@ namespace {
(d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
throw error(CL_INVALID_VALUE);
if ((d_flags & CL_MEM_SVM_ATOMICS) &&
!(d_flags & CL_MEM_SVM_FINE_GRAIN_BUFFER))
throw error(CL_INVALID_VALUE);
if (d_parent) {
const auto &parent = obj(d_parent);
const cl_mem_flags flags = (d_flags |
@ -77,7 +86,7 @@ namespace {
CLOVER_API cl_mem
clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size,
void *host_ptr, cl_int *r_errcode) try {
const cl_mem_flags flags = validate_flags(NULL, d_flags);
const cl_mem_flags flags = validate_flags(NULL, d_flags, false);
auto &ctx = obj(d_ctx);
if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
@ -103,7 +112,7 @@ clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags,
cl_buffer_create_type op,
const void *op_info, cl_int *r_errcode) try {
auto &parent = obj<root_buffer>(d_mem);
const cl_mem_flags flags = validate_flags(d_mem, d_flags);
const cl_mem_flags flags = validate_flags(d_mem, d_flags, false);
if (op == CL_BUFFER_CREATE_TYPE_REGION) {
auto reg = reinterpret_cast<const cl_buffer_region *>(op_info);
@ -163,7 +172,7 @@ clCreateImage(cl_context d_ctx, cl_mem_flags d_flags,
CL_MEM_COPY_HOST_PTR)))
throw error(CL_INVALID_HOST_PTR);
const cl_mem_flags flags = validate_flags(desc->buffer, d_flags);
const cl_mem_flags flags = validate_flags(desc->buffer, d_flags, false);
if (!supported_formats(ctx, desc->image_type).count(*format))
throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
@ -249,7 +258,7 @@ clGetSupportedImageFormats(cl_context d_ctx, cl_mem_flags flags,
auto &ctx = obj(d_ctx);
auto formats = supported_formats(ctx, type);
validate_flags(NULL, flags);
validate_flags(NULL, flags, false);
if (r_buf && !r_count)
throw error(CL_INVALID_VALUE);
@ -313,6 +322,15 @@ clGetMemObjectInfo(cl_mem d_mem, cl_mem_info param,
buf.as_scalar<size_t>() = (sub ? sub->offset() : 0);
break;
}
case CL_MEM_USES_SVM_POINTER: {
// with system SVM all host ptrs are SVM pointers
// TODO: once we support devices with lower levels of SVM, we have to
// check the ptr in more detail
const bool system_svm = all_of(std::mem_fn(&device::has_system_svm),
mem.context().devices());
buf.as_scalar<cl_bool>() = mem.host_ptr() && system_svm;
break;
}
default:
throw error(CL_INVALID_VALUE);
}
@ -431,13 +449,48 @@ CLOVER_API void *
clSVMAlloc(cl_context d_ctx,
cl_svm_mem_flags flags,
size_t size,
unsigned int alignment) {
unsigned int alignment) try {
auto &ctx = obj(d_ctx);
validate_flags(NULL, flags, true);
if (!size ||
size > fold(minimum(), cl_ulong(ULONG_MAX),
map(std::mem_fn(&device::max_mem_alloc_size), ctx.devices())))
return nullptr;
if (!util_is_power_of_two_or_zero(alignment))
return nullptr;
if (!alignment)
alignment = 0x80; // sizeof(long16)
bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
if (can_emulate) {
// we can ignore all the flags as it's not required to honor them.
void *ptr = nullptr;
if (alignment < sizeof(void*))
alignment = sizeof(void*);
posix_memalign(&ptr, alignment, size);
return ptr;
}
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return nullptr;
} catch (error &e) {
return nullptr;
}
CLOVER_API void
clSVMFree(cl_context d_ctx,
void *svm_pointer) {
void *svm_pointer) try {
auto &ctx = obj(d_ctx);
bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
if (can_emulate)
return free(svm_pointer);
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
} catch (error &e) {
}

View file

@ -22,6 +22,8 @@
#include <cstring>
#include "util/bitscan.h"
#include "api/util.hpp"
#include "core/event.hpp"
#include "core/memory.hpp"
@ -769,13 +771,47 @@ CLOVER_API cl_int
clEnqueueSVMFree(cl_command_queue d_q,
cl_uint num_svm_pointers,
void *svm_pointers[],
void (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[], void *user_data),
void (CL_CALLBACK *pfn_free_func) (
cl_command_queue queue, cl_uint num_svm_pointers,
void *svm_pointers[], void *user_data),
void *user_data,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) {
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
cl_event *event) try {
if (bool(num_svm_pointers) != bool(svm_pointers))
return CL_INVALID_VALUE;
auto &q = obj(d_q);
bool can_emulate = q.device().has_system_svm();
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
validate_common(q, deps);
std::vector<void *> svm_pointers_cpy(svm_pointers,
svm_pointers + num_svm_pointers);
if (!pfn_free_func) {
if (!can_emulate) {
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
}
pfn_free_func = [](cl_command_queue, cl_uint num_svm_pointers,
void *svm_pointers[], void *) {
for (void *p : range(svm_pointers, num_svm_pointers))
free(p);
};
}
auto hev = create<hard_event>(q, CL_COMMAND_SVM_FREE, deps,
[=](clover::event &) mutable {
pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
user_data);
});
ret_object(event, hev);
return CL_SUCCESS;
} catch (error &e) {
return e.get();
}
CLOVER_API cl_int
@ -786,9 +822,38 @@ clEnqueueSVMMemcpy(cl_command_queue d_q,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) {
cl_event *event) try {
if (dst_ptr == nullptr || src_ptr == nullptr)
return CL_INVALID_VALUE;
if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
return CL_MEM_COPY_OVERLAP;
auto &q = obj(d_q);
bool can_emulate = q.device().has_system_svm();
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
validate_common(q, deps);
if (can_emulate) {
auto hev = create<hard_event>(q, CL_COMMAND_SVM_MEMCPY, deps,
[=](clover::event &) {
memcpy(dst_ptr, src_ptr, size);
});
if (blocking_copy)
hev().wait();
ret_object(event, hev);
return CL_SUCCESS;
}
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
} catch (error &e) {
return e.get();
}
CLOVER_API cl_int
@ -799,9 +864,39 @@ clEnqueueSVMMemFill(cl_command_queue d_q,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) {
cl_event *event) try {
if (svm_ptr == nullptr || pattern == nullptr ||
!util_is_power_of_two_nonzero(pattern_size) ||
pattern_size > 128 ||
!ptr_is_aligned(svm_ptr, pattern_size) ||
size % pattern_size)
return CL_INVALID_VALUE;
auto &q = obj(d_q);
bool can_emulate = q.device().has_system_svm();
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
validate_common(q, deps);
if (can_emulate) {
auto hev = create<hard_event>(q, CL_COMMAND_SVM_MEMFILL, deps,
[=](clover::event &) {
void *ptr = svm_ptr;
for (size_t s = size; s; s -= pattern_size) {
memcpy(ptr, pattern, pattern_size);
ptr = static_cast<uint8_t*>(ptr) + pattern_size;
}
});
ret_object(event, hev);
return CL_SUCCESS;
}
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
} catch (error &e) {
return e.get();
}
CLOVER_API cl_int
@ -812,9 +907,30 @@ clEnqueueSVMMap(cl_command_queue d_q,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) {
cl_event *event) try {
if (svm_ptr == nullptr || size == 0)
return CL_INVALID_VALUE;
auto &q = obj(d_q);
bool can_emulate = q.device().has_system_svm();
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
validate_common(q, deps);
if (can_emulate) {
auto hev = create<hard_event>(q, CL_COMMAND_SVM_MAP, deps,
[](clover::event &) { });
ret_object(event, hev);
return CL_SUCCESS;
}
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
} catch (error &e) {
return e.get();
}
CLOVER_API cl_int
@ -822,9 +938,30 @@ clEnqueueSVMUnmap(cl_command_queue d_q,
void *svm_ptr,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) {
cl_event *event) try {
if (svm_ptr == nullptr)
return CL_INVALID_VALUE;
auto &q = obj(d_q);
bool can_emulate = q.device().has_system_svm();
auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
validate_common(q, deps);
if (can_emulate) {
auto hev = create<hard_event>(q, CL_COMMAND_SVM_UNMAP, deps,
[](clover::event &) { });
ret_object(event, hev);
return CL_SUCCESS;
}
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
} catch (error &e) {
return e.get();
}
CLOVER_API cl_int

View file

@ -239,7 +239,10 @@ device::svm_support() const {
// and SVM pointer into the same kernel at the same time.
if (pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) &&
pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
// we can emulate all lower levels if we support fine grain system
return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
return 0;
}

View file

@ -95,6 +95,11 @@ namespace clover {
clover::platform &platform;
inline bool
has_system_svm() const {
return svm_support() & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
}
private:
pipe_screen *pipe;
pipe_loader_device *ldev;

View file

@ -26,6 +26,17 @@
#include <atomic>
namespace clover {
///
/// Some helper functions for raw pointer operations
///
template <class T>
static bool
ptr_is_aligned(const T *ptr, uintptr_t a) noexcept {
assert(a == (a & -a));
uintptr_t ptr_value = reinterpret_cast<uintptr_t>(ptr);
return (ptr_value & (a - 1)) == 0;
}
///
/// Base class for objects that support reference counting.
///