mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-31 16:20:13 +01:00
pan/kmod: Cache the device props at the pan_kmod_dev level
The frontend is going to query the device props anyway, so let's just query it at device creation time and store it in pan_kmod_dev::props. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com> Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36385>
This commit is contained in:
parent
f43cff3728
commit
ee172bb769
34 changed files with 302 additions and 307 deletions
|
|
@ -1634,8 +1634,8 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
|||
struct pan_tls_info info = {
|
||||
.tls.size = ss->info.tls_size,
|
||||
.wls.size = ss->info.wls_size + grid->variable_shared_mem,
|
||||
.wls.instances = pan_calc_wls_instances(&local_size, &dev->kmod.props,
|
||||
grid->indirect ? NULL : &dim),
|
||||
.wls.instances = pan_calc_wls_instances(
|
||||
&local_size, &dev->kmod.dev->props, grid->indirect ? NULL : &dim),
|
||||
};
|
||||
|
||||
if (ss->info.tls_size) {
|
||||
|
|
@ -4493,12 +4493,12 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
|
|||
screen->vtbl.get_conv_desc = get_conv_desc;
|
||||
|
||||
pan_blend_shader_cache_init(&dev->blend_shaders, panfrost_device_gpu_id(dev),
|
||||
dev->kmod.props.gpu_variant,
|
||||
dev->kmod.dev->props.gpu_variant,
|
||||
&screen->mempools.bin.base);
|
||||
|
||||
GENX(pan_fb_preload_cache_init)
|
||||
(&dev->fb_preload_cache, panfrost_device_gpu_id(dev),
|
||||
dev->kmod.props.gpu_variant, &dev->blend_shaders,
|
||||
dev->kmod.dev->props.gpu_variant, &dev->blend_shaders,
|
||||
&screen->mempools.bin.base, &screen->mempools.desc.base);
|
||||
|
||||
dev->precomp_cache = GENX(panfrost_precomp_cache_init)(screen);
|
||||
|
|
|
|||
|
|
@ -783,7 +783,7 @@ panfrost_get_query_result(struct pipe_context *pipe, struct pipe_query *q,
|
|||
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT: {
|
||||
vresult->timestamp_disjoint.frequency =
|
||||
dev->kmod.props.timestamp_frequency;
|
||||
dev->kmod.dev->props.timestamp_frequency;
|
||||
vresult->timestamp_disjoint.disjoint = false;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -966,8 +966,8 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
|
|||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z), 0);
|
||||
|
||||
unsigned threads_per_wg = info->block[0] * info->block[1] * info->block[2];
|
||||
unsigned max_thread_cnt =
|
||||
pan_compute_max_thread_count(&dev->kmod.props, cs->info.work_reg_count);
|
||||
unsigned max_thread_cnt = pan_compute_max_thread_count(
|
||||
&dev->kmod.dev->props, cs->info.work_reg_count);
|
||||
|
||||
if (info->indirect) {
|
||||
/* Load size in workgroups per dimension from memory */
|
||||
|
|
|
|||
|
|
@ -60,11 +60,9 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
|
|||
return -1;
|
||||
}
|
||||
|
||||
pan_kmod_dev_query_props(dev->kmod.dev, &dev->kmod.props);
|
||||
|
||||
dev->arch = pan_arch(dev->kmod.props.gpu_id);
|
||||
dev->model = pan_get_model(dev->kmod.props.gpu_id,
|
||||
dev->kmod.props.gpu_variant);
|
||||
dev->arch = pan_arch(dev->kmod.dev->props.gpu_id);
|
||||
dev->model = pan_get_model(dev->kmod.dev->props.gpu_id,
|
||||
dev->kmod.dev->props.gpu_variant);
|
||||
|
||||
/* If we don't recognize the model, bail early */
|
||||
if (!dev->model)
|
||||
|
|
@ -85,14 +83,16 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
|
|||
goto err_free_kmod_dev;
|
||||
|
||||
dev->core_count =
|
||||
pan_query_core_count(&dev->kmod.props, &dev->core_id_range);
|
||||
dev->thread_tls_alloc = pan_query_thread_tls_alloc(&dev->kmod.props);
|
||||
pan_query_core_count(&dev->kmod.dev->props, &dev->core_id_range);
|
||||
dev->thread_tls_alloc = pan_query_thread_tls_alloc(&dev->kmod.dev->props);
|
||||
dev->optimal_tib_size = pan_query_optimal_tib_size(dev->arch, dev->model);
|
||||
dev->optimal_z_tib_size = pan_query_optimal_z_tib_size(dev->arch, dev->model);
|
||||
dev->compressed_formats = pan_query_compressed_formats(&dev->kmod.props);
|
||||
dev->tiler_features = pan_query_tiler_features(&dev->kmod.props);
|
||||
dev->has_afbc = pan_query_afbc(&dev->kmod.props);
|
||||
dev->has_afrc = pan_query_afrc(&dev->kmod.props);
|
||||
dev->optimal_z_tib_size =
|
||||
pan_query_optimal_z_tib_size(dev->arch, dev->model);
|
||||
dev->compressed_formats =
|
||||
pan_query_compressed_formats(&dev->kmod.dev->props);
|
||||
dev->tiler_features = pan_query_tiler_features(&dev->kmod.dev->props);
|
||||
dev->has_afbc = pan_query_afbc(&dev->kmod.dev->props);
|
||||
dev->has_afrc = pan_query_afrc(&dev->kmod.dev->props);
|
||||
dev->formats = pan_format_table(dev->arch);
|
||||
dev->blendable_formats = pan_blendable_format_table(dev->arch);
|
||||
|
||||
|
|
|
|||
|
|
@ -90,9 +90,6 @@ struct panfrost_device {
|
|||
/* The pan_kmod_dev object backing this device. */
|
||||
struct pan_kmod_dev *dev;
|
||||
|
||||
/* Cached pan_kmod_dev_props properties queried at device create time. */
|
||||
struct pan_kmod_dev_props props;
|
||||
|
||||
/* VM attached to this device. */
|
||||
struct pan_kmod_vm *vm;
|
||||
} kmod;
|
||||
|
|
@ -194,19 +191,19 @@ panfrost_device_fd(const struct panfrost_device *dev)
|
|||
static inline uint32_t
|
||||
panfrost_device_gpu_id(const struct panfrost_device *dev)
|
||||
{
|
||||
return dev->kmod.props.gpu_id;
|
||||
return dev->kmod.dev->props.gpu_id;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
panfrost_device_gpu_prod_id(const struct panfrost_device *dev)
|
||||
{
|
||||
return dev->kmod.props.gpu_id >> 16;
|
||||
return dev->kmod.dev->props.gpu_id >> 16;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
panfrost_device_gpu_rev(const struct panfrost_device *dev)
|
||||
{
|
||||
return dev->kmod.props.gpu_id & BITFIELD_MASK(16);
|
||||
return dev->kmod.dev->props.gpu_id & BITFIELD_MASK(16);
|
||||
}
|
||||
|
||||
static inline int
|
||||
|
|
@ -243,8 +240,8 @@ pan_is_bifrost(const struct panfrost_device *dev)
|
|||
static inline uint64_t
|
||||
pan_gpu_time_to_ns(struct panfrost_device *dev, uint64_t gpu_time)
|
||||
{
|
||||
assert(dev->kmod.props.timestamp_frequency > 0);
|
||||
return (gpu_time * NSEC_PER_SEC) / dev->kmod.props.timestamp_frequency;
|
||||
assert(dev->kmod.dev->props.timestamp_frequency > 0);
|
||||
return (gpu_time * NSEC_PER_SEC) / dev->kmod.dev->props.timestamp_frequency;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
|
|
|
|||
|
|
@ -197,8 +197,8 @@ emit_tls(struct panfrost_batch *batch,
|
|||
struct pan_tls_info info = {
|
||||
.tls.size = shader->info.tls_size,
|
||||
.wls.size = shader->info.wls_size,
|
||||
.wls.instances =
|
||||
pan_calc_wls_instances(&shader->local_size, &dev->kmod.props, dim),
|
||||
.wls.instances = pan_calc_wls_instances(&shader->local_size,
|
||||
&dev->kmod.dev->props, dim),
|
||||
};
|
||||
|
||||
if (info.tls.size) {
|
||||
|
|
@ -354,7 +354,7 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch,
|
|||
unsigned threads_per_wg =
|
||||
shader->local_size.x * shader->local_size.y * shader->local_size.z;
|
||||
unsigned max_thread_cnt = pan_compute_max_thread_count(
|
||||
&dev->kmod.props, shader->info.work_reg_count);
|
||||
&dev->kmod.dev->props, shader->info.work_reg_count);
|
||||
|
||||
/* Pick the task_axis and task_increment to maximize thread utilization. */
|
||||
unsigned task_axis = MALI_TASK_AXIS_X;
|
||||
|
|
|
|||
|
|
@ -439,12 +439,12 @@ panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen,
|
|||
for (unsigned r = 0; r < yuv_lowering.nres; r++) {
|
||||
enum pipe_format res_format = yuv_lowering.res_formats[r];
|
||||
|
||||
supported &= pan_image_test_modifier_with_format(&dev->kmod.props,
|
||||
mod, res_format);
|
||||
supported &= pan_image_test_modifier_with_format(
|
||||
&dev->kmod.dev->props, mod, res_format);
|
||||
}
|
||||
} else {
|
||||
supported =
|
||||
pan_image_test_modifier_with_format(&dev->kmod.props, mod, format);
|
||||
supported = pan_image_test_modifier_with_format(&dev->kmod.dev->props,
|
||||
mod, format);
|
||||
}
|
||||
|
||||
if (!supported)
|
||||
|
|
@ -727,10 +727,10 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
|
|||
|
||||
/* Compile side is TODO for Midgard. */
|
||||
caps->shader_clock = dev->arch >= 6 &&
|
||||
dev->kmod.props.gpu_can_query_timestamp;
|
||||
dev->kmod.dev->props.gpu_can_query_timestamp;
|
||||
caps->shader_realtime_clock = dev->arch >= 6 &&
|
||||
dev->kmod.props.gpu_can_query_timestamp &&
|
||||
dev->kmod.props.timestamp_device_coherent;
|
||||
dev->kmod.dev->props.gpu_can_query_timestamp &&
|
||||
dev->kmod.dev->props.timestamp_device_coherent;
|
||||
|
||||
/* pixel_local_storage is initially for valhall and bifrost only */
|
||||
caps->shader_pixel_local_storage_fast_size =
|
||||
|
|
@ -796,10 +796,9 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
|
|||
/* Must be at least 64 for correct behaviour */
|
||||
caps->texture_buffer_offset_alignment = 64;
|
||||
|
||||
caps->query_time_elapsed =
|
||||
caps->query_timestamp =
|
||||
dev->kmod.props.gpu_can_query_timestamp &&
|
||||
dev->kmod.props.timestamp_frequency != 0;
|
||||
caps->query_time_elapsed = caps->query_timestamp =
|
||||
dev->kmod.dev->props.gpu_can_query_timestamp &&
|
||||
dev->kmod.dev->props.timestamp_frequency != 0;
|
||||
|
||||
if (caps->query_timestamp)
|
||||
caps->timer_resolution = pan_gpu_time_to_ns(dev, 1);
|
||||
|
|
@ -914,9 +913,8 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
|
|||
|
||||
caps->native_fence_fd = true;
|
||||
|
||||
caps->context_priority_mask =
|
||||
from_kmod_group_allow_priority_flags(
|
||||
dev->kmod.props.allowed_group_priorities_mask);
|
||||
caps->context_priority_mask = from_kmod_group_allow_priority_flags(
|
||||
dev->kmod.dev->props.allowed_group_priorities_mask);
|
||||
|
||||
caps->astc_decode_mode = dev->arch >= 9 && (dev->compressed_formats & (1 << 30));
|
||||
|
||||
|
|
@ -1006,7 +1004,7 @@ get_core_mask(const struct panfrost_device *dev,
|
|||
const struct pipe_screen_config *config,
|
||||
const char *option_name, uint64_t *mask)
|
||||
{
|
||||
uint64_t present = dev->kmod.props.shader_present;
|
||||
uint64_t present = dev->kmod.dev->props.shader_present;
|
||||
*mask = driQueryOptionu64(config->options, option_name) & present;
|
||||
|
||||
if (!*mask) {
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
|||
|
||||
struct pan_compile_inputs inputs = {
|
||||
.gpu_id = panfrost_device_gpu_id(dev),
|
||||
.gpu_variant = dev->kmod.props.gpu_variant,
|
||||
.gpu_variant = dev->kmod.dev->props.gpu_variant,
|
||||
.get_conv_desc = screen->vtbl.get_conv_desc,
|
||||
};
|
||||
|
||||
|
|
@ -688,8 +688,8 @@ panfrost_get_compute_state_info(struct pipe_context *pipe, void *cso,
|
|||
struct panfrost_compiled_shader *cs =
|
||||
util_dynarray_begin(&uncompiled->variants);
|
||||
|
||||
info->max_threads =
|
||||
pan_compute_max_thread_count(&dev->kmod.props, cs->info.work_reg_count);
|
||||
info->max_threads = pan_compute_max_thread_count(&dev->kmod.dev->props,
|
||||
cs->info.work_reg_count);
|
||||
info->private_memory = cs->info.tls_size;
|
||||
info->simd_sizes = pan_subgroup_size(dev->arch);
|
||||
info->preferred_simd_size = info->simd_sizes;
|
||||
|
|
|
|||
|
|
@ -369,10 +369,6 @@ struct pan_kmod_ops {
|
|||
/* Destroy a pan_kmod_dev object. */
|
||||
void (*dev_destroy)(struct pan_kmod_dev *dev);
|
||||
|
||||
/* Query device properties. */
|
||||
void (*dev_query_props)(const struct pan_kmod_dev *dev,
|
||||
struct pan_kmod_dev_props *props);
|
||||
|
||||
/* Query the maxium user VA range.
|
||||
* Users are free to use a subset of this range if they need less VA space.
|
||||
* This method is optional, when not specified, kmod assumes the whole VA
|
||||
|
|
@ -466,6 +462,9 @@ struct pan_kmod_dev {
|
|||
/* KMD backing this device. */
|
||||
struct pan_kmod_driver driver;
|
||||
|
||||
/* KMD-agnostic device properties. */
|
||||
struct pan_kmod_dev_props props;
|
||||
|
||||
/* kmod backend ops assigned at device creation. */
|
||||
const struct pan_kmod_ops *ops;
|
||||
|
||||
|
|
@ -499,25 +498,15 @@ pan_kmod_dev_create(int fd, uint32_t flags,
|
|||
|
||||
void pan_kmod_dev_destroy(struct pan_kmod_dev *dev);
|
||||
|
||||
static inline void
|
||||
pan_kmod_dev_query_props(const struct pan_kmod_dev *dev,
|
||||
struct pan_kmod_dev_props *props)
|
||||
{
|
||||
dev->ops->dev_query_props(dev, props);
|
||||
}
|
||||
|
||||
static inline struct pan_kmod_va_range
|
||||
pan_kmod_dev_query_user_va_range(const struct pan_kmod_dev *dev)
|
||||
{
|
||||
if (dev->ops->dev_query_user_va_range)
|
||||
return dev->ops->dev_query_user_va_range(dev);
|
||||
|
||||
struct pan_kmod_dev_props props;
|
||||
|
||||
pan_kmod_dev_query_props(dev, &props);
|
||||
return (struct pan_kmod_va_range){
|
||||
.start = 0,
|
||||
.size = 1ull << MMU_FEATURES_VA_BITS(props.mmu_features),
|
||||
.size = 1ull << MMU_FEATURES_VA_BITS(dev->props.mmu_features),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -42,39 +42,6 @@ struct panfrost_kmod_bo {
|
|||
uint64_t offset;
|
||||
};
|
||||
|
||||
static struct pan_kmod_dev *
|
||||
panfrost_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version,
|
||||
const struct pan_kmod_allocator *allocator)
|
||||
{
|
||||
if (version->version_major < 1 ||
|
||||
(version->version_major == 1 && version->version_minor < 1)) {
|
||||
mesa_loge("kernel driver is too old (requires at least 1.1, found %d.%d)",
|
||||
version->version_major, version->version_minor);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct panfrost_kmod_dev *panfrost_dev =
|
||||
pan_kmod_alloc(allocator, sizeof(*panfrost_dev));
|
||||
if (!panfrost_dev) {
|
||||
mesa_loge("failed to allocate a panfrost_kmod_dev object");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pan_kmod_dev_init(&panfrost_dev->base, fd, flags, version,
|
||||
&panfrost_kmod_ops, allocator);
|
||||
return &panfrost_dev->base;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_kmod_dev_destroy(struct pan_kmod_dev *dev)
|
||||
{
|
||||
struct panfrost_kmod_dev *panfrost_dev =
|
||||
container_of(dev, struct panfrost_kmod_dev, base);
|
||||
|
||||
pan_kmod_dev_cleanup(dev);
|
||||
pan_kmod_free(dev->allocator, panfrost_dev);
|
||||
}
|
||||
|
||||
/* Abstraction over the raw drm_panfrost_get_param ioctl for fetching
|
||||
* information about devices.
|
||||
*/
|
||||
|
|
@ -97,9 +64,10 @@ panfrost_query_raw(int fd, enum drm_panfrost_param param, bool required,
|
|||
}
|
||||
|
||||
static void
|
||||
panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev,
|
||||
struct pan_kmod_dev_props *props)
|
||||
panfrost_dev_query_thread_props(struct panfrost_kmod_dev *panfrost_dev)
|
||||
{
|
||||
struct pan_kmod_dev_props *props = &panfrost_dev->base.props;
|
||||
const struct pan_kmod_dev *dev = &panfrost_dev->base;
|
||||
int fd = dev->fd;
|
||||
|
||||
props->max_threads_per_core =
|
||||
|
|
@ -177,9 +145,10 @@ panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev,
|
|||
}
|
||||
|
||||
static void
|
||||
panfrost_dev_query_props(const struct pan_kmod_dev *dev,
|
||||
struct pan_kmod_dev_props *props)
|
||||
panfrost_dev_query_props(struct panfrost_kmod_dev *panfrost_dev)
|
||||
{
|
||||
struct pan_kmod_dev_props *props = &panfrost_dev->base.props;
|
||||
const struct pan_kmod_dev *dev = &panfrost_dev->base;
|
||||
int fd = dev->fd;
|
||||
|
||||
memset(props, 0, sizeof(*props));
|
||||
|
|
@ -203,7 +172,7 @@ panfrost_dev_query_props(const struct pan_kmod_dev *dev,
|
|||
props->afbc_features =
|
||||
panfrost_query_raw(fd, DRM_PANFROST_PARAM_AFBC_FEATURES, true, 0);
|
||||
|
||||
panfrost_dev_query_thread_props(dev, props);
|
||||
panfrost_dev_query_thread_props(panfrost_dev);
|
||||
|
||||
if (dev->driver.version.major > 1 || dev->driver.version.minor >= 3) {
|
||||
props->gpu_can_query_timestamp = true;
|
||||
|
|
@ -230,6 +199,41 @@ panfrost_dev_query_props(const struct pan_kmod_dev *dev,
|
|||
PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH;
|
||||
}
|
||||
|
||||
static struct pan_kmod_dev *
|
||||
panfrost_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version,
|
||||
const struct pan_kmod_allocator *allocator)
|
||||
{
|
||||
if (version->version_major < 1 ||
|
||||
(version->version_major == 1 && version->version_minor < 1)) {
|
||||
mesa_loge("kernel driver is too old (requires at least 1.1, found %d.%d)",
|
||||
version->version_major, version->version_minor);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct panfrost_kmod_dev *panfrost_dev =
|
||||
pan_kmod_alloc(allocator, sizeof(*panfrost_dev));
|
||||
if (!panfrost_dev) {
|
||||
mesa_loge("failed to allocate a panfrost_kmod_dev object");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pan_kmod_dev_init(&panfrost_dev->base, fd, flags, version,
|
||||
&panfrost_kmod_ops, allocator);
|
||||
panfrost_dev_query_props(panfrost_dev);
|
||||
|
||||
return &panfrost_dev->base;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_kmod_dev_destroy(struct pan_kmod_dev *dev)
|
||||
{
|
||||
struct panfrost_kmod_dev *panfrost_dev =
|
||||
container_of(dev, struct panfrost_kmod_dev, base);
|
||||
|
||||
pan_kmod_dev_cleanup(dev);
|
||||
pan_kmod_free(dev->allocator, panfrost_dev);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
to_panfrost_bo_flags(struct pan_kmod_dev *dev, uint32_t flags)
|
||||
{
|
||||
|
|
@ -530,7 +534,6 @@ panfrost_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const c
|
|||
const struct pan_kmod_ops panfrost_kmod_ops = {
|
||||
.dev_create = panfrost_kmod_dev_create,
|
||||
.dev_destroy = panfrost_kmod_dev_destroy,
|
||||
.dev_query_props = panfrost_dev_query_props,
|
||||
.dev_query_user_va_range = panfrost_kmod_dev_query_user_va_range,
|
||||
.bo_alloc = panfrost_kmod_bo_alloc,
|
||||
.bo_free = panfrost_kmod_bo_free,
|
||||
|
|
|
|||
|
|
@ -103,6 +103,90 @@ struct panthor_kmod_bo {
|
|||
} sync;
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
to_kmod_group_allow_priority_flags(uint32_t panthor_flags)
|
||||
{
|
||||
uint32_t kmod_flags = 0;
|
||||
|
||||
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_REALTIME))
|
||||
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME;
|
||||
|
||||
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_HIGH))
|
||||
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH;
|
||||
|
||||
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_MEDIUM))
|
||||
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM;
|
||||
|
||||
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_LOW))
|
||||
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW;
|
||||
|
||||
return kmod_flags;
|
||||
}
|
||||
|
||||
static void
|
||||
panthor_dev_query_thread_props(struct panthor_kmod_dev *panthor_dev)
|
||||
{
|
||||
struct pan_kmod_dev_props *props = &panthor_dev->base.props;
|
||||
|
||||
props->max_threads_per_wg = panthor_dev->props.gpu.thread_max_workgroup_size;
|
||||
props->max_threads_per_core = panthor_dev->props.gpu.max_threads;
|
||||
props->max_tasks_per_core = panthor_dev->props.gpu.thread_features >> 24;
|
||||
props->num_registers_per_core =
|
||||
panthor_dev->props.gpu.thread_features & 0x3fffff;
|
||||
|
||||
/* We assume that all thread properties are populated. If we ever have a GPU
|
||||
* that have one of the THREAD_xxx register that's zero, we can always add a
|
||||
* quirk here.
|
||||
*/
|
||||
assert(props->max_threads_per_wg && props->max_threads_per_core &&
|
||||
props->max_tasks_per_core && props->num_registers_per_core);
|
||||
|
||||
/* There is no THREAD_TLS_ALLOC register on v10+, and the maximum number
|
||||
* of TLS instance per core is assumed to be the maximum number of threads
|
||||
* per core.
|
||||
*/
|
||||
props->max_tls_instance_per_core = props->max_threads_per_core;
|
||||
}
|
||||
|
||||
static void
|
||||
panthor_dev_query_props(struct panthor_kmod_dev *panthor_dev)
|
||||
{
|
||||
struct pan_kmod_dev_props *props = &panthor_dev->base.props;
|
||||
|
||||
*props = (struct pan_kmod_dev_props){
|
||||
.gpu_id = panthor_dev->props.gpu.gpu_id,
|
||||
.gpu_variant = panthor_dev->props.gpu.core_features & 0xff,
|
||||
.shader_present = panthor_dev->props.gpu.shader_present,
|
||||
.tiler_features = panthor_dev->props.gpu.tiler_features,
|
||||
.mem_features = panthor_dev->props.gpu.mem_features,
|
||||
.mmu_features = panthor_dev->props.gpu.mmu_features,
|
||||
|
||||
/* This register does not exist because AFBC is no longer optional. */
|
||||
.afbc_features = 0,
|
||||
|
||||
/* Access to timstamp from the GPU is always supported on Panthor. */
|
||||
.gpu_can_query_timestamp = true,
|
||||
|
||||
.timestamp_frequency = panthor_dev->props.timestamp.timestamp_frequency,
|
||||
|
||||
.allowed_group_priorities_mask = to_kmod_group_allow_priority_flags(
|
||||
panthor_dev->props.group_priorities.allowed_mask),
|
||||
};
|
||||
|
||||
if (panthor_dev->base.driver.version.major > 1 ||
|
||||
panthor_dev->base.driver.version.minor >= 6)
|
||||
props->timestamp_device_coherent = true;
|
||||
|
||||
static_assert(sizeof(props->texture_features) ==
|
||||
sizeof(panthor_dev->props.gpu.texture_features),
|
||||
"Mismatch in texture_features array size");
|
||||
|
||||
memcpy(props->texture_features, panthor_dev->props.gpu.texture_features,
|
||||
sizeof(props->texture_features));
|
||||
|
||||
panthor_dev_query_thread_props(panthor_dev);
|
||||
}
|
||||
|
||||
static struct pan_kmod_dev *
|
||||
panthor_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version,
|
||||
const struct pan_kmod_allocator *allocator)
|
||||
|
|
@ -195,8 +279,11 @@ panthor_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version,
|
|||
}
|
||||
|
||||
assert(!ret);
|
||||
pan_kmod_dev_init(&panthor_dev->base, fd, flags, version, &panthor_kmod_ops,
|
||||
allocator);
|
||||
|
||||
pan_kmod_dev_init(&panthor_dev->base, fd, flags, version,
|
||||
&panthor_kmod_ops, allocator);
|
||||
panthor_dev_query_props(panthor_dev);
|
||||
|
||||
return &panthor_dev->base;
|
||||
|
||||
err_free_dev:
|
||||
|
|
@ -215,91 +302,6 @@ panthor_kmod_dev_destroy(struct pan_kmod_dev *dev)
|
|||
pan_kmod_free(dev->allocator, panthor_dev);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
to_kmod_group_allow_priority_flags(uint32_t panthor_flags)
|
||||
{
|
||||
uint32_t kmod_flags = 0;
|
||||
|
||||
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_REALTIME))
|
||||
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME;
|
||||
|
||||
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_HIGH))
|
||||
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH;
|
||||
|
||||
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_MEDIUM))
|
||||
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM;
|
||||
|
||||
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_LOW))
|
||||
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW;
|
||||
|
||||
return kmod_flags;
|
||||
}
|
||||
|
||||
static void
|
||||
panthor_dev_query_thread_props(const struct panthor_kmod_dev *panthor_dev,
|
||||
struct pan_kmod_dev_props *props)
|
||||
{
|
||||
props->max_threads_per_wg = panthor_dev->props.gpu.thread_max_workgroup_size;
|
||||
props->max_threads_per_core = panthor_dev->props.gpu.max_threads;
|
||||
props->max_tasks_per_core = panthor_dev->props.gpu.thread_features >> 24;
|
||||
props->num_registers_per_core =
|
||||
panthor_dev->props.gpu.thread_features & 0x3fffff;
|
||||
|
||||
/* We assume that all thread properties are populated. If we ever have a GPU
|
||||
* that have one of the THREAD_xxx register that's zero, we can always add a
|
||||
* quirk here.
|
||||
*/
|
||||
assert(props->max_threads_per_wg && props->max_threads_per_core &&
|
||||
props->max_tasks_per_core && props->num_registers_per_core);
|
||||
|
||||
/* There is no THREAD_TLS_ALLOC register on v10+, and the maximum number
|
||||
* of TLS instance per core is assumed to be the maximum number of threads
|
||||
* per core.
|
||||
*/
|
||||
props->max_tls_instance_per_core = props->max_threads_per_core;
|
||||
}
|
||||
|
||||
static void
|
||||
panthor_dev_query_props(const struct pan_kmod_dev *dev,
|
||||
struct pan_kmod_dev_props *props)
|
||||
{
|
||||
struct panthor_kmod_dev *panthor_dev =
|
||||
container_of(dev, struct panthor_kmod_dev, base);
|
||||
|
||||
*props = (struct pan_kmod_dev_props){
|
||||
.gpu_id = panthor_dev->props.gpu.gpu_id,
|
||||
.gpu_variant = panthor_dev->props.gpu.core_features & 0xff,
|
||||
.shader_present = panthor_dev->props.gpu.shader_present,
|
||||
.tiler_features = panthor_dev->props.gpu.tiler_features,
|
||||
.mem_features = panthor_dev->props.gpu.mem_features,
|
||||
.mmu_features = panthor_dev->props.gpu.mmu_features,
|
||||
|
||||
/* This register does not exist because AFBC is no longer optional. */
|
||||
.afbc_features = 0,
|
||||
|
||||
/* Access to timstamp from the GPU is always supported on Panthor. */
|
||||
.gpu_can_query_timestamp = true,
|
||||
|
||||
.timestamp_frequency = panthor_dev->props.timestamp.timestamp_frequency,
|
||||
|
||||
.allowed_group_priorities_mask = to_kmod_group_allow_priority_flags(
|
||||
panthor_dev->props.group_priorities.allowed_mask),
|
||||
};
|
||||
|
||||
if (dev->driver.version.major > 1 || dev->driver.version.minor >= 6) {
|
||||
props->timestamp_device_coherent = true;
|
||||
}
|
||||
|
||||
static_assert(sizeof(props->texture_features) ==
|
||||
sizeof(panthor_dev->props.gpu.texture_features),
|
||||
"Mismatch in texture_features array size");
|
||||
|
||||
memcpy(props->texture_features, panthor_dev->props.gpu.texture_features,
|
||||
sizeof(props->texture_features));
|
||||
|
||||
panthor_dev_query_thread_props(panthor_dev, props);
|
||||
}
|
||||
|
||||
static struct pan_kmod_va_range
|
||||
panthor_kmod_dev_query_user_va_range(const struct pan_kmod_dev *dev)
|
||||
{
|
||||
|
|
@ -706,10 +708,6 @@ static struct pan_kmod_vm *
|
|||
panthor_kmod_vm_create(struct pan_kmod_dev *dev, uint32_t flags,
|
||||
uint64_t user_va_start, uint64_t user_va_range)
|
||||
{
|
||||
struct pan_kmod_dev_props props;
|
||||
|
||||
panthor_dev_query_props(dev, &props);
|
||||
|
||||
struct panthor_kmod_vm *panthor_vm =
|
||||
pan_kmod_dev_alloc(dev, sizeof(*panthor_vm));
|
||||
if (!panthor_vm) {
|
||||
|
|
@ -1236,7 +1234,6 @@ panthor_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const ch
|
|||
const struct pan_kmod_ops panthor_kmod_ops = {
|
||||
.dev_create = panthor_kmod_dev_create,
|
||||
.dev_destroy = panthor_kmod_dev_destroy,
|
||||
.dev_query_props = panthor_dev_query_props,
|
||||
.dev_query_user_va_range = panthor_kmod_dev_query_user_va_range,
|
||||
.bo_alloc = panthor_kmod_bo_alloc,
|
||||
.bo_free = panthor_kmod_bo_free,
|
||||
|
|
|
|||
|
|
@ -82,8 +82,7 @@ pan_perf_init(struct pan_perf *perf, int fd)
|
|||
perf->dev = pan_kmod_dev_create(fd, 0, NULL);
|
||||
assert(perf->dev);
|
||||
|
||||
struct pan_kmod_dev_props props = {};
|
||||
pan_kmod_dev_query_props(perf->dev, &props);
|
||||
struct pan_kmod_dev_props props = perf->dev->props;
|
||||
|
||||
const struct pan_model *model =
|
||||
pan_get_model(props.gpu_id, props.gpu_variant);
|
||||
|
|
|
|||
|
|
@ -71,10 +71,7 @@ main(void)
|
|||
|
||||
struct pan_kmod_dev *dev =
|
||||
pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD, NULL);
|
||||
struct pan_kmod_dev_props props;
|
||||
|
||||
pan_kmod_dev_query_props(dev, &props);
|
||||
|
||||
struct pan_kmod_dev_props props = dev->props;
|
||||
uint32_t supported = pan_query_compressed_formats(&props);
|
||||
bool all_ok = true;
|
||||
|
||||
|
|
|
|||
|
|
@ -288,7 +288,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
|
|||
nir_builder b = nir_builder_init_simple_shader(
|
||||
MESA_SHADER_COMPUTE,
|
||||
pan_get_nir_shader_compiler_options(
|
||||
pan_arch(phys_dev->kmod.props.gpu_id)),
|
||||
pan_arch(phys_dev->kmod.dev->props.gpu_id)),
|
||||
"%s", "desc_copy");
|
||||
|
||||
/* We actually customize that at execution time to issue the
|
||||
|
|
@ -302,8 +302,8 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
|
|||
single_desc_copy(&b, desc_copy_id);
|
||||
|
||||
struct pan_compile_inputs inputs = {
|
||||
.gpu_id = phys_dev->kmod.props.gpu_id,
|
||||
.gpu_variant = phys_dev->kmod.props.gpu_variant,
|
||||
.gpu_id = phys_dev->kmod.dev->props.gpu_id,
|
||||
.gpu_variant = phys_dev->kmod.dev->props.gpu_variant,
|
||||
};
|
||||
|
||||
pan_preprocess_nir(b.shader, inputs.gpu_id);
|
||||
|
|
|
|||
|
|
@ -677,7 +677,7 @@ panvk_per_arch(calculate_task_axis_and_increment)(
|
|||
{
|
||||
/* Pick the task_axis and task_increment to maximize thread
|
||||
* utilization. */
|
||||
const struct pan_kmod_dev_props *props = &phys_dev->kmod.props;
|
||||
const struct pan_kmod_dev_props *props = &phys_dev->kmod.dev->props;
|
||||
const unsigned max_thread_cnt =
|
||||
pan_compute_max_thread_count(props, shader->info.work_reg_count);
|
||||
const unsigned threads_per_wg = shader->cs.local_size.x *
|
||||
|
|
|
|||
|
|
@ -64,11 +64,11 @@ emit_tls(struct panvk_cmd_buffer *cmdbuf)
|
|||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
unsigned core_id_range;
|
||||
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
|
||||
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
|
||||
|
||||
if (cmdbuf->state.tls.info.tls.size) {
|
||||
unsigned thread_tls_alloc =
|
||||
pan_query_thread_tls_alloc(&phys_dev->kmod.props);
|
||||
pan_query_thread_tls_alloc(&phys_dev->kmod.dev->props);
|
||||
unsigned size = pan_get_total_stack_size(cmdbuf->state.tls.info.tls.size,
|
||||
thread_tls_alloc, core_id_range);
|
||||
|
||||
|
|
@ -823,7 +823,7 @@ init_cs_builders(struct panvk_cmd_buffer *cmdbuf)
|
|||
.nr_registers = csif_info->cs_reg_count,
|
||||
.nr_kernel_registers = MAX2(csif_info->unpreserved_cs_reg_count, 4),
|
||||
.compute_ep_limit =
|
||||
PAN_ARCH >= 12 ? phys_dev->kmod.props.max_tasks_per_core : 0,
|
||||
PAN_ARCH >= 12 ? phys_dev->kmod.dev->props.max_tasks_per_core : 0,
|
||||
.alloc_buffer = alloc_cs_buffer,
|
||||
.cookie = cmdbuf,
|
||||
.ls_sb_slot = SB_ID(LS),
|
||||
|
|
|
|||
|
|
@ -87,10 +87,10 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
|
|||
|
||||
if (tlsinfo.wls.size) {
|
||||
unsigned core_id_range;
|
||||
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
|
||||
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
|
||||
|
||||
tlsinfo.wls.instances = pan_calc_wls_instances(
|
||||
&cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim);
|
||||
&cs->cs.local_size, &phys_dev->kmod.dev->props, indirect ? NULL : dim);
|
||||
|
||||
unsigned wls_total_size = pan_calc_total_wls_size(
|
||||
tlsinfo.wls.size, tlsinfo.wls.instances, core_id_range);
|
||||
|
|
@ -156,7 +156,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
unsigned wg_per_task = 0;
|
||||
if (indirect)
|
||||
wg_per_task = pan_calc_workgroups_per_task(&cs->cs.local_size,
|
||||
&phys_dev->kmod.props);
|
||||
&phys_dev->kmod.dev->props);
|
||||
|
||||
if (compute_state_dirty(cmdbuf, DESC_STATE) ||
|
||||
compute_state_dirty(cmdbuf, CS)) {
|
||||
|
|
|
|||
|
|
@ -1000,7 +1000,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
|
||||
const bool tracing_enabled = PANVK_DEBUG(TRACE);
|
||||
struct pan_tiler_features tiler_features =
|
||||
pan_query_tiler_features(&phys_dev->kmod.props);
|
||||
pan_query_tiler_features(&phys_dev->kmod.dev->props);
|
||||
bool simul_use =
|
||||
cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
|
||||
struct pan_ptr tiler_desc = {0};
|
||||
|
|
|
|||
|
|
@ -531,7 +531,7 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue)
|
|||
subqueue);
|
||||
pandecode_cs_binary(dev->debug.decode_ctx, qsubmit.stream_addr,
|
||||
qsubmit.stream_size,
|
||||
phys_dev->kmod.props.gpu_id);
|
||||
phys_dev->kmod.dev->props.gpu_id);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -778,7 +778,7 @@ panvk_queue_submit_init(struct panvk_queue_submit *submit,
|
|||
|
||||
submit->process_utrace =
|
||||
u_trace_should_process(&submit->dev->utrace.utctx) &&
|
||||
submit->phys_dev->kmod.props.timestamp_frequency;
|
||||
submit->phys_dev->kmod.dev->props.timestamp_frequency;
|
||||
|
||||
submit->force_sync = PANVK_DEBUG(TRACE) || PANVK_DEBUG(SYNC);
|
||||
}
|
||||
|
|
@ -1201,7 +1201,8 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit)
|
|||
struct pandecode_context *decode_ctx = submit->dev->debug.decode_ctx;
|
||||
|
||||
if (PANVK_DEBUG(TRACE)) {
|
||||
const struct pan_kmod_dev_props *props = &submit->phys_dev->kmod.props;
|
||||
const struct pan_kmod_dev_props *props =
|
||||
&submit->phys_dev->kmod.dev->props;
|
||||
|
||||
for (uint32_t i = 0; i < submit->qsubmit_count; i++) {
|
||||
const struct drm_panthor_queue_submit *qsubmit = &submit->qsubmits[i];
|
||||
|
|
|
|||
|
|
@ -117,10 +117,10 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
if (batch->tlsinfo.tls.size) {
|
||||
unsigned thread_tls_alloc =
|
||||
pan_query_thread_tls_alloc(&phys_dev->kmod.props);
|
||||
pan_query_thread_tls_alloc(&phys_dev->kmod.dev->props);
|
||||
unsigned core_id_range;
|
||||
|
||||
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
|
||||
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
|
||||
|
||||
unsigned size = pan_get_total_stack_size(batch->tlsinfo.tls.size,
|
||||
thread_tls_alloc, core_id_range);
|
||||
|
|
|
|||
|
|
@ -50,9 +50,9 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
|
|||
if (batch->tlsinfo.wls.size) {
|
||||
unsigned core_id_range;
|
||||
|
||||
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
|
||||
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
|
||||
batch->tlsinfo.wls.instances = pan_calc_wls_instances(
|
||||
&cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim);
|
||||
&cs->cs.local_size, &phys_dev->kmod.dev->props, indirect ? NULL : dim);
|
||||
batch->wls_total_size = pan_calc_total_wls_size(
|
||||
batch->tlsinfo.wls.size, batch->tlsinfo.wls.instances, core_id_range);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
|
|||
|
||||
if (PANVK_DEBUG(TRACE)) {
|
||||
pandecode_jc(dev->debug.decode_ctx, batch->vtc_jc.first_job,
|
||||
phys_dev->kmod.props.gpu_id);
|
||||
phys_dev->kmod.dev->props.gpu_id);
|
||||
}
|
||||
|
||||
if (PANVK_DEBUG(DUMP))
|
||||
|
|
@ -85,7 +85,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
|
|||
|
||||
if (PANVK_DEBUG(SYNC))
|
||||
pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc,
|
||||
phys_dev->kmod.props.gpu_id);
|
||||
phys_dev->kmod.dev->props.gpu_id);
|
||||
}
|
||||
|
||||
if (batch->frag_jc.first_job) {
|
||||
|
|
@ -115,14 +115,14 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
|
|||
|
||||
if (PANVK_DEBUG(TRACE))
|
||||
pandecode_jc(dev->debug.decode_ctx, batch->frag_jc.first_job,
|
||||
phys_dev->kmod.props.gpu_id);
|
||||
phys_dev->kmod.dev->props.gpu_id);
|
||||
|
||||
if (PANVK_DEBUG(DUMP))
|
||||
pandecode_dump_mappings(dev->debug.decode_ctx);
|
||||
|
||||
if (PANVK_DEBUG(SYNC))
|
||||
pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc,
|
||||
phys_dev->kmod.props.gpu_id);
|
||||
phys_dev->kmod.dev->props.gpu_id);
|
||||
}
|
||||
|
||||
if (PANVK_DEBUG(TRACE))
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev,
|
|||
unsigned bin_ptr_mem_budget)
|
||||
{
|
||||
struct pan_tiler_features tiler_features =
|
||||
pan_query_tiler_features(&phys_dev->kmod.props);
|
||||
pan_query_tiler_features(&phys_dev->kmod.dev->props);
|
||||
|
||||
uint32_t hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)(
|
||||
state->render.fb.info.width, state->render.fb.info.height,
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ panvk_image_can_use_afbc(
|
|||
VkImageUsageFlags usage, VkImageType type, VkImageTiling tiling,
|
||||
VkImageCreateFlags flags)
|
||||
{
|
||||
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
||||
|
||||
/* Disallow AFBC if either of these is true
|
||||
|
|
@ -79,7 +79,7 @@ panvk_image_can_use_afbc(
|
|||
return !PANVK_DEBUG(NO_AFBC) &&
|
||||
!(usage &
|
||||
(VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_HOST_TRANSFER_BIT)) &&
|
||||
pan_query_afbc(&phys_dev->kmod.props) &&
|
||||
pan_query_afbc(&phys_dev->kmod.dev->props) &&
|
||||
pan_afbc_supports_format(arch, pfmt) &&
|
||||
tiling != VK_IMAGE_TILING_LINEAR && type != VK_IMAGE_TYPE_1D &&
|
||||
(type != VK_IMAGE_TYPE_3D || arch >= 7) &&
|
||||
|
|
@ -143,7 +143,7 @@ get_plane_count(struct panvk_image *image)
|
|||
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(image->vk.base.device->physical);
|
||||
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
|
||||
|
||||
/* Z32_S8X24 is not supported on v9+, and we don't want to use it
|
||||
* on v7- anyway, because it's less efficient than the multiplanar
|
||||
|
|
@ -208,7 +208,7 @@ panvk_image_can_use_mod(struct panvk_image *image,
|
|||
{
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(image->vk.base.device->physical);
|
||||
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
|
||||
const bool forced_linear = PANVK_DEBUG(LINEAR) ||
|
||||
image->vk.tiling == VK_IMAGE_TILING_LINEAR ||
|
||||
image->vk.image_type == VK_IMAGE_TYPE_1D;
|
||||
|
|
@ -298,7 +298,7 @@ panvk_image_can_use_mod(struct panvk_image *image,
|
|||
};
|
||||
|
||||
enum pan_mod_support supported =
|
||||
pan_image_test_props(&phys_dev->kmod.props, &iprops, iusage);
|
||||
pan_image_test_props(&phys_dev->kmod.dev->props, &iprops, iusage);
|
||||
if (supported == PAN_MOD_NOT_SUPPORTED ||
|
||||
(supported == PAN_MOD_NOT_OPTIMAL && optimal_only))
|
||||
return false;
|
||||
|
|
@ -413,7 +413,7 @@ panvk_image_init_layouts(struct panvk_image *image,
|
|||
struct panvk_device *dev = to_panvk_device(image->vk.base.device);
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
|
||||
const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_info =
|
||||
vk_find_struct_const(
|
||||
pCreateInfo->pNext,
|
||||
|
|
|
|||
|
|
@ -145,8 +145,8 @@ init_shader_caches(struct panvk_physical_device *device,
|
|||
_mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
|
||||
sizeof(instance->driver_build_sha));
|
||||
|
||||
_mesa_sha1_update(&sha_ctx, &device->kmod.props.gpu_id,
|
||||
sizeof(device->kmod.props.gpu_id));
|
||||
_mesa_sha1_update(&sha_ctx, &device->kmod.dev->props.gpu_id,
|
||||
sizeof(device->kmod.dev->props.gpu_id));
|
||||
|
||||
unsigned char sha[SHA1_DIGEST_LENGTH];
|
||||
_mesa_sha1_final(&sha_ctx, sha);
|
||||
|
|
@ -157,7 +157,7 @@ init_shader_caches(struct panvk_physical_device *device,
|
|||
#ifdef ENABLE_SHADER_CACHE
|
||||
char renderer[17];
|
||||
ASSERTED int len = snprintf(renderer, sizeof(renderer), "panvk_0x%08x",
|
||||
device->kmod.props.gpu_id);
|
||||
device->kmod.dev->props.gpu_id);
|
||||
assert(len == sizeof(renderer) - 1);
|
||||
|
||||
char timestamp[SHA1_DIGEST_STRING_LENGTH];
|
||||
|
|
@ -186,7 +186,7 @@ get_core_mask(struct panvk_physical_device *device,
|
|||
const struct panvk_instance *instance, const char *option_name,
|
||||
uint64_t *mask)
|
||||
{
|
||||
uint64_t present = device->kmod.props.shader_present;
|
||||
uint64_t present = device->kmod.dev->props.shader_present;
|
||||
*mask = driQueryOptionu64(&instance->dri_options, option_name) & present;
|
||||
|
||||
if (!*mask)
|
||||
|
|
@ -218,7 +218,7 @@ static VkResult
|
|||
get_device_sync_types(struct panvk_physical_device *device,
|
||||
const struct panvk_instance *instance)
|
||||
{
|
||||
const unsigned arch = pan_arch(device->kmod.props.gpu_id);
|
||||
const unsigned arch = pan_arch(device->kmod.dev->props.gpu_id);
|
||||
uint32_t sync_type_count = 0;
|
||||
|
||||
device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd);
|
||||
|
|
@ -252,12 +252,12 @@ get_device_sync_types(struct panvk_physical_device *device,
|
|||
float
|
||||
panvk_get_gpu_system_timestamp_period(const struct panvk_physical_device *device)
|
||||
{
|
||||
if (!device->kmod.props.gpu_can_query_timestamp ||
|
||||
!device->kmod.props.timestamp_frequency)
|
||||
if (!device->kmod.dev->props.gpu_can_query_timestamp ||
|
||||
!device->kmod.dev->props.timestamp_frequency)
|
||||
return 0;
|
||||
|
||||
const float ns_per_s = 1000000000.0;
|
||||
return ns_per_s / (float)device->kmod.props.timestamp_frequency;
|
||||
return ns_per_s / (float)device->kmod.dev->props.timestamp_frequency;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -283,18 +283,16 @@ panvk_physical_device_init(struct panvk_physical_device *device,
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props);
|
||||
device->model = pan_get_model(device->kmod.dev->props.gpu_id,
|
||||
device->kmod.dev->props.gpu_variant);
|
||||
|
||||
device->model = pan_get_model(device->kmod.props.gpu_id,
|
||||
device->kmod.props.gpu_variant);
|
||||
|
||||
unsigned arch = pan_arch(device->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(device->kmod.dev->props.gpu_id);
|
||||
|
||||
if (!device->model) {
|
||||
result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
|
||||
"Unknown gpu_id (%#x) or variant (%#x)",
|
||||
device->kmod.props.gpu_id,
|
||||
device->kmod.props.gpu_variant);
|
||||
device->kmod.dev->props.gpu_id,
|
||||
device->kmod.dev->props.gpu_variant);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
|
|
@ -400,13 +398,13 @@ panvk_fill_global_priority(const struct panvk_physical_device *physical_device,
|
|||
uint32_t family_idx,
|
||||
VkQueueFamilyGlobalPriorityPropertiesKHR *prio)
|
||||
{
|
||||
const unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
|
||||
const unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
|
||||
uint32_t prio_idx = 0;
|
||||
|
||||
switch (family_idx) {
|
||||
case PANVK_QUEUE_FAMILY_GPU: {
|
||||
enum pan_kmod_group_allow_priority_flags prio_mask =
|
||||
physical_device->kmod.props.allowed_group_priorities_mask;
|
||||
physical_device->kmod.dev->props.allowed_group_priorities_mask;
|
||||
|
||||
/* Non-medium priority context is not hooked-up in the JM backend, even
|
||||
* though the panfrost kmod advertize it. Manually filter non-medium
|
||||
|
|
@ -445,7 +443,7 @@ panvk_GetPhysicalDeviceQueueFamilyProperties2(
|
|||
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
|
||||
VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
|
||||
pQueueFamilyPropertyCount);
|
||||
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
|
||||
|
||||
const VkQueueFamilyProperties qfamily_props[PANVK_QUEUE_FAMILY_COUNT] = {
|
||||
[PANVK_QUEUE_FAMILY_GPU] = {
|
||||
|
|
@ -455,7 +453,8 @@ panvk_GetPhysicalDeviceQueueFamilyProperties2(
|
|||
some CTS tests */
|
||||
.queueCount = arch >= 10 ? 2 : 1,
|
||||
.timestampValidBits =
|
||||
arch >= 10 && physical_device->kmod.props.gpu_can_query_timestamp
|
||||
arch >= 10 &&
|
||||
physical_device->kmod.dev->props.gpu_can_query_timestamp
|
||||
? 64
|
||||
: 0,
|
||||
.minImageTransferGranularity = {1, 1, 1},
|
||||
|
|
@ -525,7 +524,7 @@ panvk_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
|
||||
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
|
||||
VkResult result = VK_ERROR_INITIALIZATION_FAILED;
|
||||
|
||||
panvk_arch_dispatch_ret(arch, create_device, result, physical_device,
|
||||
|
|
@ -540,7 +539,7 @@ panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
VK_FROM_HANDLE(panvk_device, device, _device);
|
||||
struct panvk_physical_device *physical_device =
|
||||
to_panvk_physical_device(device->vk.physical);
|
||||
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
|
||||
|
||||
panvk_arch_dispatch(arch, destroy_device, device, pAllocator);
|
||||
}
|
||||
|
|
@ -580,7 +579,7 @@ format_is_supported(struct panvk_physical_device *physical_device,
|
|||
* the supported formats reported by the GPU. */
|
||||
if (util_format_is_compressed(pfmt)) {
|
||||
uint32_t supported_compr_fmts =
|
||||
pan_query_compressed_formats(&physical_device->kmod.props);
|
||||
pan_query_compressed_formats(&physical_device->kmod.dev->props);
|
||||
|
||||
if (!(BITFIELD_BIT(fmt.texfeat_bit) & supported_compr_fmts))
|
||||
return false;
|
||||
|
|
@ -596,7 +595,7 @@ get_image_plane_format_features(struct panvk_physical_device *physical_device,
|
|||
VkFormatFeatureFlags2 features = 0;
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(format);
|
||||
const struct pan_format fmt = physical_device->formats.all[pfmt];
|
||||
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
|
||||
|
||||
if (!format_is_supported(physical_device, fmt, pfmt))
|
||||
return 0;
|
||||
|
|
@ -736,7 +735,7 @@ static VkFormatFeatureFlags2
|
|||
get_image_format_sample_counts(struct panvk_physical_device *physical_device,
|
||||
VkFormat format)
|
||||
{
|
||||
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
|
||||
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
|
||||
unsigned max_tib_size = pan_query_tib_size(physical_device->model);
|
||||
unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size);
|
||||
|
||||
|
|
@ -782,7 +781,7 @@ panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
|
|||
VkFormatProperties2 *pFormatProperties)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
|
||||
const unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
|
||||
const unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
|
||||
|
||||
VkFormatFeatureFlags2 tex =
|
||||
get_image_format_features(physical_device, format);
|
||||
|
|
@ -846,7 +845,7 @@ panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
|
|||
static VkExtent3D
|
||||
get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format)
|
||||
{
|
||||
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
|
||||
const unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
|
||||
const uint64_t max_img_size_B =
|
||||
arch <= 10 ? u_uintN_max(32) : u_uintN_max(48);
|
||||
const enum pipe_format pfmt = vk_format_to_pipe_format(format);
|
||||
|
|
@ -869,7 +868,7 @@ get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format)
|
|||
static VkExtent3D
|
||||
get_max_3d_image_size(struct panvk_physical_device *phys_dev, VkFormat format)
|
||||
{
|
||||
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
|
||||
const unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
|
||||
const uint64_t max_img_size_B =
|
||||
arch <= 10 ? u_uintN_max(32) : u_uintN_max(48);
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(format);
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ struct panvk_physical_device {
|
|||
|
||||
struct {
|
||||
struct pan_kmod_dev *dev;
|
||||
struct pan_kmod_dev_props props;
|
||||
} kmod;
|
||||
|
||||
const struct pan_model *model;
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
|
|||
struct panvk_device *dev = to_dev(utctx);
|
||||
const struct panvk_physical_device *pdev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
|
||||
const struct pan_kmod_dev_props *props = &pdev->kmod.dev->props;
|
||||
const struct panvk_utrace_buf *buf = timestamps;
|
||||
struct panvk_utrace_flush_data *data = flush_data;
|
||||
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ get_gpu_time_ns(struct panvk_device *dev)
|
|||
{
|
||||
const struct panvk_physical_device *pdev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
|
||||
const struct pan_kmod_dev_props *props = &pdev->kmod.dev->props;
|
||||
|
||||
const uint64_t ts = pan_kmod_query_timestamp(dev->kmod.dev);
|
||||
return ts * NSEC_PER_SEC / props->timestamp_frequency;
|
||||
|
|
@ -332,7 +332,7 @@ panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count)
|
|||
{
|
||||
const struct panvk_physical_device *pdev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
|
||||
const struct pan_kmod_dev_props *props = &pdev->kmod.dev->props;
|
||||
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
|
||||
|
||||
if (queue_count > PANVK_UTRACE_PERFETTO_QUEUE_COUNT) {
|
||||
|
|
|
|||
|
|
@ -87,8 +87,8 @@ get_blend_shader(struct panvk_device *dev,
|
|||
|
||||
/* Compile the NIR shader */
|
||||
struct pan_compile_inputs inputs = {
|
||||
.gpu_id = pdev->kmod.props.gpu_id,
|
||||
.gpu_variant = pdev->kmod.props.gpu_variant,
|
||||
.gpu_id = pdev->kmod.dev->props.gpu_id,
|
||||
.gpu_variant = pdev->kmod.dev->props.gpu_variant,
|
||||
.is_blend = true,
|
||||
.blend = {
|
||||
.nr_samples = key.info.nr_samples,
|
||||
|
|
|
|||
|
|
@ -149,8 +149,8 @@ get_preload_shader(struct panvk_device *dev,
|
|||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
struct pan_compile_inputs inputs = {
|
||||
.gpu_id = phys_dev->kmod.props.gpu_id,
|
||||
.gpu_variant = phys_dev->kmod.props.gpu_variant,
|
||||
.gpu_id = phys_dev->kmod.dev->props.gpu_id,
|
||||
.gpu_variant = phys_dev->kmod.dev->props.gpu_variant,
|
||||
.is_blit = true,
|
||||
};
|
||||
|
||||
|
|
@ -512,7 +512,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
|
|||
*/
|
||||
struct panvk_physical_device *pdev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
unsigned gpu_prod_id = pdev->kmod.props.gpu_id >> 16;
|
||||
unsigned gpu_prod_id = pdev->kmod.dev->props.gpu_id >> 16;
|
||||
|
||||
/* the PAN_ARCH check is redundant but allows compiler optimization
|
||||
when PAN_ARCH <= 6 */
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ static VkResult
|
|||
check_global_priority(const struct panvk_physical_device *phys_dev,
|
||||
const VkDeviceQueueCreateInfo *create_info)
|
||||
{
|
||||
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
|
||||
const unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
|
||||
const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
|
||||
vk_find_struct_const(create_info->pNext,
|
||||
DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
|
||||
|
|
@ -217,7 +217,7 @@ check_global_priority(const struct panvk_physical_device *phys_dev,
|
|||
enum pan_kmod_group_allow_priority_flags requested_prio =
|
||||
global_priority_to_group_allow_priority_flag(priority);
|
||||
enum pan_kmod_group_allow_priority_flags allowed_prio_mask =
|
||||
phys_dev->kmod.props.allowed_group_priorities_mask;
|
||||
phys_dev->kmod.dev->props.allowed_group_priorities_mask;
|
||||
|
||||
/* Non-medium priority context is not hooked-up in the JM backend, even
|
||||
* though the panfrost kmod advertize it. Manually filter non-medium
|
||||
|
|
|
|||
|
|
@ -48,7 +48,8 @@ panvk_per_arch(get_physical_device_extensions)(
|
|||
.KHR_shader_atomic_int64 = PAN_ARCH >= 9,
|
||||
.KHR_bind_memory2 = true,
|
||||
.KHR_buffer_device_address = true,
|
||||
.KHR_calibrated_timestamps = device->kmod.props.gpu_can_query_timestamp,
|
||||
.KHR_calibrated_timestamps =
|
||||
device->kmod.dev->props.gpu_can_query_timestamp,
|
||||
.KHR_copy_commands2 = true,
|
||||
.KHR_create_renderpass2 = true,
|
||||
.KHR_dedicated_allocation = true,
|
||||
|
|
@ -126,7 +127,8 @@ panvk_per_arch(get_physical_device_extensions)(
|
|||
.EXT_4444_formats = true,
|
||||
.EXT_border_color_swizzle = true,
|
||||
.EXT_buffer_device_address = true,
|
||||
.EXT_calibrated_timestamps = device->kmod.props.gpu_can_query_timestamp,
|
||||
.EXT_calibrated_timestamps =
|
||||
device->kmod.dev->props.gpu_can_query_timestamp,
|
||||
.EXT_custom_border_color = true,
|
||||
.EXT_depth_bias_control = true,
|
||||
.EXT_depth_clamp_zero_one = true,
|
||||
|
|
@ -198,7 +200,7 @@ has_compressed_formats(const struct panvk_physical_device *physical_device,
|
|||
const uint32_t required_formats)
|
||||
{
|
||||
uint32_t supported_compr_fmts =
|
||||
pan_query_compressed_formats(&physical_device->kmod.props);
|
||||
pan_query_compressed_formats(&physical_device->kmod.dev->props);
|
||||
|
||||
return (supported_compr_fmts & required_formats) == required_formats;
|
||||
}
|
||||
|
|
@ -275,8 +277,8 @@ panvk_per_arch(get_physical_device_features)(
|
|||
.vertexPipelineStoresAndAtomics =
|
||||
(PAN_ARCH >= 13 && instance->enable_vertex_pipeline_stores_atomics) ||
|
||||
instance->force_enable_shader_atomics,
|
||||
.fragmentStoresAndAtomics = (PAN_ARCH >= 10) ||
|
||||
instance->force_enable_shader_atomics,
|
||||
.fragmentStoresAndAtomics =
|
||||
(PAN_ARCH >= 10) || instance->force_enable_shader_atomics,
|
||||
.shaderTessellationAndGeometryPointSize = false,
|
||||
.shaderImageGatherExtended = true,
|
||||
.shaderStorageImageExtendedFormats = true,
|
||||
|
|
@ -492,8 +494,8 @@ panvk_per_arch(get_physical_device_features)(
|
|||
.nullDescriptor = PAN_ARCH >= 10,
|
||||
|
||||
/* VK_KHR_shader_clock */
|
||||
.shaderSubgroupClock = device->kmod.props.gpu_can_query_timestamp,
|
||||
.shaderDeviceClock = device->kmod.props.timestamp_device_coherent,
|
||||
.shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp,
|
||||
.shaderDeviceClock = device->kmod.dev->props.timestamp_device_coherent,
|
||||
|
||||
/* VK_KHR_shader_quad_control */
|
||||
.shaderQuadControl = true,
|
||||
|
|
@ -590,7 +592,7 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
const bool has_disk_cache = device->vk.disk_cache != NULL;
|
||||
|
||||
/* Ensure that the max threads count per workgroup is valid for Bifrost */
|
||||
assert(PAN_ARCH > 8 || device->kmod.props.max_threads_per_wg <= 1024);
|
||||
assert(PAN_ARCH > 8 || device->kmod.dev->props.max_threads_per_wg <= 1024);
|
||||
|
||||
float pointSizeRangeMin;
|
||||
float pointSizeRangeMax;
|
||||
|
|
@ -607,14 +609,14 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
*properties = (struct vk_properties){
|
||||
.apiVersion = get_api_version(),
|
||||
.driverVersion = vk_get_driver_version(),
|
||||
.vendorID = instance->force_vk_vendor ? instance->force_vk_vendor :
|
||||
ARM_VENDOR_ID,
|
||||
.vendorID =
|
||||
instance->force_vk_vendor ? instance->force_vk_vendor : ARM_VENDOR_ID,
|
||||
|
||||
/* Collect arch_major, arch_minor, arch_rev and product_major,
|
||||
* as done by the Arm driver.
|
||||
*/
|
||||
.deviceID =
|
||||
device->kmod.props.gpu_id & (ARCH_MAJOR | ARCH_MINOR | ARCH_REV | PRODUCT_MAJOR),
|
||||
.deviceID = device->kmod.dev->props.gpu_id &
|
||||
(ARCH_MAJOR | ARCH_MINOR | ARCH_REV | PRODUCT_MAJOR),
|
||||
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
|
||||
|
||||
/* Vulkan 1.0 limits */
|
||||
|
|
@ -740,10 +742,11 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
/* We could also split into serveral jobs but this has many limitations.
|
||||
* As such we limit to the max threads per workgroup supported by the GPU.
|
||||
*/
|
||||
.maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg,
|
||||
.maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg,
|
||||
device->kmod.props.max_threads_per_wg,
|
||||
device->kmod.props.max_threads_per_wg},
|
||||
.maxComputeWorkGroupInvocations =
|
||||
device->kmod.dev->props.max_threads_per_wg,
|
||||
.maxComputeWorkGroupSize = {device->kmod.dev->props.max_threads_per_wg,
|
||||
device->kmod.dev->props.max_threads_per_wg,
|
||||
device->kmod.dev->props.max_threads_per_wg},
|
||||
/* 8-bit subpixel precision. */
|
||||
.subPixelPrecisionBits = 8,
|
||||
.subTexelPrecisionBits = 8,
|
||||
|
|
@ -789,8 +792,10 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
.sampledImageStencilSampleCounts = sample_counts,
|
||||
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
|
||||
.maxSampleMaskWords = 1,
|
||||
.timestampComputeAndGraphics = PAN_ARCH >= 10 && device->kmod.props.gpu_can_query_timestamp,
|
||||
.timestampPeriod = PAN_ARCH >= 10 ? panvk_get_gpu_system_timestamp_period(device) : 0,
|
||||
.timestampComputeAndGraphics =
|
||||
PAN_ARCH >= 10 && device->kmod.dev->props.gpu_can_query_timestamp,
|
||||
.timestampPeriod =
|
||||
PAN_ARCH >= 10 ? panvk_get_gpu_system_timestamp_period(device) : 0,
|
||||
.maxClipDistances = 0,
|
||||
.maxCullDistances = 0,
|
||||
.maxCombinedClipAndCullDistances = 0,
|
||||
|
|
@ -824,14 +829,11 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
.subgroupSupportedStages =
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.subgroupSupportedOperations =
|
||||
VK_SUBGROUP_FEATURE_BASIC_BIT |
|
||||
VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
|
||||
VK_SUBGROUP_FEATURE_BALLOT_BIT |
|
||||
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
|
||||
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
|
||||
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
|
||||
VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
|
||||
VK_SUBGROUP_FEATURE_QUAD_BIT |
|
||||
VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
|
||||
VK_SUBGROUP_FEATURE_ROTATE_BIT |
|
||||
VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT,
|
||||
.subgroupQuadOperationsInAllStages = false,
|
||||
|
|
@ -844,11 +846,20 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
.maxMemoryAllocationSize = UINT32_MAX,
|
||||
|
||||
/* Vulkan 1.2 properties */
|
||||
.supportedDepthResolveModes =
|
||||
VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
|
||||
VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
|
||||
.supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
|
||||
VK_RESOLVE_MODE_MIN_BIT |
|
||||
VK_RESOLVE_MODE_MAX_BIT,
|
||||
.independentResolveNone = true,
|
||||
.independentResolve = true,
|
||||
/* VK_KHR_driver_properties */
|
||||
.driverID = VK_DRIVER_ID_MESA_PANVK,
|
||||
.conformanceVersion = get_conformance_version(),
|
||||
.denormBehaviorIndependence = PAN_ARCH >= 9 ?
|
||||
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE :
|
||||
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
|
||||
.denormBehaviorIndependence =
|
||||
PAN_ARCH >= 9 ? VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE
|
||||
: VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
|
||||
.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
|
||||
.shaderSignedZeroInfNanPreserveFloat16 = true,
|
||||
.shaderSignedZeroInfNanPreserveFloat32 = true,
|
||||
|
|
@ -867,6 +878,8 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
.shaderRoundingModeRTZFloat64 = false,
|
||||
.maxUpdateAfterBindDescriptorsInAllPools =
|
||||
PAN_ARCH >= 9 ? UINT32_MAX : 0,
|
||||
/* VK_EXT_descriptor_indexing */
|
||||
.maxUpdateAfterBindDescriptorsInAllPools = PAN_ARCH >= 9 ? UINT32_MAX : 0,
|
||||
.shaderUniformBufferArrayNonUniformIndexingNative = false,
|
||||
.shaderSampledImageArrayNonUniformIndexingNative = false,
|
||||
.shaderStorageBufferArrayNonUniformIndexingNative = false,
|
||||
|
|
@ -922,7 +935,8 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
.minSubgroupSize = pan_subgroup_size(PAN_ARCH),
|
||||
.maxSubgroupSize = pan_subgroup_size(PAN_ARCH),
|
||||
.maxComputeWorkgroupSubgroups =
|
||||
device->kmod.props.max_threads_per_wg / pan_subgroup_size(PAN_ARCH),
|
||||
device->kmod.dev->props.max_threads_per_wg /
|
||||
pan_subgroup_size(PAN_ARCH),
|
||||
.requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
|
||||
.maxPerStageDescriptorInlineUniformBlocks =
|
||||
|
|
@ -1054,9 +1068,9 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
.fmaRate = device->model->rates.fma,
|
||||
|
||||
/* VK_ARM_shader_core_builtins */
|
||||
.shaderCoreMask = device->kmod.props.shader_present,
|
||||
.shaderCoreCount = util_bitcount(device->kmod.props.shader_present),
|
||||
.shaderWarpsPerCore = device->kmod.props.max_threads_per_core /
|
||||
.shaderCoreMask = device->kmod.dev->props.shader_present,
|
||||
.shaderCoreCount = util_bitcount(device->kmod.dev->props.shader_present),
|
||||
.shaderWarpsPerCore = device->kmod.dev->props.max_threads_per_core /
|
||||
(pan_subgroup_size(PAN_ARCH) * 2),
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ panvk_per_arch(CreateQueryPool)(VkDevice _device,
|
|||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(device->vk.physical);
|
||||
|
||||
pan_query_core_count(&phys_dev->kmod.props, &reports_per_query);
|
||||
pan_query_core_count(&phys_dev->kmod.dev->props, &reports_per_query);
|
||||
#else
|
||||
reports_per_query = 1;
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -370,7 +370,8 @@ panvk_get_nir_options(UNUSED struct vk_physical_device *vk_pdev,
|
|||
UNUSED const struct vk_pipeline_robustness_state *rs)
|
||||
{
|
||||
struct panvk_physical_device *phys_dev = to_panvk_physical_device(vk_pdev);
|
||||
return pan_get_nir_shader_compiler_options(pan_arch(phys_dev->kmod.props.gpu_id));
|
||||
return pan_get_nir_shader_compiler_options(
|
||||
pan_arch(phys_dev->kmod.dev->props.gpu_id));
|
||||
}
|
||||
|
||||
static struct spirv_to_nir_options
|
||||
|
|
@ -439,7 +440,7 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev,
|
|||
*
|
||||
* This would give us a better place to do panvk-specific lowering.
|
||||
*/
|
||||
pan_nir_lower_texture_early(nir, pdev->kmod.props.gpu_id);
|
||||
pan_nir_lower_texture_early(nir, pdev->kmod.dev->props.gpu_id);
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
||||
nir_lower_compute_system_values_options options = {
|
||||
|
|
@ -451,17 +452,18 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev,
|
|||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS(_, nir, nir_lower_wpos_center);
|
||||
|
||||
pan_optimize_nir(nir, pdev->kmod.props.gpu_id);
|
||||
pan_optimize_nir(nir, pdev->kmod.dev->props.gpu_id);
|
||||
|
||||
NIR_PASS(_, nir, nir_split_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
|
||||
assert(pdev->kmod.props.shader_present != 0);
|
||||
uint64_t core_max_id = util_last_bit(pdev->kmod.props.shader_present) - 1;
|
||||
assert(pdev->kmod.dev->props.shader_present != 0);
|
||||
uint64_t core_max_id =
|
||||
util_last_bit(pdev->kmod.dev->props.shader_present) - 1;
|
||||
NIR_PASS(_, nir, nir_inline_sysval, nir_intrinsic_load_core_max_id_arm,
|
||||
core_max_id);
|
||||
|
||||
pan_preprocess_nir(nir, pdev->kmod.props.gpu_id);
|
||||
pan_preprocess_nir(nir, pdev->kmod.dev->props.gpu_id);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1289,8 +1291,8 @@ panvk_compile_shader(struct panvk_device *dev,
|
|||
robust2_modes |= nir_var_mem_ssbo;
|
||||
|
||||
struct pan_compile_inputs inputs = {
|
||||
.gpu_id = phys_dev->kmod.props.gpu_id,
|
||||
.gpu_variant = phys_dev->kmod.props.gpu_variant,
|
||||
.gpu_id = phys_dev->kmod.dev->props.gpu_id,
|
||||
.gpu_variant = phys_dev->kmod.dev->props.gpu_variant,
|
||||
.view_mask = (state && state->rp) ? state->rp->view_mask : 0,
|
||||
.robust2_modes = robust2_modes,
|
||||
.robust_descriptors = dev->vk.enabled_features.nullDescriptor,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue