pan/kmod: Cache the device props at the pan_kmod_dev level

The frontend is going to query the device props anyway, so let's just
query it at device creation time and store it in pan_kmod_dev::props.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36385>
This commit is contained in:
Boris Brezillon 2025-08-27 16:20:11 +02:00
parent f43cff3728
commit ee172bb769
34 changed files with 302 additions and 307 deletions

View file

@ -1634,8 +1634,8 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
struct pan_tls_info info = {
.tls.size = ss->info.tls_size,
.wls.size = ss->info.wls_size + grid->variable_shared_mem,
.wls.instances = pan_calc_wls_instances(&local_size, &dev->kmod.props,
grid->indirect ? NULL : &dim),
.wls.instances = pan_calc_wls_instances(
&local_size, &dev->kmod.dev->props, grid->indirect ? NULL : &dim),
};
if (ss->info.tls_size) {
@ -4493,12 +4493,12 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
screen->vtbl.get_conv_desc = get_conv_desc;
pan_blend_shader_cache_init(&dev->blend_shaders, panfrost_device_gpu_id(dev),
dev->kmod.props.gpu_variant,
dev->kmod.dev->props.gpu_variant,
&screen->mempools.bin.base);
GENX(pan_fb_preload_cache_init)
(&dev->fb_preload_cache, panfrost_device_gpu_id(dev),
dev->kmod.props.gpu_variant, &dev->blend_shaders,
dev->kmod.dev->props.gpu_variant, &dev->blend_shaders,
&screen->mempools.bin.base, &screen->mempools.desc.base);
dev->precomp_cache = GENX(panfrost_precomp_cache_init)(screen);

View file

@ -783,7 +783,7 @@ panfrost_get_query_result(struct pipe_context *pipe, struct pipe_query *q,
case PIPE_QUERY_TIMESTAMP_DISJOINT: {
vresult->timestamp_disjoint.frequency =
dev->kmod.props.timestamp_frequency;
dev->kmod.dev->props.timestamp_frequency;
vresult->timestamp_disjoint.disjoint = false;
break;
}

View file

@ -966,8 +966,8 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z), 0);
unsigned threads_per_wg = info->block[0] * info->block[1] * info->block[2];
unsigned max_thread_cnt =
pan_compute_max_thread_count(&dev->kmod.props, cs->info.work_reg_count);
unsigned max_thread_cnt = pan_compute_max_thread_count(
&dev->kmod.dev->props, cs->info.work_reg_count);
if (info->indirect) {
/* Load size in workgroups per dimension from memory */

View file

@ -60,11 +60,9 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
return -1;
}
pan_kmod_dev_query_props(dev->kmod.dev, &dev->kmod.props);
dev->arch = pan_arch(dev->kmod.props.gpu_id);
dev->model = pan_get_model(dev->kmod.props.gpu_id,
dev->kmod.props.gpu_variant);
dev->arch = pan_arch(dev->kmod.dev->props.gpu_id);
dev->model = pan_get_model(dev->kmod.dev->props.gpu_id,
dev->kmod.dev->props.gpu_variant);
/* If we don't recognize the model, bail early */
if (!dev->model)
@ -85,14 +83,16 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
goto err_free_kmod_dev;
dev->core_count =
pan_query_core_count(&dev->kmod.props, &dev->core_id_range);
dev->thread_tls_alloc = pan_query_thread_tls_alloc(&dev->kmod.props);
pan_query_core_count(&dev->kmod.dev->props, &dev->core_id_range);
dev->thread_tls_alloc = pan_query_thread_tls_alloc(&dev->kmod.dev->props);
dev->optimal_tib_size = pan_query_optimal_tib_size(dev->arch, dev->model);
dev->optimal_z_tib_size = pan_query_optimal_z_tib_size(dev->arch, dev->model);
dev->compressed_formats = pan_query_compressed_formats(&dev->kmod.props);
dev->tiler_features = pan_query_tiler_features(&dev->kmod.props);
dev->has_afbc = pan_query_afbc(&dev->kmod.props);
dev->has_afrc = pan_query_afrc(&dev->kmod.props);
dev->optimal_z_tib_size =
pan_query_optimal_z_tib_size(dev->arch, dev->model);
dev->compressed_formats =
pan_query_compressed_formats(&dev->kmod.dev->props);
dev->tiler_features = pan_query_tiler_features(&dev->kmod.dev->props);
dev->has_afbc = pan_query_afbc(&dev->kmod.dev->props);
dev->has_afrc = pan_query_afrc(&dev->kmod.dev->props);
dev->formats = pan_format_table(dev->arch);
dev->blendable_formats = pan_blendable_format_table(dev->arch);

View file

@ -90,9 +90,6 @@ struct panfrost_device {
/* The pan_kmod_dev object backing this device. */
struct pan_kmod_dev *dev;
/* Cached pan_kmod_dev_props properties queried at device create time. */
struct pan_kmod_dev_props props;
/* VM attached to this device. */
struct pan_kmod_vm *vm;
} kmod;
@ -194,19 +191,19 @@ panfrost_device_fd(const struct panfrost_device *dev)
static inline uint32_t
panfrost_device_gpu_id(const struct panfrost_device *dev)
{
return dev->kmod.props.gpu_id;
return dev->kmod.dev->props.gpu_id;
}
static inline uint32_t
panfrost_device_gpu_prod_id(const struct panfrost_device *dev)
{
return dev->kmod.props.gpu_id >> 16;
return dev->kmod.dev->props.gpu_id >> 16;
}
static inline uint32_t
panfrost_device_gpu_rev(const struct panfrost_device *dev)
{
return dev->kmod.props.gpu_id & BITFIELD_MASK(16);
return dev->kmod.dev->props.gpu_id & BITFIELD_MASK(16);
}
static inline int
@ -243,8 +240,8 @@ pan_is_bifrost(const struct panfrost_device *dev)
static inline uint64_t
pan_gpu_time_to_ns(struct panfrost_device *dev, uint64_t gpu_time)
{
assert(dev->kmod.props.timestamp_frequency > 0);
return (gpu_time * NSEC_PER_SEC) / dev->kmod.props.timestamp_frequency;
assert(dev->kmod.dev->props.timestamp_frequency > 0);
return (gpu_time * NSEC_PER_SEC) / dev->kmod.dev->props.timestamp_frequency;
}
static inline uint32_t

View file

@ -197,8 +197,8 @@ emit_tls(struct panfrost_batch *batch,
struct pan_tls_info info = {
.tls.size = shader->info.tls_size,
.wls.size = shader->info.wls_size,
.wls.instances =
pan_calc_wls_instances(&shader->local_size, &dev->kmod.props, dim),
.wls.instances = pan_calc_wls_instances(&shader->local_size,
&dev->kmod.dev->props, dim),
};
if (info.tls.size) {
@ -354,7 +354,7 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch,
unsigned threads_per_wg =
shader->local_size.x * shader->local_size.y * shader->local_size.z;
unsigned max_thread_cnt = pan_compute_max_thread_count(
&dev->kmod.props, shader->info.work_reg_count);
&dev->kmod.dev->props, shader->info.work_reg_count);
/* Pick the task_axis and task_increment to maximize thread utilization. */
unsigned task_axis = MALI_TASK_AXIS_X;

View file

@ -439,12 +439,12 @@ panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen,
for (unsigned r = 0; r < yuv_lowering.nres; r++) {
enum pipe_format res_format = yuv_lowering.res_formats[r];
supported &= pan_image_test_modifier_with_format(&dev->kmod.props,
mod, res_format);
supported &= pan_image_test_modifier_with_format(
&dev->kmod.dev->props, mod, res_format);
}
} else {
supported =
pan_image_test_modifier_with_format(&dev->kmod.props, mod, format);
supported = pan_image_test_modifier_with_format(&dev->kmod.dev->props,
mod, format);
}
if (!supported)
@ -727,10 +727,10 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
/* Compile side is TODO for Midgard. */
caps->shader_clock = dev->arch >= 6 &&
dev->kmod.props.gpu_can_query_timestamp;
dev->kmod.dev->props.gpu_can_query_timestamp;
caps->shader_realtime_clock = dev->arch >= 6 &&
dev->kmod.props.gpu_can_query_timestamp &&
dev->kmod.props.timestamp_device_coherent;
dev->kmod.dev->props.gpu_can_query_timestamp &&
dev->kmod.dev->props.timestamp_device_coherent;
/* pixel_local_storage is initially for valhall and bifrost only */
caps->shader_pixel_local_storage_fast_size =
@ -796,10 +796,9 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
/* Must be at least 64 for correct behaviour */
caps->texture_buffer_offset_alignment = 64;
caps->query_time_elapsed =
caps->query_timestamp =
dev->kmod.props.gpu_can_query_timestamp &&
dev->kmod.props.timestamp_frequency != 0;
caps->query_time_elapsed = caps->query_timestamp =
dev->kmod.dev->props.gpu_can_query_timestamp &&
dev->kmod.dev->props.timestamp_frequency != 0;
if (caps->query_timestamp)
caps->timer_resolution = pan_gpu_time_to_ns(dev, 1);
@ -914,9 +913,8 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
caps->native_fence_fd = true;
caps->context_priority_mask =
from_kmod_group_allow_priority_flags(
dev->kmod.props.allowed_group_priorities_mask);
caps->context_priority_mask = from_kmod_group_allow_priority_flags(
dev->kmod.dev->props.allowed_group_priorities_mask);
caps->astc_decode_mode = dev->arch >= 9 && (dev->compressed_formats & (1 << 30));
@ -1006,7 +1004,7 @@ get_core_mask(const struct panfrost_device *dev,
const struct pipe_screen_config *config,
const char *option_name, uint64_t *mask)
{
uint64_t present = dev->kmod.props.shader_present;
uint64_t present = dev->kmod.dev->props.shader_present;
*mask = driQueryOptionu64(config->options, option_name) & present;
if (!*mask) {

View file

@ -146,7 +146,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
struct pan_compile_inputs inputs = {
.gpu_id = panfrost_device_gpu_id(dev),
.gpu_variant = dev->kmod.props.gpu_variant,
.gpu_variant = dev->kmod.dev->props.gpu_variant,
.get_conv_desc = screen->vtbl.get_conv_desc,
};
@ -688,8 +688,8 @@ panfrost_get_compute_state_info(struct pipe_context *pipe, void *cso,
struct panfrost_compiled_shader *cs =
util_dynarray_begin(&uncompiled->variants);
info->max_threads =
pan_compute_max_thread_count(&dev->kmod.props, cs->info.work_reg_count);
info->max_threads = pan_compute_max_thread_count(&dev->kmod.dev->props,
cs->info.work_reg_count);
info->private_memory = cs->info.tls_size;
info->simd_sizes = pan_subgroup_size(dev->arch);
info->preferred_simd_size = info->simd_sizes;

View file

@ -369,10 +369,6 @@ struct pan_kmod_ops {
/* Destroy a pan_kmod_dev object. */
void (*dev_destroy)(struct pan_kmod_dev *dev);
/* Query device properties. */
void (*dev_query_props)(const struct pan_kmod_dev *dev,
struct pan_kmod_dev_props *props);
/* Query the maxium user VA range.
* Users are free to use a subset of this range if they need less VA space.
* This method is optional, when not specified, kmod assumes the whole VA
@ -466,6 +462,9 @@ struct pan_kmod_dev {
/* KMD backing this device. */
struct pan_kmod_driver driver;
/* KMD-agnostic device properties. */
struct pan_kmod_dev_props props;
/* kmod backend ops assigned at device creation. */
const struct pan_kmod_ops *ops;
@ -499,25 +498,15 @@ pan_kmod_dev_create(int fd, uint32_t flags,
void pan_kmod_dev_destroy(struct pan_kmod_dev *dev);
static inline void
pan_kmod_dev_query_props(const struct pan_kmod_dev *dev,
struct pan_kmod_dev_props *props)
{
dev->ops->dev_query_props(dev, props);
}
static inline struct pan_kmod_va_range
pan_kmod_dev_query_user_va_range(const struct pan_kmod_dev *dev)
{
if (dev->ops->dev_query_user_va_range)
return dev->ops->dev_query_user_va_range(dev);
struct pan_kmod_dev_props props;
pan_kmod_dev_query_props(dev, &props);
return (struct pan_kmod_va_range){
.start = 0,
.size = 1ull << MMU_FEATURES_VA_BITS(props.mmu_features),
.size = 1ull << MMU_FEATURES_VA_BITS(dev->props.mmu_features),
};
}

View file

@ -42,39 +42,6 @@ struct panfrost_kmod_bo {
uint64_t offset;
};
static struct pan_kmod_dev *
panfrost_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version,
const struct pan_kmod_allocator *allocator)
{
if (version->version_major < 1 ||
(version->version_major == 1 && version->version_minor < 1)) {
mesa_loge("kernel driver is too old (requires at least 1.1, found %d.%d)",
version->version_major, version->version_minor);
return NULL;
}
struct panfrost_kmod_dev *panfrost_dev =
pan_kmod_alloc(allocator, sizeof(*panfrost_dev));
if (!panfrost_dev) {
mesa_loge("failed to allocate a panfrost_kmod_dev object");
return NULL;
}
pan_kmod_dev_init(&panfrost_dev->base, fd, flags, version,
&panfrost_kmod_ops, allocator);
return &panfrost_dev->base;
}
static void
panfrost_kmod_dev_destroy(struct pan_kmod_dev *dev)
{
struct panfrost_kmod_dev *panfrost_dev =
container_of(dev, struct panfrost_kmod_dev, base);
pan_kmod_dev_cleanup(dev);
pan_kmod_free(dev->allocator, panfrost_dev);
}
/* Abstraction over the raw drm_panfrost_get_param ioctl for fetching
* information about devices.
*/
@ -97,9 +64,10 @@ panfrost_query_raw(int fd, enum drm_panfrost_param param, bool required,
}
static void
panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev,
struct pan_kmod_dev_props *props)
panfrost_dev_query_thread_props(struct panfrost_kmod_dev *panfrost_dev)
{
struct pan_kmod_dev_props *props = &panfrost_dev->base.props;
const struct pan_kmod_dev *dev = &panfrost_dev->base;
int fd = dev->fd;
props->max_threads_per_core =
@ -177,9 +145,10 @@ panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev,
}
static void
panfrost_dev_query_props(const struct pan_kmod_dev *dev,
struct pan_kmod_dev_props *props)
panfrost_dev_query_props(struct panfrost_kmod_dev *panfrost_dev)
{
struct pan_kmod_dev_props *props = &panfrost_dev->base.props;
const struct pan_kmod_dev *dev = &panfrost_dev->base;
int fd = dev->fd;
memset(props, 0, sizeof(*props));
@ -203,7 +172,7 @@ panfrost_dev_query_props(const struct pan_kmod_dev *dev,
props->afbc_features =
panfrost_query_raw(fd, DRM_PANFROST_PARAM_AFBC_FEATURES, true, 0);
panfrost_dev_query_thread_props(dev, props);
panfrost_dev_query_thread_props(panfrost_dev);
if (dev->driver.version.major > 1 || dev->driver.version.minor >= 3) {
props->gpu_can_query_timestamp = true;
@ -230,6 +199,41 @@ panfrost_dev_query_props(const struct pan_kmod_dev *dev,
PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH;
}
static struct pan_kmod_dev *
panfrost_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version,
const struct pan_kmod_allocator *allocator)
{
if (version->version_major < 1 ||
(version->version_major == 1 && version->version_minor < 1)) {
mesa_loge("kernel driver is too old (requires at least 1.1, found %d.%d)",
version->version_major, version->version_minor);
return NULL;
}
struct panfrost_kmod_dev *panfrost_dev =
pan_kmod_alloc(allocator, sizeof(*panfrost_dev));
if (!panfrost_dev) {
mesa_loge("failed to allocate a panfrost_kmod_dev object");
return NULL;
}
pan_kmod_dev_init(&panfrost_dev->base, fd, flags, version,
&panfrost_kmod_ops, allocator);
panfrost_dev_query_props(panfrost_dev);
return &panfrost_dev->base;
}
static void
panfrost_kmod_dev_destroy(struct pan_kmod_dev *dev)
{
struct panfrost_kmod_dev *panfrost_dev =
container_of(dev, struct panfrost_kmod_dev, base);
pan_kmod_dev_cleanup(dev);
pan_kmod_free(dev->allocator, panfrost_dev);
}
static uint32_t
to_panfrost_bo_flags(struct pan_kmod_dev *dev, uint32_t flags)
{
@ -530,7 +534,6 @@ panfrost_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const c
const struct pan_kmod_ops panfrost_kmod_ops = {
.dev_create = panfrost_kmod_dev_create,
.dev_destroy = panfrost_kmod_dev_destroy,
.dev_query_props = panfrost_dev_query_props,
.dev_query_user_va_range = panfrost_kmod_dev_query_user_va_range,
.bo_alloc = panfrost_kmod_bo_alloc,
.bo_free = panfrost_kmod_bo_free,

View file

@ -103,6 +103,90 @@ struct panthor_kmod_bo {
} sync;
};
static uint32_t
to_kmod_group_allow_priority_flags(uint32_t panthor_flags)
{
uint32_t kmod_flags = 0;
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_REALTIME))
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME;
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_HIGH))
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH;
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_MEDIUM))
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM;
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_LOW))
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW;
return kmod_flags;
}
static void
panthor_dev_query_thread_props(struct panthor_kmod_dev *panthor_dev)
{
struct pan_kmod_dev_props *props = &panthor_dev->base.props;
props->max_threads_per_wg = panthor_dev->props.gpu.thread_max_workgroup_size;
props->max_threads_per_core = panthor_dev->props.gpu.max_threads;
props->max_tasks_per_core = panthor_dev->props.gpu.thread_features >> 24;
props->num_registers_per_core =
panthor_dev->props.gpu.thread_features & 0x3fffff;
/* We assume that all thread properties are populated. If we ever have a GPU
* that have one of the THREAD_xxx register that's zero, we can always add a
* quirk here.
*/
assert(props->max_threads_per_wg && props->max_threads_per_core &&
props->max_tasks_per_core && props->num_registers_per_core);
/* There is no THREAD_TLS_ALLOC register on v10+, and the maximum number
* of TLS instance per core is assumed to be the maximum number of threads
* per core.
*/
props->max_tls_instance_per_core = props->max_threads_per_core;
}
static void
panthor_dev_query_props(struct panthor_kmod_dev *panthor_dev)
{
struct pan_kmod_dev_props *props = &panthor_dev->base.props;
*props = (struct pan_kmod_dev_props){
.gpu_id = panthor_dev->props.gpu.gpu_id,
.gpu_variant = panthor_dev->props.gpu.core_features & 0xff,
.shader_present = panthor_dev->props.gpu.shader_present,
.tiler_features = panthor_dev->props.gpu.tiler_features,
.mem_features = panthor_dev->props.gpu.mem_features,
.mmu_features = panthor_dev->props.gpu.mmu_features,
/* This register does not exist because AFBC is no longer optional. */
.afbc_features = 0,
/* Access to timstamp from the GPU is always supported on Panthor. */
.gpu_can_query_timestamp = true,
.timestamp_frequency = panthor_dev->props.timestamp.timestamp_frequency,
.allowed_group_priorities_mask = to_kmod_group_allow_priority_flags(
panthor_dev->props.group_priorities.allowed_mask),
};
if (panthor_dev->base.driver.version.major > 1 ||
panthor_dev->base.driver.version.minor >= 6)
props->timestamp_device_coherent = true;
static_assert(sizeof(props->texture_features) ==
sizeof(panthor_dev->props.gpu.texture_features),
"Mismatch in texture_features array size");
memcpy(props->texture_features, panthor_dev->props.gpu.texture_features,
sizeof(props->texture_features));
panthor_dev_query_thread_props(panthor_dev);
}
static struct pan_kmod_dev *
panthor_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version,
const struct pan_kmod_allocator *allocator)
@ -195,8 +279,11 @@ panthor_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version,
}
assert(!ret);
pan_kmod_dev_init(&panthor_dev->base, fd, flags, version, &panthor_kmod_ops,
allocator);
pan_kmod_dev_init(&panthor_dev->base, fd, flags, version,
&panthor_kmod_ops, allocator);
panthor_dev_query_props(panthor_dev);
return &panthor_dev->base;
err_free_dev:
@ -215,91 +302,6 @@ panthor_kmod_dev_destroy(struct pan_kmod_dev *dev)
pan_kmod_free(dev->allocator, panthor_dev);
}
static uint32_t
to_kmod_group_allow_priority_flags(uint32_t panthor_flags)
{
uint32_t kmod_flags = 0;
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_REALTIME))
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME;
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_HIGH))
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH;
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_MEDIUM))
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM;
if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_LOW))
kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW;
return kmod_flags;
}
static void
panthor_dev_query_thread_props(const struct panthor_kmod_dev *panthor_dev,
struct pan_kmod_dev_props *props)
{
props->max_threads_per_wg = panthor_dev->props.gpu.thread_max_workgroup_size;
props->max_threads_per_core = panthor_dev->props.gpu.max_threads;
props->max_tasks_per_core = panthor_dev->props.gpu.thread_features >> 24;
props->num_registers_per_core =
panthor_dev->props.gpu.thread_features & 0x3fffff;
/* We assume that all thread properties are populated. If we ever have a GPU
* that have one of the THREAD_xxx register that's zero, we can always add a
* quirk here.
*/
assert(props->max_threads_per_wg && props->max_threads_per_core &&
props->max_tasks_per_core && props->num_registers_per_core);
/* There is no THREAD_TLS_ALLOC register on v10+, and the maximum number
* of TLS instance per core is assumed to be the maximum number of threads
* per core.
*/
props->max_tls_instance_per_core = props->max_threads_per_core;
}
static void
panthor_dev_query_props(const struct pan_kmod_dev *dev,
struct pan_kmod_dev_props *props)
{
struct panthor_kmod_dev *panthor_dev =
container_of(dev, struct panthor_kmod_dev, base);
*props = (struct pan_kmod_dev_props){
.gpu_id = panthor_dev->props.gpu.gpu_id,
.gpu_variant = panthor_dev->props.gpu.core_features & 0xff,
.shader_present = panthor_dev->props.gpu.shader_present,
.tiler_features = panthor_dev->props.gpu.tiler_features,
.mem_features = panthor_dev->props.gpu.mem_features,
.mmu_features = panthor_dev->props.gpu.mmu_features,
/* This register does not exist because AFBC is no longer optional. */
.afbc_features = 0,
/* Access to timstamp from the GPU is always supported on Panthor. */
.gpu_can_query_timestamp = true,
.timestamp_frequency = panthor_dev->props.timestamp.timestamp_frequency,
.allowed_group_priorities_mask = to_kmod_group_allow_priority_flags(
panthor_dev->props.group_priorities.allowed_mask),
};
if (dev->driver.version.major > 1 || dev->driver.version.minor >= 6) {
props->timestamp_device_coherent = true;
}
static_assert(sizeof(props->texture_features) ==
sizeof(panthor_dev->props.gpu.texture_features),
"Mismatch in texture_features array size");
memcpy(props->texture_features, panthor_dev->props.gpu.texture_features,
sizeof(props->texture_features));
panthor_dev_query_thread_props(panthor_dev, props);
}
static struct pan_kmod_va_range
panthor_kmod_dev_query_user_va_range(const struct pan_kmod_dev *dev)
{
@ -706,10 +708,6 @@ static struct pan_kmod_vm *
panthor_kmod_vm_create(struct pan_kmod_dev *dev, uint32_t flags,
uint64_t user_va_start, uint64_t user_va_range)
{
struct pan_kmod_dev_props props;
panthor_dev_query_props(dev, &props);
struct panthor_kmod_vm *panthor_vm =
pan_kmod_dev_alloc(dev, sizeof(*panthor_vm));
if (!panthor_vm) {
@ -1236,7 +1234,6 @@ panthor_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const ch
const struct pan_kmod_ops panthor_kmod_ops = {
.dev_create = panthor_kmod_dev_create,
.dev_destroy = panthor_kmod_dev_destroy,
.dev_query_props = panthor_dev_query_props,
.dev_query_user_va_range = panthor_kmod_dev_query_user_va_range,
.bo_alloc = panthor_kmod_bo_alloc,
.bo_free = panthor_kmod_bo_free,

View file

@ -82,8 +82,7 @@ pan_perf_init(struct pan_perf *perf, int fd)
perf->dev = pan_kmod_dev_create(fd, 0, NULL);
assert(perf->dev);
struct pan_kmod_dev_props props = {};
pan_kmod_dev_query_props(perf->dev, &props);
struct pan_kmod_dev_props props = perf->dev->props;
const struct pan_model *model =
pan_get_model(props.gpu_id, props.gpu_variant);

View file

@ -71,10 +71,7 @@ main(void)
struct pan_kmod_dev *dev =
pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD, NULL);
struct pan_kmod_dev_props props;
pan_kmod_dev_query_props(dev, &props);
struct pan_kmod_dev_props props = dev->props;
uint32_t supported = pan_query_compressed_formats(&props);
bool all_ok = true;

View file

@ -288,7 +288,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_COMPUTE,
pan_get_nir_shader_compiler_options(
pan_arch(phys_dev->kmod.props.gpu_id)),
pan_arch(phys_dev->kmod.dev->props.gpu_id)),
"%s", "desc_copy");
/* We actually customize that at execution time to issue the
@ -302,8 +302,8 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
single_desc_copy(&b, desc_copy_id);
struct pan_compile_inputs inputs = {
.gpu_id = phys_dev->kmod.props.gpu_id,
.gpu_variant = phys_dev->kmod.props.gpu_variant,
.gpu_id = phys_dev->kmod.dev->props.gpu_id,
.gpu_variant = phys_dev->kmod.dev->props.gpu_variant,
};
pan_preprocess_nir(b.shader, inputs.gpu_id);

View file

@ -677,7 +677,7 @@ panvk_per_arch(calculate_task_axis_and_increment)(
{
/* Pick the task_axis and task_increment to maximize thread
* utilization. */
const struct pan_kmod_dev_props *props = &phys_dev->kmod.props;
const struct pan_kmod_dev_props *props = &phys_dev->kmod.dev->props;
const unsigned max_thread_cnt =
pan_compute_max_thread_count(props, shader->info.work_reg_count);
const unsigned threads_per_wg = shader->cs.local_size.x *

View file

@ -64,11 +64,11 @@ emit_tls(struct panvk_cmd_buffer *cmdbuf)
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(dev->vk.physical);
unsigned core_id_range;
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
if (cmdbuf->state.tls.info.tls.size) {
unsigned thread_tls_alloc =
pan_query_thread_tls_alloc(&phys_dev->kmod.props);
pan_query_thread_tls_alloc(&phys_dev->kmod.dev->props);
unsigned size = pan_get_total_stack_size(cmdbuf->state.tls.info.tls.size,
thread_tls_alloc, core_id_range);
@ -823,7 +823,7 @@ init_cs_builders(struct panvk_cmd_buffer *cmdbuf)
.nr_registers = csif_info->cs_reg_count,
.nr_kernel_registers = MAX2(csif_info->unpreserved_cs_reg_count, 4),
.compute_ep_limit =
PAN_ARCH >= 12 ? phys_dev->kmod.props.max_tasks_per_core : 0,
PAN_ARCH >= 12 ? phys_dev->kmod.dev->props.max_tasks_per_core : 0,
.alloc_buffer = alloc_cs_buffer,
.cookie = cmdbuf,
.ls_sb_slot = SB_ID(LS),

View file

@ -87,10 +87,10 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
if (tlsinfo.wls.size) {
unsigned core_id_range;
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
tlsinfo.wls.instances = pan_calc_wls_instances(
&cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim);
&cs->cs.local_size, &phys_dev->kmod.dev->props, indirect ? NULL : dim);
unsigned wls_total_size = pan_calc_total_wls_size(
tlsinfo.wls.size, tlsinfo.wls.instances, core_id_range);
@ -156,7 +156,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
unsigned wg_per_task = 0;
if (indirect)
wg_per_task = pan_calc_workgroups_per_task(&cs->cs.local_size,
&phys_dev->kmod.props);
&phys_dev->kmod.dev->props);
if (compute_state_dirty(cmdbuf, DESC_STATE) ||
compute_state_dirty(cmdbuf, CS)) {

View file

@ -1000,7 +1000,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
const bool tracing_enabled = PANVK_DEBUG(TRACE);
struct pan_tiler_features tiler_features =
pan_query_tiler_features(&phys_dev->kmod.props);
pan_query_tiler_features(&phys_dev->kmod.dev->props);
bool simul_use =
cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
struct pan_ptr tiler_desc = {0};

View file

@ -531,7 +531,7 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue)
subqueue);
pandecode_cs_binary(dev->debug.decode_ctx, qsubmit.stream_addr,
qsubmit.stream_size,
phys_dev->kmod.props.gpu_id);
phys_dev->kmod.dev->props.gpu_id);
}
return VK_SUCCESS;
@ -778,7 +778,7 @@ panvk_queue_submit_init(struct panvk_queue_submit *submit,
submit->process_utrace =
u_trace_should_process(&submit->dev->utrace.utctx) &&
submit->phys_dev->kmod.props.timestamp_frequency;
submit->phys_dev->kmod.dev->props.timestamp_frequency;
submit->force_sync = PANVK_DEBUG(TRACE) || PANVK_DEBUG(SYNC);
}
@ -1201,7 +1201,8 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit)
struct pandecode_context *decode_ctx = submit->dev->debug.decode_ctx;
if (PANVK_DEBUG(TRACE)) {
const struct pan_kmod_dev_props *props = &submit->phys_dev->kmod.props;
const struct pan_kmod_dev_props *props =
&submit->phys_dev->kmod.dev->props;
for (uint32_t i = 0; i < submit->qsubmit_count; i++) {
const struct drm_panthor_queue_submit *qsubmit = &submit->qsubmits[i];

View file

@ -117,10 +117,10 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
if (batch->tlsinfo.tls.size) {
unsigned thread_tls_alloc =
pan_query_thread_tls_alloc(&phys_dev->kmod.props);
pan_query_thread_tls_alloc(&phys_dev->kmod.dev->props);
unsigned core_id_range;
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
unsigned size = pan_get_total_stack_size(batch->tlsinfo.tls.size,
thread_tls_alloc, core_id_range);

View file

@ -50,9 +50,9 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
if (batch->tlsinfo.wls.size) {
unsigned core_id_range;
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
batch->tlsinfo.wls.instances = pan_calc_wls_instances(
&cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim);
&cs->cs.local_size, &phys_dev->kmod.dev->props, indirect ? NULL : dim);
batch->wls_total_size = pan_calc_total_wls_size(
batch->tlsinfo.wls.size, batch->tlsinfo.wls.instances, core_id_range);
}

View file

@ -77,7 +77,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
if (PANVK_DEBUG(TRACE)) {
pandecode_jc(dev->debug.decode_ctx, batch->vtc_jc.first_job,
phys_dev->kmod.props.gpu_id);
phys_dev->kmod.dev->props.gpu_id);
}
if (PANVK_DEBUG(DUMP))
@ -85,7 +85,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
if (PANVK_DEBUG(SYNC))
pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc,
phys_dev->kmod.props.gpu_id);
phys_dev->kmod.dev->props.gpu_id);
}
if (batch->frag_jc.first_job) {
@ -115,14 +115,14 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
if (PANVK_DEBUG(TRACE))
pandecode_jc(dev->debug.decode_ctx, batch->frag_jc.first_job,
phys_dev->kmod.props.gpu_id);
phys_dev->kmod.dev->props.gpu_id);
if (PANVK_DEBUG(DUMP))
pandecode_dump_mappings(dev->debug.decode_ctx);
if (PANVK_DEBUG(SYNC))
pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc,
phys_dev->kmod.props.gpu_id);
phys_dev->kmod.dev->props.gpu_id);
}
if (PANVK_DEBUG(TRACE))

View file

@ -247,7 +247,7 @@ panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev,
unsigned bin_ptr_mem_budget)
{
struct pan_tiler_features tiler_features =
pan_query_tiler_features(&phys_dev->kmod.props);
pan_query_tiler_features(&phys_dev->kmod.dev->props);
uint32_t hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)(
state->render.fb.info.width, state->render.fb.info.height,

View file

@ -54,7 +54,7 @@ panvk_image_can_use_afbc(
VkImageUsageFlags usage, VkImageType type, VkImageTiling tiling,
VkImageCreateFlags flags)
{
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
/* Disallow AFBC if either of these is true
@ -79,7 +79,7 @@ panvk_image_can_use_afbc(
return !PANVK_DEBUG(NO_AFBC) &&
!(usage &
(VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_HOST_TRANSFER_BIT)) &&
pan_query_afbc(&phys_dev->kmod.props) &&
pan_query_afbc(&phys_dev->kmod.dev->props) &&
pan_afbc_supports_format(arch, pfmt) &&
tiling != VK_IMAGE_TILING_LINEAR && type != VK_IMAGE_TYPE_1D &&
(type != VK_IMAGE_TYPE_3D || arch >= 7) &&
@ -143,7 +143,7 @@ get_plane_count(struct panvk_image *image)
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(image->vk.base.device->physical);
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
/* Z32_S8X24 is not supported on v9+, and we don't want to use it
* on v7- anyway, because it's less efficient than the multiplanar
@ -208,7 +208,7 @@ panvk_image_can_use_mod(struct panvk_image *image,
{
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(image->vk.base.device->physical);
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
const bool forced_linear = PANVK_DEBUG(LINEAR) ||
image->vk.tiling == VK_IMAGE_TILING_LINEAR ||
image->vk.image_type == VK_IMAGE_TYPE_1D;
@ -298,7 +298,7 @@ panvk_image_can_use_mod(struct panvk_image *image,
};
enum pan_mod_support supported =
pan_image_test_props(&phys_dev->kmod.props, &iprops, iusage);
pan_image_test_props(&phys_dev->kmod.dev->props, &iprops, iusage);
if (supported == PAN_MOD_NOT_SUPPORTED ||
(supported == PAN_MOD_NOT_OPTIMAL && optimal_only))
return false;
@ -413,7 +413,7 @@ panvk_image_init_layouts(struct panvk_image *image,
struct panvk_device *dev = to_panvk_device(image->vk.base.device);
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(dev->vk.physical);
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_info =
vk_find_struct_const(
pCreateInfo->pNext,

View file

@ -145,8 +145,8 @@ init_shader_caches(struct panvk_physical_device *device,
_mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
sizeof(instance->driver_build_sha));
_mesa_sha1_update(&sha_ctx, &device->kmod.props.gpu_id,
sizeof(device->kmod.props.gpu_id));
_mesa_sha1_update(&sha_ctx, &device->kmod.dev->props.gpu_id,
sizeof(device->kmod.dev->props.gpu_id));
unsigned char sha[SHA1_DIGEST_LENGTH];
_mesa_sha1_final(&sha_ctx, sha);
@ -157,7 +157,7 @@ init_shader_caches(struct panvk_physical_device *device,
#ifdef ENABLE_SHADER_CACHE
char renderer[17];
ASSERTED int len = snprintf(renderer, sizeof(renderer), "panvk_0x%08x",
device->kmod.props.gpu_id);
device->kmod.dev->props.gpu_id);
assert(len == sizeof(renderer) - 1);
char timestamp[SHA1_DIGEST_STRING_LENGTH];
@ -186,7 +186,7 @@ get_core_mask(struct panvk_physical_device *device,
const struct panvk_instance *instance, const char *option_name,
uint64_t *mask)
{
uint64_t present = device->kmod.props.shader_present;
uint64_t present = device->kmod.dev->props.shader_present;
*mask = driQueryOptionu64(&instance->dri_options, option_name) & present;
if (!*mask)
@ -218,7 +218,7 @@ static VkResult
get_device_sync_types(struct panvk_physical_device *device,
const struct panvk_instance *instance)
{
const unsigned arch = pan_arch(device->kmod.props.gpu_id);
const unsigned arch = pan_arch(device->kmod.dev->props.gpu_id);
uint32_t sync_type_count = 0;
device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd);
@ -252,12 +252,12 @@ get_device_sync_types(struct panvk_physical_device *device,
float
panvk_get_gpu_system_timestamp_period(const struct panvk_physical_device *device)
{
if (!device->kmod.props.gpu_can_query_timestamp ||
!device->kmod.props.timestamp_frequency)
if (!device->kmod.dev->props.gpu_can_query_timestamp ||
!device->kmod.dev->props.timestamp_frequency)
return 0;
const float ns_per_s = 1000000000.0;
return ns_per_s / (float)device->kmod.props.timestamp_frequency;
return ns_per_s / (float)device->kmod.dev->props.timestamp_frequency;
}
void
@ -283,18 +283,16 @@ panvk_physical_device_init(struct panvk_physical_device *device,
if (result != VK_SUCCESS)
return result;
pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props);
device->model = pan_get_model(device->kmod.dev->props.gpu_id,
device->kmod.dev->props.gpu_variant);
device->model = pan_get_model(device->kmod.props.gpu_id,
device->kmod.props.gpu_variant);
unsigned arch = pan_arch(device->kmod.props.gpu_id);
unsigned arch = pan_arch(device->kmod.dev->props.gpu_id);
if (!device->model) {
result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"Unknown gpu_id (%#x) or variant (%#x)",
device->kmod.props.gpu_id,
device->kmod.props.gpu_variant);
device->kmod.dev->props.gpu_id,
device->kmod.dev->props.gpu_variant);
goto fail;
}
@ -400,13 +398,13 @@ panvk_fill_global_priority(const struct panvk_physical_device *physical_device,
uint32_t family_idx,
VkQueueFamilyGlobalPriorityPropertiesKHR *prio)
{
const unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
const unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
uint32_t prio_idx = 0;
switch (family_idx) {
case PANVK_QUEUE_FAMILY_GPU: {
enum pan_kmod_group_allow_priority_flags prio_mask =
physical_device->kmod.props.allowed_group_priorities_mask;
physical_device->kmod.dev->props.allowed_group_priorities_mask;
/* Non-medium priority context is not hooked-up in the JM backend, even
* though the panfrost kmod advertize it. Manually filter non-medium
@ -445,7 +443,7 @@ panvk_GetPhysicalDeviceQueueFamilyProperties2(
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
pQueueFamilyPropertyCount);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
const VkQueueFamilyProperties qfamily_props[PANVK_QUEUE_FAMILY_COUNT] = {
[PANVK_QUEUE_FAMILY_GPU] = {
@ -455,7 +453,8 @@ panvk_GetPhysicalDeviceQueueFamilyProperties2(
some CTS tests */
.queueCount = arch >= 10 ? 2 : 1,
.timestampValidBits =
arch >= 10 && physical_device->kmod.props.gpu_can_query_timestamp
arch >= 10 &&
physical_device->kmod.dev->props.gpu_can_query_timestamp
? 64
: 0,
.minImageTransferGranularity = {1, 1, 1},
@ -525,7 +524,7 @@ panvk_CreateDevice(VkPhysicalDevice physicalDevice,
const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
{
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
VkResult result = VK_ERROR_INITIALIZATION_FAILED;
panvk_arch_dispatch_ret(arch, create_device, result, physical_device,
@ -540,7 +539,7 @@ panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
VK_FROM_HANDLE(panvk_device, device, _device);
struct panvk_physical_device *physical_device =
to_panvk_physical_device(device->vk.physical);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
panvk_arch_dispatch(arch, destroy_device, device, pAllocator);
}
@ -580,7 +579,7 @@ format_is_supported(struct panvk_physical_device *physical_device,
* the supported formats reported by the GPU. */
if (util_format_is_compressed(pfmt)) {
uint32_t supported_compr_fmts =
pan_query_compressed_formats(&physical_device->kmod.props);
pan_query_compressed_formats(&physical_device->kmod.dev->props);
if (!(BITFIELD_BIT(fmt.texfeat_bit) & supported_compr_fmts))
return false;
@ -596,7 +595,7 @@ get_image_plane_format_features(struct panvk_physical_device *physical_device,
VkFormatFeatureFlags2 features = 0;
enum pipe_format pfmt = vk_format_to_pipe_format(format);
const struct pan_format fmt = physical_device->formats.all[pfmt];
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
if (!format_is_supported(physical_device, fmt, pfmt))
return 0;
@ -736,7 +735,7 @@ static VkFormatFeatureFlags2
get_image_format_sample_counts(struct panvk_physical_device *physical_device,
VkFormat format)
{
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
unsigned max_tib_size = pan_query_tib_size(physical_device->model);
unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size);
@ -782,7 +781,7 @@ panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
VkFormatProperties2 *pFormatProperties)
{
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
const unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
const unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id);
VkFormatFeatureFlags2 tex =
get_image_format_features(physical_device, format);
@ -846,7 +845,7 @@ panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
static VkExtent3D
get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format)
{
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
const unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
const uint64_t max_img_size_B =
arch <= 10 ? u_uintN_max(32) : u_uintN_max(48);
const enum pipe_format pfmt = vk_format_to_pipe_format(format);
@ -869,7 +868,7 @@ get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format)
static VkExtent3D
get_max_3d_image_size(struct panvk_physical_device *phys_dev, VkFormat format)
{
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
const unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
const uint64_t max_img_size_B =
arch <= 10 ? u_uintN_max(32) : u_uintN_max(48);
enum pipe_format pfmt = vk_format_to_pipe_format(format);

View file

@ -30,7 +30,6 @@ struct panvk_physical_device {
struct {
struct pan_kmod_dev *dev;
struct pan_kmod_dev_props props;
} kmod;
const struct pan_model *model;

View file

@ -76,7 +76,7 @@ panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
struct panvk_device *dev = to_dev(utctx);
const struct panvk_physical_device *pdev =
to_panvk_physical_device(dev->vk.physical);
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
const struct pan_kmod_dev_props *props = &pdev->kmod.dev->props;
const struct panvk_utrace_buf *buf = timestamps;
struct panvk_utrace_flush_data *data = flush_data;

View file

@ -93,7 +93,7 @@ get_gpu_time_ns(struct panvk_device *dev)
{
const struct panvk_physical_device *pdev =
to_panvk_physical_device(dev->vk.physical);
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
const struct pan_kmod_dev_props *props = &pdev->kmod.dev->props;
const uint64_t ts = pan_kmod_query_timestamp(dev->kmod.dev);
return ts * NSEC_PER_SEC / props->timestamp_frequency;
@ -332,7 +332,7 @@ panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count)
{
const struct panvk_physical_device *pdev =
to_panvk_physical_device(dev->vk.physical);
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
const struct pan_kmod_dev_props *props = &pdev->kmod.dev->props;
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
if (queue_count > PANVK_UTRACE_PERFETTO_QUEUE_COUNT) {

View file

@ -87,8 +87,8 @@ get_blend_shader(struct panvk_device *dev,
/* Compile the NIR shader */
struct pan_compile_inputs inputs = {
.gpu_id = pdev->kmod.props.gpu_id,
.gpu_variant = pdev->kmod.props.gpu_variant,
.gpu_id = pdev->kmod.dev->props.gpu_id,
.gpu_variant = pdev->kmod.dev->props.gpu_variant,
.is_blend = true,
.blend = {
.nr_samples = key.info.nr_samples,

View file

@ -149,8 +149,8 @@ get_preload_shader(struct panvk_device *dev,
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
struct pan_compile_inputs inputs = {
.gpu_id = phys_dev->kmod.props.gpu_id,
.gpu_variant = phys_dev->kmod.props.gpu_variant,
.gpu_id = phys_dev->kmod.dev->props.gpu_id,
.gpu_variant = phys_dev->kmod.dev->props.gpu_variant,
.is_blit = true,
};
@ -512,7 +512,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
*/
struct panvk_physical_device *pdev =
to_panvk_physical_device(dev->vk.physical);
unsigned gpu_prod_id = pdev->kmod.props.gpu_id >> 16;
unsigned gpu_prod_id = pdev->kmod.dev->props.gpu_id >> 16;
/* the PAN_ARCH check is redundant but allows compiler optimization
when PAN_ARCH <= 6 */

View file

@ -204,7 +204,7 @@ static VkResult
check_global_priority(const struct panvk_physical_device *phys_dev,
const VkDeviceQueueCreateInfo *create_info)
{
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
const unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id);
const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
vk_find_struct_const(create_info->pNext,
DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
@ -217,7 +217,7 @@ check_global_priority(const struct panvk_physical_device *phys_dev,
enum pan_kmod_group_allow_priority_flags requested_prio =
global_priority_to_group_allow_priority_flag(priority);
enum pan_kmod_group_allow_priority_flags allowed_prio_mask =
phys_dev->kmod.props.allowed_group_priorities_mask;
phys_dev->kmod.dev->props.allowed_group_priorities_mask;
/* Non-medium priority context is not hooked-up in the JM backend, even
* though the panfrost kmod advertize it. Manually filter non-medium

View file

@ -48,7 +48,8 @@ panvk_per_arch(get_physical_device_extensions)(
.KHR_shader_atomic_int64 = PAN_ARCH >= 9,
.KHR_bind_memory2 = true,
.KHR_buffer_device_address = true,
.KHR_calibrated_timestamps = device->kmod.props.gpu_can_query_timestamp,
.KHR_calibrated_timestamps =
device->kmod.dev->props.gpu_can_query_timestamp,
.KHR_copy_commands2 = true,
.KHR_create_renderpass2 = true,
.KHR_dedicated_allocation = true,
@ -126,7 +127,8 @@ panvk_per_arch(get_physical_device_extensions)(
.EXT_4444_formats = true,
.EXT_border_color_swizzle = true,
.EXT_buffer_device_address = true,
.EXT_calibrated_timestamps = device->kmod.props.gpu_can_query_timestamp,
.EXT_calibrated_timestamps =
device->kmod.dev->props.gpu_can_query_timestamp,
.EXT_custom_border_color = true,
.EXT_depth_bias_control = true,
.EXT_depth_clamp_zero_one = true,
@ -198,7 +200,7 @@ has_compressed_formats(const struct panvk_physical_device *physical_device,
const uint32_t required_formats)
{
uint32_t supported_compr_fmts =
pan_query_compressed_formats(&physical_device->kmod.props);
pan_query_compressed_formats(&physical_device->kmod.dev->props);
return (supported_compr_fmts & required_formats) == required_formats;
}
@ -275,8 +277,8 @@ panvk_per_arch(get_physical_device_features)(
.vertexPipelineStoresAndAtomics =
(PAN_ARCH >= 13 && instance->enable_vertex_pipeline_stores_atomics) ||
instance->force_enable_shader_atomics,
.fragmentStoresAndAtomics = (PAN_ARCH >= 10) ||
instance->force_enable_shader_atomics,
.fragmentStoresAndAtomics =
(PAN_ARCH >= 10) || instance->force_enable_shader_atomics,
.shaderTessellationAndGeometryPointSize = false,
.shaderImageGatherExtended = true,
.shaderStorageImageExtendedFormats = true,
@ -492,8 +494,8 @@ panvk_per_arch(get_physical_device_features)(
.nullDescriptor = PAN_ARCH >= 10,
/* VK_KHR_shader_clock */
.shaderSubgroupClock = device->kmod.props.gpu_can_query_timestamp,
.shaderDeviceClock = device->kmod.props.timestamp_device_coherent,
.shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp,
.shaderDeviceClock = device->kmod.dev->props.timestamp_device_coherent,
/* VK_KHR_shader_quad_control */
.shaderQuadControl = true,
@ -590,7 +592,7 @@ panvk_per_arch(get_physical_device_properties)(
const bool has_disk_cache = device->vk.disk_cache != NULL;
/* Ensure that the max threads count per workgroup is valid for Bifrost */
assert(PAN_ARCH > 8 || device->kmod.props.max_threads_per_wg <= 1024);
assert(PAN_ARCH > 8 || device->kmod.dev->props.max_threads_per_wg <= 1024);
float pointSizeRangeMin;
float pointSizeRangeMax;
@ -607,14 +609,14 @@ panvk_per_arch(get_physical_device_properties)(
*properties = (struct vk_properties){
.apiVersion = get_api_version(),
.driverVersion = vk_get_driver_version(),
.vendorID = instance->force_vk_vendor ? instance->force_vk_vendor :
ARM_VENDOR_ID,
.vendorID =
instance->force_vk_vendor ? instance->force_vk_vendor : ARM_VENDOR_ID,
/* Collect arch_major, arch_minor, arch_rev and product_major,
* as done by the Arm driver.
*/
.deviceID =
device->kmod.props.gpu_id & (ARCH_MAJOR | ARCH_MINOR | ARCH_REV | PRODUCT_MAJOR),
.deviceID = device->kmod.dev->props.gpu_id &
(ARCH_MAJOR | ARCH_MINOR | ARCH_REV | PRODUCT_MAJOR),
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
/* Vulkan 1.0 limits */
@ -740,10 +742,11 @@ panvk_per_arch(get_physical_device_properties)(
/* We could also split into serveral jobs but this has many limitations.
* As such we limit to the max threads per workgroup supported by the GPU.
*/
.maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg,
.maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg,
device->kmod.props.max_threads_per_wg,
device->kmod.props.max_threads_per_wg},
.maxComputeWorkGroupInvocations =
device->kmod.dev->props.max_threads_per_wg,
.maxComputeWorkGroupSize = {device->kmod.dev->props.max_threads_per_wg,
device->kmod.dev->props.max_threads_per_wg,
device->kmod.dev->props.max_threads_per_wg},
/* 8-bit subpixel precision. */
.subPixelPrecisionBits = 8,
.subTexelPrecisionBits = 8,
@ -789,8 +792,10 @@ panvk_per_arch(get_physical_device_properties)(
.sampledImageStencilSampleCounts = sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = PAN_ARCH >= 10 && device->kmod.props.gpu_can_query_timestamp,
.timestampPeriod = PAN_ARCH >= 10 ? panvk_get_gpu_system_timestamp_period(device) : 0,
.timestampComputeAndGraphics =
PAN_ARCH >= 10 && device->kmod.dev->props.gpu_can_query_timestamp,
.timestampPeriod =
PAN_ARCH >= 10 ? panvk_get_gpu_system_timestamp_period(device) : 0,
.maxClipDistances = 0,
.maxCullDistances = 0,
.maxCombinedClipAndCullDistances = 0,
@ -824,14 +829,11 @@ panvk_per_arch(get_physical_device_properties)(
.subgroupSupportedStages =
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT,
.subgroupSupportedOperations =
VK_SUBGROUP_FEATURE_BASIC_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT |
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
VK_SUBGROUP_FEATURE_QUAD_BIT |
VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
VK_SUBGROUP_FEATURE_ROTATE_BIT |
VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT,
.subgroupQuadOperationsInAllStages = false,
@ -844,11 +846,20 @@ panvk_per_arch(get_physical_device_properties)(
.maxMemoryAllocationSize = UINT32_MAX,
/* Vulkan 1.2 properties */
.supportedDepthResolveModes =
VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
.supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
VK_RESOLVE_MODE_MIN_BIT |
VK_RESOLVE_MODE_MAX_BIT,
.independentResolveNone = true,
.independentResolve = true,
/* VK_KHR_driver_properties */
.driverID = VK_DRIVER_ID_MESA_PANVK,
.conformanceVersion = get_conformance_version(),
.denormBehaviorIndependence = PAN_ARCH >= 9 ?
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE :
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.denormBehaviorIndependence =
PAN_ARCH >= 9 ? VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE
: VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.shaderSignedZeroInfNanPreserveFloat16 = true,
.shaderSignedZeroInfNanPreserveFloat32 = true,
@ -867,6 +878,8 @@ panvk_per_arch(get_physical_device_properties)(
.shaderRoundingModeRTZFloat64 = false,
.maxUpdateAfterBindDescriptorsInAllPools =
PAN_ARCH >= 9 ? UINT32_MAX : 0,
/* VK_EXT_descriptor_indexing */
.maxUpdateAfterBindDescriptorsInAllPools = PAN_ARCH >= 9 ? UINT32_MAX : 0,
.shaderUniformBufferArrayNonUniformIndexingNative = false,
.shaderSampledImageArrayNonUniformIndexingNative = false,
.shaderStorageBufferArrayNonUniformIndexingNative = false,
@ -922,7 +935,8 @@ panvk_per_arch(get_physical_device_properties)(
.minSubgroupSize = pan_subgroup_size(PAN_ARCH),
.maxSubgroupSize = pan_subgroup_size(PAN_ARCH),
.maxComputeWorkgroupSubgroups =
device->kmod.props.max_threads_per_wg / pan_subgroup_size(PAN_ARCH),
device->kmod.dev->props.max_threads_per_wg /
pan_subgroup_size(PAN_ARCH),
.requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT,
.maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
.maxPerStageDescriptorInlineUniformBlocks =
@ -1054,9 +1068,9 @@ panvk_per_arch(get_physical_device_properties)(
.fmaRate = device->model->rates.fma,
/* VK_ARM_shader_core_builtins */
.shaderCoreMask = device->kmod.props.shader_present,
.shaderCoreCount = util_bitcount(device->kmod.props.shader_present),
.shaderWarpsPerCore = device->kmod.props.max_threads_per_core /
.shaderCoreMask = device->kmod.dev->props.shader_present,
.shaderCoreCount = util_bitcount(device->kmod.dev->props.shader_present),
.shaderWarpsPerCore = device->kmod.dev->props.max_threads_per_core /
(pan_subgroup_size(PAN_ARCH) * 2),
};

View file

@ -52,7 +52,7 @@ panvk_per_arch(CreateQueryPool)(VkDevice _device,
const struct panvk_physical_device *phys_dev =
to_panvk_physical_device(device->vk.physical);
pan_query_core_count(&phys_dev->kmod.props, &reports_per_query);
pan_query_core_count(&phys_dev->kmod.dev->props, &reports_per_query);
#else
reports_per_query = 1;
#endif

View file

@ -370,7 +370,8 @@ panvk_get_nir_options(UNUSED struct vk_physical_device *vk_pdev,
UNUSED const struct vk_pipeline_robustness_state *rs)
{
struct panvk_physical_device *phys_dev = to_panvk_physical_device(vk_pdev);
return pan_get_nir_shader_compiler_options(pan_arch(phys_dev->kmod.props.gpu_id));
return pan_get_nir_shader_compiler_options(
pan_arch(phys_dev->kmod.dev->props.gpu_id));
}
static struct spirv_to_nir_options
@ -439,7 +440,7 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev,
*
* This would give us a better place to do panvk-specific lowering.
*/
pan_nir_lower_texture_early(nir, pdev->kmod.props.gpu_id);
pan_nir_lower_texture_early(nir, pdev->kmod.dev->props.gpu_id);
NIR_PASS(_, nir, nir_lower_system_values);
nir_lower_compute_system_values_options options = {
@ -451,17 +452,18 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev,
if (nir->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS(_, nir, nir_lower_wpos_center);
pan_optimize_nir(nir, pdev->kmod.props.gpu_id);
pan_optimize_nir(nir, pdev->kmod.dev->props.gpu_id);
NIR_PASS(_, nir, nir_split_var_copies);
NIR_PASS(_, nir, nir_lower_var_copies);
assert(pdev->kmod.props.shader_present != 0);
uint64_t core_max_id = util_last_bit(pdev->kmod.props.shader_present) - 1;
assert(pdev->kmod.dev->props.shader_present != 0);
uint64_t core_max_id =
util_last_bit(pdev->kmod.dev->props.shader_present) - 1;
NIR_PASS(_, nir, nir_inline_sysval, nir_intrinsic_load_core_max_id_arm,
core_max_id);
pan_preprocess_nir(nir, pdev->kmod.props.gpu_id);
pan_preprocess_nir(nir, pdev->kmod.dev->props.gpu_id);
}
static void
@ -1289,8 +1291,8 @@ panvk_compile_shader(struct panvk_device *dev,
robust2_modes |= nir_var_mem_ssbo;
struct pan_compile_inputs inputs = {
.gpu_id = phys_dev->kmod.props.gpu_id,
.gpu_variant = phys_dev->kmod.props.gpu_variant,
.gpu_id = phys_dev->kmod.dev->props.gpu_id,
.gpu_variant = phys_dev->kmod.dev->props.gpu_variant,
.view_mask = (state && state->rp) ? state->rp->view_mask : 0,
.robust2_modes = robust2_modes,
.robust_descriptors = dev->vk.enabled_features.nullDescriptor,