diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 8792bb2ab10..3f1d9083ea3 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1634,8 +1634,8 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch, struct pan_tls_info info = { .tls.size = ss->info.tls_size, .wls.size = ss->info.wls_size + grid->variable_shared_mem, - .wls.instances = pan_calc_wls_instances(&local_size, &dev->kmod.props, - grid->indirect ? NULL : &dim), + .wls.instances = pan_calc_wls_instances( + &local_size, &dev->kmod.dev->props, grid->indirect ? NULL : &dim), }; if (ss->info.tls_size) { @@ -4493,12 +4493,12 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen) screen->vtbl.get_conv_desc = get_conv_desc; pan_blend_shader_cache_init(&dev->blend_shaders, panfrost_device_gpu_id(dev), - dev->kmod.props.gpu_variant, + dev->kmod.dev->props.gpu_variant, &screen->mempools.bin.base); GENX(pan_fb_preload_cache_init) (&dev->fb_preload_cache, panfrost_device_gpu_id(dev), - dev->kmod.props.gpu_variant, &dev->blend_shaders, + dev->kmod.dev->props.gpu_variant, &dev->blend_shaders, &screen->mempools.bin.base, &screen->mempools.desc.base); dev->precomp_cache = GENX(panfrost_precomp_cache_init)(screen); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 6c7fd4c6d04..46b62c01ba1 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -783,7 +783,7 @@ panfrost_get_query_result(struct pipe_context *pipe, struct pipe_query *q, case PIPE_QUERY_TIMESTAMP_DISJOINT: { vresult->timestamp_disjoint.frequency = - dev->kmod.props.timestamp_frequency; + dev->kmod.dev->props.timestamp_frequency; vresult->timestamp_disjoint.disjoint = false; break; } diff --git a/src/gallium/drivers/panfrost/pan_csf.c b/src/gallium/drivers/panfrost/pan_csf.c index 08402f0692e..16b7acc855d 100644 --- a/src/gallium/drivers/panfrost/pan_csf.c +++ b/src/gallium/drivers/panfrost/pan_csf.c @@ -966,8 +966,8 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch, cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z), 0); unsigned threads_per_wg = info->block[0] * info->block[1] * info->block[2]; - unsigned max_thread_cnt = - pan_compute_max_thread_count(&dev->kmod.props, cs->info.work_reg_count); + unsigned max_thread_cnt = pan_compute_max_thread_count( + &dev->kmod.dev->props, cs->info.work_reg_count); if (info->indirect) { /* Load size in workgroups per dimension from memory */ diff --git a/src/gallium/drivers/panfrost/pan_device.c b/src/gallium/drivers/panfrost/pan_device.c index a0e062fb37d..018a83adf26 100644 --- a/src/gallium/drivers/panfrost/pan_device.c +++ b/src/gallium/drivers/panfrost/pan_device.c @@ -60,11 +60,9 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) return -1; } - pan_kmod_dev_query_props(dev->kmod.dev, &dev->kmod.props); - - dev->arch = pan_arch(dev->kmod.props.gpu_id); - dev->model = pan_get_model(dev->kmod.props.gpu_id, - dev->kmod.props.gpu_variant); + dev->arch = pan_arch(dev->kmod.dev->props.gpu_id); + dev->model = pan_get_model(dev->kmod.dev->props.gpu_id, + dev->kmod.dev->props.gpu_variant); /* If we don't recognize the model, bail early */ if (!dev->model) @@ -85,14 +83,16 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) goto err_free_kmod_dev; dev->core_count = - pan_query_core_count(&dev->kmod.props, &dev->core_id_range); - dev->thread_tls_alloc = pan_query_thread_tls_alloc(&dev->kmod.props); + pan_query_core_count(&dev->kmod.dev->props, &dev->core_id_range); + dev->thread_tls_alloc = pan_query_thread_tls_alloc(&dev->kmod.dev->props); dev->optimal_tib_size = pan_query_optimal_tib_size(dev->arch, dev->model); - dev->optimal_z_tib_size = pan_query_optimal_z_tib_size(dev->arch, dev->model); - dev->compressed_formats = pan_query_compressed_formats(&dev->kmod.props); - dev->tiler_features = pan_query_tiler_features(&dev->kmod.props); - dev->has_afbc = pan_query_afbc(&dev->kmod.props); - dev->has_afrc = pan_query_afrc(&dev->kmod.props); + dev->optimal_z_tib_size = + pan_query_optimal_z_tib_size(dev->arch, dev->model); + dev->compressed_formats = + pan_query_compressed_formats(&dev->kmod.dev->props); + dev->tiler_features = pan_query_tiler_features(&dev->kmod.dev->props); + dev->has_afbc = pan_query_afbc(&dev->kmod.dev->props); + dev->has_afrc = pan_query_afrc(&dev->kmod.dev->props); dev->formats = pan_format_table(dev->arch); dev->blendable_formats = pan_blendable_format_table(dev->arch); diff --git a/src/gallium/drivers/panfrost/pan_device.h b/src/gallium/drivers/panfrost/pan_device.h index be7af5739b9..9c0a86b0915 100644 --- a/src/gallium/drivers/panfrost/pan_device.h +++ b/src/gallium/drivers/panfrost/pan_device.h @@ -90,9 +90,6 @@ struct panfrost_device { /* The pan_kmod_dev object backing this device. */ struct pan_kmod_dev *dev; - /* Cached pan_kmod_dev_props properties queried at device create time. */ - struct pan_kmod_dev_props props; - /* VM attached to this device. */ struct pan_kmod_vm *vm; } kmod; @@ -194,19 +191,19 @@ panfrost_device_fd(const struct panfrost_device *dev) static inline uint32_t panfrost_device_gpu_id(const struct panfrost_device *dev) { - return dev->kmod.props.gpu_id; + return dev->kmod.dev->props.gpu_id; } static inline uint32_t panfrost_device_gpu_prod_id(const struct panfrost_device *dev) { - return dev->kmod.props.gpu_id >> 16; + return dev->kmod.dev->props.gpu_id >> 16; } static inline uint32_t panfrost_device_gpu_rev(const struct panfrost_device *dev) { - return dev->kmod.props.gpu_id & BITFIELD_MASK(16); + return dev->kmod.dev->props.gpu_id & BITFIELD_MASK(16); } static inline int @@ -243,8 +240,8 @@ pan_is_bifrost(const struct panfrost_device *dev) static inline uint64_t pan_gpu_time_to_ns(struct panfrost_device *dev, uint64_t gpu_time) { - assert(dev->kmod.props.timestamp_frequency > 0); - return (gpu_time * NSEC_PER_SEC) / dev->kmod.props.timestamp_frequency; + assert(dev->kmod.dev->props.timestamp_frequency > 0); + return (gpu_time * NSEC_PER_SEC) / dev->kmod.dev->props.timestamp_frequency; } static inline uint32_t diff --git a/src/gallium/drivers/panfrost/pan_precomp.c b/src/gallium/drivers/panfrost/pan_precomp.c index 4a4a30f728d..c9b0c9b62fe 100644 --- a/src/gallium/drivers/panfrost/pan_precomp.c +++ b/src/gallium/drivers/panfrost/pan_precomp.c @@ -197,8 +197,8 @@ emit_tls(struct panfrost_batch *batch, struct pan_tls_info info = { .tls.size = shader->info.tls_size, .wls.size = shader->info.wls_size, - .wls.instances = - pan_calc_wls_instances(&shader->local_size, &dev->kmod.props, dim), + .wls.instances = pan_calc_wls_instances(&shader->local_size, + &dev->kmod.dev->props, dim), }; if (info.tls.size) { @@ -354,7 +354,7 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch, unsigned threads_per_wg = shader->local_size.x * shader->local_size.y * shader->local_size.z; unsigned max_thread_cnt = pan_compute_max_thread_count( - &dev->kmod.props, shader->info.work_reg_count); + &dev->kmod.dev->props, shader->info.work_reg_count); /* Pick the task_axis and task_increment to maximize thread utilization. */ unsigned task_axis = MALI_TASK_AXIS_X; diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index fc3f5be2900..39c67fc8617 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -439,12 +439,12 @@ panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen, for (unsigned r = 0; r < yuv_lowering.nres; r++) { enum pipe_format res_format = yuv_lowering.res_formats[r]; - supported &= pan_image_test_modifier_with_format(&dev->kmod.props, - mod, res_format); + supported &= pan_image_test_modifier_with_format( + &dev->kmod.dev->props, mod, res_format); } } else { - supported = - pan_image_test_modifier_with_format(&dev->kmod.props, mod, format); + supported = pan_image_test_modifier_with_format(&dev->kmod.dev->props, + mod, format); } if (!supported) @@ -727,10 +727,10 @@ panfrost_init_screen_caps(struct panfrost_screen *screen) /* Compile side is TODO for Midgard. */ caps->shader_clock = dev->arch >= 6 && - dev->kmod.props.gpu_can_query_timestamp; + dev->kmod.dev->props.gpu_can_query_timestamp; caps->shader_realtime_clock = dev->arch >= 6 && - dev->kmod.props.gpu_can_query_timestamp && - dev->kmod.props.timestamp_device_coherent; + dev->kmod.dev->props.gpu_can_query_timestamp && + dev->kmod.dev->props.timestamp_device_coherent; /* pixel_local_storage is initially for valhall and bifrost only */ caps->shader_pixel_local_storage_fast_size = @@ -796,10 +796,9 @@ panfrost_init_screen_caps(struct panfrost_screen *screen) /* Must be at least 64 for correct behaviour */ caps->texture_buffer_offset_alignment = 64; - caps->query_time_elapsed = - caps->query_timestamp = - dev->kmod.props.gpu_can_query_timestamp && - dev->kmod.props.timestamp_frequency != 0; + caps->query_time_elapsed = caps->query_timestamp = + dev->kmod.dev->props.gpu_can_query_timestamp && + dev->kmod.dev->props.timestamp_frequency != 0; if (caps->query_timestamp) caps->timer_resolution = pan_gpu_time_to_ns(dev, 1); @@ -914,9 +913,8 @@ panfrost_init_screen_caps(struct panfrost_screen *screen) caps->native_fence_fd = true; - caps->context_priority_mask = - from_kmod_group_allow_priority_flags( - dev->kmod.props.allowed_group_priorities_mask); + caps->context_priority_mask = from_kmod_group_allow_priority_flags( + dev->kmod.dev->props.allowed_group_priorities_mask); caps->astc_decode_mode = dev->arch >= 9 && (dev->compressed_formats & (1 << 30)); @@ -1006,7 +1004,7 @@ get_core_mask(const struct panfrost_device *dev, const struct pipe_screen_config *config, const char *option_name, uint64_t *mask) { - uint64_t present = dev->kmod.props.shader_present; + uint64_t present = dev->kmod.dev->props.shader_present; *mask = driQueryOptionu64(config->options, option_name) & present; if (!*mask) { diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 8e829c3e099..61b2812ae0a 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -146,7 +146,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir, struct pan_compile_inputs inputs = { .gpu_id = panfrost_device_gpu_id(dev), - .gpu_variant = dev->kmod.props.gpu_variant, + .gpu_variant = dev->kmod.dev->props.gpu_variant, .get_conv_desc = screen->vtbl.get_conv_desc, }; @@ -688,8 +688,8 @@ panfrost_get_compute_state_info(struct pipe_context *pipe, void *cso, struct panfrost_compiled_shader *cs = util_dynarray_begin(&uncompiled->variants); - info->max_threads = - pan_compute_max_thread_count(&dev->kmod.props, cs->info.work_reg_count); + info->max_threads = pan_compute_max_thread_count(&dev->kmod.dev->props, + cs->info.work_reg_count); info->private_memory = cs->info.tls_size; info->simd_sizes = pan_subgroup_size(dev->arch); info->preferred_simd_size = info->simd_sizes; diff --git a/src/panfrost/lib/kmod/pan_kmod.h b/src/panfrost/lib/kmod/pan_kmod.h index d763376ece9..4d1ede9a308 100644 --- a/src/panfrost/lib/kmod/pan_kmod.h +++ b/src/panfrost/lib/kmod/pan_kmod.h @@ -369,10 +369,6 @@ struct pan_kmod_ops { /* Destroy a pan_kmod_dev object. */ void (*dev_destroy)(struct pan_kmod_dev *dev); - /* Query device properties. */ - void (*dev_query_props)(const struct pan_kmod_dev *dev, - struct pan_kmod_dev_props *props); - /* Query the maxium user VA range. * Users are free to use a subset of this range if they need less VA space. * This method is optional, when not specified, kmod assumes the whole VA @@ -466,6 +462,9 @@ struct pan_kmod_dev { /* KMD backing this device. */ struct pan_kmod_driver driver; + /* KMD-agnostic device properties. */ + struct pan_kmod_dev_props props; + /* kmod backend ops assigned at device creation. */ const struct pan_kmod_ops *ops; @@ -499,25 +498,15 @@ pan_kmod_dev_create(int fd, uint32_t flags, void pan_kmod_dev_destroy(struct pan_kmod_dev *dev); -static inline void -pan_kmod_dev_query_props(const struct pan_kmod_dev *dev, - struct pan_kmod_dev_props *props) -{ - dev->ops->dev_query_props(dev, props); -} - static inline struct pan_kmod_va_range pan_kmod_dev_query_user_va_range(const struct pan_kmod_dev *dev) { if (dev->ops->dev_query_user_va_range) return dev->ops->dev_query_user_va_range(dev); - struct pan_kmod_dev_props props; - - pan_kmod_dev_query_props(dev, &props); return (struct pan_kmod_va_range){ .start = 0, - .size = 1ull << MMU_FEATURES_VA_BITS(props.mmu_features), + .size = 1ull << MMU_FEATURES_VA_BITS(dev->props.mmu_features), }; } diff --git a/src/panfrost/lib/kmod/panfrost_kmod.c b/src/panfrost/lib/kmod/panfrost_kmod.c index df5382f5793..5c13e6f3549 100644 --- a/src/panfrost/lib/kmod/panfrost_kmod.c +++ b/src/panfrost/lib/kmod/panfrost_kmod.c @@ -42,39 +42,6 @@ struct panfrost_kmod_bo { uint64_t offset; }; -static struct pan_kmod_dev * -panfrost_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version, - const struct pan_kmod_allocator *allocator) -{ - if (version->version_major < 1 || - (version->version_major == 1 && version->version_minor < 1)) { - mesa_loge("kernel driver is too old (requires at least 1.1, found %d.%d)", - version->version_major, version->version_minor); - return NULL; - } - - struct panfrost_kmod_dev *panfrost_dev = - pan_kmod_alloc(allocator, sizeof(*panfrost_dev)); - if (!panfrost_dev) { - mesa_loge("failed to allocate a panfrost_kmod_dev object"); - return NULL; - } - - pan_kmod_dev_init(&panfrost_dev->base, fd, flags, version, - &panfrost_kmod_ops, allocator); - return &panfrost_dev->base; -} - -static void -panfrost_kmod_dev_destroy(struct pan_kmod_dev *dev) -{ - struct panfrost_kmod_dev *panfrost_dev = - container_of(dev, struct panfrost_kmod_dev, base); - - pan_kmod_dev_cleanup(dev); - pan_kmod_free(dev->allocator, panfrost_dev); -} - /* Abstraction over the raw drm_panfrost_get_param ioctl for fetching * information about devices. */ @@ -97,9 +64,10 @@ panfrost_query_raw(int fd, enum drm_panfrost_param param, bool required, } static void -panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev, - struct pan_kmod_dev_props *props) +panfrost_dev_query_thread_props(struct panfrost_kmod_dev *panfrost_dev) { + struct pan_kmod_dev_props *props = &panfrost_dev->base.props; + const struct pan_kmod_dev *dev = &panfrost_dev->base; int fd = dev->fd; props->max_threads_per_core = @@ -177,9 +145,10 @@ panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev, } static void -panfrost_dev_query_props(const struct pan_kmod_dev *dev, - struct pan_kmod_dev_props *props) +panfrost_dev_query_props(struct panfrost_kmod_dev *panfrost_dev) { + struct pan_kmod_dev_props *props = &panfrost_dev->base.props; + const struct pan_kmod_dev *dev = &panfrost_dev->base; int fd = dev->fd; memset(props, 0, sizeof(*props)); @@ -203,7 +172,7 @@ panfrost_dev_query_props(const struct pan_kmod_dev *dev, props->afbc_features = panfrost_query_raw(fd, DRM_PANFROST_PARAM_AFBC_FEATURES, true, 0); - panfrost_dev_query_thread_props(dev, props); + panfrost_dev_query_thread_props(panfrost_dev); if (dev->driver.version.major > 1 || dev->driver.version.minor >= 3) { props->gpu_can_query_timestamp = true; @@ -230,6 +199,41 @@ panfrost_dev_query_props(const struct pan_kmod_dev *dev, PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH; } +static struct pan_kmod_dev * +panfrost_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version, + const struct pan_kmod_allocator *allocator) +{ + if (version->version_major < 1 || + (version->version_major == 1 && version->version_minor < 1)) { + mesa_loge("kernel driver is too old (requires at least 1.1, found %d.%d)", + version->version_major, version->version_minor); + return NULL; + } + + struct panfrost_kmod_dev *panfrost_dev = + pan_kmod_alloc(allocator, sizeof(*panfrost_dev)); + if (!panfrost_dev) { + mesa_loge("failed to allocate a panfrost_kmod_dev object"); + return NULL; + } + + pan_kmod_dev_init(&panfrost_dev->base, fd, flags, version, + &panfrost_kmod_ops, allocator); + panfrost_dev_query_props(panfrost_dev); + + return &panfrost_dev->base; +} + +static void +panfrost_kmod_dev_destroy(struct pan_kmod_dev *dev) +{ + struct panfrost_kmod_dev *panfrost_dev = + container_of(dev, struct panfrost_kmod_dev, base); + + pan_kmod_dev_cleanup(dev); + pan_kmod_free(dev->allocator, panfrost_dev); +} + static uint32_t to_panfrost_bo_flags(struct pan_kmod_dev *dev, uint32_t flags) { @@ -530,7 +534,6 @@ panfrost_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const c const struct pan_kmod_ops panfrost_kmod_ops = { .dev_create = panfrost_kmod_dev_create, .dev_destroy = panfrost_kmod_dev_destroy, - .dev_query_props = panfrost_dev_query_props, .dev_query_user_va_range = panfrost_kmod_dev_query_user_va_range, .bo_alloc = panfrost_kmod_bo_alloc, .bo_free = panfrost_kmod_bo_free, diff --git a/src/panfrost/lib/kmod/panthor_kmod.c b/src/panfrost/lib/kmod/panthor_kmod.c index ba35915a7d7..d7caa79b91e 100644 --- a/src/panfrost/lib/kmod/panthor_kmod.c +++ b/src/panfrost/lib/kmod/panthor_kmod.c @@ -103,6 +103,90 @@ struct panthor_kmod_bo { } sync; }; +static uint32_t +to_kmod_group_allow_priority_flags(uint32_t panthor_flags) +{ + uint32_t kmod_flags = 0; + + if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_REALTIME)) + kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME; + + if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_HIGH)) + kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH; + + if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_MEDIUM)) + kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM; + + if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_LOW)) + kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW; + + return kmod_flags; +} + +static void +panthor_dev_query_thread_props(struct panthor_kmod_dev *panthor_dev) +{ + struct pan_kmod_dev_props *props = &panthor_dev->base.props; + + props->max_threads_per_wg = panthor_dev->props.gpu.thread_max_workgroup_size; + props->max_threads_per_core = panthor_dev->props.gpu.max_threads; + props->max_tasks_per_core = panthor_dev->props.gpu.thread_features >> 24; + props->num_registers_per_core = + panthor_dev->props.gpu.thread_features & 0x3fffff; + + /* We assume that all thread properties are populated. If we ever have a GPU + * that have one of the THREAD_xxx register that's zero, we can always add a + * quirk here. + */ + assert(props->max_threads_per_wg && props->max_threads_per_core && + props->max_tasks_per_core && props->num_registers_per_core); + + /* There is no THREAD_TLS_ALLOC register on v10+, and the maximum number + * of TLS instance per core is assumed to be the maximum number of threads + * per core. + */ + props->max_tls_instance_per_core = props->max_threads_per_core; +} + +static void +panthor_dev_query_props(struct panthor_kmod_dev *panthor_dev) +{ + struct pan_kmod_dev_props *props = &panthor_dev->base.props; + + *props = (struct pan_kmod_dev_props){ + .gpu_id = panthor_dev->props.gpu.gpu_id, + .gpu_variant = panthor_dev->props.gpu.core_features & 0xff, + .shader_present = panthor_dev->props.gpu.shader_present, + .tiler_features = panthor_dev->props.gpu.tiler_features, + .mem_features = panthor_dev->props.gpu.mem_features, + .mmu_features = panthor_dev->props.gpu.mmu_features, + + /* This register does not exist because AFBC is no longer optional. */ + .afbc_features = 0, + + /* Access to timstamp from the GPU is always supported on Panthor. */ + .gpu_can_query_timestamp = true, + + .timestamp_frequency = panthor_dev->props.timestamp.timestamp_frequency, + + .allowed_group_priorities_mask = to_kmod_group_allow_priority_flags( + panthor_dev->props.group_priorities.allowed_mask), + }; + + if (panthor_dev->base.driver.version.major > 1 || + panthor_dev->base.driver.version.minor >= 6) + props->timestamp_device_coherent = true; + + static_assert(sizeof(props->texture_features) == + sizeof(panthor_dev->props.gpu.texture_features), + "Mismatch in texture_features array size"); + + memcpy(props->texture_features, panthor_dev->props.gpu.texture_features, + sizeof(props->texture_features)); + + panthor_dev_query_thread_props(panthor_dev); +} + static struct pan_kmod_dev * panthor_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version, const struct pan_kmod_allocator *allocator) @@ -195,8 +279,11 @@ panthor_kmod_dev_create(int fd, uint32_t flags, drmVersionPtr version, } assert(!ret); - pan_kmod_dev_init(&panthor_dev->base, fd, flags, version, &panthor_kmod_ops, - allocator); + + pan_kmod_dev_init(&panthor_dev->base, fd, flags, version, + &panthor_kmod_ops, allocator); + panthor_dev_query_props(panthor_dev); + return &panthor_dev->base; err_free_dev: @@ -215,91 +302,6 @@ panthor_kmod_dev_destroy(struct pan_kmod_dev *dev) pan_kmod_free(dev->allocator, panthor_dev); } -static uint32_t -to_kmod_group_allow_priority_flags(uint32_t panthor_flags) -{ - uint32_t kmod_flags = 0; - - if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_REALTIME)) - kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME; - - if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_HIGH)) - kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH; - - if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_MEDIUM)) - kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM; - - if (panthor_flags & BITFIELD_BIT(PANTHOR_GROUP_PRIORITY_LOW)) - kmod_flags |= PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW; - - return kmod_flags; -} - -static void -panthor_dev_query_thread_props(const struct panthor_kmod_dev *panthor_dev, - struct pan_kmod_dev_props *props) -{ - props->max_threads_per_wg = panthor_dev->props.gpu.thread_max_workgroup_size; - props->max_threads_per_core = panthor_dev->props.gpu.max_threads; - props->max_tasks_per_core = panthor_dev->props.gpu.thread_features >> 24; - props->num_registers_per_core = - panthor_dev->props.gpu.thread_features & 0x3fffff; - - /* We assume that all thread properties are populated. If we ever have a GPU - * that have one of the THREAD_xxx register that's zero, we can always add a - * quirk here. - */ - assert(props->max_threads_per_wg && props->max_threads_per_core && - props->max_tasks_per_core && props->num_registers_per_core); - - /* There is no THREAD_TLS_ALLOC register on v10+, and the maximum number - * of TLS instance per core is assumed to be the maximum number of threads - * per core. - */ - props->max_tls_instance_per_core = props->max_threads_per_core; -} - -static void -panthor_dev_query_props(const struct pan_kmod_dev *dev, - struct pan_kmod_dev_props *props) -{ - struct panthor_kmod_dev *panthor_dev = - container_of(dev, struct panthor_kmod_dev, base); - - *props = (struct pan_kmod_dev_props){ - .gpu_id = panthor_dev->props.gpu.gpu_id, - .gpu_variant = panthor_dev->props.gpu.core_features & 0xff, - .shader_present = panthor_dev->props.gpu.shader_present, - .tiler_features = panthor_dev->props.gpu.tiler_features, - .mem_features = panthor_dev->props.gpu.mem_features, - .mmu_features = panthor_dev->props.gpu.mmu_features, - - /* This register does not exist because AFBC is no longer optional. */ - .afbc_features = 0, - - /* Access to timstamp from the GPU is always supported on Panthor. */ - .gpu_can_query_timestamp = true, - - .timestamp_frequency = panthor_dev->props.timestamp.timestamp_frequency, - - .allowed_group_priorities_mask = to_kmod_group_allow_priority_flags( - panthor_dev->props.group_priorities.allowed_mask), - }; - - if (dev->driver.version.major > 1 || dev->driver.version.minor >= 6) { - props->timestamp_device_coherent = true; - } - - static_assert(sizeof(props->texture_features) == - sizeof(panthor_dev->props.gpu.texture_features), - "Mismatch in texture_features array size"); - - memcpy(props->texture_features, panthor_dev->props.gpu.texture_features, - sizeof(props->texture_features)); - - panthor_dev_query_thread_props(panthor_dev, props); -} - static struct pan_kmod_va_range panthor_kmod_dev_query_user_va_range(const struct pan_kmod_dev *dev) { @@ -706,10 +708,6 @@ static struct pan_kmod_vm * panthor_kmod_vm_create(struct pan_kmod_dev *dev, uint32_t flags, uint64_t user_va_start, uint64_t user_va_range) { - struct pan_kmod_dev_props props; - - panthor_dev_query_props(dev, &props); - struct panthor_kmod_vm *panthor_vm = pan_kmod_dev_alloc(dev, sizeof(*panthor_vm)); if (!panthor_vm) { @@ -1236,7 +1234,6 @@ panthor_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const ch const struct pan_kmod_ops panthor_kmod_ops = { .dev_create = panthor_kmod_dev_create, .dev_destroy = panthor_kmod_dev_destroy, - .dev_query_props = panthor_dev_query_props, .dev_query_user_va_range = panthor_kmod_dev_query_user_va_range, .bo_alloc = panthor_kmod_bo_alloc, .bo_free = panthor_kmod_bo_free, diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 0f2363b658d..53506570310 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -82,8 +82,7 @@ pan_perf_init(struct pan_perf *perf, int fd) perf->dev = pan_kmod_dev_create(fd, 0, NULL); assert(perf->dev); - struct pan_kmod_dev_props props = {}; - pan_kmod_dev_query_props(perf->dev, &props); + struct pan_kmod_dev_props props = perf->dev->props; const struct pan_model *model = pan_get_model(props.gpu_id, props.gpu_variant); diff --git a/src/panfrost/tools/panfrost_texfeatures.c b/src/panfrost/tools/panfrost_texfeatures.c index a57baf1631b..ac9b01b988e 100644 --- a/src/panfrost/tools/panfrost_texfeatures.c +++ b/src/panfrost/tools/panfrost_texfeatures.c @@ -71,10 +71,7 @@ main(void) struct pan_kmod_dev *dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD, NULL); - struct pan_kmod_dev_props props; - - pan_kmod_dev_query_props(dev, &props); - + struct pan_kmod_dev_props props = dev->props; uint32_t supported = pan_query_compressed_formats(&props); bool all_ok = true; diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index 2c1f51e528a..a155e4e1f93 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -288,7 +288,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev) nir_builder b = nir_builder_init_simple_shader( MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options( - pan_arch(phys_dev->kmod.props.gpu_id)), + pan_arch(phys_dev->kmod.dev->props.gpu_id)), "%s", "desc_copy"); /* We actually customize that at execution time to issue the @@ -302,8 +302,8 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev) single_desc_copy(&b, desc_copy_id); struct pan_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_id, - .gpu_variant = phys_dev->kmod.props.gpu_variant, + .gpu_id = phys_dev->kmod.dev->props.gpu_id, + .gpu_variant = phys_dev->kmod.dev->props.gpu_variant, }; pan_preprocess_nir(b.shader, inputs.gpu_id); diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index 525130531ec..d74b02ed8d3 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -677,7 +677,7 @@ panvk_per_arch(calculate_task_axis_and_increment)( { /* Pick the task_axis and task_increment to maximize thread * utilization. */ - const struct pan_kmod_dev_props *props = &phys_dev->kmod.props; + const struct pan_kmod_dev_props *props = &phys_dev->kmod.dev->props; const unsigned max_thread_cnt = pan_compute_max_thread_count(props, shader->info.work_reg_count); const unsigned threads_per_wg = shader->cs.local_size.x * diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c index 25895eecb15..12ba5e22fe4 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c @@ -64,11 +64,11 @@ emit_tls(struct panvk_cmd_buffer *cmdbuf) struct panvk_physical_device *phys_dev = to_panvk_physical_device(dev->vk.physical); unsigned core_id_range; - pan_query_core_count(&phys_dev->kmod.props, &core_id_range); + pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range); if (cmdbuf->state.tls.info.tls.size) { unsigned thread_tls_alloc = - pan_query_thread_tls_alloc(&phys_dev->kmod.props); + pan_query_thread_tls_alloc(&phys_dev->kmod.dev->props); unsigned size = pan_get_total_stack_size(cmdbuf->state.tls.info.tls.size, thread_tls_alloc, core_id_range); @@ -823,7 +823,7 @@ init_cs_builders(struct panvk_cmd_buffer *cmdbuf) .nr_registers = csif_info->cs_reg_count, .nr_kernel_registers = MAX2(csif_info->unpreserved_cs_reg_count, 4), .compute_ep_limit = - PAN_ARCH >= 12 ? phys_dev->kmod.props.max_tasks_per_core : 0, + PAN_ARCH >= 12 ? phys_dev->kmod.dev->props.max_tasks_per_core : 0, .alloc_buffer = alloc_cs_buffer, .cookie = cmdbuf, .ls_sb_slot = SB_ID(LS), diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 14bb1ff0854..e4fa70384d1 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -87,10 +87,10 @@ panvk_per_arch(cmd_dispatch_prepare_tls)( if (tlsinfo.wls.size) { unsigned core_id_range; - pan_query_core_count(&phys_dev->kmod.props, &core_id_range); + pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range); tlsinfo.wls.instances = pan_calc_wls_instances( - &cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim); + &cs->cs.local_size, &phys_dev->kmod.dev->props, indirect ? NULL : dim); unsigned wls_total_size = pan_calc_total_wls_size( tlsinfo.wls.size, tlsinfo.wls.instances, core_id_range); @@ -156,7 +156,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) unsigned wg_per_task = 0; if (indirect) wg_per_task = pan_calc_workgroups_per_task(&cs->cs.local_size, - &phys_dev->kmod.props); + &phys_dev->kmod.dev->props); if (compute_state_dirty(cmdbuf, DESC_STATE) || compute_state_dirty(cmdbuf, CS)) { diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 237736a5dc7..f5179693538 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -1000,7 +1000,7 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf) to_panvk_physical_device(cmdbuf->vk.base.device->physical); const bool tracing_enabled = PANVK_DEBUG(TRACE); struct pan_tiler_features tiler_features = - pan_query_tiler_features(&phys_dev->kmod.props); + pan_query_tiler_features(&phys_dev->kmod.dev->props); bool simul_use = cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; struct pan_ptr tiler_desc = {0}; diff --git a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c index b7d9387b5e3..bd2bdb82cbc 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c +++ b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c @@ -531,7 +531,7 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue) subqueue); pandecode_cs_binary(dev->debug.decode_ctx, qsubmit.stream_addr, qsubmit.stream_size, - phys_dev->kmod.props.gpu_id); + phys_dev->kmod.dev->props.gpu_id); } return VK_SUCCESS; @@ -778,7 +778,7 @@ panvk_queue_submit_init(struct panvk_queue_submit *submit, submit->process_utrace = u_trace_should_process(&submit->dev->utrace.utctx) && - submit->phys_dev->kmod.props.timestamp_frequency; + submit->phys_dev->kmod.dev->props.timestamp_frequency; submit->force_sync = PANVK_DEBUG(TRACE) || PANVK_DEBUG(SYNC); } @@ -1201,7 +1201,8 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit) struct pandecode_context *decode_ctx = submit->dev->debug.decode_ctx; if (PANVK_DEBUG(TRACE)) { - const struct pan_kmod_dev_props *props = &submit->phys_dev->kmod.props; + const struct pan_kmod_dev_props *props = + &submit->phys_dev->kmod.dev->props; for (uint32_t i = 0; i < submit->qsubmit_count; i++) { const struct drm_panthor_queue_submit *qsubmit = &submit->qsubmits[i]; diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c index 7c8d66de2e4..9f41a13077e 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c @@ -117,10 +117,10 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) if (batch->tlsinfo.tls.size) { unsigned thread_tls_alloc = - pan_query_thread_tls_alloc(&phys_dev->kmod.props); + pan_query_thread_tls_alloc(&phys_dev->kmod.dev->props); unsigned core_id_range; - pan_query_core_count(&phys_dev->kmod.props, &core_id_range); + pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range); unsigned size = pan_get_total_stack_size(batch->tlsinfo.tls.size, thread_tls_alloc, core_id_range); diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index 162b5d25657..f8aa9614cbf 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -50,9 +50,9 @@ panvk_per_arch(cmd_dispatch_prepare_tls)( if (batch->tlsinfo.wls.size) { unsigned core_id_range; - pan_query_core_count(&phys_dev->kmod.props, &core_id_range); + pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range); batch->tlsinfo.wls.instances = pan_calc_wls_instances( - &cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim); + &cs->cs.local_size, &phys_dev->kmod.dev->props, indirect ? NULL : dim); batch->wls_total_size = pan_calc_total_wls_size( batch->tlsinfo.wls.size, batch->tlsinfo.wls.instances, core_id_range); } diff --git a/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c b/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c index 22fedbe0752..5fd4c64962e 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c +++ b/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c @@ -77,7 +77,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc if (PANVK_DEBUG(TRACE)) { pandecode_jc(dev->debug.decode_ctx, batch->vtc_jc.first_job, - phys_dev->kmod.props.gpu_id); + phys_dev->kmod.dev->props.gpu_id); } if (PANVK_DEBUG(DUMP)) @@ -85,7 +85,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc if (PANVK_DEBUG(SYNC)) pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc, - phys_dev->kmod.props.gpu_id); + phys_dev->kmod.dev->props.gpu_id); } if (batch->frag_jc.first_job) { @@ -115,14 +115,14 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc if (PANVK_DEBUG(TRACE)) pandecode_jc(dev->debug.decode_ctx, batch->frag_jc.first_job, - phys_dev->kmod.props.gpu_id); + phys_dev->kmod.dev->props.gpu_id); if (PANVK_DEBUG(DUMP)) pandecode_dump_mappings(dev->debug.decode_ctx); if (PANVK_DEBUG(SYNC)) pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc, - phys_dev->kmod.props.gpu_id); + phys_dev->kmod.dev->props.gpu_id); } if (PANVK_DEBUG(TRACE)) diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index 59c76081127..cd6f71d271c 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -247,7 +247,7 @@ panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev, unsigned bin_ptr_mem_budget) { struct pan_tiler_features tiler_features = - pan_query_tiler_features(&phys_dev->kmod.props); + pan_query_tiler_features(&phys_dev->kmod.dev->props); uint32_t hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)( state->render.fb.info.width, state->render.fb.info.height, diff --git a/src/panfrost/vulkan/panvk_image.c b/src/panfrost/vulkan/panvk_image.c index 92e795a7992..583c1bfcbfc 100644 --- a/src/panfrost/vulkan/panvk_image.c +++ b/src/panfrost/vulkan/panvk_image.c @@ -54,7 +54,7 @@ panvk_image_can_use_afbc( VkImageUsageFlags usage, VkImageType type, VkImageTiling tiling, VkImageCreateFlags flags) { - unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); + unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id); enum pipe_format pfmt = vk_format_to_pipe_format(fmt); /* Disallow AFBC if either of these is true @@ -79,7 +79,7 @@ panvk_image_can_use_afbc( return !PANVK_DEBUG(NO_AFBC) && !(usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_HOST_TRANSFER_BIT)) && - pan_query_afbc(&phys_dev->kmod.props) && + pan_query_afbc(&phys_dev->kmod.dev->props) && pan_afbc_supports_format(arch, pfmt) && tiling != VK_IMAGE_TILING_LINEAR && type != VK_IMAGE_TYPE_1D && (type != VK_IMAGE_TYPE_3D || arch >= 7) && @@ -143,7 +143,7 @@ get_plane_count(struct panvk_image *image) struct panvk_physical_device *phys_dev = to_panvk_physical_device(image->vk.base.device->physical); - unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); + unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id); /* Z32_S8X24 is not supported on v9+, and we don't want to use it * on v7- anyway, because it's less efficient than the multiplanar @@ -208,7 +208,7 @@ panvk_image_can_use_mod(struct panvk_image *image, { struct panvk_physical_device *phys_dev = to_panvk_physical_device(image->vk.base.device->physical); - unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); + unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id); const bool forced_linear = PANVK_DEBUG(LINEAR) || image->vk.tiling == VK_IMAGE_TILING_LINEAR || image->vk.image_type == VK_IMAGE_TYPE_1D; @@ -298,7 +298,7 @@ panvk_image_can_use_mod(struct panvk_image *image, }; enum pan_mod_support supported = - pan_image_test_props(&phys_dev->kmod.props, &iprops, iusage); + pan_image_test_props(&phys_dev->kmod.dev->props, &iprops, iusage); if (supported == PAN_MOD_NOT_SUPPORTED || (supported == PAN_MOD_NOT_OPTIMAL && optimal_only)) return false; @@ -413,7 +413,7 @@ panvk_image_init_layouts(struct panvk_image *image, struct panvk_device *dev = to_panvk_device(image->vk.base.device); struct panvk_physical_device *phys_dev = to_panvk_physical_device(dev->vk.physical); - unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); + unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id); const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_info = vk_find_struct_const( pCreateInfo->pNext, diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c index 74d74702575..3289c05303f 100644 --- a/src/panfrost/vulkan/panvk_physical_device.c +++ b/src/panfrost/vulkan/panvk_physical_device.c @@ -145,8 +145,8 @@ init_shader_caches(struct panvk_physical_device *device, _mesa_sha1_update(&sha_ctx, instance->driver_build_sha, sizeof(instance->driver_build_sha)); - _mesa_sha1_update(&sha_ctx, &device->kmod.props.gpu_id, - sizeof(device->kmod.props.gpu_id)); + _mesa_sha1_update(&sha_ctx, &device->kmod.dev->props.gpu_id, + sizeof(device->kmod.dev->props.gpu_id)); unsigned char sha[SHA1_DIGEST_LENGTH]; _mesa_sha1_final(&sha_ctx, sha); @@ -157,7 +157,7 @@ init_shader_caches(struct panvk_physical_device *device, #ifdef ENABLE_SHADER_CACHE char renderer[17]; ASSERTED int len = snprintf(renderer, sizeof(renderer), "panvk_0x%08x", - device->kmod.props.gpu_id); + device->kmod.dev->props.gpu_id); assert(len == sizeof(renderer) - 1); char timestamp[SHA1_DIGEST_STRING_LENGTH]; @@ -186,7 +186,7 @@ get_core_mask(struct panvk_physical_device *device, const struct panvk_instance *instance, const char *option_name, uint64_t *mask) { - uint64_t present = device->kmod.props.shader_present; + uint64_t present = device->kmod.dev->props.shader_present; *mask = driQueryOptionu64(&instance->dri_options, option_name) & present; if (!*mask) @@ -218,7 +218,7 @@ static VkResult get_device_sync_types(struct panvk_physical_device *device, const struct panvk_instance *instance) { - const unsigned arch = pan_arch(device->kmod.props.gpu_id); + const unsigned arch = pan_arch(device->kmod.dev->props.gpu_id); uint32_t sync_type_count = 0; device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd); @@ -252,12 +252,12 @@ get_device_sync_types(struct panvk_physical_device *device, float panvk_get_gpu_system_timestamp_period(const struct panvk_physical_device *device) { - if (!device->kmod.props.gpu_can_query_timestamp || - !device->kmod.props.timestamp_frequency) + if (!device->kmod.dev->props.gpu_can_query_timestamp || + !device->kmod.dev->props.timestamp_frequency) return 0; const float ns_per_s = 1000000000.0; - return ns_per_s / (float)device->kmod.props.timestamp_frequency; + return ns_per_s / (float)device->kmod.dev->props.timestamp_frequency; } void @@ -283,18 +283,16 @@ panvk_physical_device_init(struct panvk_physical_device *device, if (result != VK_SUCCESS) return result; - pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props); + device->model = pan_get_model(device->kmod.dev->props.gpu_id, + device->kmod.dev->props.gpu_variant); - device->model = pan_get_model(device->kmod.props.gpu_id, - device->kmod.props.gpu_variant); - - unsigned arch = pan_arch(device->kmod.props.gpu_id); + unsigned arch = pan_arch(device->kmod.dev->props.gpu_id); if (!device->model) { result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Unknown gpu_id (%#x) or variant (%#x)", - device->kmod.props.gpu_id, - device->kmod.props.gpu_variant); + device->kmod.dev->props.gpu_id, + device->kmod.dev->props.gpu_variant); goto fail; } @@ -400,13 +398,13 @@ panvk_fill_global_priority(const struct panvk_physical_device *physical_device, uint32_t family_idx, VkQueueFamilyGlobalPriorityPropertiesKHR *prio) { - const unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); + const unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id); uint32_t prio_idx = 0; switch (family_idx) { case PANVK_QUEUE_FAMILY_GPU: { enum pan_kmod_group_allow_priority_flags prio_mask = - physical_device->kmod.props.allowed_group_priorities_mask; + physical_device->kmod.dev->props.allowed_group_priorities_mask; /* Non-medium priority context is not hooked-up in the JM backend, even * though the panfrost kmod advertize it. Manually filter non-medium @@ -445,7 +443,7 @@ panvk_GetPhysicalDeviceQueueFamilyProperties2( VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties, pQueueFamilyPropertyCount); - unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); + unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id); const VkQueueFamilyProperties qfamily_props[PANVK_QUEUE_FAMILY_COUNT] = { [PANVK_QUEUE_FAMILY_GPU] = { @@ -455,7 +453,8 @@ panvk_GetPhysicalDeviceQueueFamilyProperties2( some CTS tests */ .queueCount = arch >= 10 ? 2 : 1, .timestampValidBits = - arch >= 10 && physical_device->kmod.props.gpu_can_query_timestamp + arch >= 10 && + physical_device->kmod.dev->props.gpu_can_query_timestamp ? 64 : 0, .minImageTransferGranularity = {1, 1, 1}, @@ -525,7 +524,7 @@ panvk_CreateDevice(VkPhysicalDevice physicalDevice, const VkAllocationCallbacks *pAllocator, VkDevice *pDevice) { VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); - unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); + unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id); VkResult result = VK_ERROR_INITIALIZATION_FAILED; panvk_arch_dispatch_ret(arch, create_device, result, physical_device, @@ -540,7 +539,7 @@ panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) VK_FROM_HANDLE(panvk_device, device, _device); struct panvk_physical_device *physical_device = to_panvk_physical_device(device->vk.physical); - unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); + unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id); panvk_arch_dispatch(arch, destroy_device, device, pAllocator); } @@ -580,7 +579,7 @@ format_is_supported(struct panvk_physical_device *physical_device, * the supported formats reported by the GPU. */ if (util_format_is_compressed(pfmt)) { uint32_t supported_compr_fmts = - pan_query_compressed_formats(&physical_device->kmod.props); + pan_query_compressed_formats(&physical_device->kmod.dev->props); if (!(BITFIELD_BIT(fmt.texfeat_bit) & supported_compr_fmts)) return false; @@ -596,7 +595,7 @@ get_image_plane_format_features(struct panvk_physical_device *physical_device, VkFormatFeatureFlags2 features = 0; enum pipe_format pfmt = vk_format_to_pipe_format(format); const struct pan_format fmt = physical_device->formats.all[pfmt]; - unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); + unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id); if (!format_is_supported(physical_device, fmt, pfmt)) return 0; @@ -736,7 +735,7 @@ static VkFormatFeatureFlags2 get_image_format_sample_counts(struct panvk_physical_device *physical_device, VkFormat format) { - unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); + unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id); unsigned max_tib_size = pan_query_tib_size(physical_device->model); unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size); @@ -782,7 +781,7 @@ panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, VkFormatProperties2 *pFormatProperties) { VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); - const unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); + const unsigned arch = pan_arch(physical_device->kmod.dev->props.gpu_id); VkFormatFeatureFlags2 tex = get_image_format_features(physical_device, format); @@ -846,7 +845,7 @@ panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, static VkExtent3D get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format) { - const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); + const unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id); const uint64_t max_img_size_B = arch <= 10 ? u_uintN_max(32) : u_uintN_max(48); const enum pipe_format pfmt = vk_format_to_pipe_format(format); @@ -869,7 +868,7 @@ get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format) static VkExtent3D get_max_3d_image_size(struct panvk_physical_device *phys_dev, VkFormat format) { - const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); + const unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id); const uint64_t max_img_size_B = arch <= 10 ? u_uintN_max(32) : u_uintN_max(48); enum pipe_format pfmt = vk_format_to_pipe_format(format); diff --git a/src/panfrost/vulkan/panvk_physical_device.h b/src/panfrost/vulkan/panvk_physical_device.h index 747a81e6e32..a93f872bbcb 100644 --- a/src/panfrost/vulkan/panvk_physical_device.h +++ b/src/panfrost/vulkan/panvk_physical_device.h @@ -30,7 +30,6 @@ struct panvk_physical_device { struct { struct pan_kmod_dev *dev; - struct pan_kmod_dev_props props; } kmod; const struct pan_model *model; diff --git a/src/panfrost/vulkan/panvk_utrace.c b/src/panfrost/vulkan/panvk_utrace.c index 943b79e5a7e..127f5ecc4ab 100644 --- a/src/panfrost/vulkan/panvk_utrace.c +++ b/src/panfrost/vulkan/panvk_utrace.c @@ -76,7 +76,7 @@ panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps, struct panvk_device *dev = to_dev(utctx); const struct panvk_physical_device *pdev = to_panvk_physical_device(dev->vk.physical); - const struct pan_kmod_dev_props *props = &pdev->kmod.props; + const struct pan_kmod_dev_props *props = &pdev->kmod.dev->props; const struct panvk_utrace_buf *buf = timestamps; struct panvk_utrace_flush_data *data = flush_data; diff --git a/src/panfrost/vulkan/panvk_utrace_perfetto.cc b/src/panfrost/vulkan/panvk_utrace_perfetto.cc index ed5bd37e98b..d957c35d3a6 100644 --- a/src/panfrost/vulkan/panvk_utrace_perfetto.cc +++ b/src/panfrost/vulkan/panvk_utrace_perfetto.cc @@ -93,7 +93,7 @@ get_gpu_time_ns(struct panvk_device *dev) { const struct panvk_physical_device *pdev = to_panvk_physical_device(dev->vk.physical); - const struct pan_kmod_dev_props *props = &pdev->kmod.props; + const struct pan_kmod_dev_props *props = &pdev->kmod.dev->props; const uint64_t ts = pan_kmod_query_timestamp(dev->kmod.dev); return ts * NSEC_PER_SEC / props->timestamp_frequency; @@ -332,7 +332,7 @@ panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count) { const struct panvk_physical_device *pdev = to_panvk_physical_device(dev->vk.physical); - const struct pan_kmod_dev_props *props = &pdev->kmod.props; + const struct pan_kmod_dev_props *props = &pdev->kmod.dev->props; struct panvk_utrace_perfetto *utp = &dev->utrace.utp; if (queue_count > PANVK_UTRACE_PERFETTO_QUEUE_COUNT) { diff --git a/src/panfrost/vulkan/panvk_vX_blend.c b/src/panfrost/vulkan/panvk_vX_blend.c index 859421eafc8..1a2853a6354 100644 --- a/src/panfrost/vulkan/panvk_vX_blend.c +++ b/src/panfrost/vulkan/panvk_vX_blend.c @@ -87,8 +87,8 @@ get_blend_shader(struct panvk_device *dev, /* Compile the NIR shader */ struct pan_compile_inputs inputs = { - .gpu_id = pdev->kmod.props.gpu_id, - .gpu_variant = pdev->kmod.props.gpu_variant, + .gpu_id = pdev->kmod.dev->props.gpu_id, + .gpu_variant = pdev->kmod.dev->props.gpu_variant, .is_blend = true, .blend = { .nr_samples = key.info.nr_samples, diff --git a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c index 7bc0c625499..a2d55265ef6 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c @@ -149,8 +149,8 @@ get_preload_shader(struct panvk_device *dev, nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); struct pan_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_id, - .gpu_variant = phys_dev->kmod.props.gpu_variant, + .gpu_id = phys_dev->kmod.dev->props.gpu_id, + .gpu_variant = phys_dev->kmod.dev->props.gpu_variant, .is_blit = true, }; @@ -512,7 +512,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo, */ struct panvk_physical_device *pdev = to_panvk_physical_device(dev->vk.physical); - unsigned gpu_prod_id = pdev->kmod.props.gpu_id >> 16; + unsigned gpu_prod_id = pdev->kmod.dev->props.gpu_id >> 16; /* the PAN_ARCH check is redundant but allows compiler optimization when PAN_ARCH <= 6 */ diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index a268312eeb4..bdde03958fb 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -204,7 +204,7 @@ static VkResult check_global_priority(const struct panvk_physical_device *phys_dev, const VkDeviceQueueCreateInfo *create_info) { - const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); + const unsigned arch = pan_arch(phys_dev->kmod.dev->props.gpu_id); const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info = vk_find_struct_const(create_info->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR); @@ -217,7 +217,7 @@ check_global_priority(const struct panvk_physical_device *phys_dev, enum pan_kmod_group_allow_priority_flags requested_prio = global_priority_to_group_allow_priority_flag(priority); enum pan_kmod_group_allow_priority_flags allowed_prio_mask = - phys_dev->kmod.props.allowed_group_priorities_mask; + phys_dev->kmod.dev->props.allowed_group_priorities_mask; /* Non-medium priority context is not hooked-up in the JM backend, even * though the panfrost kmod advertize it. Manually filter non-medium diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c index 07e4994d79c..885e4fcdcb3 100644 --- a/src/panfrost/vulkan/panvk_vX_physical_device.c +++ b/src/panfrost/vulkan/panvk_vX_physical_device.c @@ -48,7 +48,8 @@ panvk_per_arch(get_physical_device_extensions)( .KHR_shader_atomic_int64 = PAN_ARCH >= 9, .KHR_bind_memory2 = true, .KHR_buffer_device_address = true, - .KHR_calibrated_timestamps = device->kmod.props.gpu_can_query_timestamp, + .KHR_calibrated_timestamps = + device->kmod.dev->props.gpu_can_query_timestamp, .KHR_copy_commands2 = true, .KHR_create_renderpass2 = true, .KHR_dedicated_allocation = true, @@ -126,7 +127,8 @@ panvk_per_arch(get_physical_device_extensions)( .EXT_4444_formats = true, .EXT_border_color_swizzle = true, .EXT_buffer_device_address = true, - .EXT_calibrated_timestamps = device->kmod.props.gpu_can_query_timestamp, + .EXT_calibrated_timestamps = + device->kmod.dev->props.gpu_can_query_timestamp, .EXT_custom_border_color = true, .EXT_depth_bias_control = true, .EXT_depth_clamp_zero_one = true, @@ -198,7 +200,7 @@ has_compressed_formats(const struct panvk_physical_device *physical_device, const uint32_t required_formats) { uint32_t supported_compr_fmts = - pan_query_compressed_formats(&physical_device->kmod.props); + pan_query_compressed_formats(&physical_device->kmod.dev->props); return (supported_compr_fmts & required_formats) == required_formats; } @@ -275,8 +277,8 @@ panvk_per_arch(get_physical_device_features)( .vertexPipelineStoresAndAtomics = (PAN_ARCH >= 13 && instance->enable_vertex_pipeline_stores_atomics) || instance->force_enable_shader_atomics, - .fragmentStoresAndAtomics = (PAN_ARCH >= 10) || - instance->force_enable_shader_atomics, + .fragmentStoresAndAtomics = + (PAN_ARCH >= 10) || instance->force_enable_shader_atomics, .shaderTessellationAndGeometryPointSize = false, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = true, @@ -492,8 +494,8 @@ panvk_per_arch(get_physical_device_features)( .nullDescriptor = PAN_ARCH >= 10, /* VK_KHR_shader_clock */ - .shaderSubgroupClock = device->kmod.props.gpu_can_query_timestamp, - .shaderDeviceClock = device->kmod.props.timestamp_device_coherent, + .shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp, + .shaderDeviceClock = device->kmod.dev->props.timestamp_device_coherent, /* VK_KHR_shader_quad_control */ .shaderQuadControl = true, @@ -590,7 +592,7 @@ panvk_per_arch(get_physical_device_properties)( const bool has_disk_cache = device->vk.disk_cache != NULL; /* Ensure that the max threads count per workgroup is valid for Bifrost */ - assert(PAN_ARCH > 8 || device->kmod.props.max_threads_per_wg <= 1024); + assert(PAN_ARCH > 8 || device->kmod.dev->props.max_threads_per_wg <= 1024); float pointSizeRangeMin; float pointSizeRangeMax; @@ -607,14 +609,14 @@ panvk_per_arch(get_physical_device_properties)( *properties = (struct vk_properties){ .apiVersion = get_api_version(), .driverVersion = vk_get_driver_version(), - .vendorID = instance->force_vk_vendor ? instance->force_vk_vendor : - ARM_VENDOR_ID, + .vendorID = + instance->force_vk_vendor ? instance->force_vk_vendor : ARM_VENDOR_ID, /* Collect arch_major, arch_minor, arch_rev and product_major, * as done by the Arm driver. */ - .deviceID = - device->kmod.props.gpu_id & (ARCH_MAJOR | ARCH_MINOR | ARCH_REV | PRODUCT_MAJOR), + .deviceID = device->kmod.dev->props.gpu_id & + (ARCH_MAJOR | ARCH_MINOR | ARCH_REV | PRODUCT_MAJOR), .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, /* Vulkan 1.0 limits */ @@ -740,10 +742,11 @@ panvk_per_arch(get_physical_device_properties)( /* We could also split into serveral jobs but this has many limitations. * As such we limit to the max threads per workgroup supported by the GPU. */ - .maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg, - .maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg, - device->kmod.props.max_threads_per_wg, - device->kmod.props.max_threads_per_wg}, + .maxComputeWorkGroupInvocations = + device->kmod.dev->props.max_threads_per_wg, + .maxComputeWorkGroupSize = {device->kmod.dev->props.max_threads_per_wg, + device->kmod.dev->props.max_threads_per_wg, + device->kmod.dev->props.max_threads_per_wg}, /* 8-bit subpixel precision. */ .subPixelPrecisionBits = 8, .subTexelPrecisionBits = 8, @@ -789,8 +792,10 @@ panvk_per_arch(get_physical_device_properties)( .sampledImageStencilSampleCounts = sample_counts, .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = PAN_ARCH >= 10 && device->kmod.props.gpu_can_query_timestamp, - .timestampPeriod = PAN_ARCH >= 10 ? panvk_get_gpu_system_timestamp_period(device) : 0, + .timestampComputeAndGraphics = + PAN_ARCH >= 10 && device->kmod.dev->props.gpu_can_query_timestamp, + .timestampPeriod = + PAN_ARCH >= 10 ? panvk_get_gpu_system_timestamp_period(device) : 0, .maxClipDistances = 0, .maxCullDistances = 0, .maxCombinedClipAndCullDistances = 0, @@ -824,14 +829,11 @@ panvk_per_arch(get_physical_device_properties)( .subgroupSupportedStages = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT, .subgroupSupportedOperations = - VK_SUBGROUP_FEATURE_BASIC_BIT | - VK_SUBGROUP_FEATURE_VOTE_BIT | - VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | - VK_SUBGROUP_FEATURE_BALLOT_BIT | + VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | - VK_SUBGROUP_FEATURE_CLUSTERED_BIT | - VK_SUBGROUP_FEATURE_QUAD_BIT | + VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT | VK_SUBGROUP_FEATURE_ROTATE_BIT | VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT, .subgroupQuadOperationsInAllStages = false, @@ -844,11 +846,20 @@ panvk_per_arch(get_physical_device_properties)( .maxMemoryAllocationSize = UINT32_MAX, /* Vulkan 1.2 properties */ + .supportedDepthResolveModes = + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT | + VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT, + .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | + VK_RESOLVE_MODE_MIN_BIT | + VK_RESOLVE_MODE_MAX_BIT, + .independentResolveNone = true, + .independentResolve = true, + /* VK_KHR_driver_properties */ .driverID = VK_DRIVER_ID_MESA_PANVK, .conformanceVersion = get_conformance_version(), - .denormBehaviorIndependence = PAN_ARCH >= 9 ? - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE : - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, + .denormBehaviorIndependence = + PAN_ARCH >= 9 ? VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE + : VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, .shaderSignedZeroInfNanPreserveFloat16 = true, .shaderSignedZeroInfNanPreserveFloat32 = true, @@ -867,6 +878,8 @@ panvk_per_arch(get_physical_device_properties)( .shaderRoundingModeRTZFloat64 = false, .maxUpdateAfterBindDescriptorsInAllPools = PAN_ARCH >= 9 ? UINT32_MAX : 0, + /* VK_EXT_descriptor_indexing */ + .maxUpdateAfterBindDescriptorsInAllPools = PAN_ARCH >= 9 ? UINT32_MAX : 0, .shaderUniformBufferArrayNonUniformIndexingNative = false, .shaderSampledImageArrayNonUniformIndexingNative = false, .shaderStorageBufferArrayNonUniformIndexingNative = false, @@ -922,7 +935,8 @@ panvk_per_arch(get_physical_device_properties)( .minSubgroupSize = pan_subgroup_size(PAN_ARCH), .maxSubgroupSize = pan_subgroup_size(PAN_ARCH), .maxComputeWorkgroupSubgroups = - device->kmod.props.max_threads_per_wg / pan_subgroup_size(PAN_ARCH), + device->kmod.dev->props.max_threads_per_wg / + pan_subgroup_size(PAN_ARCH), .requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT, .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE, .maxPerStageDescriptorInlineUniformBlocks = @@ -1054,9 +1068,9 @@ panvk_per_arch(get_physical_device_properties)( .fmaRate = device->model->rates.fma, /* VK_ARM_shader_core_builtins */ - .shaderCoreMask = device->kmod.props.shader_present, - .shaderCoreCount = util_bitcount(device->kmod.props.shader_present), - .shaderWarpsPerCore = device->kmod.props.max_threads_per_core / + .shaderCoreMask = device->kmod.dev->props.shader_present, + .shaderCoreCount = util_bitcount(device->kmod.dev->props.shader_present), + .shaderWarpsPerCore = device->kmod.dev->props.max_threads_per_core / (pan_subgroup_size(PAN_ARCH) * 2), }; diff --git a/src/panfrost/vulkan/panvk_vX_query_pool.c b/src/panfrost/vulkan/panvk_vX_query_pool.c index 75b12203308..ccfe11db3e9 100644 --- a/src/panfrost/vulkan/panvk_vX_query_pool.c +++ b/src/panfrost/vulkan/panvk_vX_query_pool.c @@ -52,7 +52,7 @@ panvk_per_arch(CreateQueryPool)(VkDevice _device, const struct panvk_physical_device *phys_dev = to_panvk_physical_device(device->vk.physical); - pan_query_core_count(&phys_dev->kmod.props, &reports_per_query); + pan_query_core_count(&phys_dev->kmod.dev->props, &reports_per_query); #else reports_per_query = 1; #endif diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 421748ad482..c583ec00aa0 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -370,7 +370,8 @@ panvk_get_nir_options(UNUSED struct vk_physical_device *vk_pdev, UNUSED const struct vk_pipeline_robustness_state *rs) { struct panvk_physical_device *phys_dev = to_panvk_physical_device(vk_pdev); - return pan_get_nir_shader_compiler_options(pan_arch(phys_dev->kmod.props.gpu_id)); + return pan_get_nir_shader_compiler_options( + pan_arch(phys_dev->kmod.dev->props.gpu_id)); } static struct spirv_to_nir_options @@ -439,7 +440,7 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev, * * This would give us a better place to do panvk-specific lowering. */ - pan_nir_lower_texture_early(nir, pdev->kmod.props.gpu_id); + pan_nir_lower_texture_early(nir, pdev->kmod.dev->props.gpu_id); NIR_PASS(_, nir, nir_lower_system_values); nir_lower_compute_system_values_options options = { @@ -451,17 +452,18 @@ panvk_preprocess_nir(struct vk_physical_device *vk_pdev, if (nir->info.stage == MESA_SHADER_FRAGMENT) NIR_PASS(_, nir, nir_lower_wpos_center); - pan_optimize_nir(nir, pdev->kmod.props.gpu_id); + pan_optimize_nir(nir, pdev->kmod.dev->props.gpu_id); NIR_PASS(_, nir, nir_split_var_copies); NIR_PASS(_, nir, nir_lower_var_copies); - assert(pdev->kmod.props.shader_present != 0); - uint64_t core_max_id = util_last_bit(pdev->kmod.props.shader_present) - 1; + assert(pdev->kmod.dev->props.shader_present != 0); + uint64_t core_max_id = + util_last_bit(pdev->kmod.dev->props.shader_present) - 1; NIR_PASS(_, nir, nir_inline_sysval, nir_intrinsic_load_core_max_id_arm, core_max_id); - pan_preprocess_nir(nir, pdev->kmod.props.gpu_id); + pan_preprocess_nir(nir, pdev->kmod.dev->props.gpu_id); } static void @@ -1289,8 +1291,8 @@ panvk_compile_shader(struct panvk_device *dev, robust2_modes |= nir_var_mem_ssbo; struct pan_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_id, - .gpu_variant = phys_dev->kmod.props.gpu_variant, + .gpu_id = phys_dev->kmod.dev->props.gpu_id, + .gpu_variant = phys_dev->kmod.dev->props.gpu_variant, .view_mask = (state && state->rp) ? state->rp->view_mask : 0, .robust2_modes = robust2_modes, .robust_descriptors = dev->vk.enabled_features.nullDescriptor,