panvk: Add v12 support

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34032>
This commit is contained in:
Mary Guillemard 2025-01-30 11:56:20 +00:00
parent 9b4886d6f4
commit c7b94b098c
10 changed files with 225 additions and 14 deletions

View file

View file

@ -632,10 +632,10 @@ panvk_per_arch(cs_pick_iter_sb)(struct panvk_cmd_buffer *cmdbuf,
cs_match(b, iter_sb, cmp_scratch) { cs_match(b, iter_sb, cmp_scratch) {
#define CASE(x) \ #define CASE(x) \
cs_case(b, x) { \ cs_case(b, x) { \
cs_wait_slot(b, SB_ITER(x), false); \ cs_wait_slot(b, SB_ITER(x), false); \
cs_set_scoreboard_entry(b, SB_ITER(x), SB_ID(LS)); \ cs_select_sb_entries_for_async_ops(b, SB_ITER(x)); \
} }
CASE(0) CASE(0)
CASE(1) CASE(1)

View file

@ -205,7 +205,8 @@ emit_varying_descs(const struct panvk_cmd_buffer *cmdbuf,
cfg.table = 61; cfg.table = 61;
cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX; cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
cfg.offset = 1024 + (loc * 16); cfg.offset = 1024 + (loc * 16);
cfg.buffer_index = 0; /* On v12+, the hardware-controlled buffer is at index 1 for varyings */
cfg.buffer_index = PAN_ARCH >= 12 ? 1 : 0;
cfg.attribute_stride = varying_size; cfg.attribute_stride = varying_size;
cfg.packet_stride = varying_size + 16; cfg.packet_stride = varying_size + 16;
} }
@ -402,8 +403,16 @@ update_tls(struct panvk_cmd_buffer *cmdbuf)
cmdbuf->state.gfx.tsd = state->desc.gpu; cmdbuf->state.gfx.tsd = state->desc.gpu;
cs_update_vt_ctx(b) cs_update_vt_ctx(b) {
#if PAN_ARCH >= 12
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_TSD),
state->desc.gpu);
cs_move64_to(b, cs_sr_reg64(b, IDVS, FRAGMENT_TSD),
state->desc.gpu);
#else
cs_move64_to(b, cs_sr_reg64(b, IDVS, TSD_0), state->desc.gpu); cs_move64_to(b, cs_sr_reg64(b, IDVS, TSD_0), state->desc.gpu);
#endif
}
} }
state->info.tls.size = state->info.tls.size =
@ -465,6 +474,86 @@ prepare_blend(struct panvk_cmd_buffer *cmdbuf)
return VK_SUCCESS; return VK_SUCCESS;
} }
#if PAN_ARCH >= 12
static void
prepare_vp(struct panvk_cmd_buffer *cmdbuf)
{
struct cs_builder *b =
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
const VkViewport *viewport =
&cmdbuf->vk.dynamic_graphics_state.vp.viewports[0];
const VkRect2D *scissor = &cmdbuf->vk.dynamic_graphics_state.vp.scissors[0];
/* XXX: Switch scissor_array_enable to true and use array based variant
* for future proofness */
if (dyn_gfx_state_dirty(cmdbuf, VP_SCISSORS)) {
struct mali_scissor_packed scissor_box;
pan_pack(&scissor_box, SCISSOR, cfg) {
assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
int minx = scissor->offset.x;
int miny = scissor->offset.y;
int maxx = scissor->offset.x + scissor->extent.width;
int maxy = scissor->offset.y + scissor->extent.height;
/* Make sure we don't end up with a max < min when width/height is 0 */
maxx = maxx > minx ? maxx - 1 : maxx;
maxy = maxy > miny ? maxy - 1 : maxy;
/* Clamp scissor to valid range */
cfg.scissor_minimum_x = CLAMP(minx, 0, UINT16_MAX);
cfg.scissor_minimum_y = CLAMP(miny, 0, UINT16_MAX);
cfg.scissor_maximum_x = CLAMP(maxx, 0, UINT16_MAX);
cfg.scissor_maximum_y = CLAMP(maxy, 0, UINT16_MAX);
}
struct mali_scissor_packed *scissor_box_ptr = &scissor_box;
cs_move64_to(b, cs_sr_reg64(b, IDVS, SCISSOR_BOX),
*((uint64_t *)scissor_box_ptr));
}
if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) {
struct mali_viewport_packed mali_viewport;
pan_pack(&mali_viewport, VIEWPORT, cfg) {
/* The spec says "width must be greater than 0.0" */
assert(viewport->width >= 0);
int minx = (int)viewport->x;
int maxx = (int)(viewport->x + viewport->width);
/* Viewport height can be negative */
int miny =
MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
int maxy =
MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
/* Make sure we don't end up with a max < min when width/height is 0 */
maxx = maxx > minx ? maxx - 1 : maxx;
maxy = maxy > miny ? maxy - 1 : maxy;
/* Clamp viewport to valid range */
cfg.min_x = CLAMP(minx, 0, UINT16_MAX);
cfg.min_y = CLAMP(miny, 0, UINT16_MAX);
cfg.max_x = CLAMP(maxx, 0, UINT16_MAX);
cfg.max_y = CLAMP(maxy, 0, UINT16_MAX);
struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals;
float z_min = sysvals->viewport.offset.z;
float z_max = z_min + sysvals->viewport.scale.z;
cfg.min_depth = CLAMP(z_min, 0.0f, 1.0f);
cfg.max_depth = CLAMP(z_max, 0.0f, 1.0f);
}
uint64_t *mali_viewport_ptr = (uint64_t *)&mali_viewport;
cs_move64_to(b, cs_sr_reg64(b, IDVS, VIEWPORT_HIGH),
mali_viewport_ptr[0]);
cs_move64_to(b, cs_sr_reg64(b, IDVS, VIEWPORT_LOW),
mali_viewport_ptr[1]);
}
}
#else
static void static void
prepare_vp(struct panvk_cmd_buffer *cmdbuf) prepare_vp(struct panvk_cmd_buffer *cmdbuf)
{ {
@ -525,9 +614,23 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf)
fui(MAX2(z_min, z_max))); fui(MAX2(z_min, z_max)));
} }
} }
#endif
#if PAN_ARCH >= 12
static inline uint64_t static inline uint64_t
get_pos_spd(const struct panvk_cmd_buffer *cmdbuf) get_vs_all_spd(const struct panvk_cmd_buffer *cmdbuf)
{
const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
assert(vs);
const struct vk_input_assembly_state *ia =
&cmdbuf->vk.dynamic_graphics_state.ia;
return ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST
? panvk_priv_mem_dev_addr(vs->spds.all_points)
: panvk_priv_mem_dev_addr(vs->spds.all_triangles);
}
#else
static inline uint64_t
get_vs_pos_spd(const struct panvk_cmd_buffer *cmdbuf)
{ {
const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
assert(vs); assert(vs);
@ -537,6 +640,7 @@ get_pos_spd(const struct panvk_cmd_buffer *cmdbuf)
? panvk_priv_mem_dev_addr(vs->spds.pos_points) ? panvk_priv_mem_dev_addr(vs->spds.pos_points)
: panvk_priv_mem_dev_addr(vs->spds.pos_triangles); : panvk_priv_mem_dev_addr(vs->spds.pos_triangles);
} }
#endif
static void static void
prepare_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf) prepare_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf)
@ -714,6 +818,10 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
cfg.fb_width = fbinfo->width; cfg.fb_width = fbinfo->width;
cfg.fb_height = fbinfo->height; cfg.fb_height = fbinfo->height;
#if PAN_ARCH >= 12
cfg.effective_tile_size = fbinfo->tile_size;
#endif
cfg.sample_pattern = pan_sample_pattern(fbinfo->nr_samples); cfg.sample_pattern = pan_sample_pattern(fbinfo->nr_samples);
cfg.first_provoking_vertex = cfg.first_provoking_vertex =
@ -1206,14 +1314,19 @@ prepare_vs(struct panvk_cmd_buffer *cmdbuf)
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_SRT), cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_SRT),
vs_desc_state->res_table); vs_desc_state->res_table);
#if PAN_ARCH >= 12
if (gfx_state_dirty(cmdbuf, VS))
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_SPD), get_vs_all_spd(cmdbuf));
#else
if (gfx_state_dirty(cmdbuf, VS) || if (gfx_state_dirty(cmdbuf, VS) ||
dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY)) dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY))
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_POS_SPD), cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_POS_SPD),
get_pos_spd(cmdbuf)); get_vs_pos_spd(cmdbuf));
if (gfx_state_dirty(cmdbuf, VS)) if (gfx_state_dirty(cmdbuf, VS))
cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_VARY_SPD), cs_move64_to(b, cs_sr_reg64(b, IDVS, VERTEX_VARY_SPD),
panvk_priv_mem_dev_addr(vs->spds.var)); panvk_priv_mem_dev_addr(vs->spds.var));
#endif
} }
return VK_SUCCESS; return VK_SUCCESS;
@ -1706,8 +1819,14 @@ set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf,
cfg.view_mask = cmdbuf->state.gfx.render.view_mask; cfg.view_mask = cmdbuf->state.gfx.render.view_mask;
} }
cs_move32_to(b, cs_sr_reg32(b, IDVS, TILER_FLAGS), cs_move32_to(b, cs_sr_reg32(b, IDVS, TILER_FLAGS), tiler_idvs_flags.opaque[0]);
tiler_idvs_flags.opaque[0]); #if PAN_ARCH >= 11
struct mali_primitive_flags_2_packed tiler_flags_2;
pan_pack(&tiler_flags_2, PRIMITIVE_FLAGS_2, cfg) {
}
cs_move32_to(b, cs_sr_reg32(b, IDVS, TILER_FLAGS2),
tiler_flags_2.opaque[0]);
#endif
} }
} }
@ -1880,10 +1999,16 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
cs_move32_to(b, counter_reg, idvs_count); cs_move32_to(b, counter_reg, idvs_count);
cs_while(b, MALI_CS_CONDITION_GREATER, counter_reg) { cs_while(b, MALI_CS_CONDITION_GREATER, counter_reg) {
#if PAN_ARCH >= 12
cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, cs_undef(),
MALI_IDVS_SHADING_MODE_EARLY);
#else
cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, flags_override.opaque[0], false, true,
cs_shader_res_sel(0, 0, 1, 0), cs_shader_res_sel(0, 0, 1, 0),
cs_shader_res_sel(2, 2, 2, 0), cs_undef()); cs_shader_res_sel(2, 2, 2, 0), cs_undef());
#endif
cs_add32(b, counter_reg, counter_reg, -1); cs_add32(b, counter_reg, counter_reg, -1);
cs_update_vt_ctx(b) { cs_update_vt_ctx(b) {
@ -1897,10 +2022,16 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
-(idvs_count * pan_size(TILER_CONTEXT))); -(idvs_count * pan_size(TILER_CONTEXT)));
} }
} else { } else {
#if PAN_ARCH >= 12
cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, cs_undef(),
MALI_IDVS_SHADING_MODE_EARLY);
#else
cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, flags_override.opaque[0], false, true,
cs_shader_res_sel(0, 0, 1, 0), cs_shader_res_sel(0, 0, 1, 0),
cs_shader_res_sel(2, 2, 2, 0), cs_undef()); cs_shader_res_sel(2, 2, 2, 0), cs_undef());
#endif
} }
cs_req_res(b, 0); cs_req_res(b, 0);
} }
@ -2067,10 +2198,16 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
get_tiler_flags_override(draw); get_tiler_flags_override(draw);
cs_req_res(b, CS_IDVS_RES); cs_req_res(b, CS_IDVS_RES);
#if PAN_ARCH >= 12
cs_trace_run_idvs2(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, cs_undef(),
MALI_IDVS_SHADING_MODE_EARLY);
#else
cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
flags_override.opaque[0], false, true, flags_override.opaque[0], false, true,
cs_shader_res_sel(0, 0, 1, 0), cs_shader_res_sel(0, 0, 1, 0),
cs_shader_res_sel(2, 2, 2, 0), cs_undef()); cs_shader_res_sel(2, 2, 2, 0), cs_undef());
#endif
cs_req_res(b, 0); cs_req_res(b, 0);
} }

View file

@ -411,7 +411,12 @@ init_subqueue(struct panvk_queue *queue, enum panvk_subqueue_id subqueue)
panvk_priv_mem_dev_addr(subq->context)); panvk_priv_mem_dev_addr(subq->context));
/* Intialize scoreboard slots used for asynchronous operations. */ /* Intialize scoreboard slots used for asynchronous operations. */
#if PAN_ARCH >= 11
cs_set_state_imm32(&b, MALI_CS_SET_STATE_TYPE_SB_SEL_ENDPOINT, SB_ITER(0));
cs_set_state_imm32(&b, MALI_CS_SET_STATE_TYPE_SB_SEL_OTHER, SB_ID(LS));
#else
cs_set_scoreboard_entry(&b, SB_ITER(0), SB_ID(LS)); cs_set_scoreboard_entry(&b, SB_ITER(0), SB_ID(LS));
#endif
/* We do greater than test on sync objects, and given the reference seqno /* We do greater than test on sync objects, and given the reference seqno
* registers are all zero at init time, we need to initialize all syncobjs * registers are all zero at init time, we need to initialize all syncobjs

View file

@ -13,7 +13,7 @@ panvk_entrypoints = custom_target(
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'panvk', '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'panvk',
'--device-prefix', 'panvk_v6', '--device-prefix', 'panvk_v7', '--device-prefix', 'panvk_v6', '--device-prefix', 'panvk_v7',
'--device-prefix', 'panvk_v9', '--device-prefix', 'panvk_v10', '--device-prefix', 'panvk_v9', '--device-prefix', 'panvk_v10',
'--beta', with_vulkan_beta.to_string() '--device-prefix', 'panvk_v12', '--beta', with_vulkan_beta.to_string()
], ],
depend_files : vk_entrypoints_gen_depend_files, depend_files : vk_entrypoints_gen_depend_files,
) )
@ -62,6 +62,10 @@ valhall_archs = [9, 10]
valhall_inc_dir = ['valhall'] valhall_inc_dir = ['valhall']
valhall_files = [] valhall_files = []
avalon_archs = [12]
avalon_inc_dir = ['avalon']
avalon_files = []
jm_archs = [6, 7] jm_archs = [6, 7]
jm_inc_dir = ['jm'] jm_inc_dir = ['jm']
jm_files = [ jm_files = [
@ -76,7 +80,7 @@ jm_files = [
'jm/panvk_vX_queue.c', 'jm/panvk_vX_queue.c',
] ]
csf_archs = [10] csf_archs = [10, 12]
csf_inc_dir = ['csf'] csf_inc_dir = ['csf']
csf_files = [ csf_files = [
'csf/panvk_vX_cmd_buffer.c', 'csf/panvk_vX_cmd_buffer.c',
@ -114,7 +118,7 @@ common_per_arch_files = [
'panvk_vX_shader.c', 'panvk_vX_shader.c',
] ]
foreach arch : [6, 7, 10] foreach arch : [6, 7, 10, 12]
per_arch_files = common_per_arch_files per_arch_files = common_per_arch_files
inc_panvk_per_arch = [] inc_panvk_per_arch = []
@ -124,6 +128,9 @@ foreach arch : [6, 7, 10]
elif arch in valhall_archs elif arch in valhall_archs
inc_panvk_per_arch += valhall_inc_dir inc_panvk_per_arch += valhall_inc_dir
per_arch_files += valhall_files per_arch_files += valhall_files
elif arch in avalon_archs
inc_panvk_per_arch += avalon_inc_dir
per_arch_files += avalon_files
endif endif
if arch in jm_archs if arch in jm_archs

View file

@ -55,6 +55,9 @@ panvk_catch_indirect_alloc_failure(VkResult error)
case 10: \ case 10: \
panvk_arch_name(name, v10)(__VA_ARGS__); \ panvk_arch_name(name, v10)(__VA_ARGS__); \
break; \ break; \
case 12: \
panvk_arch_name(name, v12)(__VA_ARGS__); \
break; \
default: \ default: \
unreachable("Unsupported architecture"); \ unreachable("Unsupported architecture"); \
} \ } \
@ -72,6 +75,9 @@ panvk_catch_indirect_alloc_failure(VkResult error)
case 10: \ case 10: \
ret = panvk_arch_name(name, v10)(__VA_ARGS__); \ ret = panvk_arch_name(name, v10)(__VA_ARGS__); \
break; \ break; \
case 12: \
ret = panvk_arch_name(name, v12)(__VA_ARGS__); \
break; \
default: \ default: \
unreachable("Unsupported architecture"); \ unreachable("Unsupported architecture"); \
} \ } \
@ -86,6 +92,8 @@ panvk_catch_indirect_alloc_failure(VkResult error)
#define panvk_per_arch(name) panvk_arch_name(name, v9) #define panvk_per_arch(name) panvk_arch_name(name, v9)
#elif PAN_ARCH == 10 #elif PAN_ARCH == 10
#define panvk_per_arch(name) panvk_arch_name(name, v10) #define panvk_per_arch(name) panvk_arch_name(name, v10)
#elif PAN_ARCH == 12
#define panvk_per_arch(name) panvk_arch_name(name, v12)
#else #else
#error "Unsupported arch" #error "Unsupported arch"
#endif #endif

View file

@ -1091,6 +1091,7 @@ panvk_physical_device_init(struct panvk_physical_device *device,
break; break;
case 10: case 10:
case 12:
break; break;
default: default:
@ -1265,6 +1266,7 @@ panvk_GetPhysicalDeviceMemoryProperties2(
DEVICE_PER_ARCH_FUNCS(6); DEVICE_PER_ARCH_FUNCS(6);
DEVICE_PER_ARCH_FUNCS(7); DEVICE_PER_ARCH_FUNCS(7);
DEVICE_PER_ARCH_FUNCS(10); DEVICE_PER_ARCH_FUNCS(10);
DEVICE_PER_ARCH_FUNCS(12);
VKAPI_ATTR VkResult VKAPI_CALL VKAPI_ATTR VkResult VKAPI_CALL
panvk_CreateDevice(VkPhysicalDevice physicalDevice, panvk_CreateDevice(VkPhysicalDevice physicalDevice,

View file

@ -322,9 +322,14 @@ struct panvk_shader {
union { union {
struct panvk_priv_mem spd; struct panvk_priv_mem spd;
struct { struct {
#if PAN_ARCH < 12
struct panvk_priv_mem pos_points; struct panvk_priv_mem pos_points;
struct panvk_priv_mem pos_triangles; struct panvk_priv_mem pos_triangles;
struct panvk_priv_mem var; struct panvk_priv_mem var;
#else
struct panvk_priv_mem all_points;
struct panvk_priv_mem all_triangles;
#endif
} spds; } spds;
}; };
#endif #endif

View file

@ -649,11 +649,18 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
cfg.flags_1.sample_mask = 0xFFFF; cfg.flags_1.sample_mask = 0xFFFF;
cfg.flags_0.multisample_enable = key->samples > 1; cfg.flags_0.multisample_enable = key->samples > 1;
cfg.flags_0.evaluate_per_sample = key->samples > 1; cfg.flags_0.evaluate_per_sample = key->samples > 1;
cfg.maximum_z = 1.0;
cfg.flags_0.clean_fragment_write = true; cfg.flags_0.clean_fragment_write = true;
#if PAN_ARCH >= 12
cfg.fragment_resources = res_table.gpu | 1;
cfg.fragment_shader = panvk_priv_mem_dev_addr(shader->spd);
cfg.thread_storage = cmdbuf->state.gfx.tsd;
#else
cfg.maximum_z = 1.0;
cfg.shader.resources = res_table.gpu | 1; cfg.shader.resources = res_table.gpu | 1;
cfg.shader.shader = panvk_priv_mem_dev_addr(shader->spd); cfg.shader.shader = panvk_priv_mem_dev_addr(shader->spd);
cfg.shader.thread_storage = cmdbuf->state.gfx.tsd; cfg.shader.thread_storage = cmdbuf->state.gfx.tsd;
#endif
cfg.flags_2.write_mask = rt_written; cfg.flags_2.write_mask = rt_written;
} }

View file

@ -1049,8 +1049,10 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT) if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL; cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
#if PAN_ARCH < 12
else if (cfg.stage == MALI_SHADER_STAGE_VERTEX) else if (cfg.stage == MALI_SHADER_STAGE_VERTEX)
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
#endif
cfg.register_allocation = cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count); pan_register_allocation(shader->info.work_reg_count);
@ -1062,6 +1064,38 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
cfg.requires_helper_threads = shader->info.contains_barrier; cfg.requires_helper_threads = shader->info.contains_barrier;
} }
} else { } else {
#if PAN_ARCH >= 12
shader->spds.all_points =
panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
if (!panvk_priv_mem_dev_addr(shader->spds.all_points))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.all_points),
SHADER_PROGRAM, cfg) {
cfg.stage = pan_shader_stage(&shader->info);
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
cfg.binary = panvk_shader_get_dev_addr(shader);
cfg.preload.r48_r63 = (shader->info.preload >> 48);
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
}
shader->spds.all_triangles =
panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
if (!panvk_priv_mem_dev_addr(shader->spds.all_triangles))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.all_triangles),
SHADER_PROGRAM, cfg) {
cfg.stage = pan_shader_stage(&shader->info);
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
cfg.binary =
panvk_shader_get_dev_addr(shader) + shader->info.vs.no_psiz_offset;
cfg.preload.r48_r63 = (shader->info.preload >> 48);
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
}
#else
shader->spds.pos_points = shader->spds.pos_points =
panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM); panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
if (!panvk_priv_mem_dev_addr(shader->spds.pos_points)) if (!panvk_priv_mem_dev_addr(shader->spds.pos_points))
@ -1114,6 +1148,7 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
cfg.flush_to_zero_mode = shader_ftz_mode(shader); cfg.flush_to_zero_mode = shader_ftz_mode(shader);
} }
} }
#endif
} }
#endif #endif
@ -1140,9 +1175,14 @@ panvk_shader_destroy(struct vk_device *vk_dev, struct vk_shader *vk_shader,
if (shader->info.stage != MESA_SHADER_VERTEX) { if (shader->info.stage != MESA_SHADER_VERTEX) {
panvk_pool_free_mem(&shader->spd); panvk_pool_free_mem(&shader->spd);
} else { } else {
#if PAN_ARCH >= 12
panvk_pool_free_mem(&shader->spds.all_points);
panvk_pool_free_mem(&shader->spds.all_triangles);
#else
panvk_pool_free_mem(&shader->spds.var); panvk_pool_free_mem(&shader->spds.var);
panvk_pool_free_mem(&shader->spds.pos_points); panvk_pool_free_mem(&shader->spds.pos_points);
panvk_pool_free_mem(&shader->spds.pos_triangles); panvk_pool_free_mem(&shader->spds.pos_triangles);
#endif
} }
#endif #endif