pan/genxml: Enforce explicit packed types on pan_[un]pack

Provide a pan_cast_and_[un]pack() to help with the transition.
Those helpers should only be used when the caller is sure the
destination is big enough to emit the descriptor.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32962>
This commit is contained in:
Mary Guillemard 2025-01-08 14:45:06 +01:00 committed by Marge Bot
parent bd80037441
commit 3b69edf825
32 changed files with 349 additions and 278 deletions

View file

@ -316,7 +316,7 @@ panfrost_emit_blend(struct panfrost_batch *batch, void *rts,
/* Disable blending for unbacked render targets */
if (rt_count == 0 || !batch->key.cbufs[i] || !so->info[i].enabled) {
pan_pack(rts + i * pan_size(BLEND), BLEND, cfg) {
pan_pack(packed, BLEND, cfg) {
cfg.enable = false;
#if PAN_ARCH >= 6
cfg.internal.mode = MALI_BLEND_MODE_OFF;
@ -358,6 +358,8 @@ panfrost_emit_blend(struct panfrost_batch *batch, void *rts,
#if PAN_ARCH >= 6
struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
struct mali_internal_blend_packed *internal_blend_packed =
(struct mali_internal_blend_packed *)&packed->opaque[2];
/* Words 2 and 3: Internal blend */
if (blend_shaders[i]) {
@ -368,7 +370,7 @@ panfrost_emit_blend(struct panfrost_batch *batch, void *rts,
assert(!fs->bin.bo || (blend_shaders[i] & (0xffffffffull << 32)) ==
(fs->bin.gpu & (0xffffffffull << 32)));
pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
pan_pack(internal_blend_packed, INTERNAL_BLEND, cfg) {
cfg.mode = MALI_BLEND_MODE_SHADER;
cfg.shader.pc = (uint32_t)blend_shaders[i];
@ -380,7 +382,7 @@ panfrost_emit_blend(struct panfrost_batch *batch, void *rts,
#endif
}
} else {
pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
pan_pack(internal_blend_packed, INTERNAL_BLEND, cfg) {
cfg.mode = info.opaque ? MALI_BLEND_MODE_OPAQUE
: MALI_BLEND_MODE_FIXED_FUNCTION;
@ -757,7 +759,7 @@ panfrost_emit_viewport(struct panfrost_batch *batch)
if (!T.cpu)
return 0;
pan_pack(T.cpu, VIEWPORT, cfg) {
pan_cast_and_pack(T.cpu, VIEWPORT, cfg) {
cfg.scissor_minimum_x = minx;
cfg.scissor_minimum_y = miny;
cfg.scissor_maximum_x = maxx;
@ -769,7 +771,7 @@ panfrost_emit_viewport(struct panfrost_batch *batch)
return T.gpu;
#else
pan_pack(&batch->scissor, SCISSOR, cfg) {
pan_cast_and_pack(&batch->scissor, SCISSOR, cfg) {
cfg.scissor_minimum_x = minx;
cfg.scissor_minimum_y = miny;
cfg.scissor_maximum_x = maxx;
@ -1197,7 +1199,7 @@ panfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch,
uniform->u[0] =
GENX(pan_blend_get_internal_desc)(format, rt, size, false) >> 32;
} else {
pan_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg)
pan_cast_and_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg)
cfg.memory_format =
GENX(panfrost_format_from_pipe_format)(PIPE_FORMAT_NONE)->hw;
}
@ -1827,7 +1829,7 @@ static uint64_t
panfrost_upload_wa_sampler(struct panfrost_batch *batch)
{
struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, SAMPLER);
pan_pack(T.cpu, SAMPLER, cfg)
pan_cast_and_pack(T.cpu, SAMPLER, cfg)
;
return T.gpu;
}
@ -1939,7 +1941,8 @@ emit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader,
}
if (is_buffer) {
pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D,
cfg) {
cfg.s_dimension =
rsrc->base.width0 / util_format_get_blocksize(image->format);
cfg.t_dimension = cfg.r_dimension = 1;
@ -1948,7 +1951,8 @@ emit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader,
continue;
}
pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D,
cfg) {
unsigned level = image->u.tex.level;
unsigned samples = rsrc->image.layout.nr_samples;
@ -2009,8 +2013,9 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers,
/* We need an empty attrib buf to stop the prefetching on Bifrost */
#if PAN_ARCH >= 6
pan_pack(bufs.cpu + ((buf_count - 1) * pan_size(ATTRIBUTE_BUFFER)),
ATTRIBUTE_BUFFER, cfg)
struct mali_attribute_buffer_packed *attrib_bufs = bufs.cpu;
pan_pack(&attrib_bufs[buf_count - 1], ATTRIBUTE_BUFFER, cfg)
;
#endif
@ -2148,7 +2153,8 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers)
cfg.divisor_e = extra_flags;
}
pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
pan_cast_and_pack(&bufs[k + 1], ATTRIBUTE_BUFFER_CONTINUATION_NPOT,
cfg) {
cfg.divisor_numerator = magic_divisor;
cfg.divisor = divisor;
}
@ -2162,14 +2168,17 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers)
#if PAN_ARCH <= 5
/* Add special gl_VertexID/gl_InstanceID buffers */
if (special_vbufs) {
panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
panfrost_vertex_id(ctx->padded_count,
(struct mali_attribute_vertex_id_packed *)&bufs[k],
ctx->instance_count > 1);
pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
cfg.buffer_index = k++;
cfg.format = so->formats[PAN_VERTEX_ID];
}
panfrost_instance_id(ctx->padded_count, &bufs[k],
panfrost_instance_id(ctx->padded_count,
(struct mali_attribute_instance_id_packed *)&bufs[k],
ctx->instance_count > 1);
pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) {
@ -3640,7 +3649,7 @@ panfrost_create_depth_stencil_state(
#else
/* Pack with nodefaults so only explicitly set fields affect pan_merge() when
* emitting depth stencil descriptor */
pan_pack_nodefaults(&so->desc, DEPTH_STENCIL, cfg) {
pan_cast_and_pack_nodefaults(&so->desc, DEPTH_STENCIL, cfg) {
cfg.front_compare_function = (enum mali_func)front.func;
cfg.front_stencil_fail = pan_pipe_to_stencil_op(front.fail_op);
cfg.front_depth_fail = pan_pipe_to_stencil_op(front.zfail_op);
@ -3824,7 +3833,8 @@ prepare_shader(struct panfrost_compiled_shader *state,
struct panfrost_pool *pool, bool upload)
{
#if PAN_ARCH <= 7
void *out = &state->partial_rsd;
struct mali_renderer_state_packed *out =
(struct mali_renderer_state_packed *)&state->partial_rsd;
if (upload) {
struct panfrost_ptr ptr =
@ -3857,8 +3867,10 @@ prepare_shader(struct panfrost_compiled_shader *state,
state->state = panfrost_pool_take_ref(pool, ptr.gpu);
struct mali_shader_program_packed *programs = ptr.cpu;
/* Generic, or IDVS/points */
pan_pack(ptr.cpu, SHADER_PROGRAM, cfg) {
pan_cast_and_pack(&programs[0], SHADER_PROGRAM, cfg) {
cfg.stage = pan_shader_stage(&state->info);
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
@ -3880,7 +3892,7 @@ prepare_shader(struct panfrost_compiled_shader *state,
return;
/* IDVS/triangles */
pan_pack(ptr.cpu + pan_size(SHADER_PROGRAM), SHADER_PROGRAM, cfg) {
pan_pack(&programs[1], SHADER_PROGRAM, cfg) {
cfg.stage = pan_shader_stage(&state->info);
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
cfg.register_allocation =
@ -3893,7 +3905,7 @@ prepare_shader(struct panfrost_compiled_shader *state,
if (!secondary_enable)
return;
pan_pack(ptr.cpu + (pan_size(SHADER_PROGRAM) * 2), SHADER_PROGRAM, cfg) {
pan_pack(&programs[2], SHADER_PROGRAM, cfg) {
unsigned work_count = state->info.vs.secondary_work_reg_count;
cfg.stage = pan_shader_stage(&state->info);

View file

@ -153,7 +153,8 @@ panfrost_overdraw_alpha(const struct panfrost_context *ctx, bool zero)
static inline void
panfrost_emit_primitive_size(struct panfrost_context *ctx, bool points,
uint64_t size_array, void *prim_size)
uint64_t size_array,
struct mali_primitive_size_packed *prim_size)
{
struct panfrost_rasterizer *rast = ctx->rasterizer;

View file

@ -28,6 +28,7 @@
#include "genxml/cs_builder.h"
#include "panfrost/lib/genxml/cs_builder.h"
#include "gen_macros.h"
#include "pan_cmdstream.h"
#include "pan_context.h"
#include "pan_csf.h"
@ -702,7 +703,7 @@ csf_emit_tiler_desc(struct panfrost_batch *batch, const struct pan_fb_info *fb)
tiler.geometry_buffer_size = ctx->csf.tmp_geom_bo->kmod_bo->size;
}
batch->csf.pending_tiler_desc = 0;
batch->csf.pending_tiler_desc = NULL;
}
void
@ -895,8 +896,8 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
cs_move32_to(b, cs_reg32(b, 32), 0);
/* Compute workgroup size */
uint32_t wg_size[4];
pan_pack(wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
struct mali_compute_size_workgroup_packed wg_size;
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
cfg.workgroup_size_x = info->block[0];
cfg.workgroup_size_y = info->block[1];
cfg.workgroup_size_z = info->block[2];
@ -911,7 +912,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch,
(info->variable_shared_mem == 0);
}
cs_move32_to(b, cs_reg32(b, 33), wg_size[0]);
cs_move32_to(b, cs_reg32(b, 33), wg_size.opaque[0]);
/* Offset */
for (unsigned i = 0; i < 3; ++i)
@ -998,8 +999,8 @@ GENX(csf_launch_xfb)(struct panfrost_batch *batch,
cs_move32_to(b, cs_reg32(b, 32), batch->ctx->offset_start);
/* Compute workgroup size */
uint32_t wg_size[4];
pan_pack(wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
struct mali_compute_size_workgroup_packed wg_size;
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
cfg.workgroup_size_x = 1;
cfg.workgroup_size_y = 1;
cfg.workgroup_size_z = 1;
@ -1009,7 +1010,7 @@ GENX(csf_launch_xfb)(struct panfrost_batch *batch,
*/
cfg.allow_merging_workgroups = true;
}
cs_move32_to(b, cs_reg32(b, 33), wg_size[0]);
cs_move32_to(b, cs_reg32(b, 33), wg_size.opaque[0]);
/* Offset */
for (unsigned i = 0; i < 3; ++i)
@ -1119,7 +1120,7 @@ csf_emit_draw_state(struct panfrost_batch *batch,
struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
uint32_t primitive_flags = 0;
struct mali_primitive_flags_packed primitive_flags;
pan_pack(&primitive_flags, PRIMITIVE_FLAGS, cfg) {
if (panfrost_writes_point_size(ctx))
cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
@ -1138,9 +1139,11 @@ csf_emit_draw_state(struct panfrost_batch *batch,
: MALI_FIFO_FORMAT_BASIC;
}
cs_move32_to(b, cs_reg32(b, 56), primitive_flags);
cs_move32_to(b, cs_reg32(b, 56), primitive_flags.opaque[0]);
struct mali_dcd_flags_0_packed dcd_flags0;
struct mali_dcd_flags_1_packed dcd_flags1;
uint32_t dcd_flags0 = 0, dcd_flags1 = 0;
pan_pack(&dcd_flags0, DCD_FLAGS_0, cfg) {
enum mesa_prim reduced_mode = u_reduced_prim(info->mode);
bool polygon = reduced_mode == MESA_PRIM_TRIANGLES;
@ -1245,15 +1248,16 @@ csf_emit_draw_state(struct panfrost_batch *batch,
}
}
cs_move32_to(b, cs_reg32(b, 57), dcd_flags0);
cs_move32_to(b, cs_reg32(b, 58), dcd_flags1);
cs_move32_to(b, cs_reg32(b, 57), dcd_flags0.opaque[0]);
cs_move32_to(b, cs_reg32(b, 58), dcd_flags1.opaque[0]);
uint64_t primsize = 0;
struct mali_primitive_size_packed primsize;
panfrost_emit_primitive_size(ctx, info->mode == MESA_PRIM_POINTS, 0,
&primsize);
cs_move64_to(b, cs_reg64(b, 60), primsize);
struct mali_primitive_size_packed *primsize_ptr = &primsize;
cs_move64_to(b, cs_reg64(b, 60), *((uint64_t*)primsize_ptr));
uint32_t flags_override;
struct mali_primitive_flags_packed flags_override;
/* Pack with nodefaults so only explicitly set override fields affect the
* previously set register values */
pan_pack_nodefaults(&flags_override, PRIMITIVE_FLAGS, cfg) {
@ -1262,7 +1266,7 @@ csf_emit_draw_state(struct panfrost_batch *batch,
cfg.secondary_shader = secondary_shader;
};
return flags_override;
return flags_override.opaque[0];
}
static struct cs_index
@ -1430,7 +1434,7 @@ GENX(csf_init_context)(struct panfrost_context *ctx)
if (ctx->csf.heap.desc_bo == NULL)
goto err_tiler_heap_desc_bo;
pan_pack(ctx->csf.heap.desc_bo->ptr.cpu, TILER_HEAP, heap) {
pan_cast_and_pack(ctx->csf.heap.desc_bo->ptr.cpu, TILER_HEAP, heap) {
heap.size = pan_screen(ctx->base.screen)->csf_tiler_heap.chunk_size;
heap.base = thc.first_heap_chunk_gpu_va;
heap.bottom = heap.base + 64;

View file

@ -78,7 +78,7 @@ struct panfrost_csf_batch {
struct panfrost_ptr tiler_oom_ctx;
void *pending_tiler_desc;
struct mali_tiler_context_packed *pending_tiler_desc;
};
struct panfrost_csf_context {

View file

@ -132,7 +132,7 @@ static void
pan_preload_emit_blend(unsigned rt,
const struct pan_image_view *iview,
const struct pan_preload_shader_data *preload_shader,
uint64_t blend_shader, void *out)
uint64_t blend_shader, struct mali_blend_packed *out)
{
assert(blend_shader == 0 || PAN_ARCH <= 5);
@ -212,14 +212,13 @@ pan_preload_is_ms(struct pan_preload_views *views)
static void
pan_preload_emit_blends(const struct pan_preload_shader_data *preload_shader,
struct pan_preload_views *views,
uint64_t *blend_shaders, void *out)
uint64_t *blend_shaders, struct mali_blend_packed *out)
{
for (unsigned i = 0; i < MAX2(views->rt_count, 1); ++i) {
void *dest = out + pan_size(BLEND) * i;
const struct pan_image_view *rt_view = views->rts[i];
uint64_t blend_shader = blend_shaders ? blend_shaders[i] : 0;
pan_preload_emit_blend(i, rt_view, preload_shader, blend_shader, dest);
pan_preload_emit_blend(i, rt_view, preload_shader, blend_shader, &out[i]);
}
}
#endif
@ -228,7 +227,7 @@ pan_preload_emit_blends(const struct pan_preload_shader_data *preload_shader,
static void
pan_preload_emit_rsd(const struct pan_preload_shader_data *preload_shader,
struct pan_preload_views *views, uint64_t *blend_shaders,
void *out)
struct mali_renderer_state_packed *out)
{
UNUSED bool zs = (views->z || views->s);
bool ms = pan_preload_is_ms(views);
@ -312,7 +311,7 @@ pan_preload_emit_rsd(const struct pan_preload_shader_data *preload_shader,
#if PAN_ARCH >= 5
pan_preload_emit_blends(preload_shader, views, blend_shaders,
out + pan_size(RENDERER_STATE));
(void*)((uint8_t*)out + pan_size(RENDERER_STATE)));
#endif
}
#endif
@ -827,7 +826,7 @@ pan_preload_emit_varying(struct pan_pool *pool)
if (!varying.cpu)
return 0;
pan_pack(varying.cpu, ATTRIBUTE, cfg) {
pan_cast_and_pack(varying.cpu, ATTRIBUTE, cfg) {
cfg.buffer_index = 0;
cfg.offset_enable = PAN_ARCH <= 5;
cfg.format =
@ -853,7 +852,7 @@ pan_preload_emit_varying_buffer(struct pan_pool *pool, uint64_t coordinates)
if (!varying_buffer.cpu)
return 0;
pan_pack(varying_buffer.cpu, BUFFER, cfg) {
pan_cast_and_pack(varying_buffer.cpu, BUFFER, cfg) {
cfg.address = coordinates;
cfg.size = 4 * sizeof(float) * 4;
}
@ -867,15 +866,15 @@ pan_preload_emit_varying_buffer(struct pan_pool *pool, uint64_t coordinates)
if (!varying_buffer.cpu)
return 0;
pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
pan_cast_and_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
cfg.pointer = coordinates;
cfg.stride = 4 * sizeof(float);
cfg.size = cfg.stride * 4;
}
if (padding_buffer) {
pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
ATTRIBUTE_BUFFER, cfg)
pan_cast_and_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
ATTRIBUTE_BUFFER, cfg)
;
}
#endif
@ -891,7 +890,7 @@ pan_preload_emit_sampler(struct pan_pool *pool, bool nearest_filter)
if (!sampler.cpu)
return 0;
pan_pack(sampler.cpu, SAMPLER, cfg) {
pan_cast_and_pack(sampler.cpu, SAMPLER, cfg) {
cfg.seamless_cube_map = false;
cfg.normalized_coordinates = false;
cfg.minify_nearest = nearest_filter;
@ -1027,7 +1026,7 @@ pan_preload_emit_zs(struct pan_pool *pool, bool z, bool s)
if (!zsd.cpu)
return 0;
pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
pan_cast_and_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
cfg.depth_function = MALI_FUNC_ALWAYS;
cfg.depth_write_enable = z;
@ -1066,7 +1065,7 @@ pan_preload_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny,
if (!vp.cpu)
return 0;
pan_pack(vp.cpu, VIEWPORT, cfg) {
pan_cast_and_pack(vp.cpu, VIEWPORT, cfg) {
cfg.scissor_minimum_x = minx;
cfg.scissor_minimum_y = miny;
cfg.scissor_maximum_x = maxx;
@ -1078,9 +1077,9 @@ pan_preload_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny,
#endif
static void
pan_preload_emit_dcd(struct pan_fb_preload_cache *cache,
struct pan_pool *pool, struct pan_fb_info *fb, bool zs,
uint64_t coordinates, uint64_t tsd, void *out,
pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
struct pan_fb_info *fb, bool zs, uint64_t coordinates,
uint64_t tsd, struct mali_draw_packed *out,
bool always_write)
{
unsigned tex_count = 0;
@ -1163,7 +1162,7 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache,
return;
}
pan_pack(spd.cpu, SHADER_PROGRAM, cfg) {
pan_cast_and_pack(spd.cpu, SHADER_PROGRAM, cfg) {
cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;

View file

@ -65,11 +65,12 @@ GENX(jm_init_batch)(struct panfrost_batch *batch)
#if PAN_ARCH == 5
struct mali_framebuffer_pointer_packed ptr;
pan_pack(ptr.opaque, FRAMEBUFFER_POINTER, cfg) {
pan_pack(&ptr, FRAMEBUFFER_POINTER, cfg) {
cfg.pointer = batch->framebuffer.gpu;
cfg.render_target_count = 1; /* a necessary lie */
}
/* XXX: THIS IS A BUG, FIXME */
batch->tls.gpu = ptr.opaque[0];
#endif
#endif
@ -411,7 +412,7 @@ jm_emit_tiler_desc(struct panfrost_batch *batch)
struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP);
pan_pack(t.cpu, TILER_HEAP, heap) {
pan_cast_and_pack(t.cpu, TILER_HEAP, heap) {
heap.size = panfrost_bo_size(dev->tiler_heap);
heap.base = dev->tiler_heap->ptr.gpu;
heap.bottom = dev->tiler_heap->ptr.gpu;
@ -423,7 +424,7 @@ jm_emit_tiler_desc(struct panfrost_batch *batch)
assert(max_levels >= 2);
t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT);
pan_pack(t.cpu, TILER_CONTEXT, tiler) {
pan_cast_and_pack(t.cpu, TILER_CONTEXT, tiler) {
/* TODO: Select hierarchy mask more effectively */
tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
@ -471,7 +472,8 @@ jm_emit_draw_descs(struct panfrost_batch *batch, struct MALI_DRAW *d,
}
static void
jm_emit_vertex_draw(struct panfrost_batch *batch, void *section)
jm_emit_vertex_draw(struct panfrost_batch *batch,
struct mali_draw_packed *section)
{
pan_pack(section, DRAW, cfg) {
cfg.state = batch->rsd[PIPE_SHADER_VERTEX];
@ -507,8 +509,8 @@ jm_emit_vertex_job(struct panfrost_batch *batch,
#endif /* PAN_ARCH <= 7 */
static void
jm_emit_tiler_draw(void *out, struct panfrost_batch *batch, bool fs_required,
enum mesa_prim prim)
jm_emit_tiler_draw(struct mali_draw_packed *out, struct panfrost_batch *batch,
bool fs_required, enum mesa_prim prim)
{
struct panfrost_context *ctx = batch->ctx;
struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
@ -672,7 +674,7 @@ static void
jm_emit_primitive(struct panfrost_batch *batch,
const struct pipe_draw_info *info,
const struct pipe_draw_start_count_bias *draw,
bool secondary_shader, void *out)
bool secondary_shader, struct mali_primitive_packed *out)
{
struct panfrost_context *ctx = batch->ctx;
UNUSED struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;

View file

@ -73,6 +73,8 @@ ForEachMacros: [
'nodearray_dense_foreach',
'nodearray_dense_foreach_64',
'nodearray_sparse_foreach',
'pan_cast_and_pack',
'pan_cast_and_pack_nodefaults',
'pan_foreach_instr_in_block_rev',
'pan_foreach_predecessor',
'pan_foreach_successor',

View file

@ -495,14 +495,14 @@ cs_reserve_instrs(struct cs_builder *b, uint32_t num_instrs)
uint64_t *ptr = b->cur_chunk.buffer.cpu + (b->cur_chunk.pos++);
pan_pack(ptr, CS_MOVE, I) {
pan_cast_and_pack(ptr, CS_MOVE, I) {
I.destination = cs_overflow_address_reg(b);
I.immediate = newbuf.gpu;
}
ptr = b->cur_chunk.buffer.cpu + (b->cur_chunk.pos++);
pan_pack(ptr, CS_MOVE32, I) {
pan_cast_and_pack(ptr, CS_MOVE32, I) {
I.destination = cs_overflow_length_reg(b);
}
@ -511,7 +511,7 @@ cs_reserve_instrs(struct cs_builder *b, uint32_t num_instrs)
ptr = b->cur_chunk.buffer.cpu + (b->cur_chunk.pos++);
pan_pack(ptr, CS_JUMP, I) {
pan_cast_and_pack(ptr, CS_JUMP, I) {
I.length = cs_overflow_length_reg(b);
I.address = cs_overflow_address_reg(b);
}
@ -680,7 +680,7 @@ cs_finish(struct cs_builder *b)
* to be separated out being pan_pack can evaluate its argument multiple times,
* yet cs_alloc has side effects.
*/
#define cs_emit(b, T, cfg) pan_pack(cs_alloc_ins(b), CS_##T, cfg)
#define cs_emit(b, T, cfg) pan_cast_and_pack(cs_alloc_ins(b), CS_##T, cfg)
/* Asynchronous operations take a mask of scoreboard slots to wait on
* before executing the instruction, and signal a scoreboard slot when

View file

@ -127,8 +127,8 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
#endif
if (params.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
const void *PANDECODE_PTR_VAR(ctx, dcd,
params.frame_shader_dcds + (0 * dcd_size));
const struct mali_draw_packed *PANDECODE_PTR_VAR(
ctx, dcd, params.frame_shader_dcds + (0 * dcd_size));
pan_unpack(dcd, DRAW, draw);
pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n",
params.frame_shader_dcds, params.pre_frame_0);
@ -136,8 +136,8 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
}
if (params.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
const void *PANDECODE_PTR_VAR(ctx, dcd,
params.frame_shader_dcds + (1 * dcd_size));
const struct mali_draw_packed *PANDECODE_PTR_VAR(
ctx, dcd, params.frame_shader_dcds + (1 * dcd_size));
pan_unpack(dcd, DRAW, draw);
pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n",
params.frame_shader_dcds + (1 * dcd_size));
@ -145,8 +145,8 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
}
if (params.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
const void *PANDECODE_PTR_VAR(ctx, dcd,
params.frame_shader_dcds + (2 * dcd_size));
const struct mali_draw_packed *PANDECODE_PTR_VAR(
ctx, dcd, params.frame_shader_dcds + (2 * dcd_size));
pan_unpack(dcd, DRAW, draw);
pandecode_log(ctx, "Post frame:\n");
GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id);
@ -205,10 +205,11 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
#if PAN_ARCH >= 5
uint64_t
GENX(pandecode_blend)(struct pandecode_context *ctx, void *descs, int rt_no,
GENX(pandecode_blend)(struct pandecode_context *ctx,
struct mali_blend_packed *descs, int rt_no,
uint64_t frag_shader)
{
pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b);
pan_unpack(&descs[rt_no], BLEND, b);
DUMP_UNPACKED(ctx, BLEND, b, "Blend RT %d:\n", rt_no);
#if PAN_ARCH >= 6
if (b.internal.mode != MALI_BLEND_MODE_SHADER)
@ -272,7 +273,7 @@ pandecode_texture_payload(struct pandecode_context *ctx, uint64_t payload,
#define PANDECODE_EMIT_TEX_PAYLOAD_DESC(T, msg) \
for (int i = 0; i < bitmap_count; ++i) { \
uint64_t addr = payload + pan_size(T) * i; \
pan_unpack(PANDECODE_PTR(ctx, addr, void), T, s); \
pan_unpack(PANDECODE_PTR(ctx, addr, MALI_##T##_PACKED_T), T, s); \
DUMP_UNPACKED(ctx, T, s, msg " @%" PRIx64 ":\n", addr) \
}
@ -316,7 +317,8 @@ pandecode_texture_payload(struct pandecode_context *ctx, uint64_t payload,
void
GENX(pandecode_texture)(struct pandecode_context *ctx, uint64_t u, unsigned tex)
{
const uint8_t *cl = pandecode_fetch_gpu_mem(ctx, u, pan_size(TEXTURE));
const struct mali_texture_packed *cl =
pandecode_fetch_gpu_mem(ctx, u, pan_size(TEXTURE));
pan_unpack(cl, TEXTURE, temp);
DUMP_UNPACKED(ctx, TEXTURE, temp, "Texture:\n")
@ -327,8 +329,8 @@ GENX(pandecode_texture)(struct pandecode_context *ctx, uint64_t u, unsigned tex)
}
#else
void
GENX(pandecode_texture)(struct pandecode_context *ctx, const void *cl,
unsigned tex)
GENX(pandecode_texture)(struct pandecode_context *ctx,
const struct mali_texture_packed *cl, unsigned tex)
{
pan_unpack(cl, TEXTURE, temp);
DUMP_UNPACKED(ctx, TEXTURE, temp, "Texture:\n")
@ -357,10 +359,12 @@ void
GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va,
unsigned gpu_id)
{
pan_unpack(PANDECODE_PTR(ctx, gpu_va, void), TILER_CONTEXT, t);
pan_unpack(PANDECODE_PTR(ctx, gpu_va, struct mali_tiler_context_packed),
TILER_CONTEXT, t);
if (t.heap) {
pan_unpack(PANDECODE_PTR(ctx, t.heap, void), TILER_HEAP, h);
pan_unpack(PANDECODE_PTR(ctx, t.heap, struct mali_tiler_heap_packed),
TILER_HEAP, h);
DUMP_UNPACKED(ctx, TILER_HEAP, h, "Tiler Heap:\n");
}
@ -418,7 +422,7 @@ pandecode_resources(struct pandecode_context *ctx, uint64_t addr, unsigned size)
break;
case MALI_DESCRIPTOR_TYPE_TEXTURE:
pandecode_log(ctx, "Texture @%" PRIx64 "\n", addr + i);
GENX(pandecode_texture)(ctx, cl + i, i);
GENX(pandecode_texture)(ctx, (struct mali_texture_packed *)&cl[i], i);
break;
case MALI_DESCRIPTOR_TYPE_ATTRIBUTE:
DUMP_CL(ctx, ATTRIBUTE, cl + i, "Attribute @%" PRIx64 ":\n", addr + i);
@ -440,13 +444,13 @@ GENX(pandecode_resource_tables)(struct pandecode_context *ctx, uint64_t addr,
unsigned count = addr & 0x3F;
addr = addr & ~0x3F;
const uint8_t *cl =
const struct mali_resource_packed *cl =
pandecode_fetch_gpu_mem(ctx, addr, MALI_RESOURCE_LENGTH * count);
pandecode_log(ctx, "%s resource table @%" PRIx64 "\n", label, addr);
ctx->indent += 2;
for (unsigned i = 0; i < count; ++i) {
pan_unpack(cl + i * MALI_RESOURCE_LENGTH, RESOURCE, entry);
pan_unpack(&cl[i], RESOURCE, entry);
DUMP_UNPACKED(ctx, RESOURCE, entry, "Entry %u @%" PRIx64 ":\n", i,
addr + i * MALI_RESOURCE_LENGTH);

View file

@ -189,7 +189,7 @@ pandecode_log_cont(struct pandecode_context *ctx, const char *format, ...)
#define DUMP_CL(ctx, T, cl, ...) \
{ \
pan_unpack(cl, T, temp); \
pan_unpack((MALI_##T##_PACKED_T *)cl, T, temp); \
DUMP_UNPACKED(ctx, T, temp, __VA_ARGS__); \
}
@ -201,7 +201,8 @@ pandecode_log_cont(struct pandecode_context *ctx, const char *format, ...)
}
#define MAP_ADDR(ctx, T, addr, cl) \
const uint8_t *cl = pandecode_fetch_gpu_mem(ctx, addr, pan_size(T));
const MALI_##T##_PACKED_T *cl = \
pandecode_fetch_gpu_mem(ctx, addr, pan_size(T));
#define DUMP_ADDR(ctx, T, addr, ...) \
{ \
@ -238,13 +239,15 @@ void GENX(pandecode_dcd)(struct pandecode_context *ctx,
void GENX(pandecode_texture)(struct pandecode_context *ctx, uint64_t u,
unsigned tex);
#else
void GENX(pandecode_texture)(struct pandecode_context *ctx, const void *cl,
void GENX(pandecode_texture)(struct pandecode_context *ctx,
const struct mali_texture_packed *cl,
unsigned tex);
#endif
#if PAN_ARCH >= 5
uint64_t GENX(pandecode_blend)(struct pandecode_context *ctx, void *descs,
int rt_no, uint64_t frag_shader);
uint64_t GENX(pandecode_blend)(struct pandecode_context *ctx,
struct mali_blend_packed *descs, int rt_no,
uint64_t frag_shader);
#endif
#if PAN_ARCH >= 6

View file

@ -36,6 +36,8 @@
/* Limit for Mali-G610. -1 because we're not including the active frame */
#define MAX_CALL_STACK_DEPTH (8 - 1)
#define cs_unpack(packed, T, unpacked) pan_cast_and_unpack(packed, T, unpacked)
struct queue_ctx {
/* Size of CSHWIF register file in 32-bit registers */
unsigned nr_regs;
@ -93,12 +95,12 @@ static const char *conditions_str[] = {
};
static void
print_cs_instr(FILE *fp, uint64_t instr)
print_cs_instr(FILE *fp, const uint64_t *instr)
{
pan_unpack(&instr, CS_BASE, base);
cs_unpack(instr, CS_BASE, base);
switch (base.opcode) {
case MALI_CS_OPCODE_NOP: {
pan_unpack(&instr, CS_NOP, I);
cs_unpack(instr, CS_NOP, I);
if (I.ignored)
fprintf(fp, "NOP // 0x%" PRIX64, I.ignored);
else
@ -107,19 +109,19 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_MOVE: {
pan_unpack(&instr, CS_MOVE, I);
cs_unpack(instr, CS_MOVE, I);
fprintf(fp, "MOVE d%u, #0x%" PRIX64, I.destination, I.immediate);
break;
}
case MALI_CS_OPCODE_MOVE32: {
pan_unpack(&instr, CS_MOVE32, I);
cs_unpack(instr, CS_MOVE32, I);
fprintf(fp, "MOVE32 r%u, #0x%X", I.destination, I.immediate);
break;
}
case MALI_CS_OPCODE_WAIT: {
pan_unpack(&instr, CS_WAIT, I);
cs_unpack(instr, CS_WAIT, I);
fprintf(fp, "WAIT%s #%x", I.progress_increment ? ".progress_inc" : "",
I.wait_mask);
break;
@ -127,7 +129,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
case MALI_CS_OPCODE_RUN_COMPUTE: {
const char *axes[4] = {"x_axis", "y_axis", "z_axis"};
pan_unpack(&instr, CS_RUN_COMPUTE, I);
cs_unpack(instr, CS_RUN_COMPUTE, I);
/* Print the instruction. Ignore the selects and the flags override
* since we'll print them implicitly later.
@ -140,7 +142,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_RUN_TILING: {
pan_unpack(&instr, CS_RUN_TILING, I);
cs_unpack(instr, CS_RUN_TILING, I);
fprintf(fp, "RUN_TILING%s.srt%d.spd%d.tsd%d.fau%d",
I.progress_increment ? ".progress_inc" : "", I.srt_select,
I.spd_select, I.tsd_select, I.fau_select);
@ -148,7 +150,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_RUN_IDVS: {
pan_unpack(&instr, CS_RUN_IDVS, I);
cs_unpack(instr, CS_RUN_IDVS, I);
fprintf(
fp,
"RUN_IDVS%s%s%s.varying_srt%d.varying_fau%d.varying_tsd%d.frag_srt%d.frag_tsd%d r%u, #%x",
@ -168,7 +170,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
"unknown", "unknown", "unknown", "unknown",
"unknown", "unknown", "unknown", "unknown",
};
pan_unpack(&instr, CS_RUN_FRAGMENT, I);
cs_unpack(instr, CS_RUN_FRAGMENT, I);
fprintf(fp, "RUN_FRAGMENT%s%s.tile_order=%s",
I.progress_increment ? ".progress_inc" : "",
@ -178,21 +180,22 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
pan_unpack(&instr, CS_RUN_FULLSCREEN, I);
cs_unpack(instr, CS_RUN_FULLSCREEN, I);
fprintf(fp, "RUN_FULLSCREEN%s r%u, #%x",
I.progress_increment ? ".progress_inc" : "", I.dcd, I.flags_override);
I.progress_increment ? ".progress_inc" : "", I.dcd,
I.flags_override);
break;
}
case MALI_CS_OPCODE_FINISH_TILING: {
pan_unpack(&instr, CS_FINISH_TILING, I);
cs_unpack(instr, CS_FINISH_TILING, I);
fprintf(fp, "FINISH_TILING%s",
I.progress_increment ? ".progress_inc" : "");
break;
}
case MALI_CS_OPCODE_FINISH_FRAGMENT: {
pan_unpack(&instr, CS_FINISH_FRAGMENT, I);
cs_unpack(instr, CS_FINISH_FRAGMENT, I);
fprintf(fp, "FINISH_FRAGMENT%s d%u, d%u, #%x, #%u",
I.increment_fragment_completed ? ".frag_end" : "",
I.last_heap_chunk, I.first_heap_chunk, I.wait_mask,
@ -201,7 +204,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
pan_unpack(&instr, CS_ADD_IMMEDIATE32, I);
cs_unpack(instr, CS_ADD_IMMEDIATE32, I);
fprintf(fp, "ADD_IMMEDIATE32 r%u, r%u, #%d", I.destination, I.source,
I.immediate);
@ -209,7 +212,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
pan_unpack(&instr, CS_ADD_IMMEDIATE64, I);
cs_unpack(instr, CS_ADD_IMMEDIATE64, I);
fprintf(fp, "ADD_IMMEDIATE64 d%u, d%u, #%d", I.destination, I.source,
I.immediate);
@ -217,7 +220,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_UMIN32: {
pan_unpack(&instr, CS_UMIN32, I);
cs_unpack(instr, CS_UMIN32, I);
fprintf(fp, "UMIN32 r%u, r%u, r%u", I.destination, I.source_1,
I.source_2);
@ -225,7 +228,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_LOAD_MULTIPLE: {
pan_unpack(&instr, CS_LOAD_MULTIPLE, I);
cs_unpack(instr, CS_LOAD_MULTIPLE, I);
fprintf(fp, "LOAD_MULTIPLE ");
print_reg_tuple(I.base_register, I.mask, fp);
@ -235,7 +238,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_STORE_MULTIPLE: {
pan_unpack(&instr, CS_STORE_MULTIPLE, I);
cs_unpack(instr, CS_STORE_MULTIPLE, I);
fprintf(fp, "STORE_MULTIPLE ");
print_indirect(I.address, I.offset, fp);
@ -245,44 +248,44 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_BRANCH: {
pan_unpack(&instr, CS_BRANCH, I);
cs_unpack(instr, CS_BRANCH, I);
fprintf(fp, "BRANCH.%s r%u, #%d", conditions_str[I.condition], I.value,
I.offset);
break;
}
case MALI_CS_OPCODE_SET_SB_ENTRY: {
pan_unpack(&instr, CS_SET_SB_ENTRY, I);
cs_unpack(instr, CS_SET_SB_ENTRY, I);
fprintf(fp, "SET_SB_ENTRY #%u, #%u", I.endpoint_entry, I.other_entry);
break;
}
case MALI_CS_OPCODE_PROGRESS_WAIT: {
pan_unpack(&instr, CS_PROGRESS_WAIT, I);
cs_unpack(instr, CS_PROGRESS_WAIT, I);
fprintf(fp, "PROGRESS_WAIT d%u, #%u", I.source, I.queue);
break;
}
case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: {
pan_unpack(&instr, CS_SET_EXCEPTION_HANDLER, I);
cs_unpack(instr, CS_SET_EXCEPTION_HANDLER, I);
fprintf(fp, "SET_EXCEPTION_HANDLER d%u, r%u", I.address, I.length);
break;
}
case MALI_CS_OPCODE_CALL: {
pan_unpack(&instr, CS_CALL, I);
cs_unpack(instr, CS_CALL, I);
fprintf(fp, "CALL d%u, r%u", I.address, I.length);
break;
}
case MALI_CS_OPCODE_JUMP: {
pan_unpack(&instr, CS_JUMP, I);
cs_unpack(instr, CS_JUMP, I);
fprintf(fp, "JUMP d%u, r%u", I.address, I.length);
break;
}
case MALI_CS_OPCODE_REQ_RESOURCE: {
pan_unpack(&instr, CS_REQ_RESOURCE, I);
cs_unpack(instr, CS_REQ_RESOURCE, I);
fprintf(fp, "REQ_RESOURCE%s%s%s%s", I.compute ? ".compute" : "",
I.fragment ? ".fragment" : "", I.tiler ? ".tiler" : "",
I.idvs ? ".idvs" : "");
@ -290,7 +293,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_FLUSH_CACHE2: {
pan_unpack(&instr, CS_FLUSH_CACHE2, I);
cs_unpack(instr, CS_FLUSH_CACHE2, I);
static const char *mode[] = {
"nop",
"clean",
@ -306,7 +309,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_SYNC_ADD32: {
pan_unpack(&instr, CS_SYNC_ADD32, I);
cs_unpack(instr, CS_SYNC_ADD32, I);
fprintf(fp, "SYNC_ADD32%s%s [d%u], r%u, #%x, #%u",
I.error_propagate ? ".error_propagate" : "",
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address,
@ -315,7 +318,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_SYNC_SET32: {
pan_unpack(&instr, CS_SYNC_SET32, I);
cs_unpack(instr, CS_SYNC_SET32, I);
fprintf(fp, "SYNC_SET32.%s%s [d%u], r%u, #%x, #%u",
I.error_propagate ? ".error_propagate" : "",
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address,
@ -324,7 +327,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_SYNC_WAIT32: {
pan_unpack(&instr, CS_SYNC_WAIT32, I);
cs_unpack(instr, CS_SYNC_WAIT32, I);
fprintf(fp, "SYNC_WAIT32%s%s d%u, r%u", conditions_str[I.condition],
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
break;
@ -338,7 +341,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
"ERROR_STATE",
};
pan_unpack(&instr, CS_STORE_STATE, I);
cs_unpack(instr, CS_STORE_STATE, I);
fprintf(fp, "STORE_STATE.%s d%u, #%i, #%x, #%u",
I.state >= ARRAY_SIZE(states_str) ? "UNKNOWN_STATE"
: states_str[I.state],
@ -347,25 +350,25 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_PROT_REGION: {
pan_unpack(&instr, CS_PROT_REGION, I);
cs_unpack(instr, CS_PROT_REGION, I);
fprintf(fp, "PROT_REGION #%u", I.size);
break;
}
case MALI_CS_OPCODE_PROGRESS_STORE: {
pan_unpack(&instr, CS_PROGRESS_STORE, I);
cs_unpack(instr, CS_PROGRESS_STORE, I);
fprintf(fp, "PROGRESS_STORE d%u", I.source);
break;
}
case MALI_CS_OPCODE_PROGRESS_LOAD: {
pan_unpack(&instr, CS_PROGRESS_LOAD, I);
cs_unpack(instr, CS_PROGRESS_LOAD, I);
fprintf(fp, "PROGRESS_LOAD d%u", I.destination);
break;
}
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: {
pan_unpack(&instr, CS_RUN_COMPUTE_INDIRECT, I);
cs_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I);
fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u",
I.progress_increment ? ".progress_inc" : "", I.srt_select,
I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task);
@ -374,19 +377,19 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_ERROR_BARRIER: {
pan_unpack(&instr, CS_ERROR_BARRIER, I);
cs_unpack(instr, CS_ERROR_BARRIER, I);
fprintf(fp, "ERROR_BARRIER");
break;
}
case MALI_CS_OPCODE_HEAP_SET: {
pan_unpack(&instr, CS_HEAP_SET, I);
cs_unpack(instr, CS_HEAP_SET, I);
fprintf(fp, "HEAP_SET d%u", I.address);
break;
}
case MALI_CS_OPCODE_HEAP_OPERATION: {
pan_unpack(&instr, CS_HEAP_OPERATION, I);
cs_unpack(instr, CS_HEAP_OPERATION, I);
const char *counter_names[] = {"vt_start", "vt_end", NULL, "frag_end"};
fprintf(fp, "HEAP_OPERATION.%s #%x, #%d", counter_names[I.operation],
I.wait_mask, I.signal_slot);
@ -394,7 +397,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_TRACE_POINT: {
pan_unpack(&instr, CS_TRACE_POINT, I);
cs_unpack(instr, CS_TRACE_POINT, I);
fprintf(fp, "TRACE_POINT r%d:r%d, #%x, #%u", I.base_register,
I.base_register + I.register_count - 1, I.wait_mask,
I.signal_slot);
@ -402,7 +405,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_SYNC_ADD64: {
pan_unpack(&instr, CS_SYNC_ADD64, I);
cs_unpack(instr, CS_SYNC_ADD64, I);
fprintf(fp, "SYNC_ADD64%s%s [d%u], d%u, #%x, #%u",
I.error_propagate ? ".error_propagate" : "",
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address,
@ -411,7 +414,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_SYNC_SET64: {
pan_unpack(&instr, CS_SYNC_SET64, I);
cs_unpack(instr, CS_SYNC_SET64, I);
fprintf(fp, "SYNC_SET64.%s%s [d%u], d%u, #%x, #%u",
I.error_propagate ? ".error_propagate" : "",
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address,
@ -420,7 +423,7 @@ print_cs_instr(FILE *fp, uint64_t instr)
}
case MALI_CS_OPCODE_SYNC_WAIT64: {
pan_unpack(&instr, CS_SYNC_WAIT64, I);
cs_unpack(instr, CS_SYNC_WAIT64, I);
fprintf(fp, "SYNC_WAIT64%s%s d%u, d%u", conditions_str[I.condition],
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
@ -536,9 +539,10 @@ pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp,
ctx->indent++;
/* Merge flag overrides with the register flags */
uint32_t tiler_flags_raw = cs_get_u64(qctx, 56);
tiler_flags_raw |= I->flags_override;
pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags);
struct mali_primitive_flags_packed tiler_flags_packed = {
.opaque[0] = cs_get_u32(qctx, 56) | I->flags_override,
};
pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
unsigned reg_srt = I->srt_select * 2;
unsigned reg_fau = 8 + I->fau_select * 2;
@ -616,9 +620,10 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
ctx->indent++;
/* Merge flag overrides with the register flags */
uint32_t tiler_flags_raw = cs_get_u64(qctx, 56);
tiler_flags_raw |= I->flags_override;
pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags);
struct mali_primitive_flags_packed tiler_flags_packed = {
.opaque[0] = cs_get_u32(qctx, 56) | I->flags_override,
};
pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
unsigned reg_position_srt = 0;
unsigned reg_position_fau = 8;
@ -765,16 +770,19 @@ pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp,
ctx->indent++;
/* Merge flag overrides with the register flags */
uint32_t tiler_flags_raw = cs_get_u64(qctx, 56);
tiler_flags_raw |= I->flags_override;
pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags);
struct mali_primitive_flags_packed tiler_flags_packed = {
.opaque[0] = cs_get_u32(qctx, 56) | I->flags_override,
};
pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id);
DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");
pan_unpack(PANDECODE_PTR(ctx, cs_get_u64(qctx, I->dcd), void), DRAW, dcd);
pan_unpack(
PANDECODE_PTR(ctx, cs_get_u64(qctx, I->dcd), struct mali_draw_packed),
DRAW, dcd);
GENX(pandecode_dcd)(ctx, &dcd, 0, qctx->gpu_id);
ctx->indent--;
@ -857,7 +865,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
FILE *fp = ctx->dump_stream;
/* Unpack the base so we get the opcode */
uint8_t *bytes = (uint8_t *)qctx->ip;
pan_unpack(bytes, CS_BASE, base);
cs_unpack(bytes, CS_BASE, base);
assert(qctx->ip < qctx->end);
@ -869,43 +877,43 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
switch (base.opcode) {
case MALI_CS_OPCODE_RUN_COMPUTE: {
pan_unpack(bytes, CS_RUN_COMPUTE, I);
cs_unpack(bytes, CS_RUN_COMPUTE, I);
pandecode_run_compute(ctx, fp, qctx, &I);
break;
}
case MALI_CS_OPCODE_RUN_TILING: {
pan_unpack(bytes, CS_RUN_TILING, I);
cs_unpack(bytes, CS_RUN_TILING, I);
pandecode_run_tiling(ctx, fp, qctx, &I);
break;
}
case MALI_CS_OPCODE_RUN_IDVS: {
pan_unpack(bytes, CS_RUN_IDVS, I);
cs_unpack(bytes, CS_RUN_IDVS, I);
pandecode_run_idvs(ctx, fp, qctx, &I);
break;
}
case MALI_CS_OPCODE_RUN_FRAGMENT: {
pan_unpack(bytes, CS_RUN_FRAGMENT, I);
cs_unpack(bytes, CS_RUN_FRAGMENT, I);
pandecode_run_fragment(ctx, fp, qctx, &I);
break;
}
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
pan_unpack(bytes, CS_RUN_FULLSCREEN, I);
cs_unpack(bytes, CS_RUN_FULLSCREEN, I);
pandecode_run_fullscreen(ctx, fp, qctx, &I);
break;
}
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: {
pan_unpack(bytes, CS_RUN_COMPUTE_INDIRECT, I);
cs_unpack(bytes, CS_RUN_COMPUTE_INDIRECT, I);
pandecode_run_compute_indirect(ctx, fp, qctx, &I);
break;
}
case MALI_CS_OPCODE_MOVE: {
pan_unpack(bytes, CS_MOVE, I);
cs_unpack(bytes, CS_MOVE, I);
qctx->regs[I.destination + 0] = (uint32_t)I.immediate;
qctx->regs[I.destination + 1] = (uint32_t)(I.immediate >> 32);
@ -913,14 +921,14 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
}
case MALI_CS_OPCODE_MOVE32: {
pan_unpack(bytes, CS_MOVE32, I);
cs_unpack(bytes, CS_MOVE32, I);
qctx->regs[I.destination] = I.immediate;
break;
}
case MALI_CS_OPCODE_LOAD_MULTIPLE: {
pan_unpack(bytes, CS_LOAD_MULTIPLE, I);
cs_unpack(bytes, CS_LOAD_MULTIPLE, I);
uint64_t addr =
((uint64_t)qctx->regs[I.address + 1] << 32) | qctx->regs[I.address];
addr += I.offset;
@ -936,14 +944,14 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
}
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
pan_unpack(bytes, CS_ADD_IMMEDIATE32, I);
cs_unpack(bytes, CS_ADD_IMMEDIATE32, I);
qctx->regs[I.destination] = qctx->regs[I.source] + I.immediate;
break;
}
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
pan_unpack(bytes, CS_ADD_IMMEDIATE64, I);
cs_unpack(bytes, CS_ADD_IMMEDIATE64, I);
int64_t value =
(qctx->regs[I.source] | ((int64_t)qctx->regs[I.source + 1] << 32)) +
@ -955,7 +963,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
}
case MALI_CS_OPCODE_CALL: {
pan_unpack(bytes, CS_CALL, I);
cs_unpack(bytes, CS_CALL, I);
if (qctx->call_stack_depth == MAX_CALL_STACK_DEPTH) {
fprintf(stderr, "CS call stack overflow\n");
@ -978,7 +986,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
}
case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: {
pan_unpack(bytes, CS_SET_EXCEPTION_HANDLER, I);
cs_unpack(bytes, CS_SET_EXCEPTION_HANDLER, I);
assert(qctx->call_stack_depth < MAX_CALL_STACK_DEPTH);
@ -1001,7 +1009,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
}
case MALI_CS_OPCODE_JUMP: {
pan_unpack(bytes, CS_JUMP, I);
cs_unpack(bytes, CS_JUMP, I);
if (qctx->call_stack_depth == 0) {
fprintf(stderr, "Cannot jump from the entrypoint\n");
@ -1012,7 +1020,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
}
case MALI_CS_OPCODE_BRANCH: {
pan_unpack(bytes, CS_BRANCH, I);
cs_unpack(bytes, CS_BRANCH, I);
interpret_cs_branch(ctx, qctx, I.offset, I.condition, I.value);
break;
@ -1081,7 +1089,7 @@ GENX(pandecode_interpret_cs)(struct pandecode_context *ctx, uint64_t queue,
for (int i = 0; i < 1 + qctx.call_stack_depth; ++i)
fprintf(fp, " ");
print_cs_instr(fp, *(qctx.ip));
print_cs_instr(fp, qctx.ip);
fprintf(fp, "\n");
} while (interpret_cs_instr(ctx, &qctx));
}
@ -1145,35 +1153,35 @@ record_indirect_branch_target(struct cs_code_cfg *cfg,
for (; blk_offs < blk->size &&
blk->start + blk_offs != ibranch->instr_idx;
blk_offs++) {
uint64_t instr = cfg->instrs[blk->start + blk_offs];
pan_unpack(&instr, CS_BASE, base);
const uint64_t *instr = &cfg->instrs[blk->start + blk_offs];
cs_unpack(instr, CS_BASE, base);
switch (base.opcode) {
case MALI_CS_OPCODE_MOVE: {
pan_unpack(&instr, CS_MOVE, I);
cs_unpack(instr, CS_MOVE, I);
reg_file.u64[I.destination] = I.immediate;
break;
}
case MALI_CS_OPCODE_MOVE32: {
pan_unpack(&instr, CS_MOVE32, I);
cs_unpack(instr, CS_MOVE32, I);
reg_file.u32[I.destination] = I.immediate;
break;
}
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
pan_unpack(&instr, CS_ADD_IMMEDIATE32, I);
cs_unpack(instr, CS_ADD_IMMEDIATE32, I);
reg_file.u32[I.destination] = reg_file.u32[I.source] + I.immediate;
break;
}
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
pan_unpack(&instr, CS_ADD_IMMEDIATE64, I);
cs_unpack(instr, CS_ADD_IMMEDIATE64, I);
reg_file.u64[I.destination] = reg_file.u64[I.source] + I.immediate;
break;
}
case MALI_CS_OPCODE_UMIN32: {
pan_unpack(&instr, CS_UMIN32, I);
cs_unpack(instr, CS_UMIN32, I);
reg_file.u32[I.destination] =
MIN2(reg_file.u32[I.source_1], reg_file.u32[I.source_2]);
break;
@ -1187,8 +1195,8 @@ record_indirect_branch_target(struct cs_code_cfg *cfg,
}
list_delinit(&cur_blk->node);
uint64_t instr = cfg->instrs[ibranch->instr_idx];
pan_unpack(&instr, CS_JUMP, I);
uint64_t *instr = &cfg->instrs[ibranch->instr_idx];
cs_unpack(instr, CS_JUMP, I);
struct cs_indirect_branch_target target = {
.address = reg_file.u64[I.address],
@ -1209,24 +1217,24 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg,
{
for (; instr_ptr >= (int)cur_blk->start; instr_ptr--) {
assert(instr_ptr >= 0);
uint64_t instr = cfg->instrs[instr_ptr];
pan_unpack(&instr, CS_BASE, base);
const uint64_t *instr = &cfg->instrs[instr_ptr];
cs_unpack(instr, CS_BASE, base);
switch (base.opcode) {
case MALI_CS_OPCODE_MOVE: {
pan_unpack(&instr, CS_MOVE, I);
cs_unpack(instr, CS_MOVE, I);
BITSET_CLEAR(track_map, I.destination);
BITSET_CLEAR(track_map, I.destination + 1);
break;
}
case MALI_CS_OPCODE_MOVE32: {
pan_unpack(&instr, CS_MOVE32, I);
cs_unpack(instr, CS_MOVE32, I);
BITSET_CLEAR(track_map, I.destination);
break;
}
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
pan_unpack(&instr, CS_ADD_IMMEDIATE32, I);
cs_unpack(instr, CS_ADD_IMMEDIATE32, I);
if (BITSET_TEST(track_map, I.destination)) {
BITSET_SET(track_map, I.source);
BITSET_CLEAR(track_map, I.destination);
@ -1235,7 +1243,7 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg,
}
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
pan_unpack(&instr, CS_ADD_IMMEDIATE64, I);
cs_unpack(instr, CS_ADD_IMMEDIATE64, I);
if (BITSET_TEST(track_map, I.destination)) {
BITSET_SET(track_map, I.source);
BITSET_CLEAR(track_map, I.destination);
@ -1248,7 +1256,7 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg,
}
case MALI_CS_OPCODE_UMIN32: {
pan_unpack(&instr, CS_UMIN32, I);
cs_unpack(instr, CS_UMIN32, I);
if (BITSET_TEST(track_map, I.destination)) {
BITSET_SET(track_map, I.source_1);
BITSET_SET(track_map, I.source_2);
@ -1258,7 +1266,7 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg,
}
case MALI_CS_OPCODE_LOAD_MULTIPLE: {
pan_unpack(&instr, CS_LOAD_MULTIPLE, I);
cs_unpack(instr, CS_LOAD_MULTIPLE, I);
for (unsigned i = 0; i < 16; i++) {
if ((I.mask & BITFIELD_BIT(i)) &&
BITSET_TEST(track_map, I.base_register + i)) {
@ -1270,7 +1278,7 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg,
}
case MALI_CS_OPCODE_PROGRESS_LOAD: {
pan_unpack(&instr, CS_PROGRESS_LOAD, I);
cs_unpack(instr, CS_PROGRESS_LOAD, I);
for (unsigned i = 0; i < 16; i++) {
if (BITSET_TEST(track_map, I.destination) ||
BITSET_TEST(track_map, I.destination + 1)) {
@ -1323,14 +1331,14 @@ static void
collect_indirect_branch_targets(struct cs_code_cfg *cfg,
struct cs_indirect_branch *ibranch)
{
uint64_t instr = cfg->instrs[ibranch->instr_idx];
uint64_t *instr = &cfg->instrs[ibranch->instr_idx];
struct cs_code_block *cur_blk = cfg->blk_map[ibranch->instr_idx];
struct list_head blk_stack;
BITSET_DECLARE(track_map, 256) = {0};
list_inithead(&blk_stack);
pan_unpack(&instr, CS_JUMP, I);
cs_unpack(instr, CS_JUMP, I);
BITSET_SET(track_map, I.address);
BITSET_SET(track_map, I.address + 1);
BITSET_SET(track_map, I.length);
@ -1358,15 +1366,14 @@ get_cs_cfg(struct pandecode_context *ctx, struct hash_table_u64 *symbols,
util_dynarray_init(&cfg->indirect_branches, cfg);
cfg->blk_map =
rzalloc_array(cfg, struct cs_code_block *, instr_count);
cfg->blk_map = rzalloc_array(cfg, struct cs_code_block *, instr_count);
cfg->instrs = instrs;
cfg->instr_count = instr_count;
struct cs_code_block *block = cs_code_block_alloc(cfg, 0, 0);
for (unsigned i = 0; i < instr_count; i++) {
uint64_t instr = instrs[i];
const uint64_t *instr = &instrs[i];
if (!cfg->blk_map[i]) {
cfg->blk_map[i] = block;
@ -1379,7 +1386,7 @@ get_cs_cfg(struct pandecode_context *ctx, struct hash_table_u64 *symbols,
util_dynarray_append(&block->predecessors, unsigned, i - 1);
}
pan_unpack(&instr, CS_BASE, base);
cs_unpack(instr, CS_BASE, base);
if (base.opcode == MALI_CS_OPCODE_JUMP ||
base.opcode == MALI_CS_OPCODE_CALL) {
@ -1394,7 +1401,7 @@ get_cs_cfg(struct pandecode_context *ctx, struct hash_table_u64 *symbols,
if (base.opcode != MALI_CS_OPCODE_BRANCH)
continue;
pan_unpack(&instr, CS_BRANCH, I);
cs_unpack(instr, CS_BRANCH, I);
unsigned target = MIN2(i + 1 + I.offset, instr_count);
@ -1437,10 +1444,12 @@ get_cs_cfg(struct pandecode_context *ctx, struct hash_table_u64 *symbols,
}
util_dynarray_foreach(&cfg->indirect_branches, struct cs_indirect_branch,
ibranch) {
ibranch)
{
collect_indirect_branch_targets(cfg, ibranch);
util_dynarray_foreach(&ibranch->targets,
struct cs_indirect_branch_target, target) {
util_dynarray_foreach(&ibranch->targets, struct cs_indirect_branch_target,
target)
{
get_cs_cfg(ctx, symbols, target->address, target->length);
}
}
@ -1464,8 +1473,8 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin,
}
pandecode_make_indent(ctx);
print_cs_instr(ctx->dump_stream, cfg->instrs[i]);
pan_unpack(&cfg->instrs[i], CS_BASE, base);
print_cs_instr(ctx->dump_stream, &cfg->instrs[i]);
cs_unpack(&cfg->instrs[i], CS_BASE, base);
switch (base.opcode) {
case MALI_CS_OPCODE_JUMP:
case MALI_CS_OPCODE_CALL: {
@ -1475,20 +1484,20 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin,
assert(ibranch->instr_idx == i);
fprintf(ctx->dump_stream, " // ");
util_dynarray_foreach(&ibranch->targets,
struct cs_indirect_branch_target, target) {
struct cs_indirect_branch_target, target)
{
fprintf(ctx->dump_stream, "%scs@%" PRIx64,
target == ibranch->targets.data ? "" : ",",
target->address);
}
if (ibranch->has_unknown_targets)
fprintf(ctx->dump_stream, "%s??",
ibranch->targets.size ? "," : "");
fprintf(ctx->dump_stream, "%s??", ibranch->targets.size ? "," : "");
ibranch_idx++;
break;
}
case MALI_CS_OPCODE_BRANCH: {
pan_unpack(&cfg->instrs[i], CS_BRANCH, I);
cs_unpack(&cfg->instrs[i], CS_BRANCH, I);
fprintf(ctx->dump_stream, " // ");
unsigned target = i + 1 + I.offset;
@ -1532,7 +1541,8 @@ GENX(pandecode_cs_binary)(struct pandecode_context *ctx, uint64_t bin,
struct cs_code_cfg *main_cfg = get_cs_cfg(ctx, symbols, bin, bin_size);
print_cs_binary(ctx, bin, main_cfg, "main_cs");
hash_table_u64_foreach(symbols, he) {
hash_table_u64_foreach(symbols, he)
{
struct cs_code_cfg *other_cfg = he.data;
if (other_cfg == main_cfg)
continue;
@ -1571,17 +1581,17 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
};
pandecode_make_indent(ctx);
print_cs_instr(ctx->dump_stream, *instr);
print_cs_instr(ctx->dump_stream, instr);
fprintf(ctx->dump_stream, " // from tracepoint_%" PRIx64 "\n", *ip);
pan_unpack(instr, CS_BASE, base);
cs_unpack(instr, CS_BASE, base);
switch (base.opcode) {
case MALI_CS_OPCODE_RUN_IDVS: {
struct cs_run_idvs_trace *idvs_trace = trace_data;
assert(trace_size >= sizeof(idvs_trace));
pan_unpack(instr, CS_RUN_IDVS, I);
cs_unpack(instr, CS_RUN_IDVS, I);
memcpy(regs, idvs_trace->sr, sizeof(idvs_trace->sr));
if (I.draw_id_register_enable)
@ -1597,7 +1607,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
struct cs_run_fragment_trace *frag_trace = trace_data;
assert(trace_size >= sizeof(frag_trace));
pan_unpack(instr, CS_RUN_FRAGMENT, I);
cs_unpack(instr, CS_RUN_FRAGMENT, I);
memcpy(&regs[40], frag_trace->sr, sizeof(frag_trace->sr));
pandecode_run_fragment(ctx, ctx->dump_stream, &qctx, &I);
trace_data = frag_trace + 1;
@ -1609,7 +1619,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
struct cs_run_compute_trace *comp_trace = trace_data;
assert(trace_size >= sizeof(comp_trace));
pan_unpack(instr, CS_RUN_COMPUTE, I);
cs_unpack(instr, CS_RUN_COMPUTE, I);
memcpy(regs, comp_trace->sr, sizeof(comp_trace->sr));
pandecode_run_compute(ctx, ctx->dump_stream, &qctx, &I);
trace_data = comp_trace + 1;
@ -1621,7 +1631,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
struct cs_run_compute_trace *comp_trace = trace_data;
assert(trace_size >= sizeof(comp_trace));
pan_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I);
cs_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I);
memcpy(regs, comp_trace->sr, sizeof(comp_trace->sr));
pandecode_run_compute_indirect(ctx, ctx->dump_stream, &qctx, &I);
trace_data = comp_trace + 1;

View file

@ -30,7 +30,8 @@
#if PAN_ARCH <= 9
static void
pandecode_primitive(struct pandecode_context *ctx, const void *p)
pandecode_primitive(struct pandecode_context *ctx,
const struct mali_primitive_packed *p)
{
pan_unpack(p, PRIMITIVE, primitive);
DUMP_UNPACKED(ctx, PRIMITIVE, primitive, "Primitive:\n");
@ -75,14 +76,14 @@ pandecode_attributes(struct pandecode_context *ctx, uint64_t addr, int count,
MAP_ADDR(ctx, ATTRIBUTE_BUFFER, addr, cl);
for (int i = 0; i < count; ++i) {
pan_unpack(cl + i * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, temp);
pan_unpack(&cl[i], ATTRIBUTE_BUFFER, temp);
DUMP_UNPACKED(ctx, ATTRIBUTE_BUFFER, temp, "%s:\n", prefix);
switch (temp.type) {
case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION:
case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: {
pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER),
ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2);
pan_cast_and_unpack(&cl[i + 1], ATTRIBUTE_BUFFER_CONTINUATION_NPOT,
temp2);
pan_print(ctx->dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2,
(ctx->indent + 1) * 2);
i++;
@ -90,8 +91,8 @@ pandecode_attributes(struct pandecode_context *ctx, uint64_t addr, int count,
}
case MALI_ATTRIBUTE_TYPE_3D_LINEAR:
case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: {
pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER_CONTINUATION_3D),
ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2);
pan_cast_and_unpack(&cl[i + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D,
temp2);
pan_print(ctx->dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2,
(ctx->indent + 1) * 2);
i++;
@ -136,7 +137,8 @@ bits(uint32_t word, uint32_t lo, uint32_t hi)
}
static void
pandecode_invocation(struct pandecode_context *ctx, const void *i)
pandecode_invocation(struct pandecode_context *ctx,
const struct mali_invocation_packed *i)
{
/* Decode invocation_count. See the comment before the definition of
* invocation_count for an explanation.
@ -269,7 +271,8 @@ GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
/* On v5 only, the actual framebuffer pointer is tagged with extra
* metadata that we validate but do not print.
*/
pan_unpack(&p->fbd, FRAMEBUFFER_POINTER, ptr);
const uint64_t *fbd = &p->fbd;
pan_cast_and_unpack(fbd, FRAMEBUFFER_POINTER, ptr);
if (!ptr.type || ptr.zs_crc_extension_present ||
ptr.render_target_count != 1) {
@ -288,7 +291,7 @@ GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
int texture_count = 0, sampler_count = 0;
if (p->state) {
uint32_t *cl =
struct mali_renderer_state_packed *cl =
pandecode_fetch_gpu_mem(ctx, p->state, pan_size(RENDERER_STATE));
pan_unpack(cl, RENDERER_STATE, state);
@ -485,7 +488,8 @@ pandecode_fragment_job(struct pandecode_context *ctx, uint64_t job,
/* On v5 and newer, the actual framebuffer pointer is tagged with extra
* metadata that we need to disregard.
*/
pan_unpack(&s.framebuffer, FRAMEBUFFER_POINTER, ptr);
const uint64_t *framebuffer_packed_raw = &s.framebuffer;
pan_cast_and_unpack(framebuffer_packed_raw, FRAMEBUFFER_POINTER, ptr);
fbd_pointer = ptr.pointer;
#else
/* On v4, the framebuffer pointer is untagged. */

View file

@ -87,20 +87,29 @@ __gen_unpack_padded(const uint32_t *restrict cl, uint32_t start, uint32_t end)
for (struct PREFIX1(T) name = {PREFIX2(T, header)}, \
*_loop_terminate = &name; \
__builtin_expect(_loop_terminate != NULL, 1); ({ \
PREFIX2(T, pack)((PREFIX2(T, PACKED_T) *)(dst), &name); \
PREFIX2(T, pack)((dst), &name); \
_loop_terminate = NULL; \
}))
#define pan_pack_nodefaults(dst, T, name) \
for (struct PREFIX1(T) name = {0}, *_loop_terminate = &name; \
__builtin_expect(_loop_terminate != NULL, 1); ({ \
PREFIX2(T, pack)((PREFIX2(T, PACKED_T) *)(dst), &name); \
PREFIX2(T, pack)((dst), &name); \
_loop_terminate = NULL; \
}))
#define pan_cast_and_pack(dst, T, name) \
pan_pack((PREFIX2(T, PACKED_T) *)dst, T, name)
#define pan_cast_and_pack_nodefaults(dst, T, name) \
pan_pack_nodefaults((PREFIX2(T, PACKED_T) *)dst, T, name)
#define pan_unpack(src, T, name) \
struct PREFIX1(T) name; \
PREFIX2(T, unpack)((const PREFIX2(T, PACKED_T) *)(src), &name)
PREFIX2(T, unpack)((src), &name)
#define pan_cast_and_unpack(src, T, name) \
pan_unpack((const PREFIX2(T, PACKED_T) *)(src), T, name)
#define pan_print(fp, T, var, indent) PREFIX2(T, print)(fp, &(var), indent)

View file

@ -458,15 +458,13 @@ pan_blend_to_fixed_function_equation(const struct pan_blend_equation equation,
uint32_t
pan_pack_blend(const struct pan_blend_equation equation)
{
STATIC_ASSERT(sizeof(uint32_t) == MALI_BLEND_EQUATION_LENGTH);
uint32_t out = 0;
struct mali_blend_equation_packed out;
pan_pack(&out, BLEND_EQUATION, cfg) {
pan_blend_to_fixed_function_equation(equation, &cfg);
}
return out;
return out.opaque[0];
}
DERIVE_HASH_TABLE(pan_blend_shader_key);
@ -724,7 +722,7 @@ GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt,
unsigned force_size, bool dithered)
{
const struct util_format_description *desc = util_format_description(fmt);
uint64_t res;
struct mali_internal_blend_packed res;
pan_pack(&res, INTERNAL_BLEND, cfg) {
cfg.mode = MALI_BLEND_MODE_OPAQUE;
@ -771,7 +769,7 @@ GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt,
GENX(panfrost_dithered_format_from_pipe_format)(fmt, dithered);
}
return res;
return res.opaque[0] | ((uint64_t)res.opaque[1] << 32);
}
static bool

View file

@ -321,7 +321,7 @@ pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
static void
pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx,
int rt_crc, void *zs_crc_ext)
int rt_crc, struct mali_zs_crc_extension_packed *zs_crc_ext)
{
pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
pan_prepare_crc(fb, rt_crc, &cfg);
@ -605,7 +605,8 @@ pan_prepare_rt(const struct pan_fb_info *fb, unsigned layer_idx,
#endif
void
GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out)
GENX(pan_emit_tls)(const struct pan_tls_info *info,
struct mali_local_storage_packed *out)
{
pan_pack(out, LOCAL_STORAGE, cfg) {
if (info->tls.size) {
@ -644,7 +645,8 @@ GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out)
#if PAN_ARCH <= 5
static void
pan_emit_midgard_tiler(const struct pan_fb_info *fb,
const struct pan_tiler_context *tiler_ctx, void *out)
const struct pan_tiler_context *tiler_ctx,
struct mali_tiler_context_packed *out)
{
bool hierarchy = !tiler_ctx->midgard.no_hierarchical_tiling;
@ -679,8 +681,8 @@ pan_emit_midgard_tiler(const struct pan_fb_info *fb,
#if PAN_ARCH >= 5
static void
pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx,
unsigned idx, unsigned cbuf_offset, void *out)
pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx, unsigned idx,
unsigned cbuf_offset, struct mali_render_target_packed *out)
{
pan_pack(out, RENDER_TARGET, cfg) {
pan_prepare_rt(fb, layer_idx, idx, cbuf_offset, &cfg);
@ -900,7 +902,10 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
#endif
if (has_zs_crc_ext) {
pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, out + pan_size(FRAMEBUFFER));
struct mali_zs_crc_extension_packed *zs_crc_ext =
out + pan_size(FRAMEBUFFER);
pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext);
rtd += pan_size(ZS_CRC_EXTENSION);
}
@ -920,7 +925,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
}
struct mali_framebuffer_pointer_packed tag;
pan_pack(tag.opaque, FRAMEBUFFER_POINTER, cfg) {
pan_pack(&tag, FRAMEBUFFER_POINTER, cfg) {
cfg.zs_crc_extension_present = has_zs_crc_ext;
cfg.render_target_count = MAX2(fb->rt_count, 1);
}

View file

@ -187,7 +187,8 @@ pan_sample_pattern(unsigned samples)
void GENX(pan_select_tile_size)(struct pan_fb_info *fb);
void GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out);
void GENX(pan_emit_tls)(const struct pan_tls_info *info,
struct mali_local_storage_packed *out);
int GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size);

View file

@ -85,7 +85,7 @@ unsigned panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift,
#if PAN_ARCH <= 5
static inline void
panfrost_vertex_id(unsigned padded_count,
struct mali_attribute_buffer_packed *attr, bool instanced)
struct mali_attribute_vertex_id_packed *attr, bool instanced)
{
pan_pack(attr, ATTRIBUTE_VERTEX_ID, cfg) {
if (instanced) {
@ -101,7 +101,8 @@ panfrost_vertex_id(unsigned padded_count,
static inline void
panfrost_instance_id(unsigned padded_count,
struct mali_attribute_buffer_packed *attr, bool instanced)
struct mali_attribute_instance_id_packed *attr,
bool instanced)
{
pan_pack(attr, ATTRIBUTE_INSTANCE_ID, cfg) {
if (!instanced || padded_count <= 1) {
@ -234,7 +235,8 @@ panfrost_make_resource_table(struct panfrost_ptr base, unsigned index,
if (resource_count == 0)
return;
pan_pack(base.cpu + index * pan_size(RESOURCE), RESOURCE, cfg) {
struct mali_resource_packed *res = base.cpu;
pan_pack(&res[index], RESOURCE, cfg) {
cfg.address = address;
cfg.size = resource_count * pan_size(BUFFER);
}

View file

@ -136,11 +136,11 @@ pan_indirect_dispatch_init(struct pan_indirect_dispatch_meta *meta)
struct panfrost_ptr tsd =
pan_pool_alloc_desc(meta->desc_pool, LOCAL_STORAGE);
pan_pack(rsd.cpu, RENDERER_STATE, cfg) {
pan_cast_and_pack(rsd.cpu, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(&shader_info, bin.gpu, &cfg);
}
pan_pack(tsd.cpu, LOCAL_STORAGE, ls) {
pan_cast_and_pack(tsd.cpu, LOCAL_STORAGE, ls) {
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
};

View file

@ -176,7 +176,7 @@ pan_jc_add_job(struct pan_jc *jc, enum mali_job_type type, bool barrier,
/* Assign the index */
unsigned index = ++jc->job_index;
pan_pack(job->cpu, JOB_HEADER, header) {
pan_cast_and_pack(job->cpu, JOB_HEADER, header) {
header.type = type;
header.barrier = barrier;
header.suppress_prefetch = suppress_prefetch;

View file

@ -254,7 +254,7 @@ static void
panfrost_emit_surface_with_stride(const struct pan_image_section_info *section,
void **payload)
{
pan_pack(*payload, SURFACE_WITH_STRIDE, cfg) {
pan_cast_and_pack(*payload, SURFACE_WITH_STRIDE, cfg) {
cfg.pointer = section->pointer;
cfg.row_stride = section->row_stride;
cfg.surface_stride = section->surface_stride;
@ -271,7 +271,7 @@ panfrost_emit_multiplanar_surface(const struct pan_image_section_info *sections,
assert(sections[2].row_stride == 0 ||
sections[1].row_stride == sections[2].row_stride);
pan_pack(*payload, MULTIPLANAR_SURFACE, cfg) {
pan_cast_and_pack(*payload, MULTIPLANAR_SURFACE, cfg) {
cfg.plane_0_pointer = sections[0].pointer;
cfg.plane_0_row_stride = sections[0].row_stride;
cfg.plane_1_2_row_stride = sections[1].row_stride;
@ -425,7 +425,7 @@ panfrost_emit_plane(const struct pan_image_view *iview,
// TODO: this isn't technically guaranteed to be YUV, but it is in practice.
bool is_3_planar_yuv = desc->layout == UTIL_FORMAT_LAYOUT_PLANAR3;
pan_pack(*payload, PLANE, cfg) {
pan_cast_and_pack(*payload, PLANE, cfg) {
cfg.pointer = pointer;
cfg.row_stride = row_stride;
cfg.size = layout->data_size - layout->slices[level].offset;
@ -696,7 +696,8 @@ GENX(panfrost_texture_afbc_reswizzle)(struct pan_image_view *iview)
* consists of a 32-byte header followed by pointers.
*/
void
GENX(panfrost_new_texture)(const struct pan_image_view *iview, void *out,
GENX(panfrost_new_texture)(const struct pan_image_view *iview,
struct mali_texture_packed *out,
const struct panfrost_ptr *payload)
{
const struct util_format_description *desc =

View file

@ -415,7 +415,8 @@ void GENX(panfrost_texture_swizzle_replicate_x)(struct pan_image_view *iview);
void GENX(panfrost_texture_afbc_reswizzle)(struct pan_image_view *iview);
#endif
void GENX(panfrost_new_texture)(const struct pan_image_view *iview, void *out,
void GENX(panfrost_new_texture)(const struct pan_image_view *iview,
struct mali_texture_packed *out,
const struct panfrost_ptr *payload);
#endif

View file

@ -316,7 +316,8 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
return 0;
}
pan_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, cfg) {
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE,
cfg) {
pan_shader_prepare_rsd(&shader->info,
panvk_priv_mem_dev_addr(shader->code_mem), &cfg);
}

View file

@ -51,7 +51,7 @@ prepare_driver_set(struct panvk_cmd_buffer *cmdbuf)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
/* Dummy sampler always comes first. */
pan_pack(&descs[0], SAMPLER, cfg) {
pan_cast_and_pack(&descs[0], SAMPLER, cfg) {
cfg.clamp_integer_array_indices = false;
}

View file

@ -10,6 +10,7 @@
* SPDX-License-Identifier: MIT
*/
#include <stdint.h>
#include "genxml/gen_macros.h"
#include "panvk_buffer.h"
@ -136,7 +137,7 @@ prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf)
}
/* Dummy sampler always comes right after the vertex attribs. */
pan_pack(&descs[MAX_VS_ATTRIBS], SAMPLER, cfg) {
pan_cast_and_pack(&descs[MAX_VS_ATTRIBS], SAMPLER, cfg) {
cfg.clamp_integer_array_indices = false;
}
@ -147,7 +148,7 @@ prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf)
for (uint32_t i = 0; i < vb_count; i++) {
const struct panvk_attrib_buf *vb = &cmdbuf->state.gfx.vb.bufs[i];
pan_pack(&descs[vb_offset + i], BUFFER, cfg) {
pan_cast_and_pack(&descs[vb_offset + i], BUFFER, cfg) {
if (vi->bindings_valid & BITFIELD_BIT(i)) {
cfg.address = vb->address;
cfg.size = vb->size;
@ -180,7 +181,7 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
/* Dummy sampler always comes first. */
pan_pack(&descs[0], SAMPLER, cfg) {
pan_cast_and_pack(&descs[0], SAMPLER, cfg) {
cfg.clamp_integer_array_indices = false;
}
@ -421,7 +422,7 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf)
if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
dyn_gfx_state_dirty(cmdbuf, VP_SCISSORS)) {
uint64_t scissor_box;
struct mali_scissor_packed scissor_box;
pan_pack(&scissor_box, SCISSOR, cfg) {
/* The spec says "width must be greater than 0.0" */
@ -452,7 +453,8 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf)
cfg.scissor_maximum_y = CLAMP(maxy, 0, UINT16_MAX);
}
cs_move64_to(b, cs_sr_reg64(b, 42), scissor_box);
struct mali_scissor_packed *scissor_box_ptr = &scissor_box;
cs_move64_to(b, cs_sr_reg64(b, 42), *((uint64_t*)scissor_box_ptr));
}
if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
@ -1271,7 +1273,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf)
if (!zsd.gpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
pan_cast_and_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
cfg.stencil_test_enable = test_s;
if (test_s) {
cfg.front_compare_function =

View file

@ -687,7 +687,8 @@ init_tiler(struct panvk_queue *queue)
tiler_heap->context.handle = thc.handle;
tiler_heap->context.dev_addr = thc.tiler_heap_ctx_gpu_va;
pan_pack(panvk_priv_mem_host_addr(tiler_heap->desc), TILER_HEAP, cfg) {
pan_cast_and_pack(panvk_priv_mem_host_addr(tiler_heap->desc), TILER_HEAP,
cfg) {
cfg.size = tiler_heap->chunk_size;
cfg.base = thc.first_heap_chunk_gpu_va;
cfg.bottom = cfg.base + 64;

View file

@ -460,13 +460,14 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
static void
panvk_draw_emit_attrib_buf(const struct panvk_draw_data *draw,
const struct vk_vertex_binding_state *buf_info,
const struct panvk_attrib_buf *buf, void *desc)
const struct panvk_attrib_buf *buf,
struct mali_attribute_buffer_packed *desc)
{
uint64_t addr = buf->address & ~63ULL;
unsigned size = buf->size + (buf->address & 63);
unsigned divisor = draw->padded_vertex_count * buf_info->divisor;
bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
void *buf_ext = desc + pan_size(ATTRIBUTE_BUFFER);
struct mali_attribute_buffer_packed *buf_ext = &desc[1];
/* TODO: support instanced arrays */
if (draw->info.instance.count <= 1) {
@ -515,7 +516,7 @@ panvk_draw_emit_attrib_buf(const struct panvk_draw_data *draw,
cfg.divisor_e = divisor_e;
}
pan_pack(buf_ext, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
pan_cast_and_pack(buf_ext, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
cfg.divisor_numerator = divisor_num;
cfg.divisor = buf_info->divisor;
}
@ -532,7 +533,8 @@ static void
panvk_draw_emit_attrib(const struct panvk_draw_data *draw,
const struct vk_vertex_attribute_state *attrib_info,
const struct vk_vertex_binding_state *buf_info,
const struct panvk_attrib_buf *buf, void *desc)
const struct panvk_attrib_buf *buf,
struct mali_attribute_packed *desc)
{
bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
enum pipe_format f = vk_format_to_pipe_format(attrib_info->format);
@ -631,7 +633,8 @@ panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
}
static void
panvk_emit_viewport(struct panvk_cmd_buffer *cmdbuf, void *vpd)
panvk_emit_viewport(struct panvk_cmd_buffer *cmdbuf,
struct mali_viewport_packed *vpd)
{
const struct vk_viewport_state *vp = &cmdbuf->vk.dynamic_graphics_state.vp;
@ -705,7 +708,8 @@ panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
static void
panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_draw_data *draw, void *dcd)
const struct panvk_draw_data *draw,
struct mali_draw_packed *dcd)
{
const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
const struct panvk_shader_desc_state *vs_desc_state =
@ -786,7 +790,8 @@ translate_prim_topology(VkPrimitiveTopology in)
static void
panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_draw_data *draw, void *prim)
const struct panvk_draw_data *draw,
struct mali_primitive_packed *prim)
{
const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
const struct panvk_shader *fs = get_fs(cmdbuf);
@ -846,7 +851,7 @@ panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf,
static void
panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_draw_data *draw,
void *primsz)
struct mali_primitive_size_packed *primsz)
{
const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
const struct vk_input_assembly_state *ia =
@ -866,7 +871,8 @@ panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf,
static void
panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_draw_data *draw, void *dcd)
const struct panvk_draw_data *draw,
struct mali_draw_packed *dcd)
{
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
const struct vk_rasterization_state *rs =

View file

@ -112,14 +112,14 @@ panvk_per_arch(CreateBufferView)(VkDevice _device,
.cpu = panvk_priv_mem_host_addr(view->mem),
};
GENX(panfrost_new_texture)(&pview, view->descs.tex.opaque, &ptr);
GENX(panfrost_new_texture)(&pview, &view->descs.tex, &ptr);
}
#if PAN_ARCH <= 7
if (buffer->vk.usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) {
unsigned blksz = vk_format_get_blocksize(pCreateInfo->format);
pan_pack(view->descs.img_attrib_buf[0].opaque, ATTRIBUTE_BUFFER, cfg) {
pan_pack(&view->descs.img_attrib_buf[0], ATTRIBUTE_BUFFER, cfg) {
/* The format is the only thing we lack to emit attribute descriptors
* when copying from the set to the attribute tables. Instead of
* making the descriptor size to store an extra format, we pack
@ -137,8 +137,8 @@ panvk_per_arch(CreateBufferView)(VkDevice _device,
cfg.size = view->vk.elements * blksz;
}
pan_pack(view->descs.img_attrib_buf[1].opaque,
ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
struct mali_attribute_buffer_packed *buf = &view->descs.img_attrib_buf[1];
pan_cast_and_pack(buf, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
cfg.s_dimension = view->vk.elements;
cfg.t_dimension = 1;
cfg.r_dimension = 1;

View file

@ -233,7 +233,7 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)(
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
/* Emit a dummy sampler if we have to. */
pan_pack(sampler.cpu, SAMPLER, cfg) {
pan_cast_and_pack(sampler.cpu, SAMPLER, cfg) {
cfg.clamp_integer_array_indices = false;
}

View file

@ -191,7 +191,8 @@ get_preload_shader(struct panvk_device *dev,
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
pan_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, cfg) {
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM,
cfg) {
cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
@ -346,7 +347,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
if (!rsd.cpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
pan_pack(rsd.cpu, RENDERER_STATE, cfg) {
pan_cast_and_pack(rsd.cpu, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(&shader->info,
panvk_priv_mem_dev_addr(shader->code_mem), &cfg);
@ -409,7 +410,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
if (!vpd.cpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
pan_pack(vpd.cpu, VIEWPORT, cfg) {
pan_cast_and_pack(vpd.cpu, VIEWPORT, cfg) {
cfg.scissor_minimum_x = minx;
cfg.scissor_minimum_y = miny;
cfg.scissor_maximum_x = maxx;
@ -420,7 +421,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
if (!sampler.cpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
pan_pack(sampler.cpu, SAMPLER, cfg) {
pan_cast_and_pack(sampler.cpu, SAMPLER, cfg) {
cfg.seamless_cube_map = false;
cfg.normalized_coordinates = false;
cfg.clamp_integer_array_indices = false;
@ -568,7 +569,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
if (!res_table.cpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
pan_pack(res_table.cpu, RESOURCE, cfg) {
pan_cast_and_pack(res_table.cpu, RESOURCE, cfg) {
cfg.address = descs.gpu;
cfg.size = desc_count * PANVK_DESCRIPTOR_SIZE;
}
@ -582,7 +583,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
bool preload_s =
key->aspects != VK_IMAGE_ASPECT_COLOR_BIT && fbinfo->zs.preload.s;
pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
pan_cast_and_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
cfg.depth_function = MALI_FUNC_ALWAYS;
cfg.depth_write_enable = preload_z;

View file

@ -191,7 +191,7 @@ prepare_attr_buf_descs(struct panvk_image_view *view)
&image->planes[plane_idx].layout, view->pview.first_level,
is_3d ? 0 : view->pview.first_layer, is_3d ? view->pview.first_layer : 0);
pan_pack(view->descs.img_attrib_buf[0].opaque, ATTRIBUTE_BUFFER, cfg) {
pan_pack(&view->descs.img_attrib_buf[0], ATTRIBUTE_BUFFER, cfg) {
/* The format is the only thing we lack to emit attribute descriptors
* when copying from the set to the attribute tables. Instead of
* making the descriptor size to store an extra format, we pack
@ -213,8 +213,8 @@ prepare_attr_buf_descs(struct panvk_image_view *view)
cfg.size = pan_kmod_bo_size(image->bo) - offset;
}
pan_pack(view->descs.img_attrib_buf[1].opaque,
ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
struct mali_attribute_buffer_packed *buf = &view->descs.img_attrib_buf[1];
pan_cast_and_pack(buf, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
unsigned level = view->pview.first_level;
VkExtent3D extent = view->vk.extent;

View file

@ -105,7 +105,7 @@ panvk_per_arch(CreateSampler)(VkDevice _device,
panvk_afbc_reswizzle_border_color(&border_color, fmt);
#endif
pan_pack(sampler->desc.opaque, SAMPLER, cfg) {
pan_pack(&sampler->desc, SAMPLER, cfg) {
cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
cfg.mipmap_mode =

View file

@ -895,7 +895,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
if (!panvk_priv_mem_dev_addr(shader->rsd))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, cfg) {
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE,
cfg) {
pan_shader_prepare_rsd(&shader->info, panvk_shader_get_dev_addr(shader),
&cfg);
}
@ -905,7 +906,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
if (!panvk_priv_mem_dev_addr(shader->spd))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, cfg) {
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM,
cfg) {
cfg.stage = pan_shader_stage(&shader->info);
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
@ -928,8 +930,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
if (!panvk_priv_mem_dev_addr(shader->spds.pos_points))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_pack(panvk_priv_mem_host_addr(shader->spds.pos_points),
SHADER_PROGRAM, cfg) {
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.pos_points),
SHADER_PROGRAM, cfg) {
cfg.stage = pan_shader_stage(&shader->info);
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
cfg.register_allocation =
@ -944,8 +946,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
if (!panvk_priv_mem_dev_addr(shader->spds.pos_triangles))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_pack(panvk_priv_mem_host_addr(shader->spds.pos_triangles),
SHADER_PROGRAM, cfg) {
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.pos_triangles),
SHADER_PROGRAM, cfg) {
cfg.stage = pan_shader_stage(&shader->info);
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
cfg.register_allocation =
@ -962,8 +964,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
if (!panvk_priv_mem_dev_addr(shader->spds.var))
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
pan_pack(panvk_priv_mem_host_addr(shader->spds.var), SHADER_PROGRAM,
cfg) {
pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.var),
SHADER_PROGRAM, cfg) {
unsigned work_count = shader->info.vs.secondary_work_reg_count;
cfg.stage = pan_shader_stage(&shader->info);