diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 7a268cecebb..43d713b9c1f 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -316,7 +316,7 @@ panfrost_emit_blend(struct panfrost_batch *batch, void *rts, /* Disable blending for unbacked render targets */ if (rt_count == 0 || !batch->key.cbufs[i] || !so->info[i].enabled) { - pan_pack(rts + i * pan_size(BLEND), BLEND, cfg) { + pan_pack(packed, BLEND, cfg) { cfg.enable = false; #if PAN_ARCH >= 6 cfg.internal.mode = MALI_BLEND_MODE_OFF; @@ -358,6 +358,8 @@ panfrost_emit_blend(struct panfrost_batch *batch, void *rts, #if PAN_ARCH >= 6 struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT]; + struct mali_internal_blend_packed *internal_blend_packed = + (struct mali_internal_blend_packed *)&packed->opaque[2]; /* Words 2 and 3: Internal blend */ if (blend_shaders[i]) { @@ -368,7 +370,7 @@ panfrost_emit_blend(struct panfrost_batch *batch, void *rts, assert(!fs->bin.bo || (blend_shaders[i] & (0xffffffffull << 32)) == (fs->bin.gpu & (0xffffffffull << 32))); - pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) { + pan_pack(internal_blend_packed, INTERNAL_BLEND, cfg) { cfg.mode = MALI_BLEND_MODE_SHADER; cfg.shader.pc = (uint32_t)blend_shaders[i]; @@ -380,7 +382,7 @@ panfrost_emit_blend(struct panfrost_batch *batch, void *rts, #endif } } else { - pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) { + pan_pack(internal_blend_packed, INTERNAL_BLEND, cfg) { cfg.mode = info.opaque ? MALI_BLEND_MODE_OPAQUE : MALI_BLEND_MODE_FIXED_FUNCTION; @@ -757,7 +759,7 @@ panfrost_emit_viewport(struct panfrost_batch *batch) if (!T.cpu) return 0; - pan_pack(T.cpu, VIEWPORT, cfg) { + pan_cast_and_pack(T.cpu, VIEWPORT, cfg) { cfg.scissor_minimum_x = minx; cfg.scissor_minimum_y = miny; cfg.scissor_maximum_x = maxx; @@ -769,7 +771,7 @@ panfrost_emit_viewport(struct panfrost_batch *batch) return T.gpu; #else - pan_pack(&batch->scissor, SCISSOR, cfg) { + pan_cast_and_pack(&batch->scissor, SCISSOR, cfg) { cfg.scissor_minimum_x = minx; cfg.scissor_minimum_y = miny; cfg.scissor_maximum_x = maxx; @@ -1197,7 +1199,7 @@ panfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch, uniform->u[0] = GENX(pan_blend_get_internal_desc)(format, rt, size, false) >> 32; } else { - pan_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg) + pan_cast_and_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg) cfg.memory_format = GENX(panfrost_format_from_pipe_format)(PIPE_FORMAT_NONE)->hw; } @@ -1827,7 +1829,7 @@ static uint64_t panfrost_upload_wa_sampler(struct panfrost_batch *batch) { struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, SAMPLER); - pan_pack(T.cpu, SAMPLER, cfg) + pan_cast_and_pack(T.cpu, SAMPLER, cfg) ; return T.gpu; } @@ -1939,7 +1941,8 @@ emit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader, } if (is_buffer) { - pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { + pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D, + cfg) { cfg.s_dimension = rsrc->base.width0 / util_format_get_blocksize(image->format); cfg.t_dimension = cfg.r_dimension = 1; @@ -1948,7 +1951,8 @@ emit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader, continue; } - pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { + pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D, + cfg) { unsigned level = image->u.tex.level; unsigned samples = rsrc->image.layout.nr_samples; @@ -2009,8 +2013,9 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers, /* We need an empty attrib buf to stop the prefetching on Bifrost */ #if PAN_ARCH >= 6 - pan_pack(bufs.cpu + ((buf_count - 1) * pan_size(ATTRIBUTE_BUFFER)), - ATTRIBUTE_BUFFER, cfg) + struct mali_attribute_buffer_packed *attrib_bufs = bufs.cpu; + + pan_pack(&attrib_bufs[buf_count - 1], ATTRIBUTE_BUFFER, cfg) ; #endif @@ -2148,7 +2153,8 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers) cfg.divisor_e = extra_flags; } - pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) { + pan_cast_and_pack(&bufs[k + 1], ATTRIBUTE_BUFFER_CONTINUATION_NPOT, + cfg) { cfg.divisor_numerator = magic_divisor; cfg.divisor = divisor; } @@ -2162,14 +2168,17 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers) #if PAN_ARCH <= 5 /* Add special gl_VertexID/gl_InstanceID buffers */ if (special_vbufs) { - panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1); + panfrost_vertex_id(ctx->padded_count, + (struct mali_attribute_vertex_id_packed *)&bufs[k], + ctx->instance_count > 1); pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) { cfg.buffer_index = k++; cfg.format = so->formats[PAN_VERTEX_ID]; } - panfrost_instance_id(ctx->padded_count, &bufs[k], + panfrost_instance_id(ctx->padded_count, + (struct mali_attribute_instance_id_packed *)&bufs[k], ctx->instance_count > 1); pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) { @@ -3640,7 +3649,7 @@ panfrost_create_depth_stencil_state( #else /* Pack with nodefaults so only explicitly set fields affect pan_merge() when * emitting depth stencil descriptor */ - pan_pack_nodefaults(&so->desc, DEPTH_STENCIL, cfg) { + pan_cast_and_pack_nodefaults(&so->desc, DEPTH_STENCIL, cfg) { cfg.front_compare_function = (enum mali_func)front.func; cfg.front_stencil_fail = pan_pipe_to_stencil_op(front.fail_op); cfg.front_depth_fail = pan_pipe_to_stencil_op(front.zfail_op); @@ -3824,7 +3833,8 @@ prepare_shader(struct panfrost_compiled_shader *state, struct panfrost_pool *pool, bool upload) { #if PAN_ARCH <= 7 - void *out = &state->partial_rsd; + struct mali_renderer_state_packed *out = + (struct mali_renderer_state_packed *)&state->partial_rsd; if (upload) { struct panfrost_ptr ptr = @@ -3857,8 +3867,10 @@ prepare_shader(struct panfrost_compiled_shader *state, state->state = panfrost_pool_take_ref(pool, ptr.gpu); + struct mali_shader_program_packed *programs = ptr.cpu; + /* Generic, or IDVS/points */ - pan_pack(ptr.cpu, SHADER_PROGRAM, cfg) { + pan_cast_and_pack(&programs[0], SHADER_PROGRAM, cfg) { cfg.stage = pan_shader_stage(&state->info); if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT) @@ -3880,7 +3892,7 @@ prepare_shader(struct panfrost_compiled_shader *state, return; /* IDVS/triangles */ - pan_pack(ptr.cpu + pan_size(SHADER_PROGRAM), SHADER_PROGRAM, cfg) { + pan_pack(&programs[1], SHADER_PROGRAM, cfg) { cfg.stage = pan_shader_stage(&state->info); cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.register_allocation = @@ -3893,7 +3905,7 @@ prepare_shader(struct panfrost_compiled_shader *state, if (!secondary_enable) return; - pan_pack(ptr.cpu + (pan_size(SHADER_PROGRAM) * 2), SHADER_PROGRAM, cfg) { + pan_pack(&programs[2], SHADER_PROGRAM, cfg) { unsigned work_count = state->info.vs.secondary_work_reg_count; cfg.stage = pan_shader_stage(&state->info); diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h index 2f18b2e8b20..cd8f674db94 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.h +++ b/src/gallium/drivers/panfrost/pan_cmdstream.h @@ -153,7 +153,8 @@ panfrost_overdraw_alpha(const struct panfrost_context *ctx, bool zero) static inline void panfrost_emit_primitive_size(struct panfrost_context *ctx, bool points, - uint64_t size_array, void *prim_size) + uint64_t size_array, + struct mali_primitive_size_packed *prim_size) { struct panfrost_rasterizer *rast = ctx->rasterizer; diff --git a/src/gallium/drivers/panfrost/pan_csf.c b/src/gallium/drivers/panfrost/pan_csf.c index e876fbe6bd6..e6f69374379 100644 --- a/src/gallium/drivers/panfrost/pan_csf.c +++ b/src/gallium/drivers/panfrost/pan_csf.c @@ -28,6 +28,7 @@ #include "genxml/cs_builder.h" #include "panfrost/lib/genxml/cs_builder.h" +#include "gen_macros.h" #include "pan_cmdstream.h" #include "pan_context.h" #include "pan_csf.h" @@ -702,7 +703,7 @@ csf_emit_tiler_desc(struct panfrost_batch *batch, const struct pan_fb_info *fb) tiler.geometry_buffer_size = ctx->csf.tmp_geom_bo->kmod_bo->size; } - batch->csf.pending_tiler_desc = 0; + batch->csf.pending_tiler_desc = NULL; } void @@ -895,8 +896,8 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch, cs_move32_to(b, cs_reg32(b, 32), 0); /* Compute workgroup size */ - uint32_t wg_size[4]; - pan_pack(wg_size, COMPUTE_SIZE_WORKGROUP, cfg) { + struct mali_compute_size_workgroup_packed wg_size; + pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) { cfg.workgroup_size_x = info->block[0]; cfg.workgroup_size_y = info->block[1]; cfg.workgroup_size_z = info->block[2]; @@ -911,7 +912,7 @@ GENX(csf_launch_grid)(struct panfrost_batch *batch, (info->variable_shared_mem == 0); } - cs_move32_to(b, cs_reg32(b, 33), wg_size[0]); + cs_move32_to(b, cs_reg32(b, 33), wg_size.opaque[0]); /* Offset */ for (unsigned i = 0; i < 3; ++i) @@ -998,8 +999,8 @@ GENX(csf_launch_xfb)(struct panfrost_batch *batch, cs_move32_to(b, cs_reg32(b, 32), batch->ctx->offset_start); /* Compute workgroup size */ - uint32_t wg_size[4]; - pan_pack(wg_size, COMPUTE_SIZE_WORKGROUP, cfg) { + struct mali_compute_size_workgroup_packed wg_size; + pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) { cfg.workgroup_size_x = 1; cfg.workgroup_size_y = 1; cfg.workgroup_size_z = 1; @@ -1009,7 +1010,7 @@ GENX(csf_launch_xfb)(struct panfrost_batch *batch, */ cfg.allow_merging_workgroups = true; } - cs_move32_to(b, cs_reg32(b, 33), wg_size[0]); + cs_move32_to(b, cs_reg32(b, 33), wg_size.opaque[0]); /* Offset */ for (unsigned i = 0; i < 3; ++i) @@ -1119,7 +1120,7 @@ csf_emit_draw_state(struct panfrost_batch *batch, struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; - uint32_t primitive_flags = 0; + struct mali_primitive_flags_packed primitive_flags; pan_pack(&primitive_flags, PRIMITIVE_FLAGS, cfg) { if (panfrost_writes_point_size(ctx)) cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16; @@ -1138,9 +1139,11 @@ csf_emit_draw_state(struct panfrost_batch *batch, : MALI_FIFO_FORMAT_BASIC; } - cs_move32_to(b, cs_reg32(b, 56), primitive_flags); + cs_move32_to(b, cs_reg32(b, 56), primitive_flags.opaque[0]); + + struct mali_dcd_flags_0_packed dcd_flags0; + struct mali_dcd_flags_1_packed dcd_flags1; - uint32_t dcd_flags0 = 0, dcd_flags1 = 0; pan_pack(&dcd_flags0, DCD_FLAGS_0, cfg) { enum mesa_prim reduced_mode = u_reduced_prim(info->mode); bool polygon = reduced_mode == MESA_PRIM_TRIANGLES; @@ -1245,15 +1248,16 @@ csf_emit_draw_state(struct panfrost_batch *batch, } } - cs_move32_to(b, cs_reg32(b, 57), dcd_flags0); - cs_move32_to(b, cs_reg32(b, 58), dcd_flags1); + cs_move32_to(b, cs_reg32(b, 57), dcd_flags0.opaque[0]); + cs_move32_to(b, cs_reg32(b, 58), dcd_flags1.opaque[0]); - uint64_t primsize = 0; + struct mali_primitive_size_packed primsize; panfrost_emit_primitive_size(ctx, info->mode == MESA_PRIM_POINTS, 0, &primsize); - cs_move64_to(b, cs_reg64(b, 60), primsize); + struct mali_primitive_size_packed *primsize_ptr = &primsize; + cs_move64_to(b, cs_reg64(b, 60), *((uint64_t*)primsize_ptr)); - uint32_t flags_override; + struct mali_primitive_flags_packed flags_override; /* Pack with nodefaults so only explicitly set override fields affect the * previously set register values */ pan_pack_nodefaults(&flags_override, PRIMITIVE_FLAGS, cfg) { @@ -1262,7 +1266,7 @@ csf_emit_draw_state(struct panfrost_batch *batch, cfg.secondary_shader = secondary_shader; }; - return flags_override; + return flags_override.opaque[0]; } static struct cs_index @@ -1430,7 +1434,7 @@ GENX(csf_init_context)(struct panfrost_context *ctx) if (ctx->csf.heap.desc_bo == NULL) goto err_tiler_heap_desc_bo; - pan_pack(ctx->csf.heap.desc_bo->ptr.cpu, TILER_HEAP, heap) { + pan_cast_and_pack(ctx->csf.heap.desc_bo->ptr.cpu, TILER_HEAP, heap) { heap.size = pan_screen(ctx->base.screen)->csf_tiler_heap.chunk_size; heap.base = thc.first_heap_chunk_gpu_va; heap.bottom = heap.base + 64; diff --git a/src/gallium/drivers/panfrost/pan_csf.h b/src/gallium/drivers/panfrost/pan_csf.h index e3dd7c23983..16d343bae84 100644 --- a/src/gallium/drivers/panfrost/pan_csf.h +++ b/src/gallium/drivers/panfrost/pan_csf.h @@ -78,7 +78,7 @@ struct panfrost_csf_batch { struct panfrost_ptr tiler_oom_ctx; - void *pending_tiler_desc; + struct mali_tiler_context_packed *pending_tiler_desc; }; struct panfrost_csf_context { diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c index 3ab1e3a35a8..7766aefc614 100644 --- a/src/gallium/drivers/panfrost/pan_fb_preload.c +++ b/src/gallium/drivers/panfrost/pan_fb_preload.c @@ -132,7 +132,7 @@ static void pan_preload_emit_blend(unsigned rt, const struct pan_image_view *iview, const struct pan_preload_shader_data *preload_shader, - uint64_t blend_shader, void *out) + uint64_t blend_shader, struct mali_blend_packed *out) { assert(blend_shader == 0 || PAN_ARCH <= 5); @@ -212,14 +212,13 @@ pan_preload_is_ms(struct pan_preload_views *views) static void pan_preload_emit_blends(const struct pan_preload_shader_data *preload_shader, struct pan_preload_views *views, - uint64_t *blend_shaders, void *out) + uint64_t *blend_shaders, struct mali_blend_packed *out) { for (unsigned i = 0; i < MAX2(views->rt_count, 1); ++i) { - void *dest = out + pan_size(BLEND) * i; const struct pan_image_view *rt_view = views->rts[i]; uint64_t blend_shader = blend_shaders ? blend_shaders[i] : 0; - pan_preload_emit_blend(i, rt_view, preload_shader, blend_shader, dest); + pan_preload_emit_blend(i, rt_view, preload_shader, blend_shader, &out[i]); } } #endif @@ -228,7 +227,7 @@ pan_preload_emit_blends(const struct pan_preload_shader_data *preload_shader, static void pan_preload_emit_rsd(const struct pan_preload_shader_data *preload_shader, struct pan_preload_views *views, uint64_t *blend_shaders, - void *out) + struct mali_renderer_state_packed *out) { UNUSED bool zs = (views->z || views->s); bool ms = pan_preload_is_ms(views); @@ -312,7 +311,7 @@ pan_preload_emit_rsd(const struct pan_preload_shader_data *preload_shader, #if PAN_ARCH >= 5 pan_preload_emit_blends(preload_shader, views, blend_shaders, - out + pan_size(RENDERER_STATE)); + (void*)((uint8_t*)out + pan_size(RENDERER_STATE))); #endif } #endif @@ -827,7 +826,7 @@ pan_preload_emit_varying(struct pan_pool *pool) if (!varying.cpu) return 0; - pan_pack(varying.cpu, ATTRIBUTE, cfg) { + pan_cast_and_pack(varying.cpu, ATTRIBUTE, cfg) { cfg.buffer_index = 0; cfg.offset_enable = PAN_ARCH <= 5; cfg.format = @@ -853,7 +852,7 @@ pan_preload_emit_varying_buffer(struct pan_pool *pool, uint64_t coordinates) if (!varying_buffer.cpu) return 0; - pan_pack(varying_buffer.cpu, BUFFER, cfg) { + pan_cast_and_pack(varying_buffer.cpu, BUFFER, cfg) { cfg.address = coordinates; cfg.size = 4 * sizeof(float) * 4; } @@ -867,15 +866,15 @@ pan_preload_emit_varying_buffer(struct pan_pool *pool, uint64_t coordinates) if (!varying_buffer.cpu) return 0; - pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) { + pan_cast_and_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) { cfg.pointer = coordinates; cfg.stride = 4 * sizeof(float); cfg.size = cfg.stride * 4; } if (padding_buffer) { - pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER), - ATTRIBUTE_BUFFER, cfg) + pan_cast_and_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER), + ATTRIBUTE_BUFFER, cfg) ; } #endif @@ -891,7 +890,7 @@ pan_preload_emit_sampler(struct pan_pool *pool, bool nearest_filter) if (!sampler.cpu) return 0; - pan_pack(sampler.cpu, SAMPLER, cfg) { + pan_cast_and_pack(sampler.cpu, SAMPLER, cfg) { cfg.seamless_cube_map = false; cfg.normalized_coordinates = false; cfg.minify_nearest = nearest_filter; @@ -1027,7 +1026,7 @@ pan_preload_emit_zs(struct pan_pool *pool, bool z, bool s) if (!zsd.cpu) return 0; - pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) { + pan_cast_and_pack(zsd.cpu, DEPTH_STENCIL, cfg) { cfg.depth_function = MALI_FUNC_ALWAYS; cfg.depth_write_enable = z; @@ -1066,7 +1065,7 @@ pan_preload_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny, if (!vp.cpu) return 0; - pan_pack(vp.cpu, VIEWPORT, cfg) { + pan_cast_and_pack(vp.cpu, VIEWPORT, cfg) { cfg.scissor_minimum_x = minx; cfg.scissor_minimum_y = miny; cfg.scissor_maximum_x = maxx; @@ -1078,9 +1077,9 @@ pan_preload_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny, #endif static void -pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, - struct pan_pool *pool, struct pan_fb_info *fb, bool zs, - uint64_t coordinates, uint64_t tsd, void *out, +pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool, + struct pan_fb_info *fb, bool zs, uint64_t coordinates, + uint64_t tsd, struct mali_draw_packed *out, bool always_write) { unsigned tex_count = 0; @@ -1163,7 +1162,7 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, return; } - pan_pack(spd.cpu, SHADER_PROGRAM, cfg) { + pan_cast_and_pack(spd.cpu, SHADER_PROGRAM, cfg) { cfg.stage = MALI_SHADER_STAGE_FRAGMENT; cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL; cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD; diff --git a/src/gallium/drivers/panfrost/pan_jm.c b/src/gallium/drivers/panfrost/pan_jm.c index e68849ebee4..6df2150866f 100644 --- a/src/gallium/drivers/panfrost/pan_jm.c +++ b/src/gallium/drivers/panfrost/pan_jm.c @@ -65,11 +65,12 @@ GENX(jm_init_batch)(struct panfrost_batch *batch) #if PAN_ARCH == 5 struct mali_framebuffer_pointer_packed ptr; - pan_pack(ptr.opaque, FRAMEBUFFER_POINTER, cfg) { + pan_pack(&ptr, FRAMEBUFFER_POINTER, cfg) { cfg.pointer = batch->framebuffer.gpu; cfg.render_target_count = 1; /* a necessary lie */ } + /* XXX: THIS IS A BUG, FIXME */ batch->tls.gpu = ptr.opaque[0]; #endif #endif @@ -411,7 +412,7 @@ jm_emit_tiler_desc(struct panfrost_batch *batch) struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP); - pan_pack(t.cpu, TILER_HEAP, heap) { + pan_cast_and_pack(t.cpu, TILER_HEAP, heap) { heap.size = panfrost_bo_size(dev->tiler_heap); heap.base = dev->tiler_heap->ptr.gpu; heap.bottom = dev->tiler_heap->ptr.gpu; @@ -423,7 +424,7 @@ jm_emit_tiler_desc(struct panfrost_batch *batch) assert(max_levels >= 2); t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT); - pan_pack(t.cpu, TILER_CONTEXT, tiler) { + pan_cast_and_pack(t.cpu, TILER_CONTEXT, tiler) { /* TODO: Select hierarchy mask more effectively */ tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28; @@ -471,7 +472,8 @@ jm_emit_draw_descs(struct panfrost_batch *batch, struct MALI_DRAW *d, } static void -jm_emit_vertex_draw(struct panfrost_batch *batch, void *section) +jm_emit_vertex_draw(struct panfrost_batch *batch, + struct mali_draw_packed *section) { pan_pack(section, DRAW, cfg) { cfg.state = batch->rsd[PIPE_SHADER_VERTEX]; @@ -507,8 +509,8 @@ jm_emit_vertex_job(struct panfrost_batch *batch, #endif /* PAN_ARCH <= 7 */ static void -jm_emit_tiler_draw(void *out, struct panfrost_batch *batch, bool fs_required, - enum mesa_prim prim) +jm_emit_tiler_draw(struct mali_draw_packed *out, struct panfrost_batch *batch, + bool fs_required, enum mesa_prim prim) { struct panfrost_context *ctx = batch->ctx; struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; @@ -672,7 +674,7 @@ static void jm_emit_primitive(struct panfrost_batch *batch, const struct pipe_draw_info *info, const struct pipe_draw_start_count_bias *draw, - bool secondary_shader, void *out) + bool secondary_shader, struct mali_primitive_packed *out) { struct panfrost_context *ctx = batch->ctx; UNUSED struct pipe_rasterizer_state *rast = &ctx->rasterizer->base; diff --git a/src/panfrost/.clang-format b/src/panfrost/.clang-format index e56f4354240..bfe79abec0a 100644 --- a/src/panfrost/.clang-format +++ b/src/panfrost/.clang-format @@ -73,6 +73,8 @@ ForEachMacros: [ 'nodearray_dense_foreach', 'nodearray_dense_foreach_64', 'nodearray_sparse_foreach', + 'pan_cast_and_pack', + 'pan_cast_and_pack_nodefaults', 'pan_foreach_instr_in_block_rev', 'pan_foreach_predecessor', 'pan_foreach_successor', diff --git a/src/panfrost/lib/genxml/cs_builder.h b/src/panfrost/lib/genxml/cs_builder.h index c96afc31474..324b1b88f2b 100644 --- a/src/panfrost/lib/genxml/cs_builder.h +++ b/src/panfrost/lib/genxml/cs_builder.h @@ -495,14 +495,14 @@ cs_reserve_instrs(struct cs_builder *b, uint32_t num_instrs) uint64_t *ptr = b->cur_chunk.buffer.cpu + (b->cur_chunk.pos++); - pan_pack(ptr, CS_MOVE, I) { + pan_cast_and_pack(ptr, CS_MOVE, I) { I.destination = cs_overflow_address_reg(b); I.immediate = newbuf.gpu; } ptr = b->cur_chunk.buffer.cpu + (b->cur_chunk.pos++); - pan_pack(ptr, CS_MOVE32, I) { + pan_cast_and_pack(ptr, CS_MOVE32, I) { I.destination = cs_overflow_length_reg(b); } @@ -511,7 +511,7 @@ cs_reserve_instrs(struct cs_builder *b, uint32_t num_instrs) ptr = b->cur_chunk.buffer.cpu + (b->cur_chunk.pos++); - pan_pack(ptr, CS_JUMP, I) { + pan_cast_and_pack(ptr, CS_JUMP, I) { I.length = cs_overflow_length_reg(b); I.address = cs_overflow_address_reg(b); } @@ -680,7 +680,7 @@ cs_finish(struct cs_builder *b) * to be separated out being pan_pack can evaluate its argument multiple times, * yet cs_alloc has side effects. */ -#define cs_emit(b, T, cfg) pan_pack(cs_alloc_ins(b), CS_##T, cfg) +#define cs_emit(b, T, cfg) pan_cast_and_pack(cs_alloc_ins(b), CS_##T, cfg) /* Asynchronous operations take a mask of scoreboard slots to wait on * before executing the instruction, and signal a scoreboard slot when diff --git a/src/panfrost/lib/genxml/decode.c b/src/panfrost/lib/genxml/decode.c index 321f3051186..e570cebaafc 100644 --- a/src/panfrost/lib/genxml/decode.c +++ b/src/panfrost/lib/genxml/decode.c @@ -127,8 +127,8 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va, #endif if (params.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { - const void *PANDECODE_PTR_VAR(ctx, dcd, - params.frame_shader_dcds + (0 * dcd_size)); + const struct mali_draw_packed *PANDECODE_PTR_VAR( + ctx, dcd, params.frame_shader_dcds + (0 * dcd_size)); pan_unpack(dcd, DRAW, draw); pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n", params.frame_shader_dcds, params.pre_frame_0); @@ -136,8 +136,8 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va, } if (params.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { - const void *PANDECODE_PTR_VAR(ctx, dcd, - params.frame_shader_dcds + (1 * dcd_size)); + const struct mali_draw_packed *PANDECODE_PTR_VAR( + ctx, dcd, params.frame_shader_dcds + (1 * dcd_size)); pan_unpack(dcd, DRAW, draw); pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n", params.frame_shader_dcds + (1 * dcd_size)); @@ -145,8 +145,8 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va, } if (params.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { - const void *PANDECODE_PTR_VAR(ctx, dcd, - params.frame_shader_dcds + (2 * dcd_size)); + const struct mali_draw_packed *PANDECODE_PTR_VAR( + ctx, dcd, params.frame_shader_dcds + (2 * dcd_size)); pan_unpack(dcd, DRAW, draw); pandecode_log(ctx, "Post frame:\n"); GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); @@ -205,10 +205,11 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va, #if PAN_ARCH >= 5 uint64_t -GENX(pandecode_blend)(struct pandecode_context *ctx, void *descs, int rt_no, +GENX(pandecode_blend)(struct pandecode_context *ctx, + struct mali_blend_packed *descs, int rt_no, uint64_t frag_shader) { - pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b); + pan_unpack(&descs[rt_no], BLEND, b); DUMP_UNPACKED(ctx, BLEND, b, "Blend RT %d:\n", rt_no); #if PAN_ARCH >= 6 if (b.internal.mode != MALI_BLEND_MODE_SHADER) @@ -272,7 +273,7 @@ pandecode_texture_payload(struct pandecode_context *ctx, uint64_t payload, #define PANDECODE_EMIT_TEX_PAYLOAD_DESC(T, msg) \ for (int i = 0; i < bitmap_count; ++i) { \ uint64_t addr = payload + pan_size(T) * i; \ - pan_unpack(PANDECODE_PTR(ctx, addr, void), T, s); \ + pan_unpack(PANDECODE_PTR(ctx, addr, MALI_##T##_PACKED_T), T, s); \ DUMP_UNPACKED(ctx, T, s, msg " @%" PRIx64 ":\n", addr) \ } @@ -316,7 +317,8 @@ pandecode_texture_payload(struct pandecode_context *ctx, uint64_t payload, void GENX(pandecode_texture)(struct pandecode_context *ctx, uint64_t u, unsigned tex) { - const uint8_t *cl = pandecode_fetch_gpu_mem(ctx, u, pan_size(TEXTURE)); + const struct mali_texture_packed *cl = + pandecode_fetch_gpu_mem(ctx, u, pan_size(TEXTURE)); pan_unpack(cl, TEXTURE, temp); DUMP_UNPACKED(ctx, TEXTURE, temp, "Texture:\n") @@ -327,8 +329,8 @@ GENX(pandecode_texture)(struct pandecode_context *ctx, uint64_t u, unsigned tex) } #else void -GENX(pandecode_texture)(struct pandecode_context *ctx, const void *cl, - unsigned tex) +GENX(pandecode_texture)(struct pandecode_context *ctx, + const struct mali_texture_packed *cl, unsigned tex) { pan_unpack(cl, TEXTURE, temp); DUMP_UNPACKED(ctx, TEXTURE, temp, "Texture:\n") @@ -357,10 +359,12 @@ void GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va, unsigned gpu_id) { - pan_unpack(PANDECODE_PTR(ctx, gpu_va, void), TILER_CONTEXT, t); + pan_unpack(PANDECODE_PTR(ctx, gpu_va, struct mali_tiler_context_packed), + TILER_CONTEXT, t); if (t.heap) { - pan_unpack(PANDECODE_PTR(ctx, t.heap, void), TILER_HEAP, h); + pan_unpack(PANDECODE_PTR(ctx, t.heap, struct mali_tiler_heap_packed), + TILER_HEAP, h); DUMP_UNPACKED(ctx, TILER_HEAP, h, "Tiler Heap:\n"); } @@ -418,7 +422,7 @@ pandecode_resources(struct pandecode_context *ctx, uint64_t addr, unsigned size) break; case MALI_DESCRIPTOR_TYPE_TEXTURE: pandecode_log(ctx, "Texture @%" PRIx64 "\n", addr + i); - GENX(pandecode_texture)(ctx, cl + i, i); + GENX(pandecode_texture)(ctx, (struct mali_texture_packed *)&cl[i], i); break; case MALI_DESCRIPTOR_TYPE_ATTRIBUTE: DUMP_CL(ctx, ATTRIBUTE, cl + i, "Attribute @%" PRIx64 ":\n", addr + i); @@ -440,13 +444,13 @@ GENX(pandecode_resource_tables)(struct pandecode_context *ctx, uint64_t addr, unsigned count = addr & 0x3F; addr = addr & ~0x3F; - const uint8_t *cl = + const struct mali_resource_packed *cl = pandecode_fetch_gpu_mem(ctx, addr, MALI_RESOURCE_LENGTH * count); pandecode_log(ctx, "%s resource table @%" PRIx64 "\n", label, addr); ctx->indent += 2; for (unsigned i = 0; i < count; ++i) { - pan_unpack(cl + i * MALI_RESOURCE_LENGTH, RESOURCE, entry); + pan_unpack(&cl[i], RESOURCE, entry); DUMP_UNPACKED(ctx, RESOURCE, entry, "Entry %u @%" PRIx64 ":\n", i, addr + i * MALI_RESOURCE_LENGTH); diff --git a/src/panfrost/lib/genxml/decode.h b/src/panfrost/lib/genxml/decode.h index 25904e7e611..f94794467d9 100644 --- a/src/panfrost/lib/genxml/decode.h +++ b/src/panfrost/lib/genxml/decode.h @@ -189,7 +189,7 @@ pandecode_log_cont(struct pandecode_context *ctx, const char *format, ...) #define DUMP_CL(ctx, T, cl, ...) \ { \ - pan_unpack(cl, T, temp); \ + pan_unpack((MALI_##T##_PACKED_T *)cl, T, temp); \ DUMP_UNPACKED(ctx, T, temp, __VA_ARGS__); \ } @@ -201,7 +201,8 @@ pandecode_log_cont(struct pandecode_context *ctx, const char *format, ...) } #define MAP_ADDR(ctx, T, addr, cl) \ - const uint8_t *cl = pandecode_fetch_gpu_mem(ctx, addr, pan_size(T)); + const MALI_##T##_PACKED_T *cl = \ + pandecode_fetch_gpu_mem(ctx, addr, pan_size(T)); #define DUMP_ADDR(ctx, T, addr, ...) \ { \ @@ -238,13 +239,15 @@ void GENX(pandecode_dcd)(struct pandecode_context *ctx, void GENX(pandecode_texture)(struct pandecode_context *ctx, uint64_t u, unsigned tex); #else -void GENX(pandecode_texture)(struct pandecode_context *ctx, const void *cl, +void GENX(pandecode_texture)(struct pandecode_context *ctx, + const struct mali_texture_packed *cl, unsigned tex); #endif #if PAN_ARCH >= 5 -uint64_t GENX(pandecode_blend)(struct pandecode_context *ctx, void *descs, - int rt_no, uint64_t frag_shader); +uint64_t GENX(pandecode_blend)(struct pandecode_context *ctx, + struct mali_blend_packed *descs, int rt_no, + uint64_t frag_shader); #endif #if PAN_ARCH >= 6 diff --git a/src/panfrost/lib/genxml/decode_csf.c b/src/panfrost/lib/genxml/decode_csf.c index fb28debe0af..1250e49ca89 100644 --- a/src/panfrost/lib/genxml/decode_csf.c +++ b/src/panfrost/lib/genxml/decode_csf.c @@ -36,6 +36,8 @@ /* Limit for Mali-G610. -1 because we're not including the active frame */ #define MAX_CALL_STACK_DEPTH (8 - 1) +#define cs_unpack(packed, T, unpacked) pan_cast_and_unpack(packed, T, unpacked) + struct queue_ctx { /* Size of CSHWIF register file in 32-bit registers */ unsigned nr_regs; @@ -93,12 +95,12 @@ static const char *conditions_str[] = { }; static void -print_cs_instr(FILE *fp, uint64_t instr) +print_cs_instr(FILE *fp, const uint64_t *instr) { - pan_unpack(&instr, CS_BASE, base); + cs_unpack(instr, CS_BASE, base); switch (base.opcode) { case MALI_CS_OPCODE_NOP: { - pan_unpack(&instr, CS_NOP, I); + cs_unpack(instr, CS_NOP, I); if (I.ignored) fprintf(fp, "NOP // 0x%" PRIX64, I.ignored); else @@ -107,19 +109,19 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_MOVE: { - pan_unpack(&instr, CS_MOVE, I); + cs_unpack(instr, CS_MOVE, I); fprintf(fp, "MOVE d%u, #0x%" PRIX64, I.destination, I.immediate); break; } case MALI_CS_OPCODE_MOVE32: { - pan_unpack(&instr, CS_MOVE32, I); + cs_unpack(instr, CS_MOVE32, I); fprintf(fp, "MOVE32 r%u, #0x%X", I.destination, I.immediate); break; } case MALI_CS_OPCODE_WAIT: { - pan_unpack(&instr, CS_WAIT, I); + cs_unpack(instr, CS_WAIT, I); fprintf(fp, "WAIT%s #%x", I.progress_increment ? ".progress_inc" : "", I.wait_mask); break; @@ -127,7 +129,7 @@ print_cs_instr(FILE *fp, uint64_t instr) case MALI_CS_OPCODE_RUN_COMPUTE: { const char *axes[4] = {"x_axis", "y_axis", "z_axis"}; - pan_unpack(&instr, CS_RUN_COMPUTE, I); + cs_unpack(instr, CS_RUN_COMPUTE, I); /* Print the instruction. Ignore the selects and the flags override * since we'll print them implicitly later. @@ -140,7 +142,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_RUN_TILING: { - pan_unpack(&instr, CS_RUN_TILING, I); + cs_unpack(instr, CS_RUN_TILING, I); fprintf(fp, "RUN_TILING%s.srt%d.spd%d.tsd%d.fau%d", I.progress_increment ? ".progress_inc" : "", I.srt_select, I.spd_select, I.tsd_select, I.fau_select); @@ -148,7 +150,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_RUN_IDVS: { - pan_unpack(&instr, CS_RUN_IDVS, I); + cs_unpack(instr, CS_RUN_IDVS, I); fprintf( fp, "RUN_IDVS%s%s%s.varying_srt%d.varying_fau%d.varying_tsd%d.frag_srt%d.frag_tsd%d r%u, #%x", @@ -168,7 +170,7 @@ print_cs_instr(FILE *fp, uint64_t instr) "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", }; - pan_unpack(&instr, CS_RUN_FRAGMENT, I); + cs_unpack(instr, CS_RUN_FRAGMENT, I); fprintf(fp, "RUN_FRAGMENT%s%s.tile_order=%s", I.progress_increment ? ".progress_inc" : "", @@ -178,21 +180,22 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_RUN_FULLSCREEN: { - pan_unpack(&instr, CS_RUN_FULLSCREEN, I); + cs_unpack(instr, CS_RUN_FULLSCREEN, I); fprintf(fp, "RUN_FULLSCREEN%s r%u, #%x", - I.progress_increment ? ".progress_inc" : "", I.dcd, I.flags_override); + I.progress_increment ? ".progress_inc" : "", I.dcd, + I.flags_override); break; } case MALI_CS_OPCODE_FINISH_TILING: { - pan_unpack(&instr, CS_FINISH_TILING, I); + cs_unpack(instr, CS_FINISH_TILING, I); fprintf(fp, "FINISH_TILING%s", I.progress_increment ? ".progress_inc" : ""); break; } case MALI_CS_OPCODE_FINISH_FRAGMENT: { - pan_unpack(&instr, CS_FINISH_FRAGMENT, I); + cs_unpack(instr, CS_FINISH_FRAGMENT, I); fprintf(fp, "FINISH_FRAGMENT%s d%u, d%u, #%x, #%u", I.increment_fragment_completed ? ".frag_end" : "", I.last_heap_chunk, I.first_heap_chunk, I.wait_mask, @@ -201,7 +204,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_ADD_IMMEDIATE32: { - pan_unpack(&instr, CS_ADD_IMMEDIATE32, I); + cs_unpack(instr, CS_ADD_IMMEDIATE32, I); fprintf(fp, "ADD_IMMEDIATE32 r%u, r%u, #%d", I.destination, I.source, I.immediate); @@ -209,7 +212,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_ADD_IMMEDIATE64: { - pan_unpack(&instr, CS_ADD_IMMEDIATE64, I); + cs_unpack(instr, CS_ADD_IMMEDIATE64, I); fprintf(fp, "ADD_IMMEDIATE64 d%u, d%u, #%d", I.destination, I.source, I.immediate); @@ -217,7 +220,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_UMIN32: { - pan_unpack(&instr, CS_UMIN32, I); + cs_unpack(instr, CS_UMIN32, I); fprintf(fp, "UMIN32 r%u, r%u, r%u", I.destination, I.source_1, I.source_2); @@ -225,7 +228,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_LOAD_MULTIPLE: { - pan_unpack(&instr, CS_LOAD_MULTIPLE, I); + cs_unpack(instr, CS_LOAD_MULTIPLE, I); fprintf(fp, "LOAD_MULTIPLE "); print_reg_tuple(I.base_register, I.mask, fp); @@ -235,7 +238,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_STORE_MULTIPLE: { - pan_unpack(&instr, CS_STORE_MULTIPLE, I); + cs_unpack(instr, CS_STORE_MULTIPLE, I); fprintf(fp, "STORE_MULTIPLE "); print_indirect(I.address, I.offset, fp); @@ -245,44 +248,44 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_BRANCH: { - pan_unpack(&instr, CS_BRANCH, I); + cs_unpack(instr, CS_BRANCH, I); fprintf(fp, "BRANCH.%s r%u, #%d", conditions_str[I.condition], I.value, I.offset); break; } case MALI_CS_OPCODE_SET_SB_ENTRY: { - pan_unpack(&instr, CS_SET_SB_ENTRY, I); + cs_unpack(instr, CS_SET_SB_ENTRY, I); fprintf(fp, "SET_SB_ENTRY #%u, #%u", I.endpoint_entry, I.other_entry); break; } case MALI_CS_OPCODE_PROGRESS_WAIT: { - pan_unpack(&instr, CS_PROGRESS_WAIT, I); + cs_unpack(instr, CS_PROGRESS_WAIT, I); fprintf(fp, "PROGRESS_WAIT d%u, #%u", I.source, I.queue); break; } case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: { - pan_unpack(&instr, CS_SET_EXCEPTION_HANDLER, I); + cs_unpack(instr, CS_SET_EXCEPTION_HANDLER, I); fprintf(fp, "SET_EXCEPTION_HANDLER d%u, r%u", I.address, I.length); break; } case MALI_CS_OPCODE_CALL: { - pan_unpack(&instr, CS_CALL, I); + cs_unpack(instr, CS_CALL, I); fprintf(fp, "CALL d%u, r%u", I.address, I.length); break; } case MALI_CS_OPCODE_JUMP: { - pan_unpack(&instr, CS_JUMP, I); + cs_unpack(instr, CS_JUMP, I); fprintf(fp, "JUMP d%u, r%u", I.address, I.length); break; } case MALI_CS_OPCODE_REQ_RESOURCE: { - pan_unpack(&instr, CS_REQ_RESOURCE, I); + cs_unpack(instr, CS_REQ_RESOURCE, I); fprintf(fp, "REQ_RESOURCE%s%s%s%s", I.compute ? ".compute" : "", I.fragment ? ".fragment" : "", I.tiler ? ".tiler" : "", I.idvs ? ".idvs" : ""); @@ -290,7 +293,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_FLUSH_CACHE2: { - pan_unpack(&instr, CS_FLUSH_CACHE2, I); + cs_unpack(instr, CS_FLUSH_CACHE2, I); static const char *mode[] = { "nop", "clean", @@ -306,7 +309,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_SYNC_ADD32: { - pan_unpack(&instr, CS_SYNC_ADD32, I); + cs_unpack(instr, CS_SYNC_ADD32, I); fprintf(fp, "SYNC_ADD32%s%s [d%u], r%u, #%x, #%u", I.error_propagate ? ".error_propagate" : "", I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address, @@ -315,7 +318,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_SYNC_SET32: { - pan_unpack(&instr, CS_SYNC_SET32, I); + cs_unpack(instr, CS_SYNC_SET32, I); fprintf(fp, "SYNC_SET32.%s%s [d%u], r%u, #%x, #%u", I.error_propagate ? ".error_propagate" : "", I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address, @@ -324,7 +327,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_SYNC_WAIT32: { - pan_unpack(&instr, CS_SYNC_WAIT32, I); + cs_unpack(instr, CS_SYNC_WAIT32, I); fprintf(fp, "SYNC_WAIT32%s%s d%u, r%u", conditions_str[I.condition], I.error_reject ? ".reject" : ".inherit", I.address, I.data); break; @@ -338,7 +341,7 @@ print_cs_instr(FILE *fp, uint64_t instr) "ERROR_STATE", }; - pan_unpack(&instr, CS_STORE_STATE, I); + cs_unpack(instr, CS_STORE_STATE, I); fprintf(fp, "STORE_STATE.%s d%u, #%i, #%x, #%u", I.state >= ARRAY_SIZE(states_str) ? "UNKNOWN_STATE" : states_str[I.state], @@ -347,25 +350,25 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_PROT_REGION: { - pan_unpack(&instr, CS_PROT_REGION, I); + cs_unpack(instr, CS_PROT_REGION, I); fprintf(fp, "PROT_REGION #%u", I.size); break; } case MALI_CS_OPCODE_PROGRESS_STORE: { - pan_unpack(&instr, CS_PROGRESS_STORE, I); + cs_unpack(instr, CS_PROGRESS_STORE, I); fprintf(fp, "PROGRESS_STORE d%u", I.source); break; } case MALI_CS_OPCODE_PROGRESS_LOAD: { - pan_unpack(&instr, CS_PROGRESS_LOAD, I); + cs_unpack(instr, CS_PROGRESS_LOAD, I); fprintf(fp, "PROGRESS_LOAD d%u", I.destination); break; } case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: { - pan_unpack(&instr, CS_RUN_COMPUTE_INDIRECT, I); + cs_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I); fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u", I.progress_increment ? ".progress_inc" : "", I.srt_select, I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task); @@ -374,19 +377,19 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_ERROR_BARRIER: { - pan_unpack(&instr, CS_ERROR_BARRIER, I); + cs_unpack(instr, CS_ERROR_BARRIER, I); fprintf(fp, "ERROR_BARRIER"); break; } case MALI_CS_OPCODE_HEAP_SET: { - pan_unpack(&instr, CS_HEAP_SET, I); + cs_unpack(instr, CS_HEAP_SET, I); fprintf(fp, "HEAP_SET d%u", I.address); break; } case MALI_CS_OPCODE_HEAP_OPERATION: { - pan_unpack(&instr, CS_HEAP_OPERATION, I); + cs_unpack(instr, CS_HEAP_OPERATION, I); const char *counter_names[] = {"vt_start", "vt_end", NULL, "frag_end"}; fprintf(fp, "HEAP_OPERATION.%s #%x, #%d", counter_names[I.operation], I.wait_mask, I.signal_slot); @@ -394,7 +397,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_TRACE_POINT: { - pan_unpack(&instr, CS_TRACE_POINT, I); + cs_unpack(instr, CS_TRACE_POINT, I); fprintf(fp, "TRACE_POINT r%d:r%d, #%x, #%u", I.base_register, I.base_register + I.register_count - 1, I.wait_mask, I.signal_slot); @@ -402,7 +405,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_SYNC_ADD64: { - pan_unpack(&instr, CS_SYNC_ADD64, I); + cs_unpack(instr, CS_SYNC_ADD64, I); fprintf(fp, "SYNC_ADD64%s%s [d%u], d%u, #%x, #%u", I.error_propagate ? ".error_propagate" : "", I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address, @@ -411,7 +414,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_SYNC_SET64: { - pan_unpack(&instr, CS_SYNC_SET64, I); + cs_unpack(instr, CS_SYNC_SET64, I); fprintf(fp, "SYNC_SET64.%s%s [d%u], d%u, #%x, #%u", I.error_propagate ? ".error_propagate" : "", I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address, @@ -420,7 +423,7 @@ print_cs_instr(FILE *fp, uint64_t instr) } case MALI_CS_OPCODE_SYNC_WAIT64: { - pan_unpack(&instr, CS_SYNC_WAIT64, I); + cs_unpack(instr, CS_SYNC_WAIT64, I); fprintf(fp, "SYNC_WAIT64%s%s d%u, d%u", conditions_str[I.condition], I.error_reject ? ".reject" : ".inherit", I.address, I.data); @@ -536,9 +539,10 @@ pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp, ctx->indent++; /* Merge flag overrides with the register flags */ - uint32_t tiler_flags_raw = cs_get_u64(qctx, 56); - tiler_flags_raw |= I->flags_override; - pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags); + struct mali_primitive_flags_packed tiler_flags_packed = { + .opaque[0] = cs_get_u32(qctx, 56) | I->flags_override, + }; + pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags); unsigned reg_srt = I->srt_select * 2; unsigned reg_fau = 8 + I->fau_select * 2; @@ -616,9 +620,10 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, ctx->indent++; /* Merge flag overrides with the register flags */ - uint32_t tiler_flags_raw = cs_get_u64(qctx, 56); - tiler_flags_raw |= I->flags_override; - pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags); + struct mali_primitive_flags_packed tiler_flags_packed = { + .opaque[0] = cs_get_u32(qctx, 56) | I->flags_override, + }; + pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags); unsigned reg_position_srt = 0; unsigned reg_position_fau = 8; @@ -765,16 +770,19 @@ pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp, ctx->indent++; /* Merge flag overrides with the register flags */ - uint32_t tiler_flags_raw = cs_get_u64(qctx, 56); - tiler_flags_raw |= I->flags_override; - pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags); + struct mali_primitive_flags_packed tiler_flags_packed = { + .opaque[0] = cs_get_u32(qctx, 56) | I->flags_override, + }; + pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags); DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n"); GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id); DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n"); - pan_unpack(PANDECODE_PTR(ctx, cs_get_u64(qctx, I->dcd), void), DRAW, dcd); + pan_unpack( + PANDECODE_PTR(ctx, cs_get_u64(qctx, I->dcd), struct mali_draw_packed), + DRAW, dcd); GENX(pandecode_dcd)(ctx, &dcd, 0, qctx->gpu_id); ctx->indent--; @@ -857,7 +865,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) FILE *fp = ctx->dump_stream; /* Unpack the base so we get the opcode */ uint8_t *bytes = (uint8_t *)qctx->ip; - pan_unpack(bytes, CS_BASE, base); + cs_unpack(bytes, CS_BASE, base); assert(qctx->ip < qctx->end); @@ -869,43 +877,43 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) switch (base.opcode) { case MALI_CS_OPCODE_RUN_COMPUTE: { - pan_unpack(bytes, CS_RUN_COMPUTE, I); + cs_unpack(bytes, CS_RUN_COMPUTE, I); pandecode_run_compute(ctx, fp, qctx, &I); break; } case MALI_CS_OPCODE_RUN_TILING: { - pan_unpack(bytes, CS_RUN_TILING, I); + cs_unpack(bytes, CS_RUN_TILING, I); pandecode_run_tiling(ctx, fp, qctx, &I); break; } case MALI_CS_OPCODE_RUN_IDVS: { - pan_unpack(bytes, CS_RUN_IDVS, I); + cs_unpack(bytes, CS_RUN_IDVS, I); pandecode_run_idvs(ctx, fp, qctx, &I); break; } case MALI_CS_OPCODE_RUN_FRAGMENT: { - pan_unpack(bytes, CS_RUN_FRAGMENT, I); + cs_unpack(bytes, CS_RUN_FRAGMENT, I); pandecode_run_fragment(ctx, fp, qctx, &I); break; } case MALI_CS_OPCODE_RUN_FULLSCREEN: { - pan_unpack(bytes, CS_RUN_FULLSCREEN, I); + cs_unpack(bytes, CS_RUN_FULLSCREEN, I); pandecode_run_fullscreen(ctx, fp, qctx, &I); break; } case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: { - pan_unpack(bytes, CS_RUN_COMPUTE_INDIRECT, I); + cs_unpack(bytes, CS_RUN_COMPUTE_INDIRECT, I); pandecode_run_compute_indirect(ctx, fp, qctx, &I); break; } case MALI_CS_OPCODE_MOVE: { - pan_unpack(bytes, CS_MOVE, I); + cs_unpack(bytes, CS_MOVE, I); qctx->regs[I.destination + 0] = (uint32_t)I.immediate; qctx->regs[I.destination + 1] = (uint32_t)(I.immediate >> 32); @@ -913,14 +921,14 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) } case MALI_CS_OPCODE_MOVE32: { - pan_unpack(bytes, CS_MOVE32, I); + cs_unpack(bytes, CS_MOVE32, I); qctx->regs[I.destination] = I.immediate; break; } case MALI_CS_OPCODE_LOAD_MULTIPLE: { - pan_unpack(bytes, CS_LOAD_MULTIPLE, I); + cs_unpack(bytes, CS_LOAD_MULTIPLE, I); uint64_t addr = ((uint64_t)qctx->regs[I.address + 1] << 32) | qctx->regs[I.address]; addr += I.offset; @@ -936,14 +944,14 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) } case MALI_CS_OPCODE_ADD_IMMEDIATE32: { - pan_unpack(bytes, CS_ADD_IMMEDIATE32, I); + cs_unpack(bytes, CS_ADD_IMMEDIATE32, I); qctx->regs[I.destination] = qctx->regs[I.source] + I.immediate; break; } case MALI_CS_OPCODE_ADD_IMMEDIATE64: { - pan_unpack(bytes, CS_ADD_IMMEDIATE64, I); + cs_unpack(bytes, CS_ADD_IMMEDIATE64, I); int64_t value = (qctx->regs[I.source] | ((int64_t)qctx->regs[I.source + 1] << 32)) + @@ -955,7 +963,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) } case MALI_CS_OPCODE_CALL: { - pan_unpack(bytes, CS_CALL, I); + cs_unpack(bytes, CS_CALL, I); if (qctx->call_stack_depth == MAX_CALL_STACK_DEPTH) { fprintf(stderr, "CS call stack overflow\n"); @@ -978,7 +986,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) } case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: { - pan_unpack(bytes, CS_SET_EXCEPTION_HANDLER, I); + cs_unpack(bytes, CS_SET_EXCEPTION_HANDLER, I); assert(qctx->call_stack_depth < MAX_CALL_STACK_DEPTH); @@ -1001,7 +1009,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) } case MALI_CS_OPCODE_JUMP: { - pan_unpack(bytes, CS_JUMP, I); + cs_unpack(bytes, CS_JUMP, I); if (qctx->call_stack_depth == 0) { fprintf(stderr, "Cannot jump from the entrypoint\n"); @@ -1012,7 +1020,7 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) } case MALI_CS_OPCODE_BRANCH: { - pan_unpack(bytes, CS_BRANCH, I); + cs_unpack(bytes, CS_BRANCH, I); interpret_cs_branch(ctx, qctx, I.offset, I.condition, I.value); break; @@ -1081,7 +1089,7 @@ GENX(pandecode_interpret_cs)(struct pandecode_context *ctx, uint64_t queue, for (int i = 0; i < 1 + qctx.call_stack_depth; ++i) fprintf(fp, " "); - print_cs_instr(fp, *(qctx.ip)); + print_cs_instr(fp, qctx.ip); fprintf(fp, "\n"); } while (interpret_cs_instr(ctx, &qctx)); } @@ -1145,35 +1153,35 @@ record_indirect_branch_target(struct cs_code_cfg *cfg, for (; blk_offs < blk->size && blk->start + blk_offs != ibranch->instr_idx; blk_offs++) { - uint64_t instr = cfg->instrs[blk->start + blk_offs]; - pan_unpack(&instr, CS_BASE, base); + const uint64_t *instr = &cfg->instrs[blk->start + blk_offs]; + cs_unpack(instr, CS_BASE, base); switch (base.opcode) { case MALI_CS_OPCODE_MOVE: { - pan_unpack(&instr, CS_MOVE, I); + cs_unpack(instr, CS_MOVE, I); reg_file.u64[I.destination] = I.immediate; break; } case MALI_CS_OPCODE_MOVE32: { - pan_unpack(&instr, CS_MOVE32, I); + cs_unpack(instr, CS_MOVE32, I); reg_file.u32[I.destination] = I.immediate; break; } case MALI_CS_OPCODE_ADD_IMMEDIATE32: { - pan_unpack(&instr, CS_ADD_IMMEDIATE32, I); + cs_unpack(instr, CS_ADD_IMMEDIATE32, I); reg_file.u32[I.destination] = reg_file.u32[I.source] + I.immediate; break; } case MALI_CS_OPCODE_ADD_IMMEDIATE64: { - pan_unpack(&instr, CS_ADD_IMMEDIATE64, I); + cs_unpack(instr, CS_ADD_IMMEDIATE64, I); reg_file.u64[I.destination] = reg_file.u64[I.source] + I.immediate; break; } case MALI_CS_OPCODE_UMIN32: { - pan_unpack(&instr, CS_UMIN32, I); + cs_unpack(instr, CS_UMIN32, I); reg_file.u32[I.destination] = MIN2(reg_file.u32[I.source_1], reg_file.u32[I.source_2]); break; @@ -1187,8 +1195,8 @@ record_indirect_branch_target(struct cs_code_cfg *cfg, } list_delinit(&cur_blk->node); - uint64_t instr = cfg->instrs[ibranch->instr_idx]; - pan_unpack(&instr, CS_JUMP, I); + uint64_t *instr = &cfg->instrs[ibranch->instr_idx]; + cs_unpack(instr, CS_JUMP, I); struct cs_indirect_branch_target target = { .address = reg_file.u64[I.address], @@ -1209,24 +1217,24 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg, { for (; instr_ptr >= (int)cur_blk->start; instr_ptr--) { assert(instr_ptr >= 0); - uint64_t instr = cfg->instrs[instr_ptr]; - pan_unpack(&instr, CS_BASE, base); + const uint64_t *instr = &cfg->instrs[instr_ptr]; + cs_unpack(instr, CS_BASE, base); switch (base.opcode) { case MALI_CS_OPCODE_MOVE: { - pan_unpack(&instr, CS_MOVE, I); + cs_unpack(instr, CS_MOVE, I); BITSET_CLEAR(track_map, I.destination); BITSET_CLEAR(track_map, I.destination + 1); break; } case MALI_CS_OPCODE_MOVE32: { - pan_unpack(&instr, CS_MOVE32, I); + cs_unpack(instr, CS_MOVE32, I); BITSET_CLEAR(track_map, I.destination); break; } case MALI_CS_OPCODE_ADD_IMMEDIATE32: { - pan_unpack(&instr, CS_ADD_IMMEDIATE32, I); + cs_unpack(instr, CS_ADD_IMMEDIATE32, I); if (BITSET_TEST(track_map, I.destination)) { BITSET_SET(track_map, I.source); BITSET_CLEAR(track_map, I.destination); @@ -1235,7 +1243,7 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg, } case MALI_CS_OPCODE_ADD_IMMEDIATE64: { - pan_unpack(&instr, CS_ADD_IMMEDIATE64, I); + cs_unpack(instr, CS_ADD_IMMEDIATE64, I); if (BITSET_TEST(track_map, I.destination)) { BITSET_SET(track_map, I.source); BITSET_CLEAR(track_map, I.destination); @@ -1248,7 +1256,7 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg, } case MALI_CS_OPCODE_UMIN32: { - pan_unpack(&instr, CS_UMIN32, I); + cs_unpack(instr, CS_UMIN32, I); if (BITSET_TEST(track_map, I.destination)) { BITSET_SET(track_map, I.source_1); BITSET_SET(track_map, I.source_2); @@ -1258,7 +1266,7 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg, } case MALI_CS_OPCODE_LOAD_MULTIPLE: { - pan_unpack(&instr, CS_LOAD_MULTIPLE, I); + cs_unpack(instr, CS_LOAD_MULTIPLE, I); for (unsigned i = 0; i < 16; i++) { if ((I.mask & BITFIELD_BIT(i)) && BITSET_TEST(track_map, I.base_register + i)) { @@ -1270,7 +1278,7 @@ collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg, } case MALI_CS_OPCODE_PROGRESS_LOAD: { - pan_unpack(&instr, CS_PROGRESS_LOAD, I); + cs_unpack(instr, CS_PROGRESS_LOAD, I); for (unsigned i = 0; i < 16; i++) { if (BITSET_TEST(track_map, I.destination) || BITSET_TEST(track_map, I.destination + 1)) { @@ -1323,14 +1331,14 @@ static void collect_indirect_branch_targets(struct cs_code_cfg *cfg, struct cs_indirect_branch *ibranch) { - uint64_t instr = cfg->instrs[ibranch->instr_idx]; + uint64_t *instr = &cfg->instrs[ibranch->instr_idx]; struct cs_code_block *cur_blk = cfg->blk_map[ibranch->instr_idx]; struct list_head blk_stack; BITSET_DECLARE(track_map, 256) = {0}; list_inithead(&blk_stack); - pan_unpack(&instr, CS_JUMP, I); + cs_unpack(instr, CS_JUMP, I); BITSET_SET(track_map, I.address); BITSET_SET(track_map, I.address + 1); BITSET_SET(track_map, I.length); @@ -1358,15 +1366,14 @@ get_cs_cfg(struct pandecode_context *ctx, struct hash_table_u64 *symbols, util_dynarray_init(&cfg->indirect_branches, cfg); - cfg->blk_map = - rzalloc_array(cfg, struct cs_code_block *, instr_count); + cfg->blk_map = rzalloc_array(cfg, struct cs_code_block *, instr_count); cfg->instrs = instrs; cfg->instr_count = instr_count; struct cs_code_block *block = cs_code_block_alloc(cfg, 0, 0); for (unsigned i = 0; i < instr_count; i++) { - uint64_t instr = instrs[i]; + const uint64_t *instr = &instrs[i]; if (!cfg->blk_map[i]) { cfg->blk_map[i] = block; @@ -1379,7 +1386,7 @@ get_cs_cfg(struct pandecode_context *ctx, struct hash_table_u64 *symbols, util_dynarray_append(&block->predecessors, unsigned, i - 1); } - pan_unpack(&instr, CS_BASE, base); + cs_unpack(instr, CS_BASE, base); if (base.opcode == MALI_CS_OPCODE_JUMP || base.opcode == MALI_CS_OPCODE_CALL) { @@ -1394,7 +1401,7 @@ get_cs_cfg(struct pandecode_context *ctx, struct hash_table_u64 *symbols, if (base.opcode != MALI_CS_OPCODE_BRANCH) continue; - pan_unpack(&instr, CS_BRANCH, I); + cs_unpack(instr, CS_BRANCH, I); unsigned target = MIN2(i + 1 + I.offset, instr_count); @@ -1437,10 +1444,12 @@ get_cs_cfg(struct pandecode_context *ctx, struct hash_table_u64 *symbols, } util_dynarray_foreach(&cfg->indirect_branches, struct cs_indirect_branch, - ibranch) { + ibranch) + { collect_indirect_branch_targets(cfg, ibranch); - util_dynarray_foreach(&ibranch->targets, - struct cs_indirect_branch_target, target) { + util_dynarray_foreach(&ibranch->targets, struct cs_indirect_branch_target, + target) + { get_cs_cfg(ctx, symbols, target->address, target->length); } } @@ -1464,8 +1473,8 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin, } pandecode_make_indent(ctx); - print_cs_instr(ctx->dump_stream, cfg->instrs[i]); - pan_unpack(&cfg->instrs[i], CS_BASE, base); + print_cs_instr(ctx->dump_stream, &cfg->instrs[i]); + cs_unpack(&cfg->instrs[i], CS_BASE, base); switch (base.opcode) { case MALI_CS_OPCODE_JUMP: case MALI_CS_OPCODE_CALL: { @@ -1475,20 +1484,20 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin, assert(ibranch->instr_idx == i); fprintf(ctx->dump_stream, " // "); util_dynarray_foreach(&ibranch->targets, - struct cs_indirect_branch_target, target) { + struct cs_indirect_branch_target, target) + { fprintf(ctx->dump_stream, "%scs@%" PRIx64, target == ibranch->targets.data ? "" : ",", target->address); } if (ibranch->has_unknown_targets) - fprintf(ctx->dump_stream, "%s??", - ibranch->targets.size ? "," : ""); + fprintf(ctx->dump_stream, "%s??", ibranch->targets.size ? "," : ""); ibranch_idx++; break; } case MALI_CS_OPCODE_BRANCH: { - pan_unpack(&cfg->instrs[i], CS_BRANCH, I); + cs_unpack(&cfg->instrs[i], CS_BRANCH, I); fprintf(ctx->dump_stream, " // "); unsigned target = i + 1 + I.offset; @@ -1532,7 +1541,8 @@ GENX(pandecode_cs_binary)(struct pandecode_context *ctx, uint64_t bin, struct cs_code_cfg *main_cfg = get_cs_cfg(ctx, symbols, bin, bin_size); print_cs_binary(ctx, bin, main_cfg, "main_cs"); - hash_table_u64_foreach(symbols, he) { + hash_table_u64_foreach(symbols, he) + { struct cs_code_cfg *other_cfg = he.data; if (other_cfg == main_cfg) continue; @@ -1571,17 +1581,17 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace, }; pandecode_make_indent(ctx); - print_cs_instr(ctx->dump_stream, *instr); + print_cs_instr(ctx->dump_stream, instr); fprintf(ctx->dump_stream, " // from tracepoint_%" PRIx64 "\n", *ip); - pan_unpack(instr, CS_BASE, base); + cs_unpack(instr, CS_BASE, base); switch (base.opcode) { case MALI_CS_OPCODE_RUN_IDVS: { struct cs_run_idvs_trace *idvs_trace = trace_data; assert(trace_size >= sizeof(idvs_trace)); - pan_unpack(instr, CS_RUN_IDVS, I); + cs_unpack(instr, CS_RUN_IDVS, I); memcpy(regs, idvs_trace->sr, sizeof(idvs_trace->sr)); if (I.draw_id_register_enable) @@ -1597,7 +1607,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace, struct cs_run_fragment_trace *frag_trace = trace_data; assert(trace_size >= sizeof(frag_trace)); - pan_unpack(instr, CS_RUN_FRAGMENT, I); + cs_unpack(instr, CS_RUN_FRAGMENT, I); memcpy(®s[40], frag_trace->sr, sizeof(frag_trace->sr)); pandecode_run_fragment(ctx, ctx->dump_stream, &qctx, &I); trace_data = frag_trace + 1; @@ -1609,7 +1619,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace, struct cs_run_compute_trace *comp_trace = trace_data; assert(trace_size >= sizeof(comp_trace)); - pan_unpack(instr, CS_RUN_COMPUTE, I); + cs_unpack(instr, CS_RUN_COMPUTE, I); memcpy(regs, comp_trace->sr, sizeof(comp_trace->sr)); pandecode_run_compute(ctx, ctx->dump_stream, &qctx, &I); trace_data = comp_trace + 1; @@ -1621,7 +1631,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace, struct cs_run_compute_trace *comp_trace = trace_data; assert(trace_size >= sizeof(comp_trace)); - pan_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I); + cs_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I); memcpy(regs, comp_trace->sr, sizeof(comp_trace->sr)); pandecode_run_compute_indirect(ctx, ctx->dump_stream, &qctx, &I); trace_data = comp_trace + 1; diff --git a/src/panfrost/lib/genxml/decode_jm.c b/src/panfrost/lib/genxml/decode_jm.c index 6589da8c567..6730ce47dfb 100644 --- a/src/panfrost/lib/genxml/decode_jm.c +++ b/src/panfrost/lib/genxml/decode_jm.c @@ -30,7 +30,8 @@ #if PAN_ARCH <= 9 static void -pandecode_primitive(struct pandecode_context *ctx, const void *p) +pandecode_primitive(struct pandecode_context *ctx, + const struct mali_primitive_packed *p) { pan_unpack(p, PRIMITIVE, primitive); DUMP_UNPACKED(ctx, PRIMITIVE, primitive, "Primitive:\n"); @@ -75,14 +76,14 @@ pandecode_attributes(struct pandecode_context *ctx, uint64_t addr, int count, MAP_ADDR(ctx, ATTRIBUTE_BUFFER, addr, cl); for (int i = 0; i < count; ++i) { - pan_unpack(cl + i * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, temp); + pan_unpack(&cl[i], ATTRIBUTE_BUFFER, temp); DUMP_UNPACKED(ctx, ATTRIBUTE_BUFFER, temp, "%s:\n", prefix); switch (temp.type) { case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION: case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: { - pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER), - ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2); + pan_cast_and_unpack(&cl[i + 1], ATTRIBUTE_BUFFER_CONTINUATION_NPOT, + temp2); pan_print(ctx->dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2, (ctx->indent + 1) * 2); i++; @@ -90,8 +91,8 @@ pandecode_attributes(struct pandecode_context *ctx, uint64_t addr, int count, } case MALI_ATTRIBUTE_TYPE_3D_LINEAR: case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: { - pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER_CONTINUATION_3D), - ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2); + pan_cast_and_unpack(&cl[i + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D, + temp2); pan_print(ctx->dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2, (ctx->indent + 1) * 2); i++; @@ -136,7 +137,8 @@ bits(uint32_t word, uint32_t lo, uint32_t hi) } static void -pandecode_invocation(struct pandecode_context *ctx, const void *i) +pandecode_invocation(struct pandecode_context *ctx, + const struct mali_invocation_packed *i) { /* Decode invocation_count. See the comment before the definition of * invocation_count for an explanation. @@ -269,7 +271,8 @@ GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p, /* On v5 only, the actual framebuffer pointer is tagged with extra * metadata that we validate but do not print. */ - pan_unpack(&p->fbd, FRAMEBUFFER_POINTER, ptr); + const uint64_t *fbd = &p->fbd; + pan_cast_and_unpack(fbd, FRAMEBUFFER_POINTER, ptr); if (!ptr.type || ptr.zs_crc_extension_present || ptr.render_target_count != 1) { @@ -288,7 +291,7 @@ GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p, int texture_count = 0, sampler_count = 0; if (p->state) { - uint32_t *cl = + struct mali_renderer_state_packed *cl = pandecode_fetch_gpu_mem(ctx, p->state, pan_size(RENDERER_STATE)); pan_unpack(cl, RENDERER_STATE, state); @@ -485,7 +488,8 @@ pandecode_fragment_job(struct pandecode_context *ctx, uint64_t job, /* On v5 and newer, the actual framebuffer pointer is tagged with extra * metadata that we need to disregard. */ - pan_unpack(&s.framebuffer, FRAMEBUFFER_POINTER, ptr); + const uint64_t *framebuffer_packed_raw = &s.framebuffer; + pan_cast_and_unpack(framebuffer_packed_raw, FRAMEBUFFER_POINTER, ptr); fbd_pointer = ptr.pointer; #else /* On v4, the framebuffer pointer is untagged. */ diff --git a/src/panfrost/lib/genxml/pan_pack_helpers.h b/src/panfrost/lib/genxml/pan_pack_helpers.h index 5a573ad229c..1f42701bf7a 100644 --- a/src/panfrost/lib/genxml/pan_pack_helpers.h +++ b/src/panfrost/lib/genxml/pan_pack_helpers.h @@ -87,20 +87,29 @@ __gen_unpack_padded(const uint32_t *restrict cl, uint32_t start, uint32_t end) for (struct PREFIX1(T) name = {PREFIX2(T, header)}, \ *_loop_terminate = &name; \ __builtin_expect(_loop_terminate != NULL, 1); ({ \ - PREFIX2(T, pack)((PREFIX2(T, PACKED_T) *)(dst), &name); \ + PREFIX2(T, pack)((dst), &name); \ _loop_terminate = NULL; \ })) #define pan_pack_nodefaults(dst, T, name) \ for (struct PREFIX1(T) name = {0}, *_loop_terminate = &name; \ __builtin_expect(_loop_terminate != NULL, 1); ({ \ - PREFIX2(T, pack)((PREFIX2(T, PACKED_T) *)(dst), &name); \ + PREFIX2(T, pack)((dst), &name); \ _loop_terminate = NULL; \ })) +#define pan_cast_and_pack(dst, T, name) \ + pan_pack((PREFIX2(T, PACKED_T) *)dst, T, name) + +#define pan_cast_and_pack_nodefaults(dst, T, name) \ + pan_pack_nodefaults((PREFIX2(T, PACKED_T) *)dst, T, name) + #define pan_unpack(src, T, name) \ struct PREFIX1(T) name; \ - PREFIX2(T, unpack)((const PREFIX2(T, PACKED_T) *)(src), &name) + PREFIX2(T, unpack)((src), &name) + +#define pan_cast_and_unpack(src, T, name) \ + pan_unpack((const PREFIX2(T, PACKED_T) *)(src), T, name) #define pan_print(fp, T, var, indent) PREFIX2(T, print)(fp, &(var), indent) diff --git a/src/panfrost/lib/pan_blend.c b/src/panfrost/lib/pan_blend.c index b2da626622e..e3f92f4a96c 100644 --- a/src/panfrost/lib/pan_blend.c +++ b/src/panfrost/lib/pan_blend.c @@ -458,15 +458,13 @@ pan_blend_to_fixed_function_equation(const struct pan_blend_equation equation, uint32_t pan_pack_blend(const struct pan_blend_equation equation) { - STATIC_ASSERT(sizeof(uint32_t) == MALI_BLEND_EQUATION_LENGTH); - - uint32_t out = 0; + struct mali_blend_equation_packed out; pan_pack(&out, BLEND_EQUATION, cfg) { pan_blend_to_fixed_function_equation(equation, &cfg); } - return out; + return out.opaque[0]; } DERIVE_HASH_TABLE(pan_blend_shader_key); @@ -724,7 +722,7 @@ GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt, unsigned force_size, bool dithered) { const struct util_format_description *desc = util_format_description(fmt); - uint64_t res; + struct mali_internal_blend_packed res; pan_pack(&res, INTERNAL_BLEND, cfg) { cfg.mode = MALI_BLEND_MODE_OPAQUE; @@ -771,7 +769,7 @@ GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt, GENX(panfrost_dithered_format_from_pipe_format)(fmt, dithered); } - return res; + return res.opaque[0] | ((uint64_t)res.opaque[1] << 32); } static bool diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index 8fd9ca9e55a..c4ccd0320e7 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -321,7 +321,7 @@ pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc, static void pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx, - int rt_crc, void *zs_crc_ext) + int rt_crc, struct mali_zs_crc_extension_packed *zs_crc_ext) { pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) { pan_prepare_crc(fb, rt_crc, &cfg); @@ -605,7 +605,8 @@ pan_prepare_rt(const struct pan_fb_info *fb, unsigned layer_idx, #endif void -GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out) +GENX(pan_emit_tls)(const struct pan_tls_info *info, + struct mali_local_storage_packed *out) { pan_pack(out, LOCAL_STORAGE, cfg) { if (info->tls.size) { @@ -644,7 +645,8 @@ GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out) #if PAN_ARCH <= 5 static void pan_emit_midgard_tiler(const struct pan_fb_info *fb, - const struct pan_tiler_context *tiler_ctx, void *out) + const struct pan_tiler_context *tiler_ctx, + struct mali_tiler_context_packed *out) { bool hierarchy = !tiler_ctx->midgard.no_hierarchical_tiling; @@ -679,8 +681,8 @@ pan_emit_midgard_tiler(const struct pan_fb_info *fb, #if PAN_ARCH >= 5 static void -pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx, - unsigned idx, unsigned cbuf_offset, void *out) +pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx, unsigned idx, + unsigned cbuf_offset, struct mali_render_target_packed *out) { pan_pack(out, RENDER_TARGET, cfg) { pan_prepare_rt(fb, layer_idx, idx, cbuf_offset, &cfg); @@ -900,7 +902,10 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, #endif if (has_zs_crc_ext) { - pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, out + pan_size(FRAMEBUFFER)); + struct mali_zs_crc_extension_packed *zs_crc_ext = + out + pan_size(FRAMEBUFFER); + + pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, zs_crc_ext); rtd += pan_size(ZS_CRC_EXTENSION); } @@ -920,7 +925,7 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, } struct mali_framebuffer_pointer_packed tag; - pan_pack(tag.opaque, FRAMEBUFFER_POINTER, cfg) { + pan_pack(&tag, FRAMEBUFFER_POINTER, cfg) { cfg.zs_crc_extension_present = has_zs_crc_ext; cfg.render_target_count = MAX2(fb->rt_count, 1); } diff --git a/src/panfrost/lib/pan_desc.h b/src/panfrost/lib/pan_desc.h index 85248a84e35..e2186703f4c 100644 --- a/src/panfrost/lib/pan_desc.h +++ b/src/panfrost/lib/pan_desc.h @@ -187,7 +187,8 @@ pan_sample_pattern(unsigned samples) void GENX(pan_select_tile_size)(struct pan_fb_info *fb); -void GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out); +void GENX(pan_emit_tls)(const struct pan_tls_info *info, + struct mali_local_storage_packed *out); int GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size); diff --git a/src/panfrost/lib/pan_encoder.h b/src/panfrost/lib/pan_encoder.h index 5259c3d2a88..649fc0d1ef3 100644 --- a/src/panfrost/lib/pan_encoder.h +++ b/src/panfrost/lib/pan_encoder.h @@ -85,7 +85,7 @@ unsigned panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, #if PAN_ARCH <= 5 static inline void panfrost_vertex_id(unsigned padded_count, - struct mali_attribute_buffer_packed *attr, bool instanced) + struct mali_attribute_vertex_id_packed *attr, bool instanced) { pan_pack(attr, ATTRIBUTE_VERTEX_ID, cfg) { if (instanced) { @@ -101,7 +101,8 @@ panfrost_vertex_id(unsigned padded_count, static inline void panfrost_instance_id(unsigned padded_count, - struct mali_attribute_buffer_packed *attr, bool instanced) + struct mali_attribute_instance_id_packed *attr, + bool instanced) { pan_pack(attr, ATTRIBUTE_INSTANCE_ID, cfg) { if (!instanced || padded_count <= 1) { @@ -234,7 +235,8 @@ panfrost_make_resource_table(struct panfrost_ptr base, unsigned index, if (resource_count == 0) return; - pan_pack(base.cpu + index * pan_size(RESOURCE), RESOURCE, cfg) { + struct mali_resource_packed *res = base.cpu; + pan_pack(&res[index], RESOURCE, cfg) { cfg.address = address; cfg.size = resource_count * pan_size(BUFFER); } diff --git a/src/panfrost/lib/pan_indirect_dispatch.c b/src/panfrost/lib/pan_indirect_dispatch.c index d28edbbe584..4e269a9b02a 100644 --- a/src/panfrost/lib/pan_indirect_dispatch.c +++ b/src/panfrost/lib/pan_indirect_dispatch.c @@ -136,11 +136,11 @@ pan_indirect_dispatch_init(struct pan_indirect_dispatch_meta *meta) struct panfrost_ptr tsd = pan_pool_alloc_desc(meta->desc_pool, LOCAL_STORAGE); - pan_pack(rsd.cpu, RENDERER_STATE, cfg) { + pan_cast_and_pack(rsd.cpu, RENDERER_STATE, cfg) { pan_shader_prepare_rsd(&shader_info, bin.gpu, &cfg); } - pan_pack(tsd.cpu, LOCAL_STORAGE, ls) { + pan_cast_and_pack(tsd.cpu, LOCAL_STORAGE, ls) { ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; }; diff --git a/src/panfrost/lib/pan_jc.h b/src/panfrost/lib/pan_jc.h index 67ae80182be..0c78c8e8d16 100644 --- a/src/panfrost/lib/pan_jc.h +++ b/src/panfrost/lib/pan_jc.h @@ -176,7 +176,7 @@ pan_jc_add_job(struct pan_jc *jc, enum mali_job_type type, bool barrier, /* Assign the index */ unsigned index = ++jc->job_index; - pan_pack(job->cpu, JOB_HEADER, header) { + pan_cast_and_pack(job->cpu, JOB_HEADER, header) { header.type = type; header.barrier = barrier; header.suppress_prefetch = suppress_prefetch; diff --git a/src/panfrost/lib/pan_texture.c b/src/panfrost/lib/pan_texture.c index 0e1f30da4df..08125b0fbc3 100644 --- a/src/panfrost/lib/pan_texture.c +++ b/src/panfrost/lib/pan_texture.c @@ -254,7 +254,7 @@ static void panfrost_emit_surface_with_stride(const struct pan_image_section_info *section, void **payload) { - pan_pack(*payload, SURFACE_WITH_STRIDE, cfg) { + pan_cast_and_pack(*payload, SURFACE_WITH_STRIDE, cfg) { cfg.pointer = section->pointer; cfg.row_stride = section->row_stride; cfg.surface_stride = section->surface_stride; @@ -271,7 +271,7 @@ panfrost_emit_multiplanar_surface(const struct pan_image_section_info *sections, assert(sections[2].row_stride == 0 || sections[1].row_stride == sections[2].row_stride); - pan_pack(*payload, MULTIPLANAR_SURFACE, cfg) { + pan_cast_and_pack(*payload, MULTIPLANAR_SURFACE, cfg) { cfg.plane_0_pointer = sections[0].pointer; cfg.plane_0_row_stride = sections[0].row_stride; cfg.plane_1_2_row_stride = sections[1].row_stride; @@ -425,7 +425,7 @@ panfrost_emit_plane(const struct pan_image_view *iview, // TODO: this isn't technically guaranteed to be YUV, but it is in practice. bool is_3_planar_yuv = desc->layout == UTIL_FORMAT_LAYOUT_PLANAR3; - pan_pack(*payload, PLANE, cfg) { + pan_cast_and_pack(*payload, PLANE, cfg) { cfg.pointer = pointer; cfg.row_stride = row_stride; cfg.size = layout->data_size - layout->slices[level].offset; @@ -696,7 +696,8 @@ GENX(panfrost_texture_afbc_reswizzle)(struct pan_image_view *iview) * consists of a 32-byte header followed by pointers. */ void -GENX(panfrost_new_texture)(const struct pan_image_view *iview, void *out, +GENX(panfrost_new_texture)(const struct pan_image_view *iview, + struct mali_texture_packed *out, const struct panfrost_ptr *payload) { const struct util_format_description *desc = diff --git a/src/panfrost/lib/pan_texture.h b/src/panfrost/lib/pan_texture.h index 9715fa7197d..be6104d4d71 100644 --- a/src/panfrost/lib/pan_texture.h +++ b/src/panfrost/lib/pan_texture.h @@ -415,7 +415,8 @@ void GENX(panfrost_texture_swizzle_replicate_x)(struct pan_image_view *iview); void GENX(panfrost_texture_afbc_reswizzle)(struct pan_image_view *iview); #endif -void GENX(panfrost_new_texture)(const struct pan_image_view *iview, void *out, +void GENX(panfrost_new_texture)(const struct pan_image_view *iview, + struct mali_texture_packed *out, const struct panfrost_ptr *payload); #endif diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index 29f6cab9127..f843b00bfff 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -316,7 +316,8 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev) return 0; } - pan_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, cfg) { + pan_cast_and_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, + cfg) { pan_shader_prepare_rsd(&shader->info, panvk_priv_mem_dev_addr(shader->code_mem), &cfg); } diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 2e3ce3ca4b1..13f923b09ba 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -51,7 +51,7 @@ prepare_driver_set(struct panvk_cmd_buffer *cmdbuf) return VK_ERROR_OUT_OF_DEVICE_MEMORY; /* Dummy sampler always comes first. */ - pan_pack(&descs[0], SAMPLER, cfg) { + pan_cast_and_pack(&descs[0], SAMPLER, cfg) { cfg.clamp_integer_array_indices = false; } diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index dd9acc28782..9ec88223dd6 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -10,6 +10,7 @@ * SPDX-License-Identifier: MIT */ +#include #include "genxml/gen_macros.h" #include "panvk_buffer.h" @@ -136,7 +137,7 @@ prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf) } /* Dummy sampler always comes right after the vertex attribs. */ - pan_pack(&descs[MAX_VS_ATTRIBS], SAMPLER, cfg) { + pan_cast_and_pack(&descs[MAX_VS_ATTRIBS], SAMPLER, cfg) { cfg.clamp_integer_array_indices = false; } @@ -147,7 +148,7 @@ prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf) for (uint32_t i = 0; i < vb_count; i++) { const struct panvk_attrib_buf *vb = &cmdbuf->state.gfx.vb.bufs[i]; - pan_pack(&descs[vb_offset + i], BUFFER, cfg) { + pan_cast_and_pack(&descs[vb_offset + i], BUFFER, cfg) { if (vi->bindings_valid & BITFIELD_BIT(i)) { cfg.address = vb->address; cfg.size = vb->size; @@ -180,7 +181,7 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf) return VK_ERROR_OUT_OF_DEVICE_MEMORY; /* Dummy sampler always comes first. */ - pan_pack(&descs[0], SAMPLER, cfg) { + pan_cast_and_pack(&descs[0], SAMPLER, cfg) { cfg.clamp_integer_array_indices = false; } @@ -421,7 +422,7 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf) if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) || dyn_gfx_state_dirty(cmdbuf, VP_SCISSORS)) { - uint64_t scissor_box; + struct mali_scissor_packed scissor_box; pan_pack(&scissor_box, SCISSOR, cfg) { /* The spec says "width must be greater than 0.0" */ @@ -452,7 +453,8 @@ prepare_vp(struct panvk_cmd_buffer *cmdbuf) cfg.scissor_maximum_y = CLAMP(maxy, 0, UINT16_MAX); } - cs_move64_to(b, cs_sr_reg64(b, 42), scissor_box); + struct mali_scissor_packed *scissor_box_ptr = &scissor_box; + cs_move64_to(b, cs_sr_reg64(b, 42), *((uint64_t*)scissor_box_ptr)); } if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) || @@ -1271,7 +1273,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf) if (!zsd.gpu) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) { + pan_cast_and_pack(zsd.cpu, DEPTH_STENCIL, cfg) { cfg.stencil_test_enable = test_s; if (test_s) { cfg.front_compare_function = diff --git a/src/panfrost/vulkan/csf/panvk_vX_queue.c b/src/panfrost/vulkan/csf/panvk_vX_queue.c index 46ee6d62528..e44d311d4a7 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_queue.c +++ b/src/panfrost/vulkan/csf/panvk_vX_queue.c @@ -687,7 +687,8 @@ init_tiler(struct panvk_queue *queue) tiler_heap->context.handle = thc.handle; tiler_heap->context.dev_addr = thc.tiler_heap_ctx_gpu_va; - pan_pack(panvk_priv_mem_host_addr(tiler_heap->desc), TILER_HEAP, cfg) { + pan_cast_and_pack(panvk_priv_mem_host_addr(tiler_heap->desc), TILER_HEAP, + cfg) { cfg.size = tiler_heap->chunk_size; cfg.base = thc.first_heap_chunk_gpu_va; cfg.bottom = cfg.base + 64; diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index fe468eb63de..e1aab7bc0f6 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -460,13 +460,14 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, static void panvk_draw_emit_attrib_buf(const struct panvk_draw_data *draw, const struct vk_vertex_binding_state *buf_info, - const struct panvk_attrib_buf *buf, void *desc) + const struct panvk_attrib_buf *buf, + struct mali_attribute_buffer_packed *desc) { uint64_t addr = buf->address & ~63ULL; unsigned size = buf->size + (buf->address & 63); unsigned divisor = draw->padded_vertex_count * buf_info->divisor; bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE; - void *buf_ext = desc + pan_size(ATTRIBUTE_BUFFER); + struct mali_attribute_buffer_packed *buf_ext = &desc[1]; /* TODO: support instanced arrays */ if (draw->info.instance.count <= 1) { @@ -515,7 +516,7 @@ panvk_draw_emit_attrib_buf(const struct panvk_draw_data *draw, cfg.divisor_e = divisor_e; } - pan_pack(buf_ext, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) { + pan_cast_and_pack(buf_ext, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) { cfg.divisor_numerator = divisor_num; cfg.divisor = buf_info->divisor; } @@ -532,7 +533,8 @@ static void panvk_draw_emit_attrib(const struct panvk_draw_data *draw, const struct vk_vertex_attribute_state *attrib_info, const struct vk_vertex_binding_state *buf_info, - const struct panvk_attrib_buf *buf, void *desc) + const struct panvk_attrib_buf *buf, + struct mali_attribute_packed *desc) { bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE; enum pipe_format f = vk_format_to_pipe_format(attrib_info->format); @@ -631,7 +633,8 @@ panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, } static void -panvk_emit_viewport(struct panvk_cmd_buffer *cmdbuf, void *vpd) +panvk_emit_viewport(struct panvk_cmd_buffer *cmdbuf, + struct mali_viewport_packed *vpd) { const struct vk_viewport_state *vp = &cmdbuf->vk.dynamic_graphics_state.vp; @@ -705,7 +708,8 @@ panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, static void panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_draw_data *draw, void *dcd) + const struct panvk_draw_data *draw, + struct mali_draw_packed *dcd) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct panvk_shader_desc_state *vs_desc_state = @@ -786,7 +790,8 @@ translate_prim_topology(VkPrimitiveTopology in) static void panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_draw_data *draw, void *prim) + const struct panvk_draw_data *draw, + struct mali_primitive_packed *prim) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct panvk_shader *fs = get_fs(cmdbuf); @@ -846,7 +851,7 @@ panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf, static void panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_data *draw, - void *primsz) + struct mali_primitive_size_packed *primsz) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct vk_input_assembly_state *ia = @@ -866,7 +871,8 @@ panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf, static void panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_draw_data *draw, void *dcd) + const struct panvk_draw_data *draw, + struct mali_draw_packed *dcd) { struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; const struct vk_rasterization_state *rs = diff --git a/src/panfrost/vulkan/panvk_vX_buffer_view.c b/src/panfrost/vulkan/panvk_vX_buffer_view.c index f596b9b6e36..3dccc2f6f6b 100644 --- a/src/panfrost/vulkan/panvk_vX_buffer_view.c +++ b/src/panfrost/vulkan/panvk_vX_buffer_view.c @@ -112,14 +112,14 @@ panvk_per_arch(CreateBufferView)(VkDevice _device, .cpu = panvk_priv_mem_host_addr(view->mem), }; - GENX(panfrost_new_texture)(&pview, view->descs.tex.opaque, &ptr); + GENX(panfrost_new_texture)(&pview, &view->descs.tex, &ptr); } #if PAN_ARCH <= 7 if (buffer->vk.usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { unsigned blksz = vk_format_get_blocksize(pCreateInfo->format); - pan_pack(view->descs.img_attrib_buf[0].opaque, ATTRIBUTE_BUFFER, cfg) { + pan_pack(&view->descs.img_attrib_buf[0], ATTRIBUTE_BUFFER, cfg) { /* The format is the only thing we lack to emit attribute descriptors * when copying from the set to the attribute tables. Instead of * making the descriptor size to store an extra format, we pack @@ -137,8 +137,8 @@ panvk_per_arch(CreateBufferView)(VkDevice _device, cfg.size = view->vk.elements * blksz; } - pan_pack(view->descs.img_attrib_buf[1].opaque, - ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { + struct mali_attribute_buffer_packed *buf = &view->descs.img_attrib_buf[1]; + pan_cast_and_pack(buf, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { cfg.s_dimension = view->vk.elements; cfg.t_dimension = 1; cfg.r_dimension = 1; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c b/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c index 3805429016f..7c8c232b7c4 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c @@ -233,7 +233,7 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)( return VK_ERROR_OUT_OF_DEVICE_MEMORY; /* Emit a dummy sampler if we have to. */ - pan_pack(sampler.cpu, SAMPLER, cfg) { + pan_cast_and_pack(sampler.cpu, SAMPLER, cfg) { cfg.clamp_integer_array_indices = false; } diff --git a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c index 3e949cd68b1..b6315e3c8eb 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c @@ -191,7 +191,8 @@ get_preload_shader(struct panvk_device *dev, return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); } - pan_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, cfg) { + pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, + cfg) { cfg.stage = MALI_SHADER_STAGE_FRAGMENT; cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL; cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD; @@ -346,7 +347,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo, if (!rsd.cpu) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - pan_pack(rsd.cpu, RENDERER_STATE, cfg) { + pan_cast_and_pack(rsd.cpu, RENDERER_STATE, cfg) { pan_shader_prepare_rsd(&shader->info, panvk_priv_mem_dev_addr(shader->code_mem), &cfg); @@ -409,7 +410,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo, if (!vpd.cpu) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - pan_pack(vpd.cpu, VIEWPORT, cfg) { + pan_cast_and_pack(vpd.cpu, VIEWPORT, cfg) { cfg.scissor_minimum_x = minx; cfg.scissor_minimum_y = miny; cfg.scissor_maximum_x = maxx; @@ -420,7 +421,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo, if (!sampler.cpu) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - pan_pack(sampler.cpu, SAMPLER, cfg) { + pan_cast_and_pack(sampler.cpu, SAMPLER, cfg) { cfg.seamless_cube_map = false; cfg.normalized_coordinates = false; cfg.clamp_integer_array_indices = false; @@ -568,7 +569,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo, if (!res_table.cpu) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - pan_pack(res_table.cpu, RESOURCE, cfg) { + pan_cast_and_pack(res_table.cpu, RESOURCE, cfg) { cfg.address = descs.gpu; cfg.size = desc_count * PANVK_DESCRIPTOR_SIZE; } @@ -582,7 +583,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo, bool preload_s = key->aspects != VK_IMAGE_ASPECT_COLOR_BIT && fbinfo->zs.preload.s; - pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) { + pan_cast_and_pack(zsd.cpu, DEPTH_STENCIL, cfg) { cfg.depth_function = MALI_FUNC_ALWAYS; cfg.depth_write_enable = preload_z; diff --git a/src/panfrost/vulkan/panvk_vX_image_view.c b/src/panfrost/vulkan/panvk_vX_image_view.c index dd925081ec2..cb9838f7c47 100644 --- a/src/panfrost/vulkan/panvk_vX_image_view.c +++ b/src/panfrost/vulkan/panvk_vX_image_view.c @@ -191,7 +191,7 @@ prepare_attr_buf_descs(struct panvk_image_view *view) &image->planes[plane_idx].layout, view->pview.first_level, is_3d ? 0 : view->pview.first_layer, is_3d ? view->pview.first_layer : 0); - pan_pack(view->descs.img_attrib_buf[0].opaque, ATTRIBUTE_BUFFER, cfg) { + pan_pack(&view->descs.img_attrib_buf[0], ATTRIBUTE_BUFFER, cfg) { /* The format is the only thing we lack to emit attribute descriptors * when copying from the set to the attribute tables. Instead of * making the descriptor size to store an extra format, we pack @@ -213,8 +213,8 @@ prepare_attr_buf_descs(struct panvk_image_view *view) cfg.size = pan_kmod_bo_size(image->bo) - offset; } - pan_pack(view->descs.img_attrib_buf[1].opaque, - ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { + struct mali_attribute_buffer_packed *buf = &view->descs.img_attrib_buf[1]; + pan_cast_and_pack(buf, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { unsigned level = view->pview.first_level; VkExtent3D extent = view->vk.extent; diff --git a/src/panfrost/vulkan/panvk_vX_sampler.c b/src/panfrost/vulkan/panvk_vX_sampler.c index b2124c64963..4773963690a 100644 --- a/src/panfrost/vulkan/panvk_vX_sampler.c +++ b/src/panfrost/vulkan/panvk_vX_sampler.c @@ -105,7 +105,7 @@ panvk_per_arch(CreateSampler)(VkDevice _device, panvk_afbc_reswizzle_border_color(&border_color, fmt); #endif - pan_pack(sampler->desc.opaque, SAMPLER, cfg) { + pan_pack(&sampler->desc, SAMPLER, cfg) { cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST; cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST; cfg.mipmap_mode = diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index d746b39eb9b..05e5e1d825a 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -895,7 +895,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, if (!panvk_priv_mem_dev_addr(shader->rsd)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, cfg) { + pan_cast_and_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, + cfg) { pan_shader_prepare_rsd(&shader->info, panvk_shader_get_dev_addr(shader), &cfg); } @@ -905,7 +906,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, if (!panvk_priv_mem_dev_addr(shader->spd)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, cfg) { + pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, + cfg) { cfg.stage = pan_shader_stage(&shader->info); if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT) @@ -928,8 +930,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, if (!panvk_priv_mem_dev_addr(shader->spds.pos_points)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_pack(panvk_priv_mem_host_addr(shader->spds.pos_points), - SHADER_PROGRAM, cfg) { + pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.pos_points), + SHADER_PROGRAM, cfg) { cfg.stage = pan_shader_stage(&shader->info); cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.register_allocation = @@ -944,8 +946,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, if (!panvk_priv_mem_dev_addr(shader->spds.pos_triangles)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_pack(panvk_priv_mem_host_addr(shader->spds.pos_triangles), - SHADER_PROGRAM, cfg) { + pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.pos_triangles), + SHADER_PROGRAM, cfg) { cfg.stage = pan_shader_stage(&shader->info); cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.register_allocation = @@ -962,8 +964,8 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader, if (!panvk_priv_mem_dev_addr(shader->spds.var)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_pack(panvk_priv_mem_host_addr(shader->spds.var), SHADER_PROGRAM, - cfg) { + pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.var), + SHADER_PROGRAM, cfg) { unsigned work_count = shader->info.vs.secondary_work_reg_count; cfg.stage = pan_shader_stage(&shader->info);