panfrost: clang-format the tree

This switches us over to Mesa's code style [1], normalizing us within the tree. The results aren't perfect, but they bring us a hell of a lot closer to the rest of the tree. Panfrost doesn't feel so foreign relative to Mesa with this, which I think (in retrospect after a bunch of years of being "different") is the right call. I skipped PanVK because that's paused right now. find panfrost/ -type f -name '*.h' | grep -v vulkan | xargs clang-format -i; find panfrost/ -type f -name '*.c' | grep -v vulkan | xargs clang-format -i; clang-format -i gallium/drivers/panfrost/*.c gallium/drivers/panfrost/*.h ; find panfrost/ -type f -name '*.cpp' | grep -v vulkan | xargs clang-format -i [1] https://docs.mesa3d.org/codingstyle.html Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20425>
2026-02-27 15:30:30 +01:00 · 2022-12-23 16:58:38 -05:00 · 2022-12-23 16:58:38 -05:00 · 0afd691f29
commit 0afd691f29
parent a4705afe63
182 changed files with 36570 additions and 36355 deletions
--- a/src/gallium/drivers/panfrost/pan_blend_cso.h
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.h
@ -36,26 +36,26 @@
 struct panfrost_bo;

 struct pan_blend_info {
-        unsigned constant_mask : 4;
-        bool fixed_function : 1;
-        bool enabled : 1;
-        bool load_dest : 1;
-        bool opaque : 1;
-        bool alpha_zero_nop : 1;
-        bool alpha_one_store : 1;
+   unsigned constant_mask : 4;
+   bool fixed_function    : 1;
+   bool enabled           : 1;
+   bool load_dest         : 1;
+   bool opaque            : 1;
+   bool alpha_zero_nop    : 1;
+   bool alpha_one_store   : 1;
 };

 struct panfrost_blend_state {
-        struct pipe_blend_state base;
-        struct pan_blend_state pan;
-        struct pan_blend_info info[PIPE_MAX_COLOR_BUFS];
-        uint32_t equation[PIPE_MAX_COLOR_BUFS];
+   struct pipe_blend_state base;
+   struct pan_blend_state pan;
+   struct pan_blend_info info[PIPE_MAX_COLOR_BUFS];
+   uint32_t equation[PIPE_MAX_COLOR_BUFS];

-        /* info.load presented as a bitfield for draw call hot paths */
-        unsigned load_dest_mask : PIPE_MAX_COLOR_BUFS;
+   /* info.load presented as a bitfield for draw call hot paths */
+   unsigned load_dest_mask : PIPE_MAX_COLOR_BUFS;
 };

-mali_ptr
-panfrost_get_blend(struct panfrost_batch *batch, unsigned rt, struct panfrost_bo **bo, unsigned *shader_offset);
+mali_ptr panfrost_get_blend(struct panfrost_batch *batch, unsigned rt,
+                            struct panfrost_bo **bo, unsigned *shader_offset);

 #endif
--- a/src/gallium/drivers/panfrost/pan_blit.c
+++ b/src/gallium/drivers/panfrost/pan_blit.c
@ -27,59 +27,58 @@
 *
 */

+#include "util/format/u_format.h"
 #include "pan_context.h"
 #include "pan_util.h"
-#include "util/format/u_format.h"

 void
 panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond)
 {
-        struct blitter_context *blitter = ctx->blitter;
+   struct blitter_context *blitter = ctx->blitter;

-        util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
-        util_blitter_save_vertex_elements(blitter, ctx->vertex);
-        util_blitter_save_vertex_shader(blitter, ctx->uncompiled[PIPE_SHADER_VERTEX]);
-        util_blitter_save_rasterizer(blitter, ctx->rasterizer);
-        util_blitter_save_viewport(blitter, &ctx->pipe_viewport);
-        util_blitter_save_scissor(blitter, &ctx->scissor);
-        util_blitter_save_fragment_shader(blitter, ctx->uncompiled[PIPE_SHADER_FRAGMENT]);
-        util_blitter_save_blend(blitter, ctx->blend);
-        util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil);
-        util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
-        util_blitter_save_so_targets(blitter, 0, NULL);
-        util_blitter_save_sample_mask(blitter, ctx->sample_mask, ctx->min_samples);
+   util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
+   util_blitter_save_vertex_elements(blitter, ctx->vertex);
+   util_blitter_save_vertex_shader(blitter,
+                                   ctx->uncompiled[PIPE_SHADER_VERTEX]);
+   util_blitter_save_rasterizer(blitter, ctx->rasterizer);
+   util_blitter_save_viewport(blitter, &ctx->pipe_viewport);
+   util_blitter_save_scissor(blitter, &ctx->scissor);
+   util_blitter_save_fragment_shader(blitter,
+                                     ctx->uncompiled[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_blend(blitter, ctx->blend);
+   util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil);
+   util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
+   util_blitter_save_so_targets(blitter, 0, NULL);
+   util_blitter_save_sample_mask(blitter, ctx->sample_mask, ctx->min_samples);

-        util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer);
-        util_blitter_save_fragment_sampler_states(blitter,
-                        ctx->sampler_count[PIPE_SHADER_FRAGMENT],
-                        (void **)(&ctx->samplers[PIPE_SHADER_FRAGMENT]));
-        util_blitter_save_fragment_sampler_views(blitter,
-                        ctx->sampler_view_count[PIPE_SHADER_FRAGMENT],
-                        (struct pipe_sampler_view **)&ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
-        util_blitter_save_fragment_constant_buffer_slot(blitter,
-                        ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
-
-        if (!render_cond) {
-                util_blitter_save_render_condition(blitter,
-                                (struct pipe_query *) ctx->cond_query,
-                                ctx->cond_cond, ctx->cond_mode);
-        }
+   util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer);
+   util_blitter_save_fragment_sampler_states(
+      blitter, ctx->sampler_count[PIPE_SHADER_FRAGMENT],
+      (void **)(&ctx->samplers[PIPE_SHADER_FRAGMENT]));
+   util_blitter_save_fragment_sampler_views(
+      blitter, ctx->sampler_view_count[PIPE_SHADER_FRAGMENT],
+      (struct pipe_sampler_view **)&ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_fragment_constant_buffer_slot(
+      blitter, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);

+   if (!render_cond) {
+      util_blitter_save_render_condition(blitter,
+                                         (struct pipe_query *)ctx->cond_query,
+                                         ctx->cond_cond, ctx->cond_mode);
+   }
 }

 void
-panfrost_blit(struct pipe_context *pipe,
-              const struct pipe_blit_info *info)
+panfrost_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_context *ctx = pan_context(pipe);

-        if (info->render_condition_enable &&
-            !panfrost_render_condition_check(ctx))
-                return;
+   if (info->render_condition_enable && !panfrost_render_condition_check(ctx))
+      return;

-        if (!util_blitter_is_blit_supported(ctx->blitter, info))
-                unreachable("Unsupported blit\n");
+   if (!util_blitter_is_blit_supported(ctx->blitter, info))
+      unreachable("Unsupported blit\n");

-        panfrost_blitter_save(ctx, info->render_condition_enable);
-        util_blitter_blit(ctx->blitter, info);
+   panfrost_blitter_save(ctx, info->render_condition_enable);
+   util_blitter_blit(ctx->blitter, info);
 }
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@ -26,206 +26,207 @@
 #define __BUILDER_H__

 #define _LARGEFILE64_SOURCE 1
-#include <sys/mman.h>
 #include <assert.h>
-#include "pan_resource.h"
-#include "pan_job.h"
+#include <sys/mman.h>
 #include "pan_blend_cso.h"
-#include "pan_encoder.h"
-#include "pan_texture.h"
 #include "pan_earlyzs.h"
+#include "pan_encoder.h"
+#include "pan_job.h"
+#include "pan_resource.h"
+#include "pan_texture.h"

 #include "pipe/p_compiler.h"
-#include "util/detect.h"
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
-#include "util/format/u_formats.h"
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h"
-#include "util/u_blitter.h"
+#include "util/detect.h"
+#include "util/format/u_formats.h"
 #include "util/hash_table.h"
 #include "util/simple_mtx.h"
+#include "util/u_blitter.h"

-#include "midgard/midgard_compile.h"
 #include "compiler/shader_enums.h"
+#include "midgard/midgard_compile.h"

-#define SET_BIT(lval, bit, cond) \
-	if (cond) \
-		lval |= (bit); \
-	else \
-		lval &= ~(bit);
+#define SET_BIT(lval, bit, cond)                                               \
+   if (cond)                                                                   \
+      lval |= (bit);                                                           \
+   else                                                                        \
+      lval &= ~(bit);

 /* Dirty tracking flags. 3D is for general 3D state. Shader flags are
 * per-stage. Renderer refers to Renderer State Descriptors. Vertex refers to
 * vertex attributes/elements. */

 enum pan_dirty_3d {
-        PAN_DIRTY_VIEWPORT       = BITFIELD_BIT(0),
-        PAN_DIRTY_SCISSOR        = BITFIELD_BIT(1),
-        PAN_DIRTY_VERTEX         = BITFIELD_BIT(2),
-        PAN_DIRTY_PARAMS         = BITFIELD_BIT(3),
-        PAN_DIRTY_DRAWID         = BITFIELD_BIT(4),
-        PAN_DIRTY_TLS_SIZE       = BITFIELD_BIT(5),
-        PAN_DIRTY_ZS             = BITFIELD_BIT(6),
-        PAN_DIRTY_BLEND          = BITFIELD_BIT(7),
-        PAN_DIRTY_MSAA           = BITFIELD_BIT(8),
-        PAN_DIRTY_OQ             = BITFIELD_BIT(9),
-        PAN_DIRTY_RASTERIZER     = BITFIELD_BIT(10),
-        PAN_DIRTY_POINTS         = BITFIELD_BIT(11),
-        PAN_DIRTY_SO             = BITFIELD_BIT(12),
+   PAN_DIRTY_VIEWPORT = BITFIELD_BIT(0),
+   PAN_DIRTY_SCISSOR = BITFIELD_BIT(1),
+   PAN_DIRTY_VERTEX = BITFIELD_BIT(2),
+   PAN_DIRTY_PARAMS = BITFIELD_BIT(3),
+   PAN_DIRTY_DRAWID = BITFIELD_BIT(4),
+   PAN_DIRTY_TLS_SIZE = BITFIELD_BIT(5),
+   PAN_DIRTY_ZS = BITFIELD_BIT(6),
+   PAN_DIRTY_BLEND = BITFIELD_BIT(7),
+   PAN_DIRTY_MSAA = BITFIELD_BIT(8),
+   PAN_DIRTY_OQ = BITFIELD_BIT(9),
+   PAN_DIRTY_RASTERIZER = BITFIELD_BIT(10),
+   PAN_DIRTY_POINTS = BITFIELD_BIT(11),
+   PAN_DIRTY_SO = BITFIELD_BIT(12),
 };

 enum pan_dirty_shader {
-        PAN_DIRTY_STAGE_SHADER   = BITFIELD_BIT(0),
-        PAN_DIRTY_STAGE_TEXTURE  = BITFIELD_BIT(1),
-        PAN_DIRTY_STAGE_SAMPLER  = BITFIELD_BIT(2),
-        PAN_DIRTY_STAGE_IMAGE    = BITFIELD_BIT(3),
-        PAN_DIRTY_STAGE_CONST    = BITFIELD_BIT(4),
-        PAN_DIRTY_STAGE_SSBO     = BITFIELD_BIT(5),
+   PAN_DIRTY_STAGE_SHADER = BITFIELD_BIT(0),
+   PAN_DIRTY_STAGE_TEXTURE = BITFIELD_BIT(1),
+   PAN_DIRTY_STAGE_SAMPLER = BITFIELD_BIT(2),
+   PAN_DIRTY_STAGE_IMAGE = BITFIELD_BIT(3),
+   PAN_DIRTY_STAGE_CONST = BITFIELD_BIT(4),
+   PAN_DIRTY_STAGE_SSBO = BITFIELD_BIT(5),
 };

 struct panfrost_constant_buffer {
-        struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
-        uint32_t enabled_mask;
+   struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+   uint32_t enabled_mask;
 };

 struct panfrost_query {
-        /* Passthrough from Gallium */
-        unsigned type;
-        unsigned index;
+   /* Passthrough from Gallium */
+   unsigned type;
+   unsigned index;

-        /* For computed queries. 64-bit to prevent overflow */
-        struct {
-                uint64_t start;
-                uint64_t end;
-        };
+   /* For computed queries. 64-bit to prevent overflow */
+   struct {
+      uint64_t start;
+      uint64_t end;
+   };

-        /* Memory for the GPU to writeback the value of the query */
-        struct pipe_resource *rsrc;
+   /* Memory for the GPU to writeback the value of the query */
+   struct pipe_resource *rsrc;

-        /* Whether an occlusion query is for a MSAA framebuffer */
-        bool msaa;
+   /* Whether an occlusion query is for a MSAA framebuffer */
+   bool msaa;
 };

 struct panfrost_streamout_target {
-        struct pipe_stream_output_target base;
-        uint32_t offset;
+   struct pipe_stream_output_target base;
+   uint32_t offset;
 };

 struct panfrost_streamout {
-        struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
-        unsigned num_targets;
+   struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+   unsigned num_targets;
 };

 struct panfrost_context {
-        /* Gallium context */
-        struct pipe_context base;
+   /* Gallium context */
+   struct pipe_context base;

-        /* Dirty global state */
-        enum pan_dirty_3d dirty;
+   /* Dirty global state */
+   enum pan_dirty_3d dirty;

-        /* Per shader stage dirty state */
-        enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];
+   /* Per shader stage dirty state */
+   enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];

-        /* Unowned pools, so manage yourself. */
-        struct panfrost_pool descs, shaders;
+   /* Unowned pools, so manage yourself. */
+   struct panfrost_pool descs, shaders;

-        /* Sync obj used to keep track of in-flight jobs. */
-        uint32_t syncobj;
+   /* Sync obj used to keep track of in-flight jobs. */
+   uint32_t syncobj;

-        /* Set of 32 batches. When the set is full, the LRU entry (the batch
-         * with the smallest seqnum) is flushed to free a slot.
-         */
-        struct {
-                uint64_t seqnum;
-                struct panfrost_batch slots[PAN_MAX_BATCHES];
+   /* Set of 32 batches. When the set is full, the LRU entry (the batch
+    * with the smallest seqnum) is flushed to free a slot.
+    */
+   struct {
+      uint64_t seqnum;
+      struct panfrost_batch slots[PAN_MAX_BATCHES];

-                /** Set of active batches for faster traversal */
-                BITSET_DECLARE(active, PAN_MAX_BATCHES);
-        } batches;
+      /** Set of active batches for faster traversal */
+      BITSET_DECLARE(active, PAN_MAX_BATCHES);
+   } batches;

-        /* Map from resources to panfrost_batches */
-        struct hash_table *writers;
+   /* Map from resources to panfrost_batches */
+   struct hash_table *writers;

-        /* Bound job batch */
-        struct panfrost_batch *batch;
+   /* Bound job batch */
+   struct panfrost_batch *batch;

-        /* Within a launch_grid call.. */
-        const struct pipe_grid_info *compute_grid;
+   /* Within a launch_grid call.. */
+   const struct pipe_grid_info *compute_grid;

-        struct pipe_framebuffer_state pipe_framebuffer;
-        struct panfrost_streamout streamout;
+   struct pipe_framebuffer_state pipe_framebuffer;
+   struct panfrost_streamout streamout;

-        bool active_queries;
-        uint64_t prims_generated;
-        uint64_t tf_prims_generated;
-        uint64_t draw_calls;
-        struct panfrost_query *occlusion_query;
+   bool active_queries;
+   uint64_t prims_generated;
+   uint64_t tf_prims_generated;
+   uint64_t draw_calls;
+   struct panfrost_query *occlusion_query;

-        unsigned drawid;
-        unsigned vertex_count;
-        unsigned instance_count;
-        unsigned offset_start;
-        unsigned base_vertex;
-        unsigned base_instance;
-        enum pipe_prim_type active_prim;
+   unsigned drawid;
+   unsigned vertex_count;
+   unsigned instance_count;
+   unsigned offset_start;
+   unsigned base_vertex;
+   unsigned base_instance;
+   enum pipe_prim_type active_prim;

-        /* If instancing is enabled, vertex count padded for instance; if
-         * it is disabled, just equal to plain vertex count */
-        unsigned padded_count;
+   /* If instancing is enabled, vertex count padded for instance; if
+    * it is disabled, just equal to plain vertex count */
+   unsigned padded_count;

-        struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
-        struct panfrost_rasterizer *rasterizer;
-        struct panfrost_vertex_state *vertex;
+   struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
+   struct panfrost_rasterizer *rasterizer;
+   struct panfrost_vertex_state *vertex;

-        struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
-        struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];
+   struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
+   struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];

-        struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
-        uint32_t vb_mask;
+   struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+   uint32_t vb_mask;

-        struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
-        uint32_t ssbo_mask[PIPE_SHADER_TYPES];
+   struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
+   uint32_t ssbo_mask[PIPE_SHADER_TYPES];

-        struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
-        uint32_t image_mask[PIPE_SHADER_TYPES];
+   struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
+   uint32_t image_mask[PIPE_SHADER_TYPES];

-        struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
-        unsigned sampler_count[PIPE_SHADER_TYPES];
-        uint32_t valid_samplers[PIPE_SHADER_TYPES];
+   struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+   unsigned sampler_count[PIPE_SHADER_TYPES];
+   uint32_t valid_samplers[PIPE_SHADER_TYPES];

-        struct panfrost_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
-        unsigned sampler_view_count[PIPE_SHADER_TYPES];
+   struct panfrost_sampler_view
+      *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned sampler_view_count[PIPE_SHADER_TYPES];

-        struct blitter_context *blitter;
+   struct blitter_context *blitter;

-        struct panfrost_blend_state *blend;
+   struct panfrost_blend_state *blend;

-        /* On Valhall, does the current blend state use a blend shader for any
-         * output? We need this information in a hot path to decide if
-         * per-sample shading should be enabled.
-         */
-        bool valhall_has_blend_shader;
+   /* On Valhall, does the current blend state use a blend shader for any
+    * output? We need this information in a hot path to decide if
+    * per-sample shading should be enabled.
+    */
+   bool valhall_has_blend_shader;

-        struct pipe_viewport_state pipe_viewport;
-        struct pipe_scissor_state scissor;
-        struct pipe_blend_color blend_color;
-        struct panfrost_zsa_state *depth_stencil;
-        struct pipe_stencil_ref stencil_ref;
-        uint16_t sample_mask;
-        unsigned min_samples;
+   struct pipe_viewport_state pipe_viewport;
+   struct pipe_scissor_state scissor;
+   struct pipe_blend_color blend_color;
+   struct panfrost_zsa_state *depth_stencil;
+   struct pipe_stencil_ref stencil_ref;
+   uint16_t sample_mask;
+   unsigned min_samples;

-        struct panfrost_query *cond_query;
-        bool cond_cond;
-        enum pipe_render_cond_flag cond_mode;
+   struct panfrost_query *cond_query;
+   bool cond_cond;
+   enum pipe_render_cond_flag cond_mode;

-        bool is_noop;
+   bool is_noop;

-        /* Mask of active render targets */
-        uint8_t fb_rt_mask;
+   /* Mask of active render targets */
+   uint8_t fb_rt_mask;

-        int in_sync_fd;
-        uint32_t in_sync_obj;
+   int in_sync_fd;
+   uint32_t in_sync_obj;
 };

 /* Corresponds to the CSO */
@ -234,19 +235,19 @@ struct panfrost_rasterizer;

 /* Linked varyings */
 struct pan_linkage {
-        /* If the upload is owned by the CSO instead
-         * of the pool, the referenced BO. Else,
-         * NULL. */
-        struct panfrost_bo *bo;
+   /* If the upload is owned by the CSO instead
+    * of the pool, the referenced BO. Else,
+    * NULL. */
+   struct panfrost_bo *bo;

-        /* Uploaded attribute descriptors */
-        mali_ptr producer, consumer;
+   /* Uploaded attribute descriptors */
+   mali_ptr producer, consumer;

-        /* Varyings buffers required */
-        uint32_t present;
+   /* Varyings buffers required */
+   uint32_t present;

-        /* Per-vertex stride for general varying buffer */
-        uint32_t stride;
+   /* Per-vertex stride for general varying buffer */
+   uint32_t stride;
 };

 #define RSD_WORDS 16
@ -255,89 +256,89 @@ struct pan_linkage {
 * shaders with varying emulated features baked in
 */
 struct panfrost_fs_key {
-        /* Number of colour buffers if gl_FragColor is written */
-        unsigned nr_cbufs_for_fragcolor;
+   /* Number of colour buffers if gl_FragColor is written */
+   unsigned nr_cbufs_for_fragcolor;

-        /* On Valhall, fixed_varying_mask of the linked vertex shader */
-        uint32_t fixed_varying_mask;
+   /* On Valhall, fixed_varying_mask of the linked vertex shader */
+   uint32_t fixed_varying_mask;

-        /* Midgard shaders that read the tilebuffer must be keyed for
-         * non-blendable formats
-         */
-        enum pipe_format rt_formats[8];
+   /* Midgard shaders that read the tilebuffer must be keyed for
+    * non-blendable formats
+    */
+   enum pipe_format rt_formats[8];

-        /* From rasterize state, to lower point sprites */
-        uint16_t sprite_coord_enable;
+   /* From rasterize state, to lower point sprites */
+   uint16_t sprite_coord_enable;

-        /* User clip plane lowering */
-        uint8_t clip_plane_enable;
+   /* User clip plane lowering */
+   uint8_t clip_plane_enable;
 };

 struct panfrost_shader_key {
-        union {
-                /* Vertex shaders do not use shader keys. However, we have a
-                 * special "transform feedback" vertex program derived from a
-                 * vertex shader. If vs_is_xfb is set on a vertex shader, this
-                 * is a transform feedback shader, else it is a regular
-                 * (unkeyed) vertex shader.
-                 */
-                bool vs_is_xfb;
+   union {
+      /* Vertex shaders do not use shader keys. However, we have a
+       * special "transform feedback" vertex program derived from a
+       * vertex shader. If vs_is_xfb is set on a vertex shader, this
+       * is a transform feedback shader, else it is a regular
+       * (unkeyed) vertex shader.
+       */
+      bool vs_is_xfb;

-                /* Fragment shaders use regular shader keys */
-                struct panfrost_fs_key fs;
-        };
+      /* Fragment shaders use regular shader keys */
+      struct panfrost_fs_key fs;
+   };
 };

 struct panfrost_compiled_shader {
-        /* Respectively, shader binary and Renderer State Descriptor */
-        struct panfrost_pool_ref bin, state;
+   /* Respectively, shader binary and Renderer State Descriptor */
+   struct panfrost_pool_ref bin, state;

-        /* For fragment shaders, a prepared (but not uploaded RSD) */
-        uint32_t partial_rsd[RSD_WORDS];
+   /* For fragment shaders, a prepared (but not uploaded RSD) */
+   uint32_t partial_rsd[RSD_WORDS];

-        struct pan_shader_info info;
+   struct pan_shader_info info;

-        struct pan_earlyzs_lut earlyzs;
+   struct pan_earlyzs_lut earlyzs;

-        /* Linked varyings, for non-separable programs */
-        struct pan_linkage linkage;
+   /* Linked varyings, for non-separable programs */
+   struct pan_linkage linkage;

-        struct pipe_stream_output_info stream_output;
+   struct pipe_stream_output_info stream_output;

-        struct panfrost_shader_key key;
+   struct panfrost_shader_key key;

-        /* Mask of state that dirties the sysvals */
-        unsigned dirty_3d, dirty_shader;
+   /* Mask of state that dirties the sysvals */
+   unsigned dirty_3d, dirty_shader;
 };

 /* Shader CSO */
 struct panfrost_uncompiled_shader {
-        /* NIR for the shader. For graphics, this will be non-NULL even for
-         * TGSI. For compute, this will be NULL after the shader is compiled,
-         * as we don't need any compute variants.
-         */
-        const nir_shader *nir;
+   /* NIR for the shader. For graphics, this will be non-NULL even for
+    * TGSI. For compute, this will be NULL after the shader is compiled,
+    * as we don't need any compute variants.
+    */
+   const nir_shader *nir;

-        /* A SHA1 of the serialized NIR for the disk cache. */
-        unsigned char nir_sha1[20];
+   /* A SHA1 of the serialized NIR for the disk cache. */
+   unsigned char nir_sha1[20];

-        /* Stream output information */
-        struct pipe_stream_output_info stream_output;
+   /* Stream output information */
+   struct pipe_stream_output_info stream_output;

-        /** Lock for the variants array */
-        simple_mtx_t lock;
+   /** Lock for the variants array */
+   simple_mtx_t lock;

-        /* Array of panfrost_compiled_shader */
-        struct util_dynarray variants;
+   /* Array of panfrost_compiled_shader */
+   struct util_dynarray variants;

-        /* Compiled transform feedback program, if one is required */
-        struct panfrost_compiled_shader *xfb;
+   /* Compiled transform feedback program, if one is required */
+   struct panfrost_compiled_shader *xfb;

-        /* On vertex shaders, bit mask of special desktop-only varyings to link
-         * with the fragment shader. Used on Valhall to implement separable
-         * shaders for desktop GL.
-         */
-        uint32_t fixed_varying_mask;
+   /* On vertex shaders, bit mask of special desktop-only varyings to link
+    * with the fragment shader. Used on Valhall to implement separable
+    * shaders for desktop GL.
+    */
+   uint32_t fixed_varying_mask;
 };

 /* The binary artefacts of compiling a shader. This differs from
@ -347,11 +348,11 @@ struct panfrost_uncompiled_shader {
 * This structure is serialized for the shader disk cache.
 */
 struct panfrost_shader_binary {
-        /* Collected information about the compiled shader */
-        struct pan_shader_info info;
+   /* Collected information about the compiled shader */
+   struct pan_shader_info info;

-        /* The binary itself */
-        struct util_dynarray binary;
+   /* The binary itself */
+   struct util_dynarray binary;
 };

 void
@ -360,28 +361,25 @@ panfrost_disk_cache_store(struct disk_cache *cache,
                          const struct panfrost_shader_key *key,
                          const struct panfrost_shader_binary *binary);

-bool
-panfrost_disk_cache_retrieve(struct disk_cache *cache,
-                             const struct panfrost_uncompiled_shader *uncompiled,
-                             const struct panfrost_shader_key *key,
-                             struct panfrost_shader_binary *binary);
+bool panfrost_disk_cache_retrieve(
+   struct disk_cache *cache,
+   const struct panfrost_uncompiled_shader *uncompiled,
+   const struct panfrost_shader_key *key,
+   struct panfrost_shader_binary *binary);

-void
-panfrost_disk_cache_init(struct panfrost_screen *screen);
+void panfrost_disk_cache_init(struct panfrost_screen *screen);

 /** (Vertex buffer index, divisor) tuple that will become an Attribute Buffer
 * Descriptor at draw-time on Midgard
 */
 struct pan_vertex_buffer {
-        unsigned vbi;
-        unsigned divisor;
+   unsigned vbi;
+   unsigned divisor;
 };

-unsigned
-pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
-                         unsigned *nr_bufs,
-                         unsigned vbi,
-                         unsigned divisor);
+unsigned pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
+                                  unsigned *nr_bufs, unsigned vbi,
+                                  unsigned divisor);

 struct panfrost_zsa_state;
 struct panfrost_sampler_state;
@ -391,39 +389,32 @@ struct panfrost_vertex_state;
 static inline struct panfrost_context *
 pan_context(struct pipe_context *pcontext)
 {
-        return (struct panfrost_context *) pcontext;
+   return (struct panfrost_context *)pcontext;
 }

 static inline struct panfrost_streamout_target *
 pan_so_target(struct pipe_stream_output_target *target)
 {
-        return (struct panfrost_streamout_target *)target;
+   return (struct panfrost_streamout_target *)target;
 }

-struct pipe_context *
-panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
+struct pipe_context *panfrost_create_context(struct pipe_screen *screen,
+                                             void *priv, unsigned flags);

-bool
-panfrost_writes_point_size(struct panfrost_context *ctx);
+bool panfrost_writes_point_size(struct panfrost_context *ctx);

-struct panfrost_ptr
-panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler);
+struct panfrost_ptr panfrost_vertex_tiler_job(struct panfrost_context *ctx,
+                                              bool is_tiler);

-void
-panfrost_flush(
-        struct pipe_context *pipe,
-        struct pipe_fence_handle **fence,
-        unsigned flags);
+void panfrost_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence,
+                    unsigned flags);

-bool
-panfrost_render_condition_check(struct panfrost_context *ctx);
+bool panfrost_render_condition_check(struct panfrost_context *ctx);

-void
-panfrost_update_shader_variant(struct panfrost_context *ctx,
-                               enum pipe_shader_type type);
+void panfrost_update_shader_variant(struct panfrost_context *ctx,
+                                    enum pipe_shader_type type);

-void
-panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);
+void panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);

 mali_ptr
 panfrost_get_index_buffer(struct panfrost_batch *batch,
@ -438,41 +429,37 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,

 /* Instancing */

-mali_ptr
-panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i);
+mali_ptr panfrost_vertex_buffer_address(struct panfrost_context *ctx,
+                                        unsigned i);

-void
-panfrost_shader_context_init(struct pipe_context *pctx);
+void panfrost_shader_context_init(struct pipe_context *pctx);

 static inline void
 panfrost_dirty_state_all(struct panfrost_context *ctx)
 {
-        ctx->dirty = ~0;
+   ctx->dirty = ~0;

-        for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
-                ctx->dirty_shader[i] = ~0;
+   for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
+      ctx->dirty_shader[i] = ~0;
 }

 static inline void
 panfrost_clean_state_3d(struct panfrost_context *ctx)
 {
-        ctx->dirty = 0;
+   ctx->dirty = 0;

-        for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
-                if (i != PIPE_SHADER_COMPUTE)
-                        ctx->dirty_shader[i] = 0;
-        }
+   for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
+      if (i != PIPE_SHADER_COMPUTE)
+         ctx->dirty_shader[i] = 0;
+   }
 }

-void
-panfrost_set_batch_masks_blend(struct panfrost_batch *batch);
+void panfrost_set_batch_masks_blend(struct panfrost_batch *batch);

-void
-panfrost_set_batch_masks_zs(struct panfrost_batch *batch);
+void panfrost_set_batch_masks_zs(struct panfrost_batch *batch);

-void
-panfrost_track_image_access(struct panfrost_batch *batch,
-                            enum pipe_shader_type stage,
-                            struct pipe_image_view *image);
+void panfrost_track_image_access(struct panfrost_batch *batch,
+                                 enum pipe_shader_type stage,
+                                 struct pipe_image_view *image);

 #endif
--- a/src/gallium/drivers/panfrost/pan_disk_cache.c
+++ b/src/gallium/drivers/panfrost/pan_disk_cache.c
@ -21,9 +21,9 @@
 * DEALINGS IN THE SOFTWARE.
 */

-#include <stdio.h>
-#include <stdint.h>
 #include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
 #include <string.h>

 #include "compiler/nir/nir.h"
@ -43,17 +43,17 @@ extern int bifrost_debug;
 * Compute a disk cache key for the given uncompiled shader and shader key.
 */
 static void
-panfrost_disk_cache_compute_key(struct disk_cache *cache,
-                                const struct panfrost_uncompiled_shader *uncompiled,
-                                const struct panfrost_shader_key *shader_key,
-                                cache_key cache_key)
+panfrost_disk_cache_compute_key(
+   struct disk_cache *cache,
+   const struct panfrost_uncompiled_shader *uncompiled,
+   const struct panfrost_shader_key *shader_key, cache_key cache_key)
 {
-        uint8_t data[sizeof(uncompiled->nir_sha1) + sizeof(*shader_key)];
+   uint8_t data[sizeof(uncompiled->nir_sha1) + sizeof(*shader_key)];

-        memcpy(data, uncompiled->nir_sha1, sizeof(uncompiled->nir_sha1));
-        memcpy(data + sizeof(uncompiled->nir_sha1), shader_key, sizeof(*shader_key));
+   memcpy(data, uncompiled->nir_sha1, sizeof(uncompiled->nir_sha1));
+   memcpy(data + sizeof(uncompiled->nir_sha1), shader_key, sizeof(*shader_key));

-        disk_cache_compute_key(cache, data, sizeof(data), cache_key);
+   disk_cache_compute_key(cache, data, sizeof(data), cache_key);
 }

 /**
@ -69,33 +69,33 @@ panfrost_disk_cache_store(struct disk_cache *cache,
                          const struct panfrost_shader_binary *binary)
 {
 #ifdef ENABLE_SHADER_CACHE
-        if (!cache)
-                return;
+   if (!cache)
+      return;

-        cache_key cache_key;
-        panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
+   cache_key cache_key;
+   panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);

-        if (debug) {
-                char sha1[41];
-                _mesa_sha1_format(sha1, cache_key);
-                fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
-        }
+   if (debug) {
+      char sha1[41];
+      _mesa_sha1_format(sha1, cache_key);
+      fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
+   }

-        struct blob blob;
-        blob_init(&blob);
+   struct blob blob;
+   blob_init(&blob);

-        /* We write the following data to the cache blob:
-         *
-         * 1. Size of program binary
-         * 2. Program binary
-         * 3. Shader info
-         */
-        blob_write_uint32(&blob, binary->binary.size);
-        blob_write_bytes(&blob, binary->binary.data, binary->binary.size);
-        blob_write_bytes(&blob, &binary->info, sizeof(binary->info));
+   /* We write the following data to the cache blob:
+    *
+    * 1. Size of program binary
+    * 2. Program binary
+    * 3. Shader info
+    */
+   blob_write_uint32(&blob, binary->binary.size);
+   blob_write_bytes(&blob, binary->binary.data, binary->binary.size);
+   blob_write_bytes(&blob, &binary->info, sizeof(binary->info));

-        disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
-        blob_finish(&blob);
+   disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
+   blob_finish(&blob);
 #endif
 }

@ -109,43 +109,43 @@ panfrost_disk_cache_retrieve(struct disk_cache *cache,
                             struct panfrost_shader_binary *binary)
 {
 #ifdef ENABLE_SHADER_CACHE
-        if (!cache)
-                return false;
+   if (!cache)
+      return false;

-        cache_key cache_key;
-        panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
+   cache_key cache_key;
+   panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);

-        if (debug) {
-                char sha1[41];
-                _mesa_sha1_format(sha1, cache_key);
-                fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
-        }
+   if (debug) {
+      char sha1[41];
+      _mesa_sha1_format(sha1, cache_key);
+      fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
+   }

-        size_t size;
-        void *buffer = disk_cache_get(cache, cache_key, &size);
+   size_t size;
+   void *buffer = disk_cache_get(cache, cache_key, &size);

-        if (debug)
-                fprintf(stderr, "%s\n", buffer ? "found" : "missing");
+   if (debug)
+      fprintf(stderr, "%s\n", buffer ? "found" : "missing");

-        if (!buffer)
-                return false;
+   if (!buffer)
+      return false;

-        struct blob_reader blob;
-        blob_reader_init(&blob, buffer, size);
+   struct blob_reader blob;
+   blob_reader_init(&blob, buffer, size);

-        util_dynarray_init(&binary->binary, NULL);
+   util_dynarray_init(&binary->binary, NULL);

-        uint32_t binary_size = blob_read_uint32(&blob);
-        void *ptr = util_dynarray_resize_bytes(&binary->binary, binary_size, 1);
+   uint32_t binary_size = blob_read_uint32(&blob);
+   void *ptr = util_dynarray_resize_bytes(&binary->binary, binary_size, 1);

-        blob_copy_bytes(&blob, ptr, binary_size);
-        blob_copy_bytes(&blob, &binary->info, sizeof(binary->info));
+   blob_copy_bytes(&blob, ptr, binary_size);
+   blob_copy_bytes(&blob, &binary->info, sizeof(binary->info));

-        free(buffer);
+   free(buffer);

-        return true;
+   return true;
 #else
-        return false;
+   return false;
 #endif
 }

@ -156,22 +156,22 @@ void
 panfrost_disk_cache_init(struct panfrost_screen *screen)
 {
 #ifdef ENABLE_SHADER_CACHE
-        const char *renderer = screen->base.get_name(&screen->base);
+   const char *renderer = screen->base.get_name(&screen->base);

-        const struct build_id_note *note =
-                build_id_find_nhdr_for_addr(panfrost_disk_cache_init);
-        assert(note && build_id_length(note) == 20); /* sha1 */
+   const struct build_id_note *note =
+      build_id_find_nhdr_for_addr(panfrost_disk_cache_init);
+   assert(note && build_id_length(note) == 20); /* sha1 */

-        const uint8_t *id_sha1 = build_id_data(note);
-        assert(id_sha1);
+   const uint8_t *id_sha1 = build_id_data(note);
+   assert(id_sha1);

-        char timestamp[41];
-        _mesa_sha1_format(timestamp, id_sha1);
+   char timestamp[41];
+   _mesa_sha1_format(timestamp, id_sha1);

-        /* Consider any flags affecting the compile when caching */
-        uint64_t driver_flags = screen->dev.debug;
-        driver_flags |= ((uint64_t) (midgard_debug | bifrost_debug) << 32);
+   /* Consider any flags affecting the compile when caching */
+   uint64_t driver_flags = screen->dev.debug;
+   driver_flags |= ((uint64_t)(midgard_debug | bifrost_debug) << 32);

-        screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
+   screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
 #endif
 }
--- a/src/gallium/drivers/panfrost/pan_fence.c
+++ b/src/gallium/drivers/panfrost/pan_fence.c
@ -26,8 +26,8 @@
 * SOFTWARE.
 */

-#include "pan_context.h"
 #include "pan_fence.h"
+#include "pan_context.h"
 #include "pan_screen.h"

 #include "util/os_time.h"
@ -38,117 +38,112 @@ panfrost_fence_reference(struct pipe_screen *pscreen,
                         struct pipe_fence_handle **ptr,
                         struct pipe_fence_handle *fence)
 {
-        struct panfrost_device *dev = pan_device(pscreen);
-        struct pipe_fence_handle *old = *ptr;
+   struct panfrost_device *dev = pan_device(pscreen);
+   struct pipe_fence_handle *old = *ptr;

-        if (pipe_reference(&old->reference, &fence->reference)) {
-                drmSyncobjDestroy(dev->fd, old->syncobj);
-                free(old);
-        }
+   if (pipe_reference(&old->reference, &fence->reference)) {
+      drmSyncobjDestroy(dev->fd, old->syncobj);
+      free(old);
+   }

-        *ptr = fence;
+   *ptr = fence;
 }

 bool
-panfrost_fence_finish(struct pipe_screen *pscreen,
-                      struct pipe_context *ctx,
-                      struct pipe_fence_handle *fence,
-                      uint64_t timeout)
+panfrost_fence_finish(struct pipe_screen *pscreen, struct pipe_context *ctx,
+                      struct pipe_fence_handle *fence, uint64_t timeout)
 {
-        struct panfrost_device *dev = pan_device(pscreen);
-        int ret;
+   struct panfrost_device *dev = pan_device(pscreen);
+   int ret;

-        if (fence->signaled)
-                return true;
+   if (fence->signaled)
+      return true;

-        uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
-        if (abs_timeout == OS_TIMEOUT_INFINITE)
-                abs_timeout = INT64_MAX;
+   uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
+   if (abs_timeout == OS_TIMEOUT_INFINITE)
+      abs_timeout = INT64_MAX;

-        ret = drmSyncobjWait(dev->fd, &fence->syncobj,
-                             1,
-                             abs_timeout, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
-                             NULL);
+   ret = drmSyncobjWait(dev->fd, &fence->syncobj, 1, abs_timeout,
+                        DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);

-        fence->signaled = (ret >= 0);
-        return fence->signaled;
+   fence->signaled = (ret >= 0);
+   return fence->signaled;
 }

 int
-panfrost_fence_get_fd(struct pipe_screen *screen,
-                      struct pipe_fence_handle *f)
+panfrost_fence_get_fd(struct pipe_screen *screen, struct pipe_fence_handle *f)
 {
-        struct panfrost_device *dev = pan_device(screen);
-        int fd = -1;
+   struct panfrost_device *dev = pan_device(screen);
+   int fd = -1;

-        drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
-        return fd;
+   drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
+   return fd;
 }

 struct pipe_fence_handle *
 panfrost_fence_from_fd(struct panfrost_context *ctx, int fd,
                       enum pipe_fd_type type)
 {
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        int ret;
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   int ret;

-        struct pipe_fence_handle *f = calloc(1, sizeof(*f));
-        if (!f)
-                return NULL;
+   struct pipe_fence_handle *f = calloc(1, sizeof(*f));
+   if (!f)
+      return NULL;

-        if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
-                ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
-                if (ret) {
-                        fprintf(stderr, "create syncobj failed\n");
-                        goto err_free_fence;
-                }
+   if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
+      ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
+      if (ret) {
+         fprintf(stderr, "create syncobj failed\n");
+         goto err_free_fence;
+      }

-                ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
-                if (ret) {
-                        fprintf(stderr, "import syncfile failed\n");
-                        goto err_destroy_syncobj;
-                }
-        } else {
-                assert(type == PIPE_FD_TYPE_SYNCOBJ);
-                ret = drmSyncobjFDToHandle(dev->fd, fd, &f->syncobj);
-                if (ret) {
-                        fprintf(stderr, "import syncobj FD failed\n");
-                        goto err_free_fence;
-                }
-        }
+      ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
+      if (ret) {
+         fprintf(stderr, "import syncfile failed\n");
+         goto err_destroy_syncobj;
+      }
+   } else {
+      assert(type == PIPE_FD_TYPE_SYNCOBJ);
+      ret = drmSyncobjFDToHandle(dev->fd, fd, &f->syncobj);
+      if (ret) {
+         fprintf(stderr, "import syncobj FD failed\n");
+         goto err_free_fence;
+      }
+   }

-        pipe_reference_init(&f->reference, 1);
+   pipe_reference_init(&f->reference, 1);

-        return f;
+   return f;

 err_destroy_syncobj:
-        drmSyncobjDestroy(dev->fd, f->syncobj);
+   drmSyncobjDestroy(dev->fd, f->syncobj);
 err_free_fence:
-        free(f);
-        return NULL;
+   free(f);
+   return NULL;
 }

 struct pipe_fence_handle *
 panfrost_fence_create(struct panfrost_context *ctx)
 {
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        int fd = -1, ret;
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   int fd = -1, ret;

-        /* Snapshot the last rendering out fence. We'd rather have another
-         * syncobj instead of a sync file, but this is all we get.
-         * (HandleToFD/FDToHandle just gives you another syncobj ID for the
-         * same syncobj).
-         */
-        ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
-        if (ret || fd == -1) {
-                fprintf(stderr, "export failed\n");
-                return NULL;
-        }
+   /* Snapshot the last rendering out fence. We'd rather have another
+    * syncobj instead of a sync file, but this is all we get.
+    * (HandleToFD/FDToHandle just gives you another syncobj ID for the
+    * same syncobj).
+    */
+   ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
+   if (ret || fd == -1) {
+      fprintf(stderr, "export failed\n");
+      return NULL;
+   }

-        struct pipe_fence_handle *f =
-                panfrost_fence_from_fd(ctx, fd, PIPE_FD_TYPE_NATIVE_SYNC);
+   struct pipe_fence_handle *f =
+      panfrost_fence_from_fd(ctx, fd, PIPE_FD_TYPE_NATIVE_SYNC);

-        close(fd);
+   close(fd);

-        return f;
+   return f;
 }
--- a/src/gallium/drivers/panfrost/pan_fence.h
+++ b/src/gallium/drivers/panfrost/pan_fence.h
@ -30,29 +30,24 @@
 struct panfrost_context;

 struct pipe_fence_handle {
-        struct pipe_reference reference;
-        uint32_t syncobj;
-        bool signaled;
+   struct pipe_reference reference;
+   uint32_t syncobj;
+   bool signaled;
 };

-void
-panfrost_fence_reference(struct pipe_screen *pscreen,
-                         struct pipe_fence_handle **ptr,
-                         struct pipe_fence_handle *fence);
+void panfrost_fence_reference(struct pipe_screen *pscreen,
+                              struct pipe_fence_handle **ptr,
+                              struct pipe_fence_handle *fence);

-bool
-panfrost_fence_finish(struct pipe_screen *pscreen,
-                      struct pipe_context *ctx,
-                      struct pipe_fence_handle *fence,
-                      uint64_t timeout);
+bool panfrost_fence_finish(struct pipe_screen *pscreen,
+                           struct pipe_context *ctx,
+                           struct pipe_fence_handle *fence, uint64_t timeout);

-int
-panfrost_fence_get_fd(struct pipe_screen *screen,
-                      struct pipe_fence_handle *f);
+int panfrost_fence_get_fd(struct pipe_screen *screen,
+                          struct pipe_fence_handle *f);

-struct pipe_fence_handle *
-panfrost_fence_from_fd(struct panfrost_context *ctx, int fd,
-                       enum pipe_fd_type type);
+struct pipe_fence_handle *panfrost_fence_from_fd(struct panfrost_context *ctx,
+                                                 int fd,
+                                                 enum pipe_fd_type type);

-struct pipe_fence_handle *
-panfrost_fence_create(struct panfrost_context *ctx);
+struct pipe_fence_handle *panfrost_fence_create(struct panfrost_context *ctx);
--- a/src/gallium/drivers/panfrost/pan_helpers.c
+++ b/src/gallium/drivers/panfrost/pan_helpers.c
@ -21,66 +21,66 @@
 * SOFTWARE.
 */

-#include "pan_context.h"
 #include "util/u_vbuf.h"
+#include "pan_context.h"

 void
 panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss)
 {
-        unsigned dirty = 0;
-        unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
+   unsigned dirty = 0;
+   unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;

-        for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
-                switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
-                case PAN_SYSVAL_VIEWPORT_SCALE:
-                case PAN_SYSVAL_VIEWPORT_OFFSET:
-                        dirty |= PAN_DIRTY_VIEWPORT;
-                        break;
+   for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
+      switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
+      case PAN_SYSVAL_VIEWPORT_SCALE:
+      case PAN_SYSVAL_VIEWPORT_OFFSET:
+         dirty |= PAN_DIRTY_VIEWPORT;
+         break;

-                case PAN_SYSVAL_TEXTURE_SIZE:
-                        dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
-                        break;
+      case PAN_SYSVAL_TEXTURE_SIZE:
+         dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
+         break;

-                case PAN_SYSVAL_SSBO:
-                        dirty_shader |= PAN_DIRTY_STAGE_SSBO;
-                        break;
+      case PAN_SYSVAL_SSBO:
+         dirty_shader |= PAN_DIRTY_STAGE_SSBO;
+         break;

-                case PAN_SYSVAL_XFB:
-                        dirty |= PAN_DIRTY_SO;
-                        break;
+      case PAN_SYSVAL_XFB:
+         dirty |= PAN_DIRTY_SO;
+         break;

-                case PAN_SYSVAL_SAMPLER:
-                        dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
-                        break;
+      case PAN_SYSVAL_SAMPLER:
+         dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
+         break;

-                case PAN_SYSVAL_IMAGE_SIZE:
-                        dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
-                        break;
+      case PAN_SYSVAL_IMAGE_SIZE:
+         dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
+         break;

-                case PAN_SYSVAL_NUM_WORK_GROUPS:
-                case PAN_SYSVAL_LOCAL_GROUP_SIZE:
-                case PAN_SYSVAL_WORK_DIM:
-                case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
-                case PAN_SYSVAL_NUM_VERTICES:
-                        dirty |= PAN_DIRTY_PARAMS;
-                        break;
+      case PAN_SYSVAL_NUM_WORK_GROUPS:
+      case PAN_SYSVAL_LOCAL_GROUP_SIZE:
+      case PAN_SYSVAL_WORK_DIM:
+      case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
+      case PAN_SYSVAL_NUM_VERTICES:
+         dirty |= PAN_DIRTY_PARAMS;
+         break;

-                case PAN_SYSVAL_DRAWID:
-                        dirty |= PAN_DIRTY_DRAWID;
-                        break;
+      case PAN_SYSVAL_DRAWID:
+         dirty |= PAN_DIRTY_DRAWID;
+         break;

-                case PAN_SYSVAL_SAMPLE_POSITIONS:
-                case PAN_SYSVAL_MULTISAMPLED:
-                case PAN_SYSVAL_RT_CONVERSION:
-                        /* Nothing beyond the batch itself */
-                        break;
-                default:
-                        unreachable("Invalid sysval");
-                }
-        }
+      case PAN_SYSVAL_SAMPLE_POSITIONS:
+      case PAN_SYSVAL_MULTISAMPLED:
+      case PAN_SYSVAL_RT_CONVERSION:
+         /* Nothing beyond the batch itself */
+         break;
+      default:
+         unreachable("Invalid sysval");
+      }
+   }

-        ss->dirty_3d = dirty;
-        ss->dirty_shader = dirty_shader;
+   ss->dirty_3d = dirty;
+   ss->dirty_shader = dirty_shader;
 }

 /*
@ -93,25 +93,22 @@ panfrost_get_index_buffer(struct panfrost_batch *batch,
                          const struct pipe_draw_info *info,
                          const struct pipe_draw_start_count_bias *draw)
 {
-        struct panfrost_resource *rsrc = pan_resource(info->index.resource);
-        off_t offset = draw->start * info->index_size;
+   struct panfrost_resource *rsrc = pan_resource(info->index.resource);
+   off_t offset = draw->start * info->index_size;

-        if (!info->has_user_indices) {
-                /* Only resources can be directly mapped */
-                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
-                return rsrc->image.data.bo->ptr.gpu + offset;
-        } else {
-                /* Otherwise, we need to upload to transient memory */
-                const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
-                struct panfrost_ptr T =
-                        pan_pool_alloc_aligned(&batch->pool.base,
-                                               draw->count *
-                                               info->index_size,
-                                               info->index_size);
+   if (!info->has_user_indices) {
+      /* Only resources can be directly mapped */
+      panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
+      return rsrc->image.data.bo->ptr.gpu + offset;
+   } else {
+      /* Otherwise, we need to upload to transient memory */
+      const uint8_t *ibuf8 = (const uint8_t *)info->index.user;
+      struct panfrost_ptr T = pan_pool_alloc_aligned(
+         &batch->pool.base, draw->count * info->index_size, info->index_size);

-                memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
-                return T.gpu;
-        }
+      memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
+      return T.gpu;
+   }
 }

 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
@ -126,34 +123,30 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
                                  const struct pipe_draw_start_count_bias *draw,
                                  unsigned *min_index, unsigned *max_index)
 {
-        struct panfrost_resource *rsrc = pan_resource(info->index.resource);
-        struct panfrost_context *ctx = batch->ctx;
-        bool needs_indices = true;
+   struct panfrost_resource *rsrc = pan_resource(info->index.resource);
+   struct panfrost_context *ctx = batch->ctx;
+   bool needs_indices = true;

-        if (info->index_bounds_valid) {
-                *min_index = info->min_index;
-                *max_index = info->max_index;
-                needs_indices = false;
-        } else if (!info->has_user_indices) {
-                /* Check the cache */
-                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
-                                                           draw->start,
-                                                           draw->count,
-                                                           min_index,
-                                                           max_index);
-        }
+   if (info->index_bounds_valid) {
+      *min_index = info->min_index;
+      *max_index = info->max_index;
+      needs_indices = false;
+   } else if (!info->has_user_indices) {
+      /* Check the cache */
+      needs_indices = !panfrost_minmax_cache_get(
+         rsrc->index_cache, draw->start, draw->count, min_index, max_index);
+   }

-        if (needs_indices) {
-                /* Fallback */
-                u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
+   if (needs_indices) {
+      /* Fallback */
+      u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);

-                if (!info->has_user_indices)
-                        panfrost_minmax_cache_add(rsrc->index_cache,
-                                                  draw->start, draw->count,
-                                                  *min_index, *max_index);
-        }
+      if (!info->has_user_indices)
+         panfrost_minmax_cache_add(rsrc->index_cache, draw->start, draw->count,
+                                   *min_index, *max_index);
+   }

-        return panfrost_get_index_buffer(batch, info, draw);
+   return panfrost_get_index_buffer(batch, info, draw);
 }

 /**
@ -163,26 +156,24 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
 * elements CSO create time, not at draw time.
 */
 unsigned
-pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
-                         unsigned *nr_bufs,
-                         unsigned vbi,
-                         unsigned divisor)
+pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs,
+                         unsigned vbi, unsigned divisor)
 {
-        /* Look up the buffer */
-        for (unsigned i = 0; i < (*nr_bufs); ++i) {
-                if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
-                        return i;
-        }
+   /* Look up the buffer */
+   for (unsigned i = 0; i < (*nr_bufs); ++i) {
+      if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
+         return i;
+   }

-        /* Else, create a new buffer */
-        unsigned idx = (*nr_bufs)++;
+   /* Else, create a new buffer */
+   unsigned idx = (*nr_bufs)++;

-        buffers[idx] = (struct pan_vertex_buffer) {
-                .vbi = vbi,
-                .divisor = divisor,
-        };
+   buffers[idx] = (struct pan_vertex_buffer){
+      .vbi = vbi,
+      .divisor = divisor,
+   };

-        return idx;
+   return idx;
 }

 /*
@ -194,8 +185,8 @@ pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
 static void
 panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
 {
-        batch->draws |= target;
-        batch->resolve |= target;
+   batch->draws |= target;
+   batch->resolve |= target;
 }

 /*
@ -206,34 +197,34 @@ panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
 void
 panfrost_set_batch_masks_blend(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_blend_state *blend = ctx->blend;
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_blend_state *blend = ctx->blend;

-        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
-                if (blend->info[i].enabled && batch->key.cbufs[i])
-                        panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
-        }
+   for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
+      if (blend->info[i].enabled && batch->key.cbufs[i])
+         panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
+   }
 }

 void
 panfrost_set_batch_masks_zs(struct panfrost_batch *batch)
 {
-        struct panfrost_context *ctx = batch->ctx;
-        struct pipe_depth_stencil_alpha_state *zsa = (void *) ctx->depth_stencil;
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_depth_stencil_alpha_state *zsa = (void *)ctx->depth_stencil;

-        /* Assume depth is read (TODO: perf) */
-        if (zsa->depth_enabled)
-                batch->read |= PIPE_CLEAR_DEPTH;
+   /* Assume depth is read (TODO: perf) */
+   if (zsa->depth_enabled)
+      batch->read |= PIPE_CLEAR_DEPTH;

-        if (zsa->depth_writemask)
-                panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
+   if (zsa->depth_writemask)
+      panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);

-        if (zsa->stencil[0].enabled) {
-                panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
+   if (zsa->stencil[0].enabled) {
+      panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);

-                /* Assume stencil is read (TODO: perf) */
-                batch->read |= PIPE_CLEAR_STENCIL;
-        }
+      /* Assume stencil is read (TODO: perf) */
+      batch->read |= PIPE_CLEAR_STENCIL;
+   }
 }

 void
@ -241,21 +232,20 @@ panfrost_track_image_access(struct panfrost_batch *batch,
                            enum pipe_shader_type stage,
                            struct pipe_image_view *image)
 {
-        struct panfrost_resource *rsrc = pan_resource(image->resource);
+   struct panfrost_resource *rsrc = pan_resource(image->resource);

-        if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
-                panfrost_batch_write_rsrc(batch, rsrc, stage);
+   if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
+      panfrost_batch_write_rsrc(batch, rsrc, stage);

-                bool is_buffer = rsrc->base.target == PIPE_BUFFER;
-                unsigned level = is_buffer ? 0 : image->u.tex.level;
-                BITSET_SET(rsrc->valid.data, level);
+      bool is_buffer = rsrc->base.target == PIPE_BUFFER;
+      unsigned level = is_buffer ? 0 : image->u.tex.level;
+      BITSET_SET(rsrc->valid.data, level);

-                if (is_buffer) {
-                        util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
-                                        0, rsrc->base.width0);
-                }
-        } else {
-                panfrost_batch_read_rsrc(batch, rsrc, stage);
-        }
+      if (is_buffer) {
+         util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0,
+                        rsrc->base.width0);
+      }
+   } else {
+      panfrost_batch_read_rsrc(batch, rsrc, stage);
+   }
 }
-
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@ -26,8 +26,8 @@
 #ifndef __PAN_JOB_H__
 #define __PAN_JOB_H__

-#include "util/u_dynarray.h"
 #include "pipe/p_state.h"
+#include "util/u_dynarray.h"
 #include "pan_cs.h"
 #include "pan_mempool.h"
 #include "pan_resource.h"
@ -39,11 +39,11 @@
 * error. The getter needs to be used instead.
 */
 struct pan_tristate {
-        enum {
-                PAN_TRISTATE_DONTCARE,
-                PAN_TRISTATE_FALSE,
-                PAN_TRISTATE_TRUE,
-        } v;
+   enum {
+      PAN_TRISTATE_DONTCARE,
+      PAN_TRISTATE_FALSE,
+      PAN_TRISTATE_TRUE,
+   } v;
 };

 /*
@ -53,20 +53,20 @@ struct pan_tristate {
 static bool
 pan_tristate_set(struct pan_tristate *state, bool value)
 {
-        switch (state->v) {
-        case PAN_TRISTATE_DONTCARE:
-                state->v = value ? PAN_TRISTATE_TRUE : PAN_TRISTATE_FALSE;
-                return true;
+   switch (state->v) {
+   case PAN_TRISTATE_DONTCARE:
+      state->v = value ? PAN_TRISTATE_TRUE : PAN_TRISTATE_FALSE;
+      return true;

-        case PAN_TRISTATE_FALSE:
-                return (value == false);
+   case PAN_TRISTATE_FALSE:
+      return (value == false);

-        case PAN_TRISTATE_TRUE:
-                return (value == true);
+   case PAN_TRISTATE_TRUE:
+      return (value == true);

-        default:
-                unreachable("Invalid tristate value");
-        }
+   default:
+      unreachable("Invalid tristate value");
+   }
 }

 /*
@ -76,189 +76,179 @@ pan_tristate_set(struct pan_tristate *state, bool value)
 static bool
 pan_tristate_get(struct pan_tristate state)
 {
-        return (state.v == PAN_TRISTATE_TRUE);
+   return (state.v == PAN_TRISTATE_TRUE);
 }

 /* A panfrost_batch corresponds to a bound FBO we're rendering to,
 * collecting over multiple draws. */

 struct panfrost_batch {
-        struct panfrost_context *ctx;
-        struct pipe_framebuffer_state key;
+   struct panfrost_context *ctx;
+   struct pipe_framebuffer_state key;

-        /* Sequence number used to implement LRU eviction when all batch slots are used */
-        uint64_t seqnum;
+   /* Sequence number used to implement LRU eviction when all batch slots are
+    * used */
+   uint64_t seqnum;

-        /* Buffers cleared (PIPE_CLEAR_* bitmask) */
-        unsigned clear;
+   /* Buffers cleared (PIPE_CLEAR_* bitmask) */
+   unsigned clear;

-        /* Buffers drawn */
-        unsigned draws;
+   /* Buffers drawn */
+   unsigned draws;

-        /* Buffers read */
-        unsigned read;
+   /* Buffers read */
+   unsigned read;

-        /* Buffers needing resolve to memory */
-        unsigned resolve;
+   /* Buffers needing resolve to memory */
+   unsigned resolve;

-        /* Packed clear values, indexed by both render target as well as word.
-         * Essentially, a single pixel is packed, with some padding to bring it
-         * up to a 32-bit interval; that pixel is then duplicated over to fill
-         * all 16-bytes */
+   /* Packed clear values, indexed by both render target as well as word.
+    * Essentially, a single pixel is packed, with some padding to bring it
+    * up to a 32-bit interval; that pixel is then duplicated over to fill
+    * all 16-bytes */

-        uint32_t clear_color[PIPE_MAX_COLOR_BUFS][4];
-        float clear_depth;
-        unsigned clear_stencil;
+   uint32_t clear_color[PIPE_MAX_COLOR_BUFS][4];
+   float clear_depth;
+   unsigned clear_stencil;

-        /* Amount of thread local storage required per thread */
-        unsigned stack_size;
+   /* Amount of thread local storage required per thread */
+   unsigned stack_size;

-        /* Amount of shared memory needed per workgroup (for compute) */
-        unsigned shared_size;
+   /* Amount of shared memory needed per workgroup (for compute) */
+   unsigned shared_size;

-        /* The bounding box covered by this job, taking scissors into account.
-         * Basically, the bounding box we have to run fragment shaders for */
+   /* The bounding box covered by this job, taking scissors into account.
+    * Basically, the bounding box we have to run fragment shaders for */

-        unsigned minx, miny;
-        unsigned maxx, maxy;
+   unsigned minx, miny;
+   unsigned maxx, maxy;

-        /* Acts as a rasterizer discard */
-        bool scissor_culls_everything;
+   /* Acts as a rasterizer discard */
+   bool scissor_culls_everything;

-        /* BOs referenced not in the pool */
-        unsigned num_bos;
-        struct util_dynarray bos;
+   /* BOs referenced not in the pool */
+   unsigned num_bos;
+   struct util_dynarray bos;

-        /* Pool owned by this batch (released when the batch is released) used for temporary descriptors */
-        struct panfrost_pool pool;
+   /* Pool owned by this batch (released when the batch is released) used for
+    * temporary descriptors */
+   struct panfrost_pool pool;

-        /* Pool also owned by this batch that is not CPU mapped (created as
-         * INVISIBLE) used for private GPU-internal structures, particularly
-         * varyings */
-        struct panfrost_pool invisible_pool;
+   /* Pool also owned by this batch that is not CPU mapped (created as
+    * INVISIBLE) used for private GPU-internal structures, particularly
+    * varyings */
+   struct panfrost_pool invisible_pool;

-        /* Job scoreboarding state */
-        struct pan_scoreboard scoreboard;
+   /* Job scoreboarding state */
+   struct pan_scoreboard scoreboard;

-        /* Polygon list bound to the batch, or NULL if none bound yet */
-        struct panfrost_bo *polygon_list;
+   /* Polygon list bound to the batch, or NULL if none bound yet */
+   struct panfrost_bo *polygon_list;

-        /* Scratchpad BO bound to the batch, or NULL if none bound yet */
-        struct panfrost_bo *scratchpad;
+   /* Scratchpad BO bound to the batch, or NULL if none bound yet */
+   struct panfrost_bo *scratchpad;

-        /* Shared memory BO bound to the batch, or NULL if none bound yet */
-        struct panfrost_bo *shared_memory;
+   /* Shared memory BO bound to the batch, or NULL if none bound yet */
+   struct panfrost_bo *shared_memory;

-        /* Framebuffer descriptor. */
-        struct panfrost_ptr framebuffer;
+   /* Framebuffer descriptor. */
+   struct panfrost_ptr framebuffer;

-        /* Thread local storage descriptor. */
-        struct panfrost_ptr tls;
+   /* Thread local storage descriptor. */
+   struct panfrost_ptr tls;

-        /* Tiler context */
-        struct pan_tiler_context tiler_ctx;
+   /* Tiler context */
+   struct pan_tiler_context tiler_ctx;

-        /* Keep the num_work_groups sysval around for indirect dispatch */
-        mali_ptr num_wg_sysval[3];
+   /* Keep the num_work_groups sysval around for indirect dispatch */
+   mali_ptr num_wg_sysval[3];

-        /* Cached descriptors */
-        mali_ptr viewport;
-        mali_ptr rsd[PIPE_SHADER_TYPES];
-        mali_ptr textures[PIPE_SHADER_TYPES];
-        mali_ptr samplers[PIPE_SHADER_TYPES];
-        mali_ptr attribs[PIPE_SHADER_TYPES];
-        mali_ptr attrib_bufs[PIPE_SHADER_TYPES];
-        mali_ptr uniform_buffers[PIPE_SHADER_TYPES];
-        mali_ptr push_uniforms[PIPE_SHADER_TYPES];
-        mali_ptr depth_stencil;
-        mali_ptr blend;
+   /* Cached descriptors */
+   mali_ptr viewport;
+   mali_ptr rsd[PIPE_SHADER_TYPES];
+   mali_ptr textures[PIPE_SHADER_TYPES];
+   mali_ptr samplers[PIPE_SHADER_TYPES];
+   mali_ptr attribs[PIPE_SHADER_TYPES];
+   mali_ptr attrib_bufs[PIPE_SHADER_TYPES];
+   mali_ptr uniform_buffers[PIPE_SHADER_TYPES];
+   mali_ptr push_uniforms[PIPE_SHADER_TYPES];
+   mali_ptr depth_stencil;
+   mali_ptr blend;

-        /* Valhall: struct mali_scissor_packed */
-        unsigned scissor[2];
-        float minimum_z, maximum_z;
+   /* Valhall: struct mali_scissor_packed */
+   unsigned scissor[2];
+   float minimum_z, maximum_z;

-        /* Used on Valhall only. Midgard includes attributes in-band with
-         * attributes, wildly enough.
-         */
-        mali_ptr images[PIPE_SHADER_TYPES];
+   /* Used on Valhall only. Midgard includes attributes in-band with
+    * attributes, wildly enough.
+    */
+   mali_ptr images[PIPE_SHADER_TYPES];

-        /* On Valhall, these are properties of the batch. On Bifrost, they are
-         * per draw.
-         */
-        struct pan_tristate sprite_coord_origin;
-        struct pan_tristate first_provoking_vertex;
+   /* On Valhall, these are properties of the batch. On Bifrost, they are
+    * per draw.
+    */
+   struct pan_tristate sprite_coord_origin;
+   struct pan_tristate first_provoking_vertex;
 };

 /* Functions for managing the above */

-struct panfrost_batch *
-panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
+struct panfrost_batch *panfrost_get_batch_for_fbo(struct panfrost_context *ctx);

 struct panfrost_batch *
-panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx, const char *reason);
+panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx,
+                                 const char *reason);

-void
-panfrost_batch_add_bo(struct panfrost_batch *batch,
-                      struct panfrost_bo *bo,
-                      enum pipe_shader_type stage);
+void panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
+                           enum pipe_shader_type stage);

-void
-panfrost_batch_read_rsrc(struct panfrost_batch *batch,
-                         struct panfrost_resource *rsrc,
-                         enum pipe_shader_type stage);
+void panfrost_batch_read_rsrc(struct panfrost_batch *batch,
+                              struct panfrost_resource *rsrc,
+                              enum pipe_shader_type stage);

-void
-panfrost_batch_write_rsrc(struct panfrost_batch *batch,
-                          struct panfrost_resource *rsrc,
-                          enum pipe_shader_type stage);
+void panfrost_batch_write_rsrc(struct panfrost_batch *batch,
+                               struct panfrost_resource *rsrc,
+                               enum pipe_shader_type stage);

-bool
-panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
-                              struct panfrost_resource *rsrc);
+bool panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
+                                   struct panfrost_resource *rsrc);

-bool
-panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
-                               struct panfrost_resource *rsrc);
+bool panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
+                                    struct panfrost_resource *rsrc);
+
+struct panfrost_bo *panfrost_batch_create_bo(struct panfrost_batch *batch,
+                                             size_t size, uint32_t create_flags,
+                                             enum pipe_shader_type stage,
+                                             const char *label);
+
+void panfrost_flush_all_batches(struct panfrost_context *ctx,
+                                const char *reason);
+
+void panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
+                                           struct panfrost_resource *rsrc,
+                                           const char *reason);
+
+void panfrost_flush_writer(struct panfrost_context *ctx,
+                           struct panfrost_resource *rsrc, const char *reason);
+
+void panfrost_batch_adjust_stack_size(struct panfrost_batch *batch);
+
+struct panfrost_bo *panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
+                                                  unsigned size,
+                                                  unsigned thread_tls_alloc,
+                                                  unsigned core_id_range);

 struct panfrost_bo *
-panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
-                         uint32_t create_flags, enum pipe_shader_type stage,
-                         const char *label);
+panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size,
+                                 unsigned workgroup_count);

-void
-panfrost_flush_all_batches(struct panfrost_context *ctx, const char *reason);
+void panfrost_batch_clear(struct panfrost_batch *batch, unsigned buffers,
+                          const union pipe_color_union *color, double depth,
+                          unsigned stencil);

-void
-panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
-                                      struct panfrost_resource *rsrc,
-                                      const char *reason);
+void panfrost_batch_union_scissor(struct panfrost_batch *batch, unsigned minx,
+                                  unsigned miny, unsigned maxx, unsigned maxy);

-void
-panfrost_flush_writer(struct panfrost_context *ctx,
-                      struct panfrost_resource *rsrc,
-                      const char *reason);
-
-void
-panfrost_batch_adjust_stack_size(struct panfrost_batch *batch);
-
-struct panfrost_bo *
-panfrost_batch_get_scratchpad(struct panfrost_batch *batch, unsigned size, unsigned thread_tls_alloc, unsigned core_id_range);
-
-struct panfrost_bo *
-panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size, unsigned workgroup_count);
-
-void
-panfrost_batch_clear(struct panfrost_batch *batch,
-                     unsigned buffers,
-                     const union pipe_color_union *color,
-                     double depth, unsigned stencil);
-
-void
-panfrost_batch_union_scissor(struct panfrost_batch *batch,
-                             unsigned minx, unsigned miny,
-                             unsigned maxx, unsigned maxy);
-
-bool
-panfrost_batch_skip_rasterization(struct panfrost_batch *batch);
+bool panfrost_batch_skip_rasterization(struct panfrost_batch *batch);

 #endif
--- a/src/gallium/drivers/panfrost/pan_mempool.c
+++ b/src/gallium/drivers/panfrost/pan_mempool.c
@ -46,124 +46,124 @@
 static struct panfrost_bo *
 panfrost_pool_alloc_backing(struct panfrost_pool *pool, size_t bo_sz)
 {
-        /* We don't know what the BO will be used for, so let's flag it
-         * RW and attach it to both the fragment and vertex/tiler jobs.
-         * TODO: if we want fine grained BO assignment we should pass
-         * flags to this function and keep the read/write,
-         * fragment/vertex+tiler pools separate.
-         */
-        struct panfrost_bo *bo = panfrost_bo_create(pool->base.dev, bo_sz,
-                        pool->base.create_flags, pool->base.label);
+   /* We don't know what the BO will be used for, so let's flag it
+    * RW and attach it to both the fragment and vertex/tiler jobs.
+    * TODO: if we want fine grained BO assignment we should pass
+    * flags to this function and keep the read/write,
+    * fragment/vertex+tiler pools separate.
+    */
+   struct panfrost_bo *bo = panfrost_bo_create(
+      pool->base.dev, bo_sz, pool->base.create_flags, pool->base.label);

-        if (pool->owned)
-                util_dynarray_append(&pool->bos, struct panfrost_bo *, bo);
-        else
-                panfrost_bo_unreference(pool->transient_bo);
+   if (pool->owned)
+      util_dynarray_append(&pool->bos, struct panfrost_bo *, bo);
+   else
+      panfrost_bo_unreference(pool->transient_bo);

-        pool->transient_bo = bo;
-        pool->transient_offset = 0;
+   pool->transient_bo = bo;
+   pool->transient_offset = 0;

-        return bo;
+   return bo;
 }

 void
 panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
-                   struct panfrost_device *dev,
-                   unsigned create_flags, size_t slab_size, const char *label,
-                   bool prealloc, bool owned)
+                   struct panfrost_device *dev, unsigned create_flags,
+                   size_t slab_size, const char *label, bool prealloc,
+                   bool owned)
 {
-        memset(pool, 0, sizeof(*pool));
-        pan_pool_init(&pool->base, dev, create_flags, slab_size, label);
-        pool->owned = owned;
+   memset(pool, 0, sizeof(*pool));
+   pan_pool_init(&pool->base, dev, create_flags, slab_size, label);
+   pool->owned = owned;

-        if (owned)
-                util_dynarray_init(&pool->bos, memctx);
+   if (owned)
+      util_dynarray_init(&pool->bos, memctx);

-        if (prealloc)
-                panfrost_pool_alloc_backing(pool, pool->base.slab_size);
+   if (prealloc)
+      panfrost_pool_alloc_backing(pool, pool->base.slab_size);
 }

 void
 panfrost_pool_cleanup(struct panfrost_pool *pool)
 {
-        if (!pool->owned) {
-                panfrost_bo_unreference(pool->transient_bo);
-                return;
-        }
+   if (!pool->owned) {
+      panfrost_bo_unreference(pool->transient_bo);
+      return;
+   }

-        util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo)
-                panfrost_bo_unreference(*bo);
+   util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo)
+      panfrost_bo_unreference(*bo);

-        util_dynarray_fini(&pool->bos);
+   util_dynarray_fini(&pool->bos);
 }

 void
 panfrost_pool_get_bo_handles(struct panfrost_pool *pool, uint32_t *handles)
 {
-        assert(pool->owned && "pool does not track BOs in unowned mode");
+   assert(pool->owned && "pool does not track BOs in unowned mode");

-        unsigned idx = 0;
-        util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo) {
-                assert((*bo)->gem_handle > 0);
-                handles[idx++] = (*bo)->gem_handle;
+   unsigned idx = 0;
+   util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo) {
+      assert((*bo)->gem_handle > 0);
+      handles[idx++] = (*bo)->gem_handle;

-               /* Update the BO access flags so that panfrost_bo_wait() knows
-                * about all pending accesses.
-                * We only keep the READ/WRITE info since this is all the BO
-                * wait logic cares about.
-                * We also preserve existing flags as this batch might not
-                * be the first one to access the BO.
-                */
-                (*bo)->gpu_access |= PAN_BO_ACCESS_RW;
-        }
+      /* Update the BO access flags so that panfrost_bo_wait() knows
+       * about all pending accesses.
+       * We only keep the READ/WRITE info since this is all the BO
+       * wait logic cares about.
+       * We also preserve existing flags as this batch might not
+       * be the first one to access the BO.
+       */
+      (*bo)->gpu_access |= PAN_BO_ACCESS_RW;
+   }
 }

 #define PAN_GUARD_SIZE 4096

 static struct panfrost_ptr
-panfrost_pool_alloc_aligned(struct panfrost_pool *pool, size_t sz, unsigned alignment)
+panfrost_pool_alloc_aligned(struct panfrost_pool *pool, size_t sz,
+                            unsigned alignment)
 {
-        assert(alignment == util_next_power_of_two(alignment));
+   assert(alignment == util_next_power_of_two(alignment));

-        /* Find or create a suitable BO */
-        struct panfrost_bo *bo = pool->transient_bo;
-        unsigned offset = ALIGN_POT(pool->transient_offset, alignment);
+   /* Find or create a suitable BO */
+   struct panfrost_bo *bo = pool->transient_bo;
+   unsigned offset = ALIGN_POT(pool->transient_offset, alignment);

 #ifdef PAN_DBG_OVERFLOW
-        if (unlikely(pool->base.dev->debug & PAN_DBG_OVERFLOW) &&
-            !(pool->base.create_flags & PAN_BO_INVISIBLE)) {
-                unsigned aligned = ALIGN_POT(sz, sysconf(_SC_PAGESIZE));
-                unsigned bo_size = aligned + PAN_GUARD_SIZE;
+   if (unlikely(pool->base.dev->debug & PAN_DBG_OVERFLOW) &&
+       !(pool->base.create_flags & PAN_BO_INVISIBLE)) {
+      unsigned aligned = ALIGN_POT(sz, sysconf(_SC_PAGESIZE));
+      unsigned bo_size = aligned + PAN_GUARD_SIZE;

-                bo = panfrost_pool_alloc_backing(pool, bo_size);
-                memset(bo->ptr.cpu, 0xbb, bo_size);
+      bo = panfrost_pool_alloc_backing(pool, bo_size);
+      memset(bo->ptr.cpu, 0xbb, bo_size);

-                /* Place the object as close as possible to the protected
-                 * region at the end of the buffer while keeping alignment. */
-                offset = ROUND_DOWN_TO(aligned - sz, alignment);
+      /* Place the object as close as possible to the protected
+       * region at the end of the buffer while keeping alignment. */
+      offset = ROUND_DOWN_TO(aligned - sz, alignment);

-                if (mprotect(bo->ptr.cpu + aligned,
-                             PAN_GUARD_SIZE, PROT_NONE) == -1)
-                        perror("mprotect");
+      if (mprotect(bo->ptr.cpu + aligned, PAN_GUARD_SIZE, PROT_NONE) == -1)
+         perror("mprotect");

-                pool->transient_bo = NULL;
-        }
+      pool->transient_bo = NULL;
+   }
 #endif

-        /* If we don't fit, allocate a new backing */
-        if (unlikely(bo == NULL || (offset + sz) >= pool->base.slab_size)) {
-                bo = panfrost_pool_alloc_backing(pool,
-                                ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096));
-                offset = 0;
-        }
+   /* If we don't fit, allocate a new backing */
+   if (unlikely(bo == NULL || (offset + sz) >= pool->base.slab_size)) {
+      bo = panfrost_pool_alloc_backing(
+         pool, ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096));
+      offset = 0;
+   }

-        pool->transient_offset = offset + sz;
+   pool->transient_offset = offset + sz;

-        struct panfrost_ptr ret = {
-                .cpu = bo->ptr.cpu + offset,
-                .gpu = bo->ptr.gpu + offset,
-        };
+   struct panfrost_ptr ret = {
+      .cpu = bo->ptr.cpu + offset,
+      .gpu = bo->ptr.gpu + offset,
+   };

-        return ret;
+   return ret;
 }
 PAN_POOL_ALLOCATOR(struct panfrost_pool, panfrost_pool_alloc_aligned)
--- a/src/gallium/drivers/panfrost/pan_mempool.h
+++ b/src/gallium/drivers/panfrost/pan_mempool.h
@ -31,37 +31,37 @@
   be unowned for persistent uploads. */

 struct panfrost_pool {
-        /* Inherit from pan_pool */
-        struct pan_pool base;
+   /* Inherit from pan_pool */
+   struct pan_pool base;

-        /* BOs allocated by this pool */
-        struct util_dynarray bos;
+   /* BOs allocated by this pool */
+   struct util_dynarray bos;

-        /* Current transient BO */
-        struct panfrost_bo *transient_bo;
+   /* Current transient BO */
+   struct panfrost_bo *transient_bo;

-        /* Within the topmost transient BO, how much has been used? */
-        unsigned transient_offset;
+   /* Within the topmost transient BO, how much has been used? */
+   unsigned transient_offset;

-        /* Mode of the pool. BO management is in the pool for owned mode, but
-         * the consumed for unowned mode. */
-        bool owned;
+   /* Mode of the pool. BO management is in the pool for owned mode, but
+    * the consumed for unowned mode. */
+   bool owned;
 };

 static inline struct panfrost_pool *
 to_panfrost_pool(struct pan_pool *pool)
 {
-        return container_of(pool, struct panfrost_pool, base);
+   return container_of(pool, struct panfrost_pool, base);
 }

 /* Reference to pool allocated memory for an unowned pool */

 struct panfrost_pool_ref {
-        /* Owning BO */
-        struct panfrost_bo *bo;
+   /* Owning BO */
+   struct panfrost_bo *bo;

-        /* Mapped GPU VA */
-        mali_ptr gpu;
+   /* Mapped GPU VA */
+   mali_ptr gpu;
 };

 /* Take a reference to an allocation pool. Call directly after allocating from
@ -70,32 +70,30 @@ struct panfrost_pool_ref {
 static inline struct panfrost_pool_ref
 panfrost_pool_take_ref(struct panfrost_pool *pool, mali_ptr ptr)
 {
-        if (!pool->owned)
-                panfrost_bo_reference(pool->transient_bo);
+   if (!pool->owned)
+      panfrost_bo_reference(pool->transient_bo);

-        return (struct panfrost_pool_ref) {
-                .bo = pool->transient_bo,
-                .gpu = ptr,
-        };
+   return (struct panfrost_pool_ref){
+      .bo = pool->transient_bo,
+      .gpu = ptr,
+   };
 }

-void
-panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
-                   struct panfrost_device *dev, unsigned create_flags,
-                   size_t slab_size, const char *label, bool prealloc, bool
-                   owned);
+void panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
+                        struct panfrost_device *dev, unsigned create_flags,
+                        size_t slab_size, const char *label, bool prealloc,
+                        bool owned);

-void
-panfrost_pool_cleanup(struct panfrost_pool *pool);
+void panfrost_pool_cleanup(struct panfrost_pool *pool);

 static inline unsigned
 panfrost_pool_num_bos(struct panfrost_pool *pool)
 {
-        assert(pool->owned && "pool does not track BOs in unowned mode");
-        return util_dynarray_num_elements(&pool->bos, struct panfrost_bo *);
+   assert(pool->owned && "pool does not track BOs in unowned mode");
+   return util_dynarray_num_elements(&pool->bos, struct panfrost_bo *);
 }

-void
-panfrost_pool_get_bo_handles(struct panfrost_pool *pool, uint32_t *handles);
+void panfrost_pool_get_bo_handles(struct panfrost_pool *pool,
+                                  uint32_t *handles);

 #endif
--- a/src/gallium/drivers/panfrost/pan_public.h
+++ b/src/gallium/drivers/panfrost/pan_public.h
@ -31,8 +31,7 @@ extern "C" {
 struct pipe_screen;
 struct renderonly;

-struct pipe_screen *
-panfrost_create_screen(int fd, struct renderonly *ro);
+struct pipe_screen *panfrost_create_screen(int fd, struct renderonly *ro);

 #ifdef __cplusplus
 }
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
--- a/src/gallium/drivers/panfrost/pan_resource.h
+++ b/src/gallium/drivers/panfrost/pan_resource.h
@ -22,87 +22,86 @@
 *
 */

-
 #ifndef PAN_RESOURCE_H
 #define PAN_RESOURCE_H

-#include "pan_screen.h"
-#include "pan_minmax_cache.h"
-#include "pan_texture.h"
 #include "drm-uapi/drm.h"
 #include "util/u_range.h"
+#include "pan_minmax_cache.h"
+#include "pan_screen.h"
+#include "pan_texture.h"

 #define LAYOUT_CONVERT_THRESHOLD 8
-#define PAN_MAX_BATCHES 32
+#define PAN_MAX_BATCHES          32

-#define PAN_BIND_SHARED_MASK (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | \
-                              PIPE_BIND_SHARED)
+#define PAN_BIND_SHARED_MASK                                                   \
+   (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)

 struct panfrost_resource {
-        struct pipe_resource base;
-        struct {
-                struct pipe_scissor_state extent;
-                struct {
-                        bool enable;
-                        unsigned stride;
-                        unsigned size;
-                        BITSET_WORD *data;
-                } tile_map;
-        } damage;
+   struct pipe_resource base;
+   struct {
+      struct pipe_scissor_state extent;
+      struct {
+         bool enable;
+         unsigned stride;
+         unsigned size;
+         BITSET_WORD *data;
+      } tile_map;
+   } damage;

-        struct renderonly_scanout *scanout;
+   struct renderonly_scanout *scanout;

-        struct panfrost_resource *separate_stencil;
+   struct panfrost_resource *separate_stencil;

-        struct util_range valid_buffer_range;
+   struct util_range valid_buffer_range;

-        /* Description of the resource layout */
-        struct pan_image image;
+   /* Description of the resource layout */
+   struct pan_image image;

-        struct {
-                /* Is the checksum for this image valid? Implicitly refers to
-                 * the first slice; we only checksum non-mipmapped 2D images */
-                bool crc;
+   struct {
+      /* Is the checksum for this image valid? Implicitly refers to
+       * the first slice; we only checksum non-mipmapped 2D images */
+      bool crc;

-                /* Has anything been written to this slice? */
-                BITSET_DECLARE(data, MAX_MIP_LEVELS);
-        } valid;
+      /* Has anything been written to this slice? */
+      BITSET_DECLARE(data, MAX_MIP_LEVELS);
+   } valid;

-        /* Whether the modifier can be changed */
-        bool modifier_constant;
+   /* Whether the modifier can be changed */
+   bool modifier_constant;

-        /* Used to decide when to convert to another modifier */
-        uint16_t modifier_updates;
+   /* Used to decide when to convert to another modifier */
+   uint16_t modifier_updates;

-        /* Do all pixels have the same stencil value? */
-        bool constant_stencil;
+   /* Do all pixels have the same stencil value? */
+   bool constant_stencil;

-        /* The stencil value if constant_stencil is set */
-        uint8_t stencil_value;
+   /* The stencil value if constant_stencil is set */
+   uint8_t stencil_value;

-        /* Cached min/max values for index buffers */
-        struct panfrost_minmax_cache *index_cache;
+   /* Cached min/max values for index buffers */
+   struct panfrost_minmax_cache *index_cache;
 };

 static inline struct panfrost_resource *
 pan_resource(struct pipe_resource *p)
 {
-        return (struct panfrost_resource *)p;
+   return (struct panfrost_resource *)p;
 }

 struct panfrost_transfer {
-        struct pipe_transfer base;
-        void *map;
-        struct {
-                struct pipe_resource *rsrc;
-                struct pipe_box box;
-        } staging;
+   struct pipe_transfer base;
+   void *map;
+   struct {
+      struct pipe_resource *rsrc;
+      struct pipe_box box;
+   } staging;
 };

 static inline struct panfrost_transfer *
 pan_transfer(struct pipe_transfer *p)
 {
-        return (struct panfrost_transfer *)p;
+   return (struct panfrost_transfer *)p;
 }

 void panfrost_resource_screen_init(struct pipe_screen *screen);
@ -113,53 +112,48 @@ void panfrost_resource_context_init(struct pipe_context *pctx);

 /* Blitting */

-void
-panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond);
+void panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond);

-void
-panfrost_blit(struct pipe_context *pipe,
-              const struct pipe_blit_info *info);
+void panfrost_blit(struct pipe_context *pipe,
+                   const struct pipe_blit_info *info);

-void
-panfrost_resource_set_damage_region(struct pipe_screen *screen,
-                                    struct pipe_resource *res,
-                                    unsigned int nrects,
-                                    const struct pipe_box *rects);
+void panfrost_resource_set_damage_region(struct pipe_screen *screen,
+                                         struct pipe_resource *res,
+                                         unsigned int nrects,
+                                         const struct pipe_box *rects);

 static inline enum mali_texture_dimension
-panfrost_translate_texture_dimension(enum pipe_texture_target t) {
-        switch (t)
-        {
-        case PIPE_BUFFER:
-        case PIPE_TEXTURE_1D:
-        case PIPE_TEXTURE_1D_ARRAY:
-                return MALI_TEXTURE_DIMENSION_1D;
+panfrost_translate_texture_dimension(enum pipe_texture_target t)
+{
+   switch (t) {
+   case PIPE_BUFFER:
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+      return MALI_TEXTURE_DIMENSION_1D;

-        case PIPE_TEXTURE_2D:
-        case PIPE_TEXTURE_2D_ARRAY:
-        case PIPE_TEXTURE_RECT:
-                return MALI_TEXTURE_DIMENSION_2D;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_RECT:
+      return MALI_TEXTURE_DIMENSION_2D;

-        case PIPE_TEXTURE_3D:
-                return MALI_TEXTURE_DIMENSION_3D;
+   case PIPE_TEXTURE_3D:
+      return MALI_TEXTURE_DIMENSION_3D;

-        case PIPE_TEXTURE_CUBE:
-        case PIPE_TEXTURE_CUBE_ARRAY:
-                return MALI_TEXTURE_DIMENSION_CUBE;
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return MALI_TEXTURE_DIMENSION_CUBE;

-        default:
-                unreachable("Unknown target");
-        }
+   default:
+      unreachable("Unknown target");
+   }
 }

-void
-pan_resource_modifier_convert(struct panfrost_context *ctx,
-                              struct panfrost_resource *rsrc,
-                              uint64_t modifier, const char *reason);
+void pan_resource_modifier_convert(struct panfrost_context *ctx,
+                                   struct panfrost_resource *rsrc,
+                                   uint64_t modifier, const char *reason);

-void
-pan_legalize_afbc_format(struct panfrost_context *ctx,
-                         struct panfrost_resource *rsrc,
-                         enum pipe_format format);
+void pan_legalize_afbc_format(struct panfrost_context *ctx,
+                              struct panfrost_resource *rsrc,
+                              enum pipe_format format);

 #endif /* PAN_RESOURCE_H */
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@ -30,14 +30,14 @@
 #define PAN_SCREEN_H

 #include <xf86drm.h>
-#include "pipe/p_screen.h"
 #include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
 #include "renderonly/renderonly.h"
-#include "util/u_dynarray.h"
 #include "util/bitset.h"
-#include "util/set.h"
-#include "util/log.h"
 #include "util/disk_cache.h"
+#include "util/log.h"
+#include "util/set.h"
+#include "util/u_dynarray.h"

 #include "pan_device.h"
 #include "pan_mempool.h"
@ -45,7 +45,7 @@
 #define PAN_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)

 static const struct pipe_driver_query_info panfrost_driver_query_list[] = {
-        {"draw-calls", PAN_QUERY_DRAW_CALLS, { 0 }},
+   {"draw-calls", PAN_QUERY_DRAW_CALLS, {0}},
 };

 struct panfrost_batch;
@ -58,77 +58,74 @@ struct pan_blend_state;
 /* Virtual table of per-generation (GenXML) functions */

 struct panfrost_vtable {
-        /* Prepares the renderer state descriptor or shader program descriptor
-         * for a given compiled shader, and if desired uploads it as well */
-        void (*prepare_shader)(struct panfrost_compiled_shader *,
-                            struct panfrost_pool *, bool);
+   /* Prepares the renderer state descriptor or shader program descriptor
+    * for a given compiled shader, and if desired uploads it as well */
+   void (*prepare_shader)(struct panfrost_compiled_shader *,
+                          struct panfrost_pool *, bool);

-        /* Emits a thread local storage descriptor */
-        void (*emit_tls)(struct panfrost_batch *);
+   /* Emits a thread local storage descriptor */
+   void (*emit_tls)(struct panfrost_batch *);

-        /* Emits a framebuffer descriptor */
-        void (*emit_fbd)(struct panfrost_batch *, const struct pan_fb_info *);
+   /* Emits a framebuffer descriptor */
+   void (*emit_fbd)(struct panfrost_batch *, const struct pan_fb_info *);

-        /* Emits a fragment job */
-        mali_ptr (*emit_fragment_job)(struct panfrost_batch *, const struct pan_fb_info *);
+   /* Emits a fragment job */
+   mali_ptr (*emit_fragment_job)(struct panfrost_batch *,
+                                 const struct pan_fb_info *);

-        /* General destructor */
-        void (*screen_destroy)(struct pipe_screen *);
+   /* General destructor */
+   void (*screen_destroy)(struct pipe_screen *);

-        /* Preload framebuffer */
-        void (*preload)(struct panfrost_batch *, struct pan_fb_info *);
+   /* Preload framebuffer */
+   void (*preload)(struct panfrost_batch *, struct pan_fb_info *);

-        /* Initialize a Gallium context */
-        void (*context_init)(struct pipe_context *pipe);
+   /* Initialize a Gallium context */
+   void (*context_init)(struct pipe_context *pipe);

-        /* Device-dependent initialization of a panfrost_batch */
-        void (*init_batch)(struct panfrost_batch *batch);
+   /* Device-dependent initialization of a panfrost_batch */
+   void (*init_batch)(struct panfrost_batch *batch);

-        /* Get blend shader */
-        struct pan_blend_shader_variant *
-        (*get_blend_shader)(const struct panfrost_device *,
-                            const struct pan_blend_state *,
-                            nir_alu_type, nir_alu_type,
-                            unsigned rt);
+   /* Get blend shader */
+   struct pan_blend_shader_variant *(*get_blend_shader)(
+      const struct panfrost_device *, const struct pan_blend_state *,
+      nir_alu_type, nir_alu_type, unsigned rt);

-        /* Initialize the polygon list */
-        void (*init_polygon_list)(struct panfrost_batch *);
+   /* Initialize the polygon list */
+   void (*init_polygon_list)(struct panfrost_batch *);

-        /* Shader compilation methods */
-        const nir_shader_compiler_options *(*get_compiler_options)(void);
-        void (*compile_shader)(nir_shader *s,
-                               struct panfrost_compile_inputs *inputs,
-                               struct util_dynarray *binary,
-                               struct pan_shader_info *info);
+   /* Shader compilation methods */
+   const nir_shader_compiler_options *(*get_compiler_options)(void);
+   void (*compile_shader)(nir_shader *s, struct panfrost_compile_inputs *inputs,
+                          struct util_dynarray *binary,
+                          struct pan_shader_info *info);
 };

 struct panfrost_screen {
-        struct pipe_screen base;
-        struct panfrost_device dev;
-        struct {
-                struct panfrost_pool bin_pool;
-                struct panfrost_pool desc_pool;
-        } blitter;
+   struct pipe_screen base;
+   struct panfrost_device dev;
+   struct {
+      struct panfrost_pool bin_pool;
+      struct panfrost_pool desc_pool;
+   } blitter;

-        struct panfrost_vtable vtbl;
-        struct disk_cache *disk_cache;
+   struct panfrost_vtable vtbl;
+   struct disk_cache *disk_cache;
 };

 static inline struct panfrost_screen *
 pan_screen(struct pipe_screen *p)
 {
-        return (struct panfrost_screen *)p;
+   return (struct panfrost_screen *)p;
 }

 static inline struct panfrost_device *
 pan_device(struct pipe_screen *p)
 {
-        return &(pan_screen(p)->dev);
+   return &(pan_screen(p)->dev);
 }

-int
-panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
-                               struct pipe_driver_query_info *info);
+int panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+                                   struct pipe_driver_query_info *info);

 void panfrost_cmdstream_screen_init_v4(struct panfrost_screen *screen);
 void panfrost_cmdstream_screen_init_v5(struct panfrost_screen *screen);
@ -136,13 +133,13 @@ void panfrost_cmdstream_screen_init_v6(struct panfrost_screen *screen);
 void panfrost_cmdstream_screen_init_v7(struct panfrost_screen *screen);
 void panfrost_cmdstream_screen_init_v9(struct panfrost_screen *screen);

-#define perf_debug(dev, ...) \
-        do { \
-                if (unlikely((dev)->debug & PAN_DBG_PERF)) \
-                        mesa_logw(__VA_ARGS__); \
-        } while(0)
+#define perf_debug(dev, ...)                                                   \
+   do {                                                                        \
+      if (unlikely((dev)->debug & PAN_DBG_PERF))                               \
+         mesa_logw(__VA_ARGS__);                                               \
+   } while (0)

-#define perf_debug_ctx(ctx, ...) \
-        perf_debug(pan_device((ctx)->base.screen), __VA_ARGS__);
+#define perf_debug_ctx(ctx, ...)                                               \
+   perf_debug(pan_device((ctx)->base.screen), __VA_ARGS__);

 #endif /* PAN_SCREEN_H */
--- a/src/gallium/drivers/panfrost/pan_shader.c
+++ b/src/gallium/drivers/panfrost/pan_shader.c
@ -28,103 +28,96 @@
 *
 */

-#include "pan_context.h"
-#include "pan_bo.h"
 #include "pan_shader.h"
-#include "util/u_memory.h"
 #include "nir/tgsi_to_nir.h"
+#include "util/u_memory.h"
 #include "nir_serialize.h"
+#include "pan_bo.h"
+#include "pan_context.h"

 static struct panfrost_uncompiled_shader *
 panfrost_alloc_shader(const nir_shader *nir)
 {
-        struct panfrost_uncompiled_shader *so =
-                rzalloc(NULL, struct panfrost_uncompiled_shader);
+   struct panfrost_uncompiled_shader *so =
+      rzalloc(NULL, struct panfrost_uncompiled_shader);

-        simple_mtx_init(&so->lock, mtx_plain);
-        util_dynarray_init(&so->variants, so);
+   simple_mtx_init(&so->lock, mtx_plain);
+   util_dynarray_init(&so->variants, so);

-        so->nir = nir;
+   so->nir = nir;

-        /* Serialize the NIR to a binary blob that we can hash for the disk
-         * cache. Drop unnecessary information (like variable names) so the
-         * serialized NIR is smaller, and also to let us detect more isomorphic
-         * shaders when hashing, increasing cache hits.
-         */
-        struct blob blob;
-        blob_init(&blob);
-        nir_serialize(&blob, nir, true);
-        _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
-        blob_finish(&blob);
+   /* Serialize the NIR to a binary blob that we can hash for the disk
+    * cache. Drop unnecessary information (like variable names) so the
+    * serialized NIR is smaller, and also to let us detect more isomorphic
+    * shaders when hashing, increasing cache hits.
+    */
+   struct blob blob;
+   blob_init(&blob);
+   nir_serialize(&blob, nir, true);
+   _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
+   blob_finish(&blob);

-        return so;
+   return so;
 }

 static struct panfrost_compiled_shader *
 panfrost_alloc_variant(struct panfrost_uncompiled_shader *so)
 {
-        return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
+   return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
 }

 static void
-panfrost_shader_compile(struct panfrost_screen *screen,
-                        const nir_shader *ir,
+panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
                        struct util_debug_callback *dbg,
-                        struct panfrost_shader_key *key,
-                        unsigned req_local_mem,
+                        struct panfrost_shader_key *key, unsigned req_local_mem,
                        unsigned fixed_varying_mask,
                        struct panfrost_shader_binary *out)
 {
-        struct panfrost_device *dev = pan_device(&screen->base);
+   struct panfrost_device *dev = pan_device(&screen->base);

-        nir_shader *s = nir_shader_clone(NULL, ir);
+   nir_shader *s = nir_shader_clone(NULL, ir);

-        struct panfrost_compile_inputs inputs = {
-                .debug = dbg,
-                .gpu_id = dev->gpu_id,
-                .fixed_sysval_ubo = -1,
-        };
+   struct panfrost_compile_inputs inputs = {
+      .debug = dbg,
+      .gpu_id = dev->gpu_id,
+      .fixed_sysval_ubo = -1,
+   };

-        /* Lower this early so the backends don't have to worry about it */
-        if (s->info.stage == MESA_SHADER_FRAGMENT) {
-                inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
+   /* Lower this early so the backends don't have to worry about it */
+   if (s->info.stage == MESA_SHADER_FRAGMENT) {
+      inputs.fixed_varying_mask = key->fs.fixed_varying_mask;

-                if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
-                        NIR_PASS_V(s, nir_lower_fragcolor,
-                                   key->fs.nr_cbufs_for_fragcolor);
-                }
+      if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
+         NIR_PASS_V(s, nir_lower_fragcolor, key->fs.nr_cbufs_for_fragcolor);
+      }

-                if (key->fs.sprite_coord_enable) {
-                        NIR_PASS_V(s, nir_lower_texcoord_replace,
-                                   key->fs.sprite_coord_enable,
-                                   true /* point coord is sysval */,
-                                   false /* Y-invert */);
-                }
+      if (key->fs.sprite_coord_enable) {
+         NIR_PASS_V(s, nir_lower_texcoord_replace, key->fs.sprite_coord_enable,
+                    true /* point coord is sysval */, false /* Y-invert */);
+      }

-                if (key->fs.clip_plane_enable) {
-                        NIR_PASS_V(s, nir_lower_clip_fs,
-                                   key->fs.clip_plane_enable,
-                                   false);
-                }
+      if (key->fs.clip_plane_enable) {
+         NIR_PASS_V(s, nir_lower_clip_fs, key->fs.clip_plane_enable, false);
+      }

-                memcpy(inputs.rt_formats, key->fs.rt_formats, sizeof(inputs.rt_formats));
-        } else if (s->info.stage == MESA_SHADER_VERTEX) {
-                inputs.fixed_varying_mask = fixed_varying_mask;
+      memcpy(inputs.rt_formats, key->fs.rt_formats, sizeof(inputs.rt_formats));
+   } else if (s->info.stage == MESA_SHADER_VERTEX) {
+      inputs.fixed_varying_mask = fixed_varying_mask;

-                /* No IDVS for internal XFB shaders */
-                inputs.no_idvs = s->info.has_transform_feedback_varyings;
-        }
+      /* No IDVS for internal XFB shaders */
+      inputs.no_idvs = s->info.has_transform_feedback_varyings;
+   }

-        util_dynarray_init(&out->binary, NULL);
-        screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
+   util_dynarray_init(&out->binary, NULL);
+   screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);

-        assert(req_local_mem >= out->info.wls_size);
-        out->info.wls_size = req_local_mem;
+   assert(req_local_mem >= out->info.wls_size);
+   out->info.wls_size = req_local_mem;

-        /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
-         * a NULL context
-         */
-        ralloc_free(s);
+   /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
+    * a NULL context
+    */
+   ralloc_free(s);
 }

 static void
@ -136,287 +129,288 @@ panfrost_shader_get(struct pipe_screen *pscreen,
                    struct panfrost_compiled_shader *state,
                    unsigned req_local_mem)
 {
-        struct panfrost_screen *screen = pan_screen(pscreen);
-        struct panfrost_device *dev = pan_device(pscreen);
+   struct panfrost_screen *screen = pan_screen(pscreen);
+   struct panfrost_device *dev = pan_device(pscreen);

-        struct panfrost_shader_binary res = { 0 };
+   struct panfrost_shader_binary res = {0};

-        /* Try to retrieve the variant from the disk cache. If that fails,
-         * compile a new variant and store in the disk cache for later reuse.
-         */
-        if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled, &state->key, &res)) {
-                panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
-                                        req_local_mem,
-                                        uncompiled->fixed_varying_mask, &res);
+   /* Try to retrieve the variant from the disk cache. If that fails,
+    * compile a new variant and store in the disk cache for later reuse.
+    */
+   if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled,
+                                     &state->key, &res)) {
+      panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
+                              req_local_mem, uncompiled->fixed_varying_mask,
+                              &res);

-                panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key, &res);
-        }
+      panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key,
+                                &res);
+   }

-        state->info = res.info;
+   state->info = res.info;

-        if (res.binary.size) {
-                state->bin = panfrost_pool_take_ref(shader_pool,
-                        pan_pool_upload_aligned(&shader_pool->base,
-                                res.binary.data, res.binary.size, 128));
-        }
+   if (res.binary.size) {
+      state->bin = panfrost_pool_take_ref(
+         shader_pool,
+         pan_pool_upload_aligned(&shader_pool->base, res.binary.data,
+                                 res.binary.size, 128));
+   }

-        util_dynarray_fini(&res.binary);
+   util_dynarray_fini(&res.binary);

-        /* Don't upload RSD for fragment shaders since they need draw-time
-         * merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
-         * shader program descriptors on Valhall, which can be preuploaded even
-         * for fragment shaders. */
-        bool upload = !(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
-        screen->vtbl.prepare_shader(state, desc_pool, upload);
+   /* Don't upload RSD for fragment shaders since they need draw-time
+    * merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
+    * shader program descriptors on Valhall, which can be preuploaded even
+    * for fragment shaders. */
+   bool upload =
+      !(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
+   screen->vtbl.prepare_shader(state, desc_pool, upload);

-        panfrost_analyze_sysvals(state);
+   panfrost_analyze_sysvals(state);
 }

 static void
 panfrost_build_key(struct panfrost_context *ctx,
-                   struct panfrost_shader_key *key,
-                   const nir_shader *nir)
+                   struct panfrost_shader_key *key, const nir_shader *nir)
 {
-        /* We don't currently have vertex shader variants */
-        if (nir->info.stage != MESA_SHADER_FRAGMENT)
-               return;
+   /* We don't currently have vertex shader variants */
+   if (nir->info.stage != MESA_SHADER_FRAGMENT)
+      return;

-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
-        struct pipe_rasterizer_state *rast = (void *) ctx->rasterizer;
-        struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
+   struct panfrost_device *dev = pan_device(ctx->base.screen);
+   struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
+   struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
+   struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];

-        /* gl_FragColor lowering needs the number of colour buffers */
-        if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
-                key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
-        }
+   /* gl_FragColor lowering needs the number of colour buffers */
+   if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
+      key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
+   }

-        /* Point sprite lowering needed on Bifrost and newer */
-        if (dev->arch >= 6 && rast && ctx->active_prim == PIPE_PRIM_POINTS) {
-                key->fs.sprite_coord_enable = rast->sprite_coord_enable;
-        }
+   /* Point sprite lowering needed on Bifrost and newer */
+   if (dev->arch >= 6 && rast && ctx->active_prim == PIPE_PRIM_POINTS) {
+      key->fs.sprite_coord_enable = rast->sprite_coord_enable;
+   }

-        /* User clip plane lowering needed everywhere */
-        if (rast) {
-                key->fs.clip_plane_enable = rast->clip_plane_enable;
-        }
+   /* User clip plane lowering needed everywhere */
+   if (rast) {
+      key->fs.clip_plane_enable = rast->clip_plane_enable;
+   }

-        if (dev->arch <= 5) {
-                u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
-                        enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
+   if (dev->arch <= 5) {
+      u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
+         enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;

-                        if ((fb->nr_cbufs > i) && fb->cbufs[i])
-                                fmt = fb->cbufs[i]->format;
+         if ((fb->nr_cbufs > i) && fb->cbufs[i])
+            fmt = fb->cbufs[i]->format;

-                        if (panfrost_blendable_formats_v6[fmt].internal)
-                                fmt = PIPE_FORMAT_NONE;
+         if (panfrost_blendable_formats_v6[fmt].internal)
+            fmt = PIPE_FORMAT_NONE;

-                        key->fs.rt_formats[i] = fmt;
-                }
-        }
+         key->fs.rt_formats[i] = fmt;
+      }
+   }

-        /* Funny desktop GL varying lowering on Valhall */
-        if (dev->arch >= 9) {
-                assert(vs != NULL && "too early");
-                key->fs.fixed_varying_mask = vs->fixed_varying_mask;
-        }
+   /* Funny desktop GL varying lowering on Valhall */
+   if (dev->arch >= 9) {
+      assert(vs != NULL && "too early");
+      key->fs.fixed_varying_mask = vs->fixed_varying_mask;
+   }
 }

 static struct panfrost_compiled_shader *
-panfrost_new_variant_locked(
-        struct panfrost_context *ctx,
-        struct panfrost_uncompiled_shader *uncompiled,
-        struct panfrost_shader_key *key)
+panfrost_new_variant_locked(struct panfrost_context *ctx,
+                            struct panfrost_uncompiled_shader *uncompiled,
+                            struct panfrost_shader_key *key)
 {
-        struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
+   struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);

-        *prog = (struct panfrost_compiled_shader) {
-                .key = *key,
-                .stream_output = uncompiled->stream_output,
-        };
+   *prog = (struct panfrost_compiled_shader){
+      .key = *key,
+      .stream_output = uncompiled->stream_output,
+   };

-        panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
-                            uncompiled, &ctx->base.debug, prog, 0);
+   panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, uncompiled,
+                       &ctx->base.debug, prog, 0);

-        prog->earlyzs = pan_earlyzs_analyze(&prog->info);
+   prog->earlyzs = pan_earlyzs_analyze(&prog->info);

-        return prog;
+   return prog;
 }

 static void
-panfrost_bind_shader_state(
-        struct pipe_context *pctx,
-        void *hwcso,
-        enum pipe_shader_type type)
+panfrost_bind_shader_state(struct pipe_context *pctx, void *hwcso,
+                           enum pipe_shader_type type)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        ctx->uncompiled[type] = hwcso;
-        ctx->prog[type] = NULL;
+   struct panfrost_context *ctx = pan_context(pctx);
+   ctx->uncompiled[type] = hwcso;
+   ctx->prog[type] = NULL;

-        ctx->dirty |= PAN_DIRTY_TLS_SIZE;
-        ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;
+   ctx->dirty |= PAN_DIRTY_TLS_SIZE;
+   ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;

-        if (hwcso)
-                panfrost_update_shader_variant(ctx, type);
+   if (hwcso)
+      panfrost_update_shader_variant(ctx, type);
 }

 void
 panfrost_update_shader_variant(struct panfrost_context *ctx,
                               enum pipe_shader_type type)
 {
-        /* No shader variants for compute */
-        if (type == PIPE_SHADER_COMPUTE)
-                return;
+   /* No shader variants for compute */
+   if (type == PIPE_SHADER_COMPUTE)
+      return;

-        /* We need linking information, defer this */
-        if (type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX])
-                return;
+   /* We need linking information, defer this */
+   if (type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX])
+      return;

-        /* Also defer, happens with GALLIUM_HUD */
-        if (!ctx->uncompiled[type])
-                return;
+   /* Also defer, happens with GALLIUM_HUD */
+   if (!ctx->uncompiled[type])
+      return;

-        /* Match the appropriate variant */
-        struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
-        struct panfrost_compiled_shader *compiled = NULL;
+   /* Match the appropriate variant */
+   struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
+   struct panfrost_compiled_shader *compiled = NULL;

-        simple_mtx_lock(&uncompiled->lock);
+   simple_mtx_lock(&uncompiled->lock);

-        struct panfrost_shader_key key = { 0 };
-        panfrost_build_key(ctx, &key, uncompiled->nir);
+   struct panfrost_shader_key key = {0};
+   panfrost_build_key(ctx, &key, uncompiled->nir);

-        util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader, so) {
-                if (memcmp(&key, &so->key, sizeof(key)) == 0) {
-                        compiled = so;
-                        break;
-                }
-        }
+   util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader,
+                         so) {
+      if (memcmp(&key, &so->key, sizeof(key)) == 0) {
+         compiled = so;
+         break;
+      }
+   }

-        if (compiled == NULL)
-                compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);
+   if (compiled == NULL)
+      compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);

-        ctx->prog[type] = compiled;
+   ctx->prog[type] = compiled;

-        /* TODO: it would be more efficient to release the lock before
-         * compiling instead of after, but that can race if thread A compiles a
-         * variant while thread B searches for that same variant */
-        simple_mtx_unlock(&uncompiled->lock);
+   /* TODO: it would be more efficient to release the lock before
+    * compiling instead of after, but that can race if thread A compiles a
+    * variant while thread B searches for that same variant */
+   simple_mtx_unlock(&uncompiled->lock);
 }

 static void
 panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso)
 {
-        panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
+   panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);

-        /* Fragment shaders are linked with vertex shaders */
-        struct panfrost_context *ctx = pan_context(pctx);
-        panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
+   /* Fragment shaders are linked with vertex shaders */
+   struct panfrost_context *ctx = pan_context(pctx);
+   panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
 }

 static void
 panfrost_bind_fs_state(struct pipe_context *pctx, void *hwcso)
 {
-        panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
+   panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
 }

 static void *
-panfrost_create_shader_state(
-        struct pipe_context *pctx,
-        const struct pipe_shader_state *cso)
+panfrost_create_shader_state(struct pipe_context *pctx,
+                             const struct pipe_shader_state *cso)
 {
-        nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI) ?
-                          tgsi_to_nir(cso->tokens, pctx->screen, false) :
-                          cso->ir.nir;
+   nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI)
+                        ? tgsi_to_nir(cso->tokens, pctx->screen, false)
+                        : cso->ir.nir;

-        struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
+   struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);

-        /* The driver gets ownership of the nir_shader for graphics. The NIR is
-         * ralloc'd. Free the NIR when we free the uncompiled shader.
-         */
-        ralloc_steal(so, nir);
+   /* The driver gets ownership of the nir_shader for graphics. The NIR is
+    * ralloc'd. Free the NIR when we free the uncompiled shader.
+    */
+   ralloc_steal(so, nir);

-        so->stream_output = cso->stream_output;
-        so->nir = nir;
+   so->stream_output = cso->stream_output;
+   so->nir = nir;

-        /* Fix linkage early */
-        if (so->nir->info.stage == MESA_SHADER_VERTEX) {
-                so->fixed_varying_mask =
-                        (so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
-                        ~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
-        }
+   /* Fix linkage early */
+   if (so->nir->info.stage == MESA_SHADER_VERTEX) {
+      so->fixed_varying_mask =
+         (so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
+         ~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
+   }

-        /* If this shader uses transform feedback, compile the transform
-         * feedback program. This is a special shader variant.
-         */
-        struct panfrost_context *ctx = pan_context(pctx);
+   /* If this shader uses transform feedback, compile the transform
+    * feedback program. This is a special shader variant.
+    */
+   struct panfrost_context *ctx = pan_context(pctx);

-        if (so->nir->xfb_info) {
-                nir_shader *xfb = nir_shader_clone(NULL, so->nir);
-                xfb->info.name = ralloc_asprintf(xfb, "%s@xfb", xfb->info.name);
-                xfb->info.internal = true;
+   if (so->nir->xfb_info) {
+      nir_shader *xfb = nir_shader_clone(NULL, so->nir);
+      xfb->info.name = ralloc_asprintf(xfb, "%s@xfb", xfb->info.name);
+      xfb->info.internal = true;

-                so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
-                so->xfb->key.vs_is_xfb = true;
+      so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
+      so->xfb->key.vs_is_xfb = true;

-                panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
-                                    so, &ctx->base.debug, so->xfb, 0);
+      panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, so,
+                          &ctx->base.debug, so->xfb, 0);

-                /* Since transform feedback is handled via the transform
-                 * feedback program, the original program no longer uses XFB
-                 */
-                nir->info.has_transform_feedback_varyings = false;
-        }
+      /* Since transform feedback is handled via the transform
+       * feedback program, the original program no longer uses XFB
+       */
+      nir->info.has_transform_feedback_varyings = false;
+   }

-        /* Compile the program. We don't use vertex shader keys, so there will
-         * be no further vertex shader variants. We do have fragment shader
-         * keys, but we can still compile with a default key that will work most
-         * of the time.
-         */
-        struct panfrost_shader_key key = { 0 };
+   /* Compile the program. We don't use vertex shader keys, so there will
+    * be no further vertex shader variants. We do have fragment shader
+    * keys, but we can still compile with a default key that will work most
+    * of the time.
+    */
+   struct panfrost_shader_key key = {0};

-        /* gl_FragColor lowering needs the number of colour buffers on desktop
-         * GL, where it acts as an implicit broadcast to all colour buffers.
-         *
-         * However, gl_FragColor is a legacy feature, so assume that if
-         * gl_FragColor is used, there is only a single render target. The
-         * implicit broadcast is neither especially useful nor required by GLES.
-         */
-        if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
-            so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
+   /* gl_FragColor lowering needs the number of colour buffers on desktop
+    * GL, where it acts as an implicit broadcast to all colour buffers.
+    *
+    * However, gl_FragColor is a legacy feature, so assume that if
+    * gl_FragColor is used, there is only a single render target. The
+    * implicit broadcast is neither especially useful nor required by GLES.
+    */
+   if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
+       so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {

-                key.fs.nr_cbufs_for_fragcolor = 1;
-        }
+      key.fs.nr_cbufs_for_fragcolor = 1;
+   }

-        /* Creating a CSO is single-threaded, so it's ok to use the
-         * locked function without explicitly taking the lock. Creating a
-         * default variant acts as a precompile.
-         */
-        panfrost_new_variant_locked(ctx, so, &key);
+   /* Creating a CSO is single-threaded, so it's ok to use the
+    * locked function without explicitly taking the lock. Creating a
+    * default variant acts as a precompile.
+    */
+   panfrost_new_variant_locked(ctx, so, &key);

-        return so;
+   return so;
 }

 static void
 panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
 {
-        struct panfrost_uncompiled_shader *cso = (struct panfrost_uncompiled_shader *) so;
+   struct panfrost_uncompiled_shader *cso =
+      (struct panfrost_uncompiled_shader *)so;

-        util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
-                panfrost_bo_unreference(so->bin.bo);
-                panfrost_bo_unreference(so->state.bo);
-                panfrost_bo_unreference(so->linkage.bo);
-        }
+   util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
+      panfrost_bo_unreference(so->bin.bo);
+      panfrost_bo_unreference(so->state.bo);
+      panfrost_bo_unreference(so->linkage.bo);
+   }

-        if (cso->xfb) {
-                panfrost_bo_unreference(cso->xfb->bin.bo);
-                panfrost_bo_unreference(cso->xfb->state.bo);
-                panfrost_bo_unreference(cso->xfb->linkage.bo);
-                free(cso->xfb);
-        }
+   if (cso->xfb) {
+      panfrost_bo_unreference(cso->xfb->bin.bo);
+      panfrost_bo_unreference(cso->xfb->state.bo);
+      panfrost_bo_unreference(cso->xfb->linkage.bo);
+      free(cso->xfb);
+   }

-        simple_mtx_destroy(&cso->lock);
+   simple_mtx_destroy(&cso->lock);

-        ralloc_free(so);
+   ralloc_free(so);
 }

 /*
@ -424,52 +418,51 @@ panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
 * precompiled, creating both the uncompiled and compiled shaders now.
 */
 static void *
-panfrost_create_compute_state(
-        struct pipe_context *pctx,
-        const struct pipe_compute_state *cso)
+panfrost_create_compute_state(struct pipe_context *pctx,
+                              const struct pipe_compute_state *cso)
 {
-        struct panfrost_context *ctx = pan_context(pctx);
-        struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
-        struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
-        memset(v, 0, sizeof *v);
+   struct panfrost_context *ctx = pan_context(pctx);
+   struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
+   struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
+   memset(v, 0, sizeof *v);

-        assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
+   assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");

-        panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs,
-                            so, &ctx->base.debug, v, cso->static_shared_mem);
+   panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs, so,
+                       &ctx->base.debug, v, cso->static_shared_mem);

-        /* The NIR becomes invalid after this. For compute kernels, we never
-         * need to access it again. Don't keep a dangling pointer around.
-         */
-        so->nir = NULL;
+   /* The NIR becomes invalid after this. For compute kernels, we never
+    * need to access it again. Don't keep a dangling pointer around.
+    */
+   so->nir = NULL;

-        return so;
+   return so;
 }

 static void
 panfrost_bind_compute_state(struct pipe_context *pipe, void *cso)
 {
-        struct panfrost_context *ctx = pan_context(pipe);
-        struct panfrost_uncompiled_shader *uncompiled = cso;
+   struct panfrost_context *ctx = pan_context(pipe);
+   struct panfrost_uncompiled_shader *uncompiled = cso;

-        ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;
+   ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;

-        ctx->prog[PIPE_SHADER_COMPUTE] =
-                uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
+   ctx->prog[PIPE_SHADER_COMPUTE] =
+      uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
 }

 void
 panfrost_shader_context_init(struct pipe_context *pctx)
 {
-        pctx->create_vs_state = panfrost_create_shader_state;
-        pctx->delete_vs_state = panfrost_delete_shader_state;
-        pctx->bind_vs_state = panfrost_bind_vs_state;
+   pctx->create_vs_state = panfrost_create_shader_state;
+   pctx->delete_vs_state = panfrost_delete_shader_state;
+   pctx->bind_vs_state = panfrost_bind_vs_state;

-        pctx->create_fs_state = panfrost_create_shader_state;
-        pctx->delete_fs_state = panfrost_delete_shader_state;
-        pctx->bind_fs_state = panfrost_bind_fs_state;
+   pctx->create_fs_state = panfrost_create_shader_state;
+   pctx->delete_fs_state = panfrost_delete_shader_state;
+   pctx->bind_fs_state = panfrost_bind_fs_state;

-        pctx->create_compute_state = panfrost_create_compute_state;
-        pctx->bind_compute_state = panfrost_bind_compute_state;
-        pctx->delete_compute_state = panfrost_delete_shader_state;
+   pctx->create_compute_state = panfrost_create_compute_state;
+   pctx->bind_compute_state = panfrost_bind_compute_state;
+   pctx->delete_compute_state = panfrost_delete_shader_state;
 }
--- a/src/panfrost/bifrost/bi_helper_invocations.c
+++ b/src/panfrost/bifrost/bi_helper_invocations.c
@ -64,20 +64,20 @@
 static bool
 bi_has_skip_bit(enum bi_opcode op)
 {
-        switch (op) {
-        case BI_OPCODE_TEX_SINGLE:
-        case BI_OPCODE_TEXC:
-        case BI_OPCODE_TEXC_DUAL:
-        case BI_OPCODE_TEXS_2D_F16:
-        case BI_OPCODE_TEXS_2D_F32:
-        case BI_OPCODE_TEXS_CUBE_F16:
-        case BI_OPCODE_TEXS_CUBE_F32:
-        case BI_OPCODE_VAR_TEX_F16:
-        case BI_OPCODE_VAR_TEX_F32:
-                return true;
-        default:
-                return false;
-        }
+   switch (op) {
+   case BI_OPCODE_TEX_SINGLE:
+   case BI_OPCODE_TEXC:
+   case BI_OPCODE_TEXC_DUAL:
+   case BI_OPCODE_TEXS_2D_F16:
+   case BI_OPCODE_TEXS_2D_F32:
+   case BI_OPCODE_TEXS_CUBE_F16:
+   case BI_OPCODE_TEXS_CUBE_F32:
+   case BI_OPCODE_VAR_TEX_F16:
+   case BI_OPCODE_VAR_TEX_F32:
+      return true;
+   default:
+      return false;
+   }
 }

 /* Does a given instruction require helper threads to be active (because it
@ -87,52 +87,52 @@ bi_has_skip_bit(enum bi_opcode op)
 bool
 bi_instr_uses_helpers(bi_instr *I)
 {
-        switch (I->op) {
-        case BI_OPCODE_TEXC:
-        case BI_OPCODE_TEXC_DUAL:
-        case BI_OPCODE_TEXS_2D_F16:
-        case BI_OPCODE_TEXS_2D_F32:
-        case BI_OPCODE_TEXS_CUBE_F16:
-        case BI_OPCODE_TEXS_CUBE_F32:
-        case BI_OPCODE_VAR_TEX_F16:
-        case BI_OPCODE_VAR_TEX_F32:
-                return !I->lod_mode; /* set for zero, clear for computed */
-        case BI_OPCODE_TEX_SINGLE:
-                return (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_LOD) ||
-                       (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_BIAS);
-        case BI_OPCODE_CLPER_I32:
-        case BI_OPCODE_CLPER_OLD_I32:
-                /* Fragment shaders require helpers to implement derivatives.
-                 * Other shader stages don't have helpers at all */
-                return true;
-        default:
-                return false;
-        }
+   switch (I->op) {
+   case BI_OPCODE_TEXC:
+   case BI_OPCODE_TEXC_DUAL:
+   case BI_OPCODE_TEXS_2D_F16:
+   case BI_OPCODE_TEXS_2D_F32:
+   case BI_OPCODE_TEXS_CUBE_F16:
+   case BI_OPCODE_TEXS_CUBE_F32:
+   case BI_OPCODE_VAR_TEX_F16:
+   case BI_OPCODE_VAR_TEX_F32:
+      return !I->lod_mode; /* set for zero, clear for computed */
+   case BI_OPCODE_TEX_SINGLE:
+      return (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_LOD) ||
+             (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_BIAS);
+   case BI_OPCODE_CLPER_I32:
+   case BI_OPCODE_CLPER_OLD_I32:
+      /* Fragment shaders require helpers to implement derivatives.
+       * Other shader stages don't have helpers at all */
+      return true;
+   default:
+      return false;
+   }
 }

 /* Does a block use helpers directly */
 static bool
 bi_block_uses_helpers(bi_block *block)
 {
-        bi_foreach_instr_in_block(block, I) {
-                if (bi_instr_uses_helpers(I))
-                        return true;
-        }
+   bi_foreach_instr_in_block(block, I) {
+      if (bi_instr_uses_helpers(I))
+         return true;
+   }

-        return false;
+   return false;
 }

 bool
 bi_block_terminates_helpers(bi_block *block)
 {
-        /* Can't terminate if a successor needs helpers */
-        bi_foreach_successor(block, succ) {
-                if (succ->pass_flags & 1)
-                        return false;
-        }
+   /* Can't terminate if a successor needs helpers */
+   bi_foreach_successor(block, succ) {
+      if (succ->pass_flags & 1)
+         return false;
+   }

-        /* Otherwise we terminate */
-        return true;
+   /* Otherwise we terminate */
+   return true;
 }

 /*
@ -142,128 +142,130 @@ bi_block_terminates_helpers(bi_block *block)
 static void
 bi_propagate_pass_flag(bi_block *block)
 {
-        block->pass_flags = 1;
+   block->pass_flags = 1;

-        bi_foreach_predecessor(block, pred) {
-                if ((*pred)->pass_flags == 0)
-                        bi_propagate_pass_flag(*pred);
-        }
+   bi_foreach_predecessor(block, pred) {
+      if ((*pred)->pass_flags == 0)
+         bi_propagate_pass_flag(*pred);
+   }
 }

 void
 bi_analyze_helper_terminate(bi_context *ctx)
 {
-        /* Other shader stages do not have a notion of helper threads, so we
-         * can skip the analysis. Don't run for blend shaders, either, since
-         * they run in the context of another shader that we don't see. */
-        if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
-                return;
+   /* Other shader stages do not have a notion of helper threads, so we
+    * can skip the analysis. Don't run for blend shaders, either, since
+    * they run in the context of another shader that we don't see. */
+   if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
+      return;

-        /* Clear flags */
-        bi_foreach_block(ctx, block)
-                block->pass_flags = 0;
+   /* Clear flags */
+   bi_foreach_block(ctx, block)
+      block->pass_flags = 0;

-        /* For each block, check if it uses helpers and propagate that fact if
-         * so. We walk in reverse order to minimize the number of blocks tested:
-         * if the (unique) last block uses helpers, only that block is tested.
-         */
-        bi_foreach_block_rev(ctx, block) {
-                if (block->pass_flags == 0 && bi_block_uses_helpers(block))
-                        bi_propagate_pass_flag(block);
-        }
+   /* For each block, check if it uses helpers and propagate that fact if
+    * so. We walk in reverse order to minimize the number of blocks tested:
+    * if the (unique) last block uses helpers, only that block is tested.
+    */
+   bi_foreach_block_rev(ctx, block) {
+      if (block->pass_flags == 0 && bi_block_uses_helpers(block))
+         bi_propagate_pass_flag(block);
+   }
 }

 void
 bi_mark_clauses_td(bi_context *ctx)
 {
-        if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
-                return;
+   if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
+      return;

-        /* Finally, mark clauses requiring helpers */
-        bi_foreach_block(ctx, block) {
-                /* At the end, there are helpers iff we don't terminate */
-                bool helpers = !bi_block_terminates_helpers(block);
+   /* Finally, mark clauses requiring helpers */
+   bi_foreach_block(ctx, block) {
+      /* At the end, there are helpers iff we don't terminate */
+      bool helpers = !bi_block_terminates_helpers(block);

-                bi_foreach_clause_in_block_rev(block, clause) {
-                        bi_foreach_instr_in_clause_rev(block, clause, I) {
-                                helpers |= bi_instr_uses_helpers(I);
-                        }
+      bi_foreach_clause_in_block_rev(block, clause) {
+         bi_foreach_instr_in_clause_rev(block, clause, I) {
+            helpers |= bi_instr_uses_helpers(I);
+         }

-                        clause->td = !helpers;
-                }
-        }
+         clause->td = !helpers;
+      }
+   }
 }

 static bool
 bi_helper_block_update(BITSET_WORD *deps, bi_block *block)
 {
-        bool progress = false;
+   bool progress = false;

-        bi_foreach_instr_in_block_rev(block, I) {
-                /* If a destination is required by helper invocation... */
-                bi_foreach_dest(I, d) {
-                        if (!BITSET_TEST(deps, I->dest[d].value))
-                                continue;
+   bi_foreach_instr_in_block_rev(block, I) {
+      /* If a destination is required by helper invocation... */
+      bi_foreach_dest(I, d) {
+         if (!BITSET_TEST(deps, I->dest[d].value))
+            continue;

-                        /* ...so are the sources */
-                        bi_foreach_ssa_src(I, s) {
-                                progress |= !BITSET_TEST(deps, I->src[s].value);
-                                BITSET_SET(deps, I->src[s].value);
-                        }
+         /* ...so are the sources */
+         bi_foreach_ssa_src(I, s) {
+            progress |= !BITSET_TEST(deps, I->src[s].value);
+            BITSET_SET(deps, I->src[s].value);
+         }

-                        break;
-                }
-        }
+         break;
+      }
+   }

-        return progress;
+   return progress;
 }

 void
 bi_analyze_helper_requirements(bi_context *ctx)
 {
-        BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), ctx->ssa_alloc);
+   BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), ctx->ssa_alloc);

-        /* Initialize with the sources of instructions consuming
-         * derivatives */
+   /* Initialize with the sources of instructions consuming
+    * derivatives */

-        bi_foreach_instr_global(ctx, I) {
-                if (!bi_instr_uses_helpers(I)) continue;
+   bi_foreach_instr_global(ctx, I) {
+      if (!bi_instr_uses_helpers(I))
+         continue;

-                bi_foreach_ssa_src(I, s)
-                        BITSET_SET(deps, I->src[s].value);
-        }
+      bi_foreach_ssa_src(I, s)
+         BITSET_SET(deps, I->src[s].value);
+   }

-        /* Propagate that up */
-        u_worklist worklist;
-        bi_worklist_init(ctx, &worklist);
+   /* Propagate that up */
+   u_worklist worklist;
+   bi_worklist_init(ctx, &worklist);

-        bi_foreach_block(ctx, block) {
-                bi_worklist_push_tail(&worklist, block);
-        }
+   bi_foreach_block(ctx, block) {
+      bi_worklist_push_tail(&worklist, block);
+   }

-        while (!u_worklist_is_empty(&worklist)) {
-                bi_block *blk = bi_worklist_pop_tail(&worklist);
+   while (!u_worklist_is_empty(&worklist)) {
+      bi_block *blk = bi_worklist_pop_tail(&worklist);

-                if (bi_helper_block_update(deps, blk)) {
-                        bi_foreach_predecessor(blk, pred)
-                                bi_worklist_push_head(&worklist, *pred);
-                }
-        }
+      if (bi_helper_block_update(deps, blk)) {
+         bi_foreach_predecessor(blk, pred)
+            bi_worklist_push_head(&worklist, *pred);
+      }
+   }

-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);

-        /* Set the execute bits */
+   /* Set the execute bits */

-        bi_foreach_instr_global(ctx, I) {
-                if (!bi_has_skip_bit(I->op)) continue;
+   bi_foreach_instr_global(ctx, I) {
+      if (!bi_has_skip_bit(I->op))
+         continue;

-                bool exec = false;
+      bool exec = false;

-                bi_foreach_dest(I, d)
-                        exec |= BITSET_TEST(deps, I->dest[d].value);
+      bi_foreach_dest(I, d)
+         exec |= BITSET_TEST(deps, I->dest[d].value);

-                I->skip = !exec;
-        }
+      I->skip = !exec;
+   }

-        free(deps);
+   free(deps);
 }
--- a/src/panfrost/bifrost/bi_layout.c
+++ b/src/panfrost/bifrost/bi_layout.c
@ -37,10 +37,8 @@
 bool
 bi_ec0_packed(unsigned tuple_count)
 {
-        return (tuple_count == 3) ||
-                (tuple_count == 5) ||
-                (tuple_count == 6) ||
-                (tuple_count == 8);
+   return (tuple_count == 3) || (tuple_count == 5) || (tuple_count == 6) ||
+          (tuple_count == 8);
 }

 /* Helper to calculate the number of quadwords in a clause. This is a function
@ -60,7 +58,7 @@ bi_ec0_packed(unsigned tuple_count)
 *   6 | 5*
 *   7 | 5
 *   8 | 6*
- * 
+ *
 * Y = { X      if X <= 3
 *     { X - 1  if 4 <= X <= 6
 *     { X - 2  if 7 <= X <= 8
@ -72,15 +70,15 @@ bi_ec0_packed(unsigned tuple_count)
 static unsigned
 bi_clause_quadwords(bi_clause *clause)
 {
-        unsigned X = clause->tuple_count;
-        unsigned Y = X - ((X >= 7) ? 2 : (X >= 4) ? 1 : 0);
+   unsigned X = clause->tuple_count;
+   unsigned Y = X - ((X >= 7) ? 2 : (X >= 4) ? 1 : 0);

-        unsigned constants = clause->constant_count;
+   unsigned constants = clause->constant_count;

-        if ((X != 4) && (X != 7) && (X >= 3) && constants)
-                constants--;
+   if ((X != 4) && (X != 7) && (X >= 3) && constants)
+      constants--;

-        return Y + DIV_ROUND_UP(constants, 2);
+   return Y + DIV_ROUND_UP(constants, 2);
 }

 /* Measures the number of quadwords a branch jumps. Bifrost relative offsets
@ -90,62 +88,62 @@ bi_clause_quadwords(bi_clause *clause)
 signed
 bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target)
 {
-        /* Signed since we might jump backwards */
-        signed ret = 0;
+   /* Signed since we might jump backwards */
+   signed ret = 0;

-        /* Determine if the block we're branching to is strictly greater in
-         * source order */
-        bool forwards = target->index > start->block->index;
+   /* Determine if the block we're branching to is strictly greater in
+    * source order */
+   bool forwards = target->index > start->block->index;

-        if (forwards) {
-                /* We have to jump through this block from the start of this
-                 * clause to the end */
-                bi_foreach_clause_in_block_from(start->block, clause, start) {
-                        ret += bi_clause_quadwords(clause);
-                }
+   if (forwards) {
+      /* We have to jump through this block from the start of this
+       * clause to the end */
+      bi_foreach_clause_in_block_from(start->block, clause, start) {
+         ret += bi_clause_quadwords(clause);
+      }

-                /* We then need to jump through every clause of every following
-                 * block until the target */
-                bi_foreach_block_from(ctx, start->block, blk) {
-                        /* Don't double-count the first block */
-                        if (blk == start->block)
-                                continue;
+      /* We then need to jump through every clause of every following
+       * block until the target */
+      bi_foreach_block_from(ctx, start->block, blk) {
+         /* Don't double-count the first block */
+         if (blk == start->block)
+            continue;

-                        /* End just before the target */
-                        if (blk == target)
-                                break;
+         /* End just before the target */
+         if (blk == target)
+            break;

-                        /* Count every clause in the block */
-                        bi_foreach_clause_in_block(blk, clause) {
-                                ret += bi_clause_quadwords(clause);
-                        }
-                }
-        } else {
-                /* We start at the beginning of the clause but have to jump
-                 * through the clauses before us in the block */
-                bi_foreach_clause_in_block_from_rev(start->block, clause, start) {
-                        if (clause == start)
-                                continue;
+         /* Count every clause in the block */
+         bi_foreach_clause_in_block(blk, clause) {
+            ret += bi_clause_quadwords(clause);
+         }
+      }
+   } else {
+      /* We start at the beginning of the clause but have to jump
+       * through the clauses before us in the block */
+      bi_foreach_clause_in_block_from_rev(start->block, clause, start) {
+         if (clause == start)
+            continue;

-                        ret -= bi_clause_quadwords(clause);
-                }
+         ret -= bi_clause_quadwords(clause);
+      }

-                /* And jump back every clause of preceding blocks up through
-                 * and including the target to get to the beginning of the
-                 * target */
-                bi_foreach_block_from_rev(ctx, start->block, blk) {
-                        if (blk == start->block)
-                                continue;
+      /* And jump back every clause of preceding blocks up through
+       * and including the target to get to the beginning of the
+       * target */
+      bi_foreach_block_from_rev(ctx, start->block, blk) {
+         if (blk == start->block)
+            continue;

-                        bi_foreach_clause_in_block(blk, clause) {
-                                ret -= bi_clause_quadwords(clause);
-                        }
+         bi_foreach_clause_in_block(blk, clause) {
+            ret -= bi_clause_quadwords(clause);
+         }

-                        /* End just after the target */
-                        if (blk == target)
-                                break;
-                }
-        }
+         /* End just after the target */
+         if (blk == target)
+            break;
+      }
+   }

-        return ret;
+   return ret;
 }
--- a/src/panfrost/bifrost/bi_liveness.c
+++ b/src/panfrost/bifrost/bi_liveness.c
@ -23,98 +23,100 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "util/u_memory.h"
+#include "compiler.h"

 void
 bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *I)
 {
-        bi_foreach_dest(I, d)
-                BITSET_CLEAR(live, I->dest[d].value);
+   bi_foreach_dest(I, d)
+      BITSET_CLEAR(live, I->dest[d].value);

-        bi_foreach_ssa_src(I, s)
-                BITSET_SET(live, I->src[s].value);
+   bi_foreach_ssa_src(I, s)
+      BITSET_SET(live, I->src[s].value);
 }

 void
 bi_compute_liveness_ssa(bi_context *ctx)
 {
-        u_worklist worklist;
-        u_worklist_init(&worklist, ctx->num_blocks, NULL);
+   u_worklist worklist;
+   u_worklist_init(&worklist, ctx->num_blocks, NULL);

-        /* Free any previous liveness, and allocate */
-        unsigned words = BITSET_WORDS(ctx->ssa_alloc);
+   /* Free any previous liveness, and allocate */
+   unsigned words = BITSET_WORDS(ctx->ssa_alloc);

-        bi_foreach_block(ctx, block) {
-                if (block->ssa_live_in)
-                        ralloc_free(block->ssa_live_in);
+   bi_foreach_block(ctx, block) {
+      if (block->ssa_live_in)
+         ralloc_free(block->ssa_live_in);

-                if (block->ssa_live_out)
-                        ralloc_free(block->ssa_live_out);
+      if (block->ssa_live_out)
+         ralloc_free(block->ssa_live_out);

-                block->ssa_live_in = rzalloc_array(block, BITSET_WORD, words);
-                block->ssa_live_out = rzalloc_array(block, BITSET_WORD, words);
+      block->ssa_live_in = rzalloc_array(block, BITSET_WORD, words);
+      block->ssa_live_out = rzalloc_array(block, BITSET_WORD, words);

-                bi_worklist_push_head(&worklist, block);
-        }
+      bi_worklist_push_head(&worklist, block);
+   }

-        /* Iterate the work list */
-        while(!u_worklist_is_empty(&worklist)) {
-                /* Pop in reverse order since liveness is a backwards pass */
-                bi_block *blk = bi_worklist_pop_head(&worklist);
+   /* Iterate the work list */
+   while (!u_worklist_is_empty(&worklist)) {
+      /* Pop in reverse order since liveness is a backwards pass */
+      bi_block *blk = bi_worklist_pop_head(&worklist);

-                /* Update its liveness information */
-                memcpy(blk->ssa_live_in, blk->ssa_live_out, words * sizeof(BITSET_WORD));
+      /* Update its liveness information */
+      memcpy(blk->ssa_live_in, blk->ssa_live_out, words * sizeof(BITSET_WORD));

-                bi_foreach_instr_in_block_rev(blk, I) {
-                        /* Phi nodes are handled separately, so we skip them. As phi nodes are
-                         * at the beginning and we're iterating backwards, we stop as soon as
-                         * we hit a phi node.
-                         */
-                        if (I->op == BI_OPCODE_PHI)
-                                break;
+      bi_foreach_instr_in_block_rev(blk, I) {
+         /* Phi nodes are handled separately, so we skip them. As phi nodes are
+          * at the beginning and we're iterating backwards, we stop as soon as
+          * we hit a phi node.
+          */
+         if (I->op == BI_OPCODE_PHI)
+            break;

-                        bi_liveness_ins_update_ssa(blk->ssa_live_in, I);
-                }
+         bi_liveness_ins_update_ssa(blk->ssa_live_in, I);
+      }

-                /* Propagate the live in of the successor (blk) to the live out of
-                 * predecessors.
-                 *
-                 * Phi nodes are logically on the control flow edge and act in parallel.
-                 * To handle when propagating, we kill writes from phis and make live the
-                 * corresponding sources.
-                 */
-                bi_foreach_predecessor(blk, pred) {
-                        BITSET_WORD *live = ralloc_array(blk, BITSET_WORD, words);
-                        memcpy(live, blk->ssa_live_in, words * sizeof(BITSET_WORD));
+      /* Propagate the live in of the successor (blk) to the live out of
+       * predecessors.
+       *
+       * Phi nodes are logically on the control flow edge and act in parallel.
+       * To handle when propagating, we kill writes from phis and make live the
+       * corresponding sources.
+       */
+      bi_foreach_predecessor(blk, pred) {
+         BITSET_WORD *live = ralloc_array(blk, BITSET_WORD, words);
+         memcpy(live, blk->ssa_live_in, words * sizeof(BITSET_WORD));

-                        /* Kill write */
-                        bi_foreach_instr_in_block(blk, I) {
-                                if (I->op != BI_OPCODE_PHI) break;
+         /* Kill write */
+         bi_foreach_instr_in_block(blk, I) {
+            if (I->op != BI_OPCODE_PHI)
+               break;

-                                BITSET_CLEAR(live, I->dest[0].value);
-                        }
+            BITSET_CLEAR(live, I->dest[0].value);
+         }

-                        /* Make live the corresponding source */
-                        bi_foreach_instr_in_block(blk, I) {
-                                if (I->op != BI_OPCODE_PHI) break;
+         /* Make live the corresponding source */
+         bi_foreach_instr_in_block(blk, I) {
+            if (I->op != BI_OPCODE_PHI)
+               break;

-                                bi_index operand = I->src[bi_predecessor_index(blk, *pred)];
-                                if (bi_is_ssa(operand))
-                                        BITSET_SET(live, operand.value);
-                        }
+            bi_index operand = I->src[bi_predecessor_index(blk, *pred)];
+            if (bi_is_ssa(operand))
+               BITSET_SET(live, operand.value);
+         }

-                        BITSET_WORD progress = 0;
+         BITSET_WORD progress = 0;

-                        for (unsigned i = 0; i < words; ++i) {
-                                progress |= live[i] & ~((*pred)->ssa_live_out[i]);
-                                (*pred)->ssa_live_out[i] |= live[i];
-                        }
+         for (unsigned i = 0; i < words; ++i) {
+            progress |= live[i] & ~((*pred)->ssa_live_out[i]);
+            (*pred)->ssa_live_out[i] |= live[i];
+         }

-                        if (progress != 0)
-                                bi_worklist_push_tail(&worklist, *pred);
-                }
-        }
+         if (progress != 0)
+            bi_worklist_push_tail(&worklist, *pred);
+      }
+   }

-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);
 }
--- a/src/panfrost/bifrost/bi_lower_divergent_indirects.c
+++ b/src/panfrost/bifrost/bi_lower_divergent_indirects.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "compiler/nir/nir_builder.h"
+#include "compiler.h"

 /* Divergent attribute access is undefined behaviour. To avoid divergence,
 * lower to an if-chain like:
@ -40,89 +40,88 @@
 static bool
 bi_lower_divergent_indirects_impl(nir_builder *b, nir_instr *instr, void *data)
 {
-        if (instr->type != nir_instr_type_intrinsic)
-                return false;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;

-        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-        gl_shader_stage stage = b->shader->info.stage;
-        nir_src *offset;
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   gl_shader_stage stage = b->shader->info.stage;
+   nir_src *offset;

-        /* Not all indirect access needs this workaround */
-        switch (intr->intrinsic) {
-        case nir_intrinsic_load_input:
-        case nir_intrinsic_load_interpolated_input:
-                /* Attributes and varyings */
-                offset = nir_get_io_offset_src(intr);
-                break;
+   /* Not all indirect access needs this workaround */
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+      /* Attributes and varyings */
+      offset = nir_get_io_offset_src(intr);
+      break;

-        case nir_intrinsic_store_output:
-                /* Varyings only */
-                if (stage == MESA_SHADER_FRAGMENT)
-                        return false;
+   case nir_intrinsic_store_output:
+      /* Varyings only */
+      if (stage == MESA_SHADER_FRAGMENT)
+         return false;

-                offset = nir_get_io_offset_src(intr);
-                break;
+      offset = nir_get_io_offset_src(intr);
+      break;

-        case nir_intrinsic_image_atomic_add:
-        case nir_intrinsic_image_atomic_imin:
-        case nir_intrinsic_image_atomic_umin:
-        case nir_intrinsic_image_atomic_imax:
-        case nir_intrinsic_image_atomic_umax:
-        case nir_intrinsic_image_atomic_and:
-        case nir_intrinsic_image_atomic_or:
-        case nir_intrinsic_image_atomic_xor:
-        case nir_intrinsic_image_load:
-        case nir_intrinsic_image_store:
-                /* Any image access */
-                offset = &intr->src[0];
-                break;
-        default:
-                return false;
-        }
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_imax:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_store:
+      /* Any image access */
+      offset = &intr->src[0];
+      break;
+   default:
+      return false;
+   }

-        if (!nir_src_is_divergent(*offset))
-                return false;
+   if (!nir_src_is_divergent(*offset))
+      return false;

-        /* This indirect does need it */
+   /* This indirect does need it */

-        b->cursor = nir_before_instr(instr);
-        nir_ssa_def *lane = nir_load_subgroup_invocation(b);
-        unsigned *lanes = data;
+   b->cursor = nir_before_instr(instr);
+   nir_ssa_def *lane = nir_load_subgroup_invocation(b);
+   unsigned *lanes = data;

-        /* Write zero in a funny way to bypass lower_load_const_to_scalar */
-        bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest;
-        unsigned size = has_dest ? nir_dest_bit_size(intr->dest) : 32;
-        nir_ssa_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL;
-        nir_ssa_def *zeroes[4] = { zero, zero, zero, zero };
-        nir_ssa_def *res = has_dest ?
-                nir_vec(b, zeroes, nir_dest_num_components(intr->dest)) : NULL;
+   /* Write zero in a funny way to bypass lower_load_const_to_scalar */
+   bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest;
+   unsigned size = has_dest ? nir_dest_bit_size(intr->dest) : 32;
+   nir_ssa_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL;
+   nir_ssa_def *zeroes[4] = {zero, zero, zero, zero};
+   nir_ssa_def *res =
+      has_dest ? nir_vec(b, zeroes, nir_dest_num_components(intr->dest)) : NULL;

-        for (unsigned i = 0; i < (*lanes); ++i) {
-                nir_push_if(b, nir_ieq_imm(b, lane, i));
+   for (unsigned i = 0; i < (*lanes); ++i) {
+      nir_push_if(b, nir_ieq_imm(b, lane, i));

-                nir_instr *c = nir_instr_clone(b->shader, instr);
-                nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c);
-                nir_builder_instr_insert(b, c);
-                nir_pop_if(b, NULL);
+      nir_instr *c = nir_instr_clone(b->shader, instr);
+      nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c);
+      nir_builder_instr_insert(b, c);
+      nir_pop_if(b, NULL);

-                if (has_dest) {
-                        assert(c_intr->dest.is_ssa);
-                        nir_ssa_def *c_ssa = &c_intr->dest.ssa;
-                        res = nir_if_phi(b, c_ssa, res);
-                }
-        }
+      if (has_dest) {
+         assert(c_intr->dest.is_ssa);
+         nir_ssa_def *c_ssa = &c_intr->dest.ssa;
+         res = nir_if_phi(b, c_ssa, res);
+      }
+   }

-        if (has_dest)
-                nir_ssa_def_rewrite_uses(&intr->dest.ssa, res);
+   if (has_dest)
+      nir_ssa_def_rewrite_uses(&intr->dest.ssa, res);

-        nir_instr_remove(instr);
-        return true;
+   nir_instr_remove(instr);
+   return true;
 }

 bool
 bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes)
 {
-        return nir_shader_instructions_pass(shader,
-                        bi_lower_divergent_indirects_impl,
-                        nir_metadata_none, &lanes);
+   return nir_shader_instructions_pass(
+      shader, bi_lower_divergent_indirects_impl, nir_metadata_none, &lanes);
 }
--- a/src/panfrost/bifrost/bi_lower_swizzle.c
+++ b/src/panfrost/bifrost/bi_lower_swizzle.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"

 /* Not all 8-bit and 16-bit instructions support all swizzles on all sources.
 * These passes, intended to run after NIR->BIR but before scheduling/RA, lower
@ -33,270 +33,269 @@
 static bool
 bi_swizzle_replicates_8(enum bi_swizzle swz)
 {
-        switch (swz) {
-        case BI_SWIZZLE_B0000:
-        case BI_SWIZZLE_B1111:
-        case BI_SWIZZLE_B2222:
-        case BI_SWIZZLE_B3333:
-                return true;
-        default:
-                return false;
-        }
+   switch (swz) {
+   case BI_SWIZZLE_B0000:
+   case BI_SWIZZLE_B1111:
+   case BI_SWIZZLE_B2222:
+   case BI_SWIZZLE_B3333:
+      return true;
+   default:
+      return false;
+   }
 }

 static void
 lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
 {
-        /* TODO: Use the opcode table and be a lot more methodical about this... */
-        switch (ins->op) {
-        /* Some instructions used with 16-bit data never have swizzles */
-        case BI_OPCODE_CSEL_V2F16:
-        case BI_OPCODE_CSEL_V2I16:
-        case BI_OPCODE_CSEL_V2S16:
-        case BI_OPCODE_CSEL_V2U16:
+   /* TODO: Use the opcode table and be a lot more methodical about this... */
+   switch (ins->op) {
+   /* Some instructions used with 16-bit data never have swizzles */
+   case BI_OPCODE_CSEL_V2F16:
+   case BI_OPCODE_CSEL_V2I16:
+   case BI_OPCODE_CSEL_V2S16:
+   case BI_OPCODE_CSEL_V2U16:

-        /* Despite ostensibly being 32-bit instructions, CLPER does not
-         * inherently interpret the data, so it can be used for v2f16
-         * derivatives, which might require swizzle lowering */
-        case BI_OPCODE_CLPER_I32:
-        case BI_OPCODE_CLPER_OLD_I32:
+   /* Despite ostensibly being 32-bit instructions, CLPER does not
+    * inherently interpret the data, so it can be used for v2f16
+    * derivatives, which might require swizzle lowering */
+   case BI_OPCODE_CLPER_I32:
+   case BI_OPCODE_CLPER_OLD_I32:

-        /* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
-         * boolean is implemented as a 16-bit integer, the swizzle is needed
-         * for correct operation if the instruction producing the 16-bit
-         * boolean does not replicate to both halves of the containing 32-bit
-         * register. As such, we may need to lower a swizzle.
-         *
-         * This is a silly hack. Ideally, code gen would be smart enough to
-         * avoid this case (by replicating). In practice, silly hardware design
-         * decisions force our hand here.
-         */
-        case BI_OPCODE_MUX_I32:
-        case BI_OPCODE_CSEL_I32:
-            break;
+   /* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
+    * boolean is implemented as a 16-bit integer, the swizzle is needed
+    * for correct operation if the instruction producing the 16-bit
+    * boolean does not replicate to both halves of the containing 32-bit
+    * register. As such, we may need to lower a swizzle.
+    *
+    * This is a silly hack. Ideally, code gen would be smart enough to
+    * avoid this case (by replicating). In practice, silly hardware design
+    * decisions force our hand here.
+    */
+   case BI_OPCODE_MUX_I32:
+   case BI_OPCODE_CSEL_I32:
+      break;

-        case BI_OPCODE_IADD_V2S16:
-        case BI_OPCODE_IADD_V2U16:
-        case BI_OPCODE_ISUB_V2S16:
-        case BI_OPCODE_ISUB_V2U16:
-            if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
-                    break;
-            else
-                    return;
-        case BI_OPCODE_LSHIFT_AND_V2I16:
-        case BI_OPCODE_LSHIFT_OR_V2I16:
-        case BI_OPCODE_LSHIFT_XOR_V2I16:
-        case BI_OPCODE_RSHIFT_AND_V2I16:
-        case BI_OPCODE_RSHIFT_OR_V2I16:
-        case BI_OPCODE_RSHIFT_XOR_V2I16:
-            if (src == 2)
-                    return;
-            else
-                    break;
+   case BI_OPCODE_IADD_V2S16:
+   case BI_OPCODE_IADD_V2U16:
+   case BI_OPCODE_ISUB_V2S16:
+   case BI_OPCODE_ISUB_V2U16:
+      if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
+         break;
+      else
+         return;
+   case BI_OPCODE_LSHIFT_AND_V2I16:
+   case BI_OPCODE_LSHIFT_OR_V2I16:
+   case BI_OPCODE_LSHIFT_XOR_V2I16:
+   case BI_OPCODE_RSHIFT_AND_V2I16:
+   case BI_OPCODE_RSHIFT_OR_V2I16:
+   case BI_OPCODE_RSHIFT_XOR_V2I16:
+      if (src == 2)
+         return;
+      else
+         break;

-        /* For some reason MUX.v2i16 allows swaps but not replication */
-        case BI_OPCODE_MUX_V2I16:
-                if (ins->src[src].swizzle == BI_SWIZZLE_H10)
-                        return;
-                else
-                        break;
+   /* For some reason MUX.v2i16 allows swaps but not replication */
+   case BI_OPCODE_MUX_V2I16:
+      if (ins->src[src].swizzle == BI_SWIZZLE_H10)
+         return;
+      else
+         break;

-        /* No swizzles supported */
-        case BI_OPCODE_HADD_V4U8:
-        case BI_OPCODE_HADD_V4S8:
-        case BI_OPCODE_CLZ_V4U8:
-        case BI_OPCODE_IDP_V4I8:
-        case BI_OPCODE_IABS_V4S8:
-        case BI_OPCODE_ICMP_V4I8:
-        case BI_OPCODE_ICMP_V4U8:
-        case BI_OPCODE_MUX_V4I8:
-        case BI_OPCODE_IADD_IMM_V4I8:
-                break;
+   /* No swizzles supported */
+   case BI_OPCODE_HADD_V4U8:
+   case BI_OPCODE_HADD_V4S8:
+   case BI_OPCODE_CLZ_V4U8:
+   case BI_OPCODE_IDP_V4I8:
+   case BI_OPCODE_IABS_V4S8:
+   case BI_OPCODE_ICMP_V4I8:
+   case BI_OPCODE_ICMP_V4U8:
+   case BI_OPCODE_MUX_V4I8:
+   case BI_OPCODE_IADD_IMM_V4I8:
+      break;

-        case BI_OPCODE_LSHIFT_AND_V4I8:
-        case BI_OPCODE_LSHIFT_OR_V4I8:
-        case BI_OPCODE_LSHIFT_XOR_V4I8:
-        case BI_OPCODE_RSHIFT_AND_V4I8:
-        case BI_OPCODE_RSHIFT_OR_V4I8:
-        case BI_OPCODE_RSHIFT_XOR_V4I8:
-                /* Last source allows identity or replication */
-                if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
-                        return;
+   case BI_OPCODE_LSHIFT_AND_V4I8:
+   case BI_OPCODE_LSHIFT_OR_V4I8:
+   case BI_OPCODE_LSHIFT_XOR_V4I8:
+   case BI_OPCODE_RSHIFT_AND_V4I8:
+   case BI_OPCODE_RSHIFT_OR_V4I8:
+   case BI_OPCODE_RSHIFT_XOR_V4I8:
+      /* Last source allows identity or replication */
+      if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
+         return;

-                /* Others do not allow swizzles */
-                break;
+      /* Others do not allow swizzles */
+      break;

-        /* We don't want to deal with reswizzling logic in modifier prop. Move
-         * the swizzle outside, it's easier for clamp propagation. */
-        case BI_OPCODE_FCLAMP_V2F16:
-        {
-                bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
-                bi_index dest = ins->dest[0];
-                bi_index tmp = bi_temp(ctx);
+   /* We don't want to deal with reswizzling logic in modifier prop. Move
+    * the swizzle outside, it's easier for clamp propagation. */
+   case BI_OPCODE_FCLAMP_V2F16: {
+      bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
+      bi_index dest = ins->dest[0];
+      bi_index tmp = bi_temp(ctx);

-                ins->dest[0] = tmp;
-                bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
-                return;
-        }
+      ins->dest[0] = tmp;
+      bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
+      return;
+   }

-        default:
-            return;
-        }
+   default:
+      return;
+   }

-        /* First, try to apply a given swizzle to a constant to clear the
-         * runtime swizzle. This is less heavy-handed than ignoring the
-         * swizzle for scalar destinations, since it maintains
-         * replication of the destination.
-         */
-        if (ins->src[src].type == BI_INDEX_CONSTANT) {
-                ins->src[src].value = bi_apply_swizzle(ins->src[src].value,
-                                                       ins->src[src].swizzle);
-                ins->src[src].swizzle = BI_SWIZZLE_H01;
-                return;
-        }
+   /* First, try to apply a given swizzle to a constant to clear the
+    * runtime swizzle. This is less heavy-handed than ignoring the
+    * swizzle for scalar destinations, since it maintains
+    * replication of the destination.
+    */
+   if (ins->src[src].type == BI_INDEX_CONSTANT) {
+      ins->src[src].value =
+         bi_apply_swizzle(ins->src[src].value, ins->src[src].swizzle);
+      ins->src[src].swizzle = BI_SWIZZLE_H01;
+      return;
+   }

-        /* Even if the source does not replicate, if the consuming instruction
-         * produces a 16-bit scalar, we can ignore the other component.
-         */
-        if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
-                        ins->src[src].swizzle == BI_SWIZZLE_H00)
-        {
-                ins->src[src].swizzle = BI_SWIZZLE_H01;
-                return;
-        }
+   /* Even if the source does not replicate, if the consuming instruction
+    * produces a 16-bit scalar, we can ignore the other component.
+    */
+   if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
+       ins->src[src].swizzle == BI_SWIZZLE_H00) {
+      ins->src[src].swizzle = BI_SWIZZLE_H01;
+      return;
+   }

-        /* Lower it away */
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
+   /* Lower it away */
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));

-        bool is_8 = (bi_opcode_props[ins->op].size == BI_SIZE_8);
-        bi_index orig = ins->src[src];
-        bi_index stripped = bi_replace_index(bi_null(), orig);
-        stripped.swizzle = ins->src[src].swizzle;
+   bool is_8 = (bi_opcode_props[ins->op].size == BI_SIZE_8);
+   bi_index orig = ins->src[src];
+   bi_index stripped = bi_replace_index(bi_null(), orig);
+   stripped.swizzle = ins->src[src].swizzle;

-        bi_index swz = is_8 ? bi_swz_v4i8(&b, stripped) : bi_swz_v2i16(&b, stripped);
+   bi_index swz = is_8 ? bi_swz_v4i8(&b, stripped) : bi_swz_v2i16(&b, stripped);

-        bi_replace_src(ins, src, swz);
-        ins->src[src].swizzle = BI_SWIZZLE_H01;
+   bi_replace_src(ins, src, swz);
+   ins->src[src].swizzle = BI_SWIZZLE_H01;
 }

 static bool
 bi_swizzle_replicates_16(enum bi_swizzle swz)
 {
-        switch (swz) {
-        case BI_SWIZZLE_H00:
-        case BI_SWIZZLE_H11:
-                return true;
-        default:
-                /* If a swizzle replicates every 8-bits, it also replicates
-                 * every 16-bits, so allow 8-bit replicating swizzles.
-                 */
-                return bi_swizzle_replicates_8(swz);
-        }
+   switch (swz) {
+   case BI_SWIZZLE_H00:
+   case BI_SWIZZLE_H11:
+      return true;
+   default:
+      /* If a swizzle replicates every 8-bits, it also replicates
+       * every 16-bits, so allow 8-bit replicating swizzles.
+       */
+      return bi_swizzle_replicates_8(swz);
+   }
 }

 static bool
 bi_instr_replicates(bi_instr *I, BITSET_WORD *replicates_16)
 {
-        switch (I->op) {
+   switch (I->op) {

-        /* Instructions that construct vectors have replicated output if their
-         * sources are identical. Check this case first.
-         */
-        case BI_OPCODE_MKVEC_V2I16:
-        case BI_OPCODE_V2F16_TO_V2S16:
-        case BI_OPCODE_V2F16_TO_V2U16:
-        case BI_OPCODE_V2F32_TO_V2F16:
-        case BI_OPCODE_V2S16_TO_V2F16:
-        case BI_OPCODE_V2S8_TO_V2F16:
-        case BI_OPCODE_V2S8_TO_V2S16:
-        case BI_OPCODE_V2U16_TO_V2F16:
-        case BI_OPCODE_V2U8_TO_V2F16:
-        case BI_OPCODE_V2U8_TO_V2U16:
-                return bi_is_value_equiv(I->src[0], I->src[1]);
+   /* Instructions that construct vectors have replicated output if their
+    * sources are identical. Check this case first.
+    */
+   case BI_OPCODE_MKVEC_V2I16:
+   case BI_OPCODE_V2F16_TO_V2S16:
+   case BI_OPCODE_V2F16_TO_V2U16:
+   case BI_OPCODE_V2F32_TO_V2F16:
+   case BI_OPCODE_V2S16_TO_V2F16:
+   case BI_OPCODE_V2S8_TO_V2F16:
+   case BI_OPCODE_V2S8_TO_V2S16:
+   case BI_OPCODE_V2U16_TO_V2F16:
+   case BI_OPCODE_V2U8_TO_V2F16:
+   case BI_OPCODE_V2U8_TO_V2U16:
+      return bi_is_value_equiv(I->src[0], I->src[1]);

-        /* 16-bit transcendentals are defined to output zero in their
-         * upper half, so they do not replicate
-         */
-        case BI_OPCODE_FRCP_F16:
-        case BI_OPCODE_FRSQ_F16:
-                return false;
+   /* 16-bit transcendentals are defined to output zero in their
+    * upper half, so they do not replicate
+    */
+   case BI_OPCODE_FRCP_F16:
+   case BI_OPCODE_FRSQ_F16:
+      return false;

-        /* Not sure, be conservative, we don't use these.. */
-        case BI_OPCODE_VN_ASST1_F16:
-        case BI_OPCODE_FPCLASS_F16:
-        case BI_OPCODE_FPOW_SC_DET_F16:
-                return false;
+   /* Not sure, be conservative, we don't use these.. */
+   case BI_OPCODE_VN_ASST1_F16:
+   case BI_OPCODE_FPCLASS_F16:
+   case BI_OPCODE_FPOW_SC_DET_F16:
+      return false;

-        default:
-                break;
-        }
+   default:
+      break;
+   }

-        /* Replication analysis only makes sense for ALU instructions */
-        if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE)
-                return false;
+   /* Replication analysis only makes sense for ALU instructions */
+   if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE)
+      return false;

-        /* We only analyze 16-bit instructions for 16-bit replication. We could
-         * maybe do better.
-         */
-        if (bi_opcode_props[I->op].size != BI_SIZE_16)
-                return false;
+   /* We only analyze 16-bit instructions for 16-bit replication. We could
+    * maybe do better.
+    */
+   if (bi_opcode_props[I->op].size != BI_SIZE_16)
+      return false;

-        bi_foreach_src(I, s) {
-                if (bi_is_null(I->src[s]))
-                        continue;
+   bi_foreach_src(I, s) {
+      if (bi_is_null(I->src[s]))
+         continue;

-                /* Replicated swizzles */
-                if (bi_swizzle_replicates_16(I->src[s].swizzle))
-                        continue;
+      /* Replicated swizzles */
+      if (bi_swizzle_replicates_16(I->src[s].swizzle))
+         continue;

-                /* Replicated values */
-                if (bi_is_ssa(I->src[s]) &&
-                    BITSET_TEST(replicates_16, I->src[s].value))
-                        continue;
+      /* Replicated values */
+      if (bi_is_ssa(I->src[s]) && BITSET_TEST(replicates_16, I->src[s].value))
+         continue;

-                /* Replicated constants */
-                if (I->src[s].type == BI_INDEX_CONSTANT &&
-                    (I->src[s].value & 0xFFFF) == (I->src[s].value >> 16))
-                        continue;
+      /* Replicated constants */
+      if (I->src[s].type == BI_INDEX_CONSTANT &&
+          (I->src[s].value & 0xFFFF) == (I->src[s].value >> 16))
+         continue;

-                return false;
-        }
+      return false;
+   }

-        return true;
+   return true;
 }

 void
 bi_lower_swizzle(bi_context *ctx)
 {
-        bi_foreach_instr_global_safe(ctx, ins) {
-                bi_foreach_src(ins, s) {
-                        if (bi_is_null(ins->src[s])) continue;
-                        if (ins->src[s].swizzle == BI_SWIZZLE_H01) continue;
+   bi_foreach_instr_global_safe(ctx, ins) {
+      bi_foreach_src(ins, s) {
+         if (bi_is_null(ins->src[s]))
+            continue;
+         if (ins->src[s].swizzle == BI_SWIZZLE_H01)
+            continue;

-                        lower_swizzle(ctx, ins, s);
-                }
-        }
+         lower_swizzle(ctx, ins, s);
+      }
+   }

-        /* Now that we've lowered swizzles, clean up the mess */
-        BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ctx->ssa_alloc);
+   /* Now that we've lowered swizzles, clean up the mess */
+   BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ctx->ssa_alloc);

-        bi_foreach_instr_global(ctx, ins) {
-                if (ins->nr_dests && bi_instr_replicates(ins, replicates_16))
-                        BITSET_SET(replicates_16, ins->dest[0].value);
+   bi_foreach_instr_global(ctx, ins) {
+      if (ins->nr_dests && bi_instr_replicates(ins, replicates_16))
+         BITSET_SET(replicates_16, ins->dest[0].value);

-                if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) &&
-                    BITSET_TEST(replicates_16, ins->src[0].value)) {
-                        ins->op = BI_OPCODE_MOV_I32;
-                        ins->src[0].swizzle = BI_SWIZZLE_H01;
-                }
+      if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) &&
+          BITSET_TEST(replicates_16, ins->src[0].value)) {
+         ins->op = BI_OPCODE_MOV_I32;
+         ins->src[0].swizzle = BI_SWIZZLE_H01;
+      }

-                /* The above passes rely on replicating destinations.  For
-                 * Valhall, we will want to optimize this. For now, default
-                 * to Bifrost compatible behaviour.
-                 */
-                if (ins->nr_dests)
-                        ins->dest[0].swizzle = BI_SWIZZLE_H01;
-        }
+      /* The above passes rely on replicating destinations.  For
+       * Valhall, we will want to optimize this. For now, default
+       * to Bifrost compatible behaviour.
+       */
+      if (ins->nr_dests)
+         ins->dest[0].swizzle = BI_SWIZZLE_H01;
+   }

-        free(replicates_16);
+   free(replicates_16);
 }
--- a/src/panfrost/bifrost/bi_opt_constant_fold.c
+++ b/src/panfrost/bifrost/bi_opt_constant_fold.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"

 /* Dead simple constant folding to cleanup compiler frontend patterns. Before
 * adding a new pattern here, check why you need it and whether we can avoid
@ -31,83 +31,84 @@
 static inline uint32_t
 bi_source_value(const bi_instr *I, unsigned s)
 {
-        if (s < I->nr_srcs)
-                return bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
-        else
-                return 0;
+   if (s < I->nr_srcs)
+      return bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
+   else
+      return 0;
 }

 uint32_t
 bi_fold_constant(bi_instr *I, bool *unsupported)
 {
-        /* We can only fold instructions where all sources are constant */
-        bi_foreach_src(I, s) {
-                if (I->src[s].type != BI_INDEX_CONSTANT) {
-                        *unsupported = true;
-                        return 0;
-                }
-        }
+   /* We can only fold instructions where all sources are constant */
+   bi_foreach_src(I, s) {
+      if (I->src[s].type != BI_INDEX_CONSTANT) {
+         *unsupported = true;
+         return 0;
+      }
+   }

-        /* Grab the sources */
-        uint32_t a = bi_source_value(I, 0);
-        uint32_t b = bi_source_value(I, 1);
-        uint32_t c = bi_source_value(I, 2);
-        uint32_t d = bi_source_value(I, 3);
+   /* Grab the sources */
+   uint32_t a = bi_source_value(I, 0);
+   uint32_t b = bi_source_value(I, 1);
+   uint32_t c = bi_source_value(I, 2);
+   uint32_t d = bi_source_value(I, 3);

-        /* Evaluate the instruction */
-        switch (I->op) {
-        case BI_OPCODE_SWZ_V2I16:
-                return a;
+   /* Evaluate the instruction */
+   switch (I->op) {
+   case BI_OPCODE_SWZ_V2I16:
+      return a;

-        case BI_OPCODE_MKVEC_V2I16:
-                return (b << 16) | (a & 0xFFFF);
+   case BI_OPCODE_MKVEC_V2I16:
+      return (b << 16) | (a & 0xFFFF);

-        case BI_OPCODE_MKVEC_V4I8:
-                return (d << 24) | ((c & 0xFF) << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
+   case BI_OPCODE_MKVEC_V4I8:
+      return (d << 24) | ((c & 0xFF) << 16) | ((b & 0xFF) << 8) | (a & 0xFF);

-        case BI_OPCODE_MKVEC_V2I8:
-                return (c << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
+   case BI_OPCODE_MKVEC_V2I8:
+      return (c << 16) | ((b & 0xFF) << 8) | (a & 0xFF);

-        case BI_OPCODE_LSHIFT_OR_I32:
-                if (I->not_result || I->src[0].neg || I->src[1].neg)
-                        break;
+   case BI_OPCODE_LSHIFT_OR_I32:
+      if (I->not_result || I->src[0].neg || I->src[1].neg)
+         break;

-                return (a << c) | b;
+      return (a << c) | b;

-        case BI_OPCODE_F32_TO_U32:
-                if (I->round == BI_ROUND_NONE) {
-                        /* Explicitly clamp to prevent undefined behaviour and
-                         * match hardware rules */
-                        float f = uif(a);
-                        return (f >= 0.0) ? (uint32_t) f : 0;
-                } else
-                        break;
+   case BI_OPCODE_F32_TO_U32:
+      if (I->round == BI_ROUND_NONE) {
+         /* Explicitly clamp to prevent undefined behaviour and
+          * match hardware rules */
+         float f = uif(a);
+         return (f >= 0.0) ? (uint32_t)f : 0;
+      } else
+         break;

-        default:
-                break;
-        }
+   default:
+      break;
+   }

-        *unsupported = true;
-        return 0;
+   *unsupported = true;
+   return 0;
 }

 bool
 bi_opt_constant_fold(bi_context *ctx)
 {
-        bool progress = false;
+   bool progress = false;

-        bi_foreach_instr_global_safe(ctx, ins) {
-                bool unsupported = false;
-                uint32_t replace = bi_fold_constant(ins, &unsupported);
-                if (unsupported) continue;
+   bi_foreach_instr_global_safe(ctx, ins) {
+      bool unsupported = false;
+      uint32_t replace = bi_fold_constant(ins, &unsupported);
+      if (unsupported)
+         continue;

-                /* Replace with constant move, to be copypropped */
-                assert(ins->nr_dests == 1);
-                bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
-                bi_mov_i32_to(&b, ins->dest[0], bi_imm_u32(replace));
-                bi_remove_instruction(ins);
-                progress = true;
-        }
+      /* Replace with constant move, to be copypropped */
+      assert(ins->nr_dests == 1);
+      bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
+      bi_mov_i32_to(&b, ins->dest[0], bi_imm_u32(replace));
+      bi_remove_instruction(ins);
+      progress = true;
+   }

-        return progress;
+   return progress;
 }
--- a/src/panfrost/bifrost/bi_opt_copy_prop.c
+++ b/src/panfrost/bifrost/bi_opt_copy_prop.c
@ -22,92 +22,95 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"

 /* SSA copy propagation */

 static bool
 bi_reads_fau(bi_instr *ins)
 {
-        bi_foreach_src(ins, s) {
-                if (ins->src[s].type == BI_INDEX_FAU)
-                        return true;
-        }
+   bi_foreach_src(ins, s) {
+      if (ins->src[s].type == BI_INDEX_FAU)
+         return true;
+   }

-        return false;
+   return false;
 }

 void
 bi_opt_copy_prop(bi_context *ctx)
 {
-        /* Chase SPLIT of COLLECT. Instruction selection usually avoids this
-         * pattern (due to the split cache), but it is inevitably generated by
-         * the UBO pushing pass.
-         */
-        bi_instr **collects = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
-        bi_foreach_instr_global_safe(ctx, I) {
-                if (I->op == BI_OPCODE_COLLECT_I32) {
-                        /* Rewrite trivial collects while we're at it */
-                        if (I->nr_srcs == 1)
-                                I->op = BI_OPCODE_MOV_I32;
+   /* Chase SPLIT of COLLECT. Instruction selection usually avoids this
+    * pattern (due to the split cache), but it is inevitably generated by
+    * the UBO pushing pass.
+    */
+   bi_instr **collects = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
+   bi_foreach_instr_global_safe(ctx, I) {
+      if (I->op == BI_OPCODE_COLLECT_I32) {
+         /* Rewrite trivial collects while we're at it */
+         if (I->nr_srcs == 1)
+            I->op = BI_OPCODE_MOV_I32;

-                        collects[I->dest[0].value] = I;
-                } else if (I->op == BI_OPCODE_SPLIT_I32) {
-                        /* Rewrite trivial splits while we're at it */
-                        if (I->nr_dests == 1)
-                                I->op = BI_OPCODE_MOV_I32;
+         collects[I->dest[0].value] = I;
+      } else if (I->op == BI_OPCODE_SPLIT_I32) {
+         /* Rewrite trivial splits while we're at it */
+         if (I->nr_dests == 1)
+            I->op = BI_OPCODE_MOV_I32;

-                        bi_instr *collect = collects[I->src[0].value];
-                        if (!collect)
-                                continue;
+         bi_instr *collect = collects[I->src[0].value];
+         if (!collect)
+            continue;

-                        /* Lower the split to moves, copyprop cleans up */
-                        bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
+         /* Lower the split to moves, copyprop cleans up */
+         bi_builder b = bi_init_builder(ctx, bi_before_instr(I));

-                        bi_foreach_dest(I, d)
-                                bi_mov_i32_to(&b, I->dest[d], collect->src[d]);
+         bi_foreach_dest(I, d)
+            bi_mov_i32_to(&b, I->dest[d], collect->src[d]);

-                        bi_remove_instruction(I);
-                }
-        }
+         bi_remove_instruction(I);
+      }
+   }

-        free(collects);
+   free(collects);

-        bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
+   bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);

-        bi_foreach_instr_global_safe(ctx, ins) {
-                if (ins->op == BI_OPCODE_MOV_I32 && ins->src[0].type != BI_INDEX_REGISTER) {
-                        bi_index replace = ins->src[0];
+   bi_foreach_instr_global_safe(ctx, ins) {
+      if (ins->op == BI_OPCODE_MOV_I32 &&
+          ins->src[0].type != BI_INDEX_REGISTER) {
+         bi_index replace = ins->src[0];

-                        /* Peek through one layer so copyprop converges in one
-                         * iteration for chained moves */
-                        if (bi_is_ssa(replace)) {
-                                bi_index chained = replacement[replace.value];
+         /* Peek through one layer so copyprop converges in one
+          * iteration for chained moves */
+         if (bi_is_ssa(replace)) {
+            bi_index chained = replacement[replace.value];

-                                if (!bi_is_null(chained))
-                                        replace = chained;
-                        }
+            if (!bi_is_null(chained))
+               replace = chained;
+         }

-                        assert(ins->nr_dests == 1);
-                        replacement[ins->dest[0].value] = replace;
-                }
+         assert(ins->nr_dests == 1);
+         replacement[ins->dest[0].value] = replace;
+      }

-                bi_foreach_src(ins, s) {
-                        bi_index use = ins->src[s];
+      bi_foreach_src(ins, s) {
+         bi_index use = ins->src[s];

-                        if (use.type != BI_INDEX_NORMAL) continue;
-                        if (bi_is_staging_src(ins, s)) continue;
+         if (use.type != BI_INDEX_NORMAL)
+            continue;
+         if (bi_is_staging_src(ins, s))
+            continue;

-                        bi_index repl = replacement[use.value];
+         bi_index repl = replacement[use.value];

-                        if (repl.type == BI_INDEX_CONSTANT && bi_reads_fau(ins))
-                                continue;
+         if (repl.type == BI_INDEX_CONSTANT && bi_reads_fau(ins))
+            continue;

-                        if (!bi_is_null(repl))
-                                bi_replace_src(ins, s, repl);
-                }
-        }
+         if (!bi_is_null(repl))
+            bi_replace_src(ins, s, repl);
+      }
+   }

-        free(replacement);
+   free(replacement);
 }
--- a/src/panfrost/bifrost/bi_opt_cse.c
+++ b/src/panfrost/bifrost/bi_opt_cse.c
@ -22,8 +22,8 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"

 #define XXH_INLINE_ALL
 #include "util/xxhash.h"
@ -36,85 +36,88 @@
 static inline uint32_t
 HASH(uint32_t hash, unsigned data)
 {
-        return XXH32(&data, sizeof(data), hash);
+   return XXH32(&data, sizeof(data), hash);
 }

 static uint32_t
 hash_index(uint32_t hash, bi_index index)
 {
-        hash = HASH(hash, index.value);
-        hash = HASH(hash, index.abs);
-        hash = HASH(hash, index.neg);
-        hash = HASH(hash, index.swizzle);
-        hash = HASH(hash, index.offset);
-        hash = HASH(hash, index.type);
-        return hash;
+   hash = HASH(hash, index.value);
+   hash = HASH(hash, index.abs);
+   hash = HASH(hash, index.neg);
+   hash = HASH(hash, index.swizzle);
+   hash = HASH(hash, index.offset);
+   hash = HASH(hash, index.type);
+   return hash;
 }

 /* Hash an ALU instruction. */
 static uint32_t
 hash_instr(const void *data)
 {
-        const bi_instr *I = data;
-        uint32_t hash = 0;
+   const bi_instr *I = data;
+   uint32_t hash = 0;

-        hash = HASH(hash, I->op);
-        hash = HASH(hash, I->nr_dests);
-        hash = HASH(hash, I->nr_srcs);
+   hash = HASH(hash, I->op);
+   hash = HASH(hash, I->nr_dests);
+   hash = HASH(hash, I->nr_srcs);

-        assert(!I->flow && !I->slot && "CSE must be early");
+   assert(!I->flow && !I->slot && "CSE must be early");

-        /* Explcitly skip destinations, except for size details */
-        bi_foreach_dest(I, d) {
-                hash = HASH(hash, I->dest[d].swizzle);
-        }
+   /* Explcitly skip destinations, except for size details */
+   bi_foreach_dest(I, d) {
+      hash = HASH(hash, I->dest[d].swizzle);
+   }

-        bi_foreach_src(I, s) {
-                hash = hash_index(hash, I->src[s]);
-        }
+   bi_foreach_src(I, s) {
+      hash = hash_index(hash, I->src[s]);
+   }

-        /* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */
-        hash = HASH(hash, I->dest_mod);
+   /* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */
+   hash = HASH(hash, I->dest_mod);

-        /* Explicitly skip other immediates */
-        hash = HASH(hash, I->shift);
+   /* Explicitly skip other immediates */
+   hash = HASH(hash, I->shift);

-        for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)
-                hash = HASH(hash, I->flags[i]);
+   for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)
+      hash = HASH(hash, I->flags[i]);

-        return hash;
+   return hash;
 }

 static bool
 instrs_equal(const void *_i1, const void *_i2)
 {
-        const bi_instr *i1 = _i1, *i2 = _i2;
+   const bi_instr *i1 = _i1, *i2 = _i2;

-        if (i1->op != i2->op) return false;
-        if (i1->nr_srcs != i2->nr_srcs) return false;
-        if (i1->nr_dests != i2->nr_dests) return false;
+   if (i1->op != i2->op)
+      return false;
+   if (i1->nr_srcs != i2->nr_srcs)
+      return false;
+   if (i1->nr_dests != i2->nr_dests)
+      return false;

-        /* Explicitly skip destinations */
+   /* Explicitly skip destinations */

-        bi_foreach_src(i1, s) {
-                bi_index s1 = i1->src[s], s2 = i2->src[s];
+   bi_foreach_src(i1, s) {
+      bi_index s1 = i1->src[s], s2 = i2->src[s];

-                if (memcmp(&s1, &s2, sizeof(s1)) != 0)
-                        return false;
-	}
+      if (memcmp(&s1, &s2, sizeof(s1)) != 0)
+         return false;
+   }

-        if (i1->dest_mod != i2->dest_mod)
-                return false;
+   if (i1->dest_mod != i2->dest_mod)
+      return false;

-        if (i1->shift != i2->shift)
-                return false;
+   if (i1->shift != i2->shift)
+      return false;

-        for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {
-                if (i1->flags[i] != i2->flags[i])
-                        return false;
-        }
+   for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {
+      if (i1->flags[i] != i2->flags[i])
+         return false;
+   }

-	return true;
+   return true;
 }

 /* Determines what instructions the above routines have to handle */
@ -122,64 +125,64 @@ instrs_equal(const void *_i1, const void *_i2)
 static bool
 instr_can_cse(const bi_instr *I)
 {
-        switch (I->op)  {
-        case BI_OPCODE_DTSEL_IMM:
-        case BI_OPCODE_DISCARD_F32:
-                return false;
-        default:
-                break;
-        }
+   switch (I->op) {
+   case BI_OPCODE_DTSEL_IMM:
+   case BI_OPCODE_DISCARD_F32:
+      return false;
+   default:
+      break;
+   }

-        /* Be conservative about which message-passing instructions we CSE,
-         * since most are not pure even within a thread.
-         */
-        if (bi_opcode_props[I->op].message && I->op != BI_OPCODE_LEA_BUF_IMM)
-                return false;
+   /* Be conservative about which message-passing instructions we CSE,
+    * since most are not pure even within a thread.
+    */
+   if (bi_opcode_props[I->op].message && I->op != BI_OPCODE_LEA_BUF_IMM)
+      return false;

-        if (I->branch_target)
-                return false;
+   if (I->branch_target)
+      return false;

-        return true;
+   return true;
 }

 void
 bi_opt_cse(bi_context *ctx)
 {
-        struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
+   struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);

-        bi_foreach_block(ctx, block) {
-                bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
-                _mesa_set_clear(instr_set, NULL);
+   bi_foreach_block(ctx, block) {
+      bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
+      _mesa_set_clear(instr_set, NULL);

-                bi_foreach_instr_in_block(block, instr) {
-                        /* Rewrite before trying to CSE anything so we converge
-                         * locally in one iteration */
-                        bi_foreach_ssa_src(instr, s) {
-                                if (bi_is_staging_src(instr, s))
-                                        continue;
+      bi_foreach_instr_in_block(block, instr) {
+         /* Rewrite before trying to CSE anything so we converge
+          * locally in one iteration */
+         bi_foreach_ssa_src(instr, s) {
+            if (bi_is_staging_src(instr, s))
+               continue;

-                                bi_index repl = replacement[instr->src[s].value];
-                                if (!bi_is_null(repl))
-                                        bi_replace_src(instr, s, repl);
-                        }
+            bi_index repl = replacement[instr->src[s].value];
+            if (!bi_is_null(repl))
+               bi_replace_src(instr, s, repl);
+         }

-                        if (!instr_can_cse(instr))
-                                continue;
+         if (!instr_can_cse(instr))
+            continue;

-                        bool found;
-                        struct set_entry *entry =
-                                _mesa_set_search_or_add(instr_set, instr, &found);
-                        if (found) {
-                                const bi_instr *match = entry->key;
+         bool found;
+         struct set_entry *entry =
+            _mesa_set_search_or_add(instr_set, instr, &found);
+         if (found) {
+            const bi_instr *match = entry->key;

-                                bi_foreach_dest(instr, d) {
-                                        replacement[instr->dest[d].value] = match->dest[d];
-                                }
-                        }
-                }
+            bi_foreach_dest(instr, d) {
+               replacement[instr->dest[d].value] = match->dest[d];
+            }
+         }
+      }

-                free(replacement);
-        }
+      free(replacement);
+   }

-        _mesa_set_destroy(instr_set, NULL);
+   _mesa_set_destroy(instr_set, NULL);
 }
--- a/src/panfrost/bifrost/bi_opt_dce.c
+++ b/src/panfrost/bifrost/bi_opt_dce.c
@ -22,66 +22,67 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "util/u_memory.h"
+#include "compiler.h"

 /* A simple SSA-based mark-and-sweep dead code elimination pass. */

 void
 bi_opt_dead_code_eliminate(bi_context *ctx)
 {
-        /* Mark live values */
-        BITSET_WORD *mark = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
+   /* Mark live values */
+   BITSET_WORD *mark =
+      calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));

-        u_worklist worklist;
-        u_worklist_init(&worklist, ctx->num_blocks, NULL);
+   u_worklist worklist;
+   u_worklist_init(&worklist, ctx->num_blocks, NULL);

-        bi_foreach_block(ctx, block) {
-                bi_worklist_push_head(&worklist, block);
-        }
+   bi_foreach_block(ctx, block) {
+      bi_worklist_push_head(&worklist, block);
+   }

-        while(!u_worklist_is_empty(&worklist)) {
-                /* Pop in reverse order for backwards pass */
-                bi_block *blk = bi_worklist_pop_head(&worklist);
+   while (!u_worklist_is_empty(&worklist)) {
+      /* Pop in reverse order for backwards pass */
+      bi_block *blk = bi_worklist_pop_head(&worklist);

-                bool progress = false;
+      bool progress = false;

-                bi_foreach_instr_in_block_rev(blk, I) {
-                        bool needed = bi_side_effects(I);
+      bi_foreach_instr_in_block_rev(blk, I) {
+         bool needed = bi_side_effects(I);

-                        bi_foreach_dest(I, d)
-                                needed |= BITSET_TEST(mark, I->dest[d].value);
+         bi_foreach_dest(I, d)
+            needed |= BITSET_TEST(mark, I->dest[d].value);

-                        if (!needed)
-                                continue;
+         if (!needed)
+            continue;

-                        bi_foreach_ssa_src(I, s) {
-                                progress |= !BITSET_TEST(mark, I->src[s].value);
-                                BITSET_SET(mark, I->src[s].value);
-                        }
-                }
+         bi_foreach_ssa_src(I, s) {
+            progress |= !BITSET_TEST(mark, I->src[s].value);
+            BITSET_SET(mark, I->src[s].value);
+         }
+      }

-                /* XXX: slow */
-                if (progress) {
-                        bi_foreach_block(ctx, block)
-                                bi_worklist_push_head(&worklist, block);
-                }
-        }
+      /* XXX: slow */
+      if (progress) {
+         bi_foreach_block(ctx, block)
+            bi_worklist_push_head(&worklist, block);
+      }
+   }

-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);

-        /* Sweep */
-        bi_foreach_instr_global_safe(ctx, I) {
-                bool needed = bi_side_effects(I);
+   /* Sweep */
+   bi_foreach_instr_global_safe(ctx, I) {
+      bool needed = bi_side_effects(I);

-                bi_foreach_dest(I, d)
-                        needed |= BITSET_TEST(mark, I->dest[d].value);
+      bi_foreach_dest(I, d)
+         needed |= BITSET_TEST(mark, I->dest[d].value);

-                if (!needed)
-                        bi_remove_instruction(I);
-        }
+      if (!needed)
+         bi_remove_instruction(I);
+   }

-        free(mark);
+   free(mark);
 }

 /* Post-RA liveness-based dead code analysis to clean up results of bundling */
@ -89,39 +90,39 @@ bi_opt_dead_code_eliminate(bi_context *ctx)
 uint64_t MUST_CHECK
 bi_postra_liveness_ins(uint64_t live, bi_instr *ins)
 {
-        bi_foreach_dest(ins, d) {
-                if (ins->dest[d].type == BI_INDEX_REGISTER) {
-                        unsigned nr = bi_count_write_registers(ins, d);
-                        unsigned reg = ins->dest[d].value;
-                        live &= ~(BITFIELD64_MASK(nr) << reg);
-                }
-        }
+   bi_foreach_dest(ins, d) {
+      if (ins->dest[d].type == BI_INDEX_REGISTER) {
+         unsigned nr = bi_count_write_registers(ins, d);
+         unsigned reg = ins->dest[d].value;
+         live &= ~(BITFIELD64_MASK(nr) << reg);
+      }
+   }

-        bi_foreach_src(ins, s) {
-                if (ins->src[s].type == BI_INDEX_REGISTER) {
-                        unsigned nr = bi_count_read_registers(ins, s);
-                        unsigned reg = ins->src[s].value;
-                        live |= (BITFIELD64_MASK(nr) << reg);
-                }
-        }
+   bi_foreach_src(ins, s) {
+      if (ins->src[s].type == BI_INDEX_REGISTER) {
+         unsigned nr = bi_count_read_registers(ins, s);
+         unsigned reg = ins->src[s].value;
+         live |= (BITFIELD64_MASK(nr) << reg);
+      }
+   }

-        return live;
+   return live;
 }

 static bool
 bi_postra_liveness_block(bi_block *blk)
 {
-        bi_foreach_successor(blk, succ)
-                blk->reg_live_out |= succ->reg_live_in;
+   bi_foreach_successor(blk, succ)
+      blk->reg_live_out |= succ->reg_live_in;

-        uint64_t live = blk->reg_live_out;
+   uint64_t live = blk->reg_live_out;

-        bi_foreach_instr_in_block_rev(blk, ins)
-                live = bi_postra_liveness_ins(live, ins);
+   bi_foreach_instr_in_block_rev(blk, ins)
+      live = bi_postra_liveness_ins(live, ins);

-        bool progress = blk->reg_live_in != live;
-        blk->reg_live_in = live;
-        return progress;
+   bool progress = blk->reg_live_in != live;
+   blk->reg_live_in = live;
+   return progress;
 }

 /* Globally, liveness analysis uses a fixed-point algorithm based on a
@ -133,58 +134,58 @@ bi_postra_liveness_block(bi_block *blk)
 void
 bi_postra_liveness(bi_context *ctx)
 {
-        u_worklist worklist;
-        bi_worklist_init(ctx, &worklist);
+   u_worklist worklist;
+   bi_worklist_init(ctx, &worklist);

-        bi_foreach_block(ctx, block) {
-                block->reg_live_out = block->reg_live_in = 0;
+   bi_foreach_block(ctx, block) {
+      block->reg_live_out = block->reg_live_in = 0;

-                bi_worklist_push_tail(&worklist, block);
-        }
+      bi_worklist_push_tail(&worklist, block);
+   }

-        while (!u_worklist_is_empty(&worklist)) {
-                /* Pop off in reverse order since liveness is backwards */
-                bi_block *blk = bi_worklist_pop_tail(&worklist);
+   while (!u_worklist_is_empty(&worklist)) {
+      /* Pop off in reverse order since liveness is backwards */
+      bi_block *blk = bi_worklist_pop_tail(&worklist);

-                /* Update liveness information. If we made progress, we need to
-                 * reprocess the predecessors
-                 */
-                if (bi_postra_liveness_block(blk)) {
-                        bi_foreach_predecessor(blk, pred)
-                                bi_worklist_push_head(&worklist, *pred);
-                }
-        }
+      /* Update liveness information. If we made progress, we need to
+       * reprocess the predecessors
+       */
+      if (bi_postra_liveness_block(blk)) {
+         bi_foreach_predecessor(blk, pred)
+            bi_worklist_push_head(&worklist, *pred);
+      }
+   }

-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);
 }

 void
 bi_opt_dce_post_ra(bi_context *ctx)
 {
-        bi_postra_liveness(ctx);
+   bi_postra_liveness(ctx);

-        bi_foreach_block_rev(ctx, block) {
-                uint64_t live = block->reg_live_out;
+   bi_foreach_block_rev(ctx, block) {
+      uint64_t live = block->reg_live_out;

-                bi_foreach_instr_in_block_rev(block, ins) {
-                        if (ins->op == BI_OPCODE_DTSEL_IMM)
-                                ins->dest[0] = bi_null();
+      bi_foreach_instr_in_block_rev(block, ins) {
+         if (ins->op == BI_OPCODE_DTSEL_IMM)
+            ins->dest[0] = bi_null();

-                        bi_foreach_dest(ins, d) {
-                                if (ins->dest[d].type != BI_INDEX_REGISTER)
-                                        continue;
+         bi_foreach_dest(ins, d) {
+            if (ins->dest[d].type != BI_INDEX_REGISTER)
+               continue;

-                                unsigned nr = bi_count_write_registers(ins, d);
-                                unsigned reg = ins->dest[d].value;
-                                uint64_t mask = (BITFIELD64_MASK(nr) << reg);
-                                bool cullable = (ins->op != BI_OPCODE_BLEND);
-                                cullable &= !bi_opcode_props[ins->op].sr_write;
+            unsigned nr = bi_count_write_registers(ins, d);
+            unsigned reg = ins->dest[d].value;
+            uint64_t mask = (BITFIELD64_MASK(nr) << reg);
+            bool cullable = (ins->op != BI_OPCODE_BLEND);
+            cullable &= !bi_opcode_props[ins->op].sr_write;

-                                if (!(live & mask) && cullable)
-                                        ins->dest[d] = bi_null();
-                        }
+            if (!(live & mask) && cullable)
+               ins->dest[d] = bi_null();
+         }

-                        live = bi_postra_liveness_ins(live, ins);
-                }
-        }
+         live = bi_postra_liveness_ins(live, ins);
+      }
+   }
 }
--- a/src/panfrost/bifrost/bi_opt_dual_tex.c
+++ b/src/panfrost/bifrost/bi_opt_dual_tex.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"

 #define XXH_INLINE_ALL
 #include "util/xxhash.h"
@ -51,58 +51,60 @@
 static inline bool
 bi_can_fuse_dual_tex(bi_instr *I, bool fuse_zero_lod)
 {
-        return (I->op == BI_OPCODE_TEXS_2D_F32 || I->op == BI_OPCODE_TEXS_2D_F16) &&
-               (I->texture_index < 4 && I->sampler_index < 4) &&
-               (I->lod_mode == fuse_zero_lod);
+   return (I->op == BI_OPCODE_TEXS_2D_F32 || I->op == BI_OPCODE_TEXS_2D_F16) &&
+          (I->texture_index < 4 && I->sampler_index < 4) &&
+          (I->lod_mode == fuse_zero_lod);
 }

 static enum bifrost_texture_format
 bi_format_for_texs_2d(enum bi_opcode op)
 {
-        switch (op) {
-        case BI_OPCODE_TEXS_2D_F32: return BIFROST_TEXTURE_FORMAT_F32;
-        case BI_OPCODE_TEXS_2D_F16: return BIFROST_TEXTURE_FORMAT_F16;
-        default:                    unreachable("Invalid TEXS_2D instruction");
-        }
+   switch (op) {
+   case BI_OPCODE_TEXS_2D_F32:
+      return BIFROST_TEXTURE_FORMAT_F32;
+   case BI_OPCODE_TEXS_2D_F16:
+      return BIFROST_TEXTURE_FORMAT_F16;
+   default:
+      unreachable("Invalid TEXS_2D instruction");
+   }
 }

 static void
 bi_fuse_dual(bi_context *ctx, bi_instr *I1, bi_instr *I2)
 {
-        /* Construct a texture operation descriptor for the dual texture */
-        struct bifrost_dual_texture_operation desc = {
-                .mode = BIFROST_TEXTURE_OPERATION_DUAL,
+   /* Construct a texture operation descriptor for the dual texture */
+   struct bifrost_dual_texture_operation desc = {
+      .mode = BIFROST_TEXTURE_OPERATION_DUAL,

-                .primary_texture_index = I1->texture_index,
-                .primary_sampler_index = I1->sampler_index,
-                .primary_format = bi_format_for_texs_2d(I1->op),
-                .primary_mask = 0xF,
+      .primary_texture_index = I1->texture_index,
+      .primary_sampler_index = I1->sampler_index,
+      .primary_format = bi_format_for_texs_2d(I1->op),
+      .primary_mask = 0xF,

-                .secondary_texture_index = I2->texture_index,
-                .secondary_sampler_index = I2->sampler_index,
-                .secondary_format = bi_format_for_texs_2d(I2->op),
-                .secondary_mask = 0xF,
-        };
+      .secondary_texture_index = I2->texture_index,
+      .secondary_sampler_index = I2->sampler_index,
+      .secondary_format = bi_format_for_texs_2d(I2->op),
+      .secondary_mask = 0xF,
+   };

-        /* LOD mode is implied in a shader stage */
-        assert(I1->lod_mode == I2->lod_mode);
+   /* LOD mode is implied in a shader stage */
+   assert(I1->lod_mode == I2->lod_mode);

-        /* Insert before the earlier instruction in case its result is consumed
-         * before the later instruction
-         */
-        bi_builder b = bi_init_builder(ctx, bi_before_instr(I1));
+   /* Insert before the earlier instruction in case its result is consumed
+    * before the later instruction
+    */
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(I1));

-        bi_instr *I = bi_texc_dual_to(&b,
-                        I1->dest[0], I2->dest[0], bi_null(), /* staging */
-                        I1->src[0], I1->src[1], /* coordinates */
-                        bi_imm_u32(bi_dual_tex_as_u32(desc)), I1->lod_mode,
-                        bi_count_write_registers(I1, 0),
-                        bi_count_write_registers(I2, 0));
+   bi_instr *I = bi_texc_dual_to(
+      &b, I1->dest[0], I2->dest[0], bi_null(), /* staging */
+      I1->src[0], I1->src[1],                  /* coordinates */
+      bi_imm_u32(bi_dual_tex_as_u32(desc)), I1->lod_mode,
+      bi_count_write_registers(I1, 0), bi_count_write_registers(I2, 0));

-        I->skip = I1->skip && I2->skip;
+   I->skip = I1->skip && I2->skip;

-        bi_remove_instruction(I1);
-        bi_remove_instruction(I2);
+   bi_remove_instruction(I1);
+   bi_remove_instruction(I2);
 }

 #define HASH(hash, data) XXH32(&(data), sizeof(data), hash)
@ -110,45 +112,45 @@ bi_fuse_dual(bi_context *ctx, bi_instr *I1, bi_instr *I2)
 static uint32_t
 coord_hash(const void *key)
 {
-        const bi_instr *I = key;
+   const bi_instr *I = key;

-        return XXH32(&I->src[0], sizeof(I->src[0]) + sizeof(I->src[1]), 0);
+   return XXH32(&I->src[0], sizeof(I->src[0]) + sizeof(I->src[1]), 0);
 }

 static bool
 coord_equal(const void *key1, const void *key2)
 {
-        const bi_instr *I = key1;
-        const bi_instr *J = key2;
+   const bi_instr *I = key1;
+   const bi_instr *J = key2;

-        return memcmp(&I->src[0], &J->src[0],
-                      sizeof(I->src[0]) + sizeof(I->src[1])) == 0;
+   return memcmp(&I->src[0], &J->src[0],
+                 sizeof(I->src[0]) + sizeof(I->src[1])) == 0;
 }

 static void
 bi_opt_fuse_dual_texture_block(bi_context *ctx, bi_block *block)
 {
-        struct set *set = _mesa_set_create(ctx, coord_hash, coord_equal);
-        bool fuse_zero_lod = (ctx->stage != MESA_SHADER_FRAGMENT);
-        bool found = false;
+   struct set *set = _mesa_set_create(ctx, coord_hash, coord_equal);
+   bool fuse_zero_lod = (ctx->stage != MESA_SHADER_FRAGMENT);
+   bool found = false;

-        bi_foreach_instr_in_block_safe(block, I) {
-                if (!bi_can_fuse_dual_tex(I, fuse_zero_lod)) continue;
+   bi_foreach_instr_in_block_safe(block, I) {
+      if (!bi_can_fuse_dual_tex(I, fuse_zero_lod))
+         continue;

-                struct set_entry *ent = _mesa_set_search_or_add(set, I, &found);
+      struct set_entry *ent = _mesa_set_search_or_add(set, I, &found);

-                if (found) {
-                        bi_fuse_dual(ctx, (bi_instr *) ent->key, I);
-                        _mesa_set_remove(set, ent);
-                }
-        }
+      if (found) {
+         bi_fuse_dual(ctx, (bi_instr *)ent->key, I);
+         _mesa_set_remove(set, ent);
+      }
+   }
 }

 void
 bi_opt_fuse_dual_texture(bi_context *ctx)
 {
-        bi_foreach_block(ctx, block) {
-                bi_opt_fuse_dual_texture_block(ctx, block);
-        }
-
+   bi_foreach_block(ctx, block) {
+      bi_opt_fuse_dual_texture_block(ctx, block);
+   }
 }
--- a/src/panfrost/bifrost/bi_opt_message_preload.c
+++ b/src/panfrost/bifrost/bi_opt_message_preload.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"

 /* Bifrost v7 can preload up to two messages of the form:
 *
@ -35,8 +35,8 @@
 static bool
 bi_is_regfmt_float(enum bi_register_format regfmt)
 {
-        return (regfmt == BI_REGISTER_FORMAT_F32) ||
-                (regfmt == BI_REGISTER_FORMAT_F16);
+   return (regfmt == BI_REGISTER_FORMAT_F32) ||
+          (regfmt == BI_REGISTER_FORMAT_F16);
 }

 /*
@ -46,107 +46,107 @@ bi_is_regfmt_float(enum bi_register_format regfmt)
 static bool
 bi_can_interp_at_sample(bi_instr *I)
 {
-        /* .sample mode with r61 corresponds to per-sample interpolation */
-        if (I->sample == BI_SAMPLE_SAMPLE)
-                return bi_is_value_equiv(I->src[0], bi_register(61));
+   /* .sample mode with r61 corresponds to per-sample interpolation */
+   if (I->sample == BI_SAMPLE_SAMPLE)
+      return bi_is_value_equiv(I->src[0], bi_register(61));

-        /* If the shader runs with pixel-frequency shading, .sample is
-         * equivalent to .center, so allow .center
-         *
-         * If the shader runs with sample-frequency shading, .sample and .center
-         * are not equivalent. However, the ESSL 3.20 specification
-         * stipulates in section 4.5 ("Interpolation Qualifiers"):
-         *
-         *    for fragment shader input variables qualified with neither
-         *    centroid nor sample, the value of the assigned variable may be
-         *    interpolated anywhere within the pixel and a single value may be
-         *    assigned to each sample within the pixel, to the extent permitted
-         *    by the OpenGL ES Specification.
-         *
-         * We only produce .center for variables qualified with neither centroid
-         * nor sample, so if .center is specified this section applies. This
-         * suggests that, although per-pixel interpolation is allowed, it is not
-         * mandated ("may" rather than "must" or "should"). Therefore it appears
-         * safe to substitute sample.
-         */
-        return (I->sample == BI_SAMPLE_CENTER);
+   /* If the shader runs with pixel-frequency shading, .sample is
+    * equivalent to .center, so allow .center
+    *
+    * If the shader runs with sample-frequency shading, .sample and .center
+    * are not equivalent. However, the ESSL 3.20 specification
+    * stipulates in section 4.5 ("Interpolation Qualifiers"):
+    *
+    *    for fragment shader input variables qualified with neither
+    *    centroid nor sample, the value of the assigned variable may be
+    *    interpolated anywhere within the pixel and a single value may be
+    *    assigned to each sample within the pixel, to the extent permitted
+    *    by the OpenGL ES Specification.
+    *
+    * We only produce .center for variables qualified with neither centroid
+    * nor sample, so if .center is specified this section applies. This
+    * suggests that, although per-pixel interpolation is allowed, it is not
+    * mandated ("may" rather than "must" or "should"). Therefore it appears
+    * safe to substitute sample.
+    */
+   return (I->sample == BI_SAMPLE_CENTER);
 }

 static bool
 bi_can_preload_ld_var(bi_instr *I)
 {
-        return (I->op == BI_OPCODE_LD_VAR_IMM) &&
-                bi_can_interp_at_sample(I) &&
-                bi_is_regfmt_float(I->register_format);
+   return (I->op == BI_OPCODE_LD_VAR_IMM) && bi_can_interp_at_sample(I) &&
+          bi_is_regfmt_float(I->register_format);
 }

 static bool
 bi_is_var_tex(enum bi_opcode op)
 {
-        return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16);
+   return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16);
 }

 void
 bi_opt_message_preload(bi_context *ctx)
 {
-        unsigned nr_preload = 0;
+   unsigned nr_preload = 0;

-        /* We only preload from the first block */
-        bi_block *block = bi_start_block(&ctx->blocks);
-        bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block));
+   /* We only preload from the first block */
+   bi_block *block = bi_start_block(&ctx->blocks);
+   bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block));

-        bi_foreach_instr_in_block_safe(block, I) {
-                if (I->nr_dests != 1) continue;
+   bi_foreach_instr_in_block_safe(block, I) {
+      if (I->nr_dests != 1)
+         continue;

-                struct bifrost_message_preload msg;
+      struct bifrost_message_preload msg;

-                if (bi_can_preload_ld_var(I)) {
-                        msg = (struct bifrost_message_preload) {
-                                .enabled = true,
-                                .varying_index = I->varying_index,
-                                .fp16 = (I->register_format == BI_REGISTER_FORMAT_F16),
-                                .num_components = I->vecsize + 1,
-                        };
-                } else if (bi_is_var_tex(I->op)) {
-                        msg = (struct bifrost_message_preload) {
-                                .enabled = true,
-                                .texture = true,
-                                .varying_index = I->varying_index,
-                                .texture_index = I->texture_index,
-                                .fp16 = (I->op == BI_OPCODE_VAR_TEX_F16),
-                                .skip = I->skip,
-                                .zero_lod = I->lod_mode,
-                        };
-                } else {
-                        continue;
-                }
+      if (bi_can_preload_ld_var(I)) {
+         msg = (struct bifrost_message_preload){
+            .enabled = true,
+            .varying_index = I->varying_index,
+            .fp16 = (I->register_format == BI_REGISTER_FORMAT_F16),
+            .num_components = I->vecsize + 1,
+         };
+      } else if (bi_is_var_tex(I->op)) {
+         msg = (struct bifrost_message_preload){
+            .enabled = true,
+            .texture = true,
+            .varying_index = I->varying_index,
+            .texture_index = I->texture_index,
+            .fp16 = (I->op == BI_OPCODE_VAR_TEX_F16),
+            .skip = I->skip,
+            .zero_lod = I->lod_mode,
+         };
+      } else {
+         continue;
+      }

-                /* Report the preloading */
-                ctx->info.bifrost->messages[nr_preload] = msg;
+      /* Report the preloading */
+      ctx->info.bifrost->messages[nr_preload] = msg;

-                /* Replace with a collect of preloaded registers. The collect
-                 * kills the moves, so the collect is free (it is coalesced).
-                 */
-                b.cursor = bi_before_instr(I);
+      /* Replace with a collect of preloaded registers. The collect
+       * kills the moves, so the collect is free (it is coalesced).
+       */
+      b.cursor = bi_before_instr(I);

-                unsigned nr = bi_count_write_registers(I, 0);
-                bi_instr *collect = bi_collect_i32_to(&b, I->dest[0], nr);
+      unsigned nr = bi_count_write_registers(I, 0);
+      bi_instr *collect = bi_collect_i32_to(&b, I->dest[0], nr);

-                /* The registers themselves must be preloaded at the start of
-                 * the program. Preloaded registers are coalesced, so these
-                 * moves are free.
-                 */
-                b.cursor = bi_before_block(block);
-                bi_foreach_src(collect, i) {
-                        unsigned reg = (nr_preload * 4) + i;
+      /* The registers themselves must be preloaded at the start of
+       * the program. Preloaded registers are coalesced, so these
+       * moves are free.
+       */
+      b.cursor = bi_before_block(block);
+      bi_foreach_src(collect, i) {
+         unsigned reg = (nr_preload * 4) + i;

-                        collect->src[i] = bi_mov_i32(&b, bi_register(reg));
-                }
+         collect->src[i] = bi_mov_i32(&b, bi_register(reg));
+      }

-                bi_remove_instruction(I);
+      bi_remove_instruction(I);

-                /* Maximum number of preloaded messages */
-                if ((++nr_preload) == 2)
-                        break;
-        }
+      /* Maximum number of preloaded messages */
+      if ((++nr_preload) == 2)
+         break;
+   }
 }
--- a/src/panfrost/bifrost/bi_opt_mod_props.c
+++ b/src/panfrost/bifrost/bi_opt_mod_props.c
@ -22,8 +22,8 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"

 /*
 * Due to a Bifrost encoding restriction, some instructions cannot have an abs
@ -33,76 +33,76 @@
 static bool
 bi_would_impact_abs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
 {
-        return (arch <= 8) && I->src[1 - s].abs &&
-               bi_is_word_equiv(I->src[1 - s], repl);
+   return (arch <= 8) && I->src[1 - s].abs &&
+          bi_is_word_equiv(I->src[1 - s], repl);
 }

 static bool
 bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
 {
-        switch (I->op) {
-        case BI_OPCODE_FCMP_V2F16:
-        case BI_OPCODE_FMAX_V2F16:
-        case BI_OPCODE_FMIN_V2F16:
-                return !bi_would_impact_abs(arch, I, repl, s);
-        case BI_OPCODE_FADD_V2F16:
-                /*
-                 * For FADD.v2f16, the FMA pipe has the abs encoding hazard,
-                 * while the FADD pipe cannot encode a clamp. Either case in
-                 * isolation can be worked around in the scheduler, but both
-                 * together is impossible to encode. Avoid the hazard.
-                 */
-                return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
-        case BI_OPCODE_V2F32_TO_V2F16:
-                /* TODO: Needs both match or lower */
-                return false;
-        case BI_OPCODE_FLOG_TABLE_F32:
-                /* TODO: Need to check mode */
-                return false;
-        default:
-                return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
-        }
+   switch (I->op) {
+   case BI_OPCODE_FCMP_V2F16:
+   case BI_OPCODE_FMAX_V2F16:
+   case BI_OPCODE_FMIN_V2F16:
+      return !bi_would_impact_abs(arch, I, repl, s);
+   case BI_OPCODE_FADD_V2F16:
+      /*
+       * For FADD.v2f16, the FMA pipe has the abs encoding hazard,
+       * while the FADD pipe cannot encode a clamp. Either case in
+       * isolation can be worked around in the scheduler, but both
+       * together is impossible to encode. Avoid the hazard.
+       */
+      return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
+   case BI_OPCODE_V2F32_TO_V2F16:
+      /* TODO: Needs both match or lower */
+      return false;
+   case BI_OPCODE_FLOG_TABLE_F32:
+      /* TODO: Need to check mode */
+      return false;
+   default:
+      return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
+   }
 }

 static bool
 bi_takes_fneg(unsigned arch, bi_instr *I, unsigned s)
 {
-        switch (I->op) {
-        case BI_OPCODE_CUBE_SSEL:
-        case BI_OPCODE_CUBE_TSEL:
-        case BI_OPCODE_CUBEFACE:
-                /* TODO: Bifrost encoding restriction: need to match or lower */
-                return arch >= 9;
-        case BI_OPCODE_FREXPE_F32:
-        case BI_OPCODE_FREXPE_V2F16:
-        case BI_OPCODE_FLOG_TABLE_F32:
-                /* TODO: Need to check mode */
-                return false;
-        default:
-                return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
-        }
+   switch (I->op) {
+   case BI_OPCODE_CUBE_SSEL:
+   case BI_OPCODE_CUBE_TSEL:
+   case BI_OPCODE_CUBEFACE:
+      /* TODO: Bifrost encoding restriction: need to match or lower */
+      return arch >= 9;
+   case BI_OPCODE_FREXPE_F32:
+   case BI_OPCODE_FREXPE_V2F16:
+   case BI_OPCODE_FLOG_TABLE_F32:
+      /* TODO: Need to check mode */
+      return false;
+   default:
+      return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
+   }
 }

 static bool
 bi_is_fabsneg(enum bi_opcode op, enum bi_size size)
 {
-        return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
-               (size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
+   return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
+          (size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
 }

 static enum bi_swizzle
 bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
 {
-        assert(a <= BI_SWIZZLE_H11);
-        assert(b <= BI_SWIZZLE_H11);
+   assert(a <= BI_SWIZZLE_H11);
+   assert(b <= BI_SWIZZLE_H11);

-        bool al = (a & BI_SWIZZLE_H10);
-        bool ar = (a & BI_SWIZZLE_H01);
-        bool bl = (b & BI_SWIZZLE_H10);
-        bool br = (b & BI_SWIZZLE_H01);
+   bool al = (a & BI_SWIZZLE_H10);
+   bool ar = (a & BI_SWIZZLE_H01);
+   bool bl = (b & BI_SWIZZLE_H10);
+   bool br = (b & BI_SWIZZLE_H01);

-        return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
-               ((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
+   return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
+          ((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
 }

 /* Like bi_replace_index, but composes instead of overwrites */
@ -110,17 +110,17 @@ bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
 static inline bi_index
 bi_compose_float_index(bi_index old, bi_index repl)
 {
-        /* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
-         * -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
-        repl.neg = old.neg ^ (repl.neg && !old.abs);
+   /* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
+    * -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
+   repl.neg = old.neg ^ (repl.neg && !old.abs);

-        /* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
-        repl.abs |= old.abs;
+   /* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
+   repl.abs |= old.abs;

-        /* Use the old swizzle to select from the replacement swizzle */
-        repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
+   /* Use the old swizzle to select from the replacement swizzle */
+   repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);

-        return repl;
+   return repl;
 }

 /* DISCARD.b32(FCMP.f(x, y)) --> DISCARD.f(x, y) */
@ -128,30 +128,35 @@ bi_compose_float_index(bi_index old, bi_index repl)
 static inline bool
 bi_fuse_discard_fcmp(bi_context *ctx, bi_instr *I, bi_instr *mod)
 {
-        if (!mod) return false;
-        if (I->op != BI_OPCODE_DISCARD_B32) return false;
-        if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16) return false;
-        if (mod->cmpf >= BI_CMPF_GTLT) return false;
+   if (!mod)
+      return false;
+   if (I->op != BI_OPCODE_DISCARD_B32)
+      return false;
+   if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16)
+      return false;
+   if (mod->cmpf >= BI_CMPF_GTLT)
+      return false;

-        /* result_type doesn't matter */
+   /* result_type doesn't matter */

-        /* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
-        bool absneg = mod->src[0].neg || mod->src[0].abs;
-        absneg     |= mod->src[1].neg || mod->src[1].abs;
+   /* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
+   bool absneg = mod->src[0].neg || mod->src[0].abs;
+   absneg |= mod->src[1].neg || mod->src[1].abs;

-        if (ctx->arch <= 8 && absneg) return false;
+   if (ctx->arch <= 8 && absneg)
+      return false;

-        enum bi_swizzle r = I->src[0].swizzle;
+   enum bi_swizzle r = I->src[0].swizzle;

-        bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
-        I = bi_discard_f32(&b, mod->src[0], mod->src[1], mod->cmpf);
+   bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
+   I = bi_discard_f32(&b, mod->src[0], mod->src[1], mod->cmpf);

-        if (mod->op == BI_OPCODE_FCMP_V2F16) {
-                I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
-                I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
-        }
+   if (mod->op == BI_OPCODE_FCMP_V2F16) {
+      I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
+      I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
+   }

-        return true;
+   return true;
 }

 /*
@ -159,80 +164,80 @@ bi_fuse_discard_fcmp(bi_context *ctx, bi_instr *I, bi_instr *mod)
 * because all 8-bit and 16-bit integers may be represented exactly as fp32.
 */
 struct {
-        enum bi_opcode inner;
-        enum bi_opcode outer;
-        enum bi_opcode replacement;
+   enum bi_opcode inner;
+   enum bi_opcode outer;
+   enum bi_opcode replacement;
 } bi_small_int_patterns[] = {
-        { BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32 },
-        { BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32 },
-        { BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32 },
-        { BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32 },
-        { BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32 },
-        { BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32 },
+   {BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32},
+   {BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32},
+   {BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32},
+   {BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32},
+   {BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32},
+   {BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32},
 };

 static inline void
 bi_fuse_small_int_to_f32(bi_instr *I, bi_instr *mod)
 {
-        for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
-                if (I->op != bi_small_int_patterns[i].outer)
-                        continue;
-                if (mod->op != bi_small_int_patterns[i].inner)
-                        continue;
+   for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
+      if (I->op != bi_small_int_patterns[i].outer)
+         continue;
+      if (mod->op != bi_small_int_patterns[i].inner)
+         continue;

-                assert(I->src[0].swizzle == BI_SWIZZLE_H01);
-                I->src[0] = mod->src[0];
-                I->round = BI_ROUND_NONE;
-                I->op = bi_small_int_patterns[i].replacement;
-        }
+      assert(I->src[0].swizzle == BI_SWIZZLE_H01);
+      I->src[0] = mod->src[0];
+      I->round = BI_ROUND_NONE;
+      I->op = bi_small_int_patterns[i].replacement;
+   }
 }

 void
 bi_opt_mod_prop_forward(bi_context *ctx)
 {
-        bi_instr **lut = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
+   bi_instr **lut = calloc(sizeof(bi_instr *), ctx->ssa_alloc);

-        bi_foreach_instr_global_safe(ctx, I) {
-                /* Try fusing FCMP into DISCARD.b32, building a new DISCARD.f32
-                 * instruction. As this is the only optimization DISCARD is
-                 * involved in, this shortcircuits other processing.
-                 */
-                if (I->op == BI_OPCODE_DISCARD_B32) {
-                        if (bi_is_ssa(I->src[0]) &&
-                            bi_fuse_discard_fcmp(ctx, I, lut[I->src[0].value])) {
-                                bi_remove_instruction(I);
-                        }
+   bi_foreach_instr_global_safe(ctx, I) {
+      /* Try fusing FCMP into DISCARD.b32, building a new DISCARD.f32
+       * instruction. As this is the only optimization DISCARD is
+       * involved in, this shortcircuits other processing.
+       */
+      if (I->op == BI_OPCODE_DISCARD_B32) {
+         if (bi_is_ssa(I->src[0]) &&
+             bi_fuse_discard_fcmp(ctx, I, lut[I->src[0].value])) {
+            bi_remove_instruction(I);
+         }

-                        continue;
-                }
+         continue;
+      }

-                bi_foreach_dest(I, d) {
-                        lut[I->dest[d].value] = I;
-                }
+      bi_foreach_dest(I, d) {
+         lut[I->dest[d].value] = I;
+      }

-                bi_foreach_ssa_src(I, s) {
-                        bi_instr *mod = lut[I->src[s].value];
+      bi_foreach_ssa_src(I, s) {
+         bi_instr *mod = lut[I->src[s].value];

-                        if (!mod)
-                                continue;
+         if (!mod)
+            continue;

-                        unsigned size = bi_opcode_props[I->op].size;
+         unsigned size = bi_opcode_props[I->op].size;

-                        bi_fuse_small_int_to_f32(I, mod);
+         bi_fuse_small_int_to_f32(I, mod);

-                        if (bi_is_fabsneg(mod->op, size)) {
-                                if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
-                                        continue;
+         if (bi_is_fabsneg(mod->op, size)) {
+            if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
+               continue;

-                                if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
-                                        continue;
+            if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
+               continue;

-                                I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
-                        }
-                }
-        }
+            I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
+         }
+      }
+   }

-        free(lut);
+   free(lut);
 }

 /* RSCALE has restrictions on how the clamp may be used, only used for
@ -241,199 +246,207 @@ bi_opt_mod_prop_forward(bi_context *ctx)
 static bool
 bi_takes_clamp(bi_instr *I)
 {
-        switch (I->op) {
-        case BI_OPCODE_FMA_RSCALE_F32:
-        case BI_OPCODE_FMA_RSCALE_V2F16:
-        case BI_OPCODE_FADD_RSCALE_F32:
-                return false;
-        case BI_OPCODE_FADD_V2F16:
-                /* Encoding restriction */
-                return !(I->src[0].abs && I->src[1].abs &&
-                         bi_is_word_equiv(I->src[0], I->src[1]));
-        default:
-                return bi_opcode_props[I->op].clamp;
-        }
+   switch (I->op) {
+   case BI_OPCODE_FMA_RSCALE_F32:
+   case BI_OPCODE_FMA_RSCALE_V2F16:
+   case BI_OPCODE_FADD_RSCALE_F32:
+      return false;
+   case BI_OPCODE_FADD_V2F16:
+      /* Encoding restriction */
+      return !(I->src[0].abs && I->src[1].abs &&
+               bi_is_word_equiv(I->src[0], I->src[1]));
+   default:
+      return bi_opcode_props[I->op].clamp;
+   }
 }

 static bool
 bi_is_fclamp(enum bi_opcode op, enum bi_size size)
 {
-        return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
-               (size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
+   return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
+          (size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
 }

 static bool
 bi_optimizer_clamp(bi_instr *I, bi_instr *use)
 {
-        if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size)) return false;
-        if (!bi_takes_clamp(I)) return false;
+   if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size))
+      return false;
+   if (!bi_takes_clamp(I))
+      return false;

-        /* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
-        I->clamp |= use->clamp;
-        I->dest[0] = use->dest[0];
-        return true;
+   /* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
+   I->clamp |= use->clamp;
+   I->dest[0] = use->dest[0];
+   return true;
 }

 static enum bi_opcode
 bi_sized_mux_op(unsigned size)
 {
-        switch (size) {
-        case  8: return BI_OPCODE_MUX_V4I8;
-        case 16: return BI_OPCODE_MUX_V2I16;
-        case 32: return BI_OPCODE_MUX_I32;
-        default: unreachable("invalid size");
-        }
+   switch (size) {
+   case 8:
+      return BI_OPCODE_MUX_V4I8;
+   case 16:
+      return BI_OPCODE_MUX_V2I16;
+   case 32:
+      return BI_OPCODE_MUX_I32;
+   default:
+      unreachable("invalid size");
+   }
 }

 static bool
 bi_is_fixed_mux(bi_instr *I, unsigned size, bi_index v1)
 {
-        return I->op == bi_sized_mux_op(size) &&
-               bi_is_value_equiv(I->src[0], bi_zero()) &&
-               bi_is_value_equiv(I->src[1], v1);
+   return I->op == bi_sized_mux_op(size) &&
+          bi_is_value_equiv(I->src[0], bi_zero()) &&
+          bi_is_value_equiv(I->src[1], v1);
 }

 static bool
 bi_takes_int_result_type(enum bi_opcode op)
 {
-        switch (op) {
-        case BI_OPCODE_ICMP_I32:
-        case BI_OPCODE_ICMP_S32:
-        case BI_OPCODE_ICMP_U32:
-        case BI_OPCODE_ICMP_V2I16:
-        case BI_OPCODE_ICMP_V2S16:
-        case BI_OPCODE_ICMP_V2U16:
-        case BI_OPCODE_ICMP_V4I8:
-        case BI_OPCODE_ICMP_V4S8:
-        case BI_OPCODE_ICMP_V4U8:
-        case BI_OPCODE_FCMP_F32:
-        case BI_OPCODE_FCMP_V2F16:
-                return true;
-        default:
-                return false;
-        }
+   switch (op) {
+   case BI_OPCODE_ICMP_I32:
+   case BI_OPCODE_ICMP_S32:
+   case BI_OPCODE_ICMP_U32:
+   case BI_OPCODE_ICMP_V2I16:
+   case BI_OPCODE_ICMP_V2S16:
+   case BI_OPCODE_ICMP_V2U16:
+   case BI_OPCODE_ICMP_V4I8:
+   case BI_OPCODE_ICMP_V4S8:
+   case BI_OPCODE_ICMP_V4U8:
+   case BI_OPCODE_FCMP_F32:
+   case BI_OPCODE_FCMP_V2F16:
+      return true;
+   default:
+      return false;
+   }
 }

 static bool
 bi_takes_float_result_type(enum bi_opcode op)
 {
-        return (op == BI_OPCODE_FCMP_F32) ||
-               (op == BI_OPCODE_FCMP_V2F16);
+   return (op == BI_OPCODE_FCMP_F32) || (op == BI_OPCODE_FCMP_V2F16);
 }

 /* CMP+MUX -> CMP with result type */
 static bool
 bi_optimizer_result_type(bi_instr *I, bi_instr *mux)
 {
-        if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
-                return false;
+   if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
+      return false;

-        if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
-            bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
+   if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
+       bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {

-                if (!bi_takes_float_result_type(I->op))
-                        return false;
+      if (!bi_takes_float_result_type(I->op))
+         return false;

-                I->result_type = BI_RESULT_TYPE_F1;
-        } else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
-                   bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
-                   bi_is_fixed_mux(mux,  8, bi_imm_u8(1))) {
+      I->result_type = BI_RESULT_TYPE_F1;
+   } else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
+              bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
+              bi_is_fixed_mux(mux, 8, bi_imm_u8(1))) {

-                if (!bi_takes_int_result_type(I->op))
-                        return false;
+      if (!bi_takes_int_result_type(I->op))
+         return false;

-                I->result_type = BI_RESULT_TYPE_I1;
-        } else {
-                return false;
-        }
+      I->result_type = BI_RESULT_TYPE_I1;
+   } else {
+      return false;
+   }

-        I->dest[0] = mux->dest[0];
-        return true;
+   I->dest[0] = mux->dest[0];
+   return true;
 }

 static bool
 bi_is_var_tex(bi_instr *var, bi_instr *tex)
 {
-        return (var->op == BI_OPCODE_LD_VAR_IMM) &&
-                (tex->op == BI_OPCODE_TEXS_2D_F16 || tex->op == BI_OPCODE_TEXS_2D_F32) &&
-                (var->register_format == BI_REGISTER_FORMAT_F32) &&
-                ((var->sample == BI_SAMPLE_CENTER && var->update == BI_UPDATE_STORE) ||
-                 (var->sample == BI_SAMPLE_NONE && var->update == BI_UPDATE_RETRIEVE)) &&
-                (tex->texture_index == tex->sampler_index) &&
-                (tex->texture_index < 4) &&
-                (var->index < 8);
+   return (var->op == BI_OPCODE_LD_VAR_IMM) &&
+          (tex->op == BI_OPCODE_TEXS_2D_F16 ||
+           tex->op == BI_OPCODE_TEXS_2D_F32) &&
+          (var->register_format == BI_REGISTER_FORMAT_F32) &&
+          ((var->sample == BI_SAMPLE_CENTER &&
+            var->update == BI_UPDATE_STORE) ||
+           (var->sample == BI_SAMPLE_NONE &&
+            var->update == BI_UPDATE_RETRIEVE)) &&
+          (tex->texture_index == tex->sampler_index) &&
+          (tex->texture_index < 4) && (var->index < 8);
 }

 static bool
 bi_optimizer_var_tex(bi_context *ctx, bi_instr *var, bi_instr *tex)
 {
-        if (!bi_is_var_tex(var, tex)) return false;
+   if (!bi_is_var_tex(var, tex))
+      return false;

-        /* Construct the corresponding VAR_TEX intruction */
-        bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
+   /* Construct the corresponding VAR_TEX intruction */
+   bi_builder b = bi_init_builder(ctx, bi_after_instr(var));

-        bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode,
-                        var->sample, var->update, tex->texture_index, var->index);
-        I->skip = tex->skip;
+   bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode, var->sample,
+                                   var->update, tex->texture_index, var->index);
+   I->skip = tex->skip;

-        if (tex->op == BI_OPCODE_TEXS_2D_F16)
-                I->op = BI_OPCODE_VAR_TEX_F16;
+   if (tex->op == BI_OPCODE_TEXS_2D_F16)
+      I->op = BI_OPCODE_VAR_TEX_F16;

-        /* Dead code elimination will clean up for us */
-        return true;
+   /* Dead code elimination will clean up for us */
+   return true;
 }

 void
 bi_opt_mod_prop_backward(bi_context *ctx)
 {
-        unsigned count = ctx->ssa_alloc;
-        bi_instr **uses = calloc(count, sizeof(*uses));
-        BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
+   unsigned count = ctx->ssa_alloc;
+   bi_instr **uses = calloc(count, sizeof(*uses));
+   BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));

-        bi_foreach_instr_global_rev(ctx, I) {
-                bi_foreach_ssa_src(I, s) {
-                        unsigned v = I->src[s].value;
+   bi_foreach_instr_global_rev(ctx, I) {
+      bi_foreach_ssa_src(I, s) {
+         unsigned v = I->src[s].value;

-                        if (uses[v] && uses[v] != I)
-                                BITSET_SET(multiple, v);
-                        else
-                                uses[v] = I;
-                }
+         if (uses[v] && uses[v] != I)
+            BITSET_SET(multiple, v);
+         else
+            uses[v] = I;
+      }

-                if (!I->nr_dests)
-                        continue;
+      if (!I->nr_dests)
+         continue;

-                bi_instr *use = uses[I->dest[0].value];
+      bi_instr *use = uses[I->dest[0].value];

-                if (!use || BITSET_TEST(multiple, I->dest[0].value))
-                        continue;
+      if (!use || BITSET_TEST(multiple, I->dest[0].value))
+         continue;

-                /* Destination has a single use, try to propagate */
-                bool propagated =
-                        bi_optimizer_clamp(I, use) ||
-                        bi_optimizer_result_type(I, use);
+      /* Destination has a single use, try to propagate */
+      bool propagated =
+         bi_optimizer_clamp(I, use) || bi_optimizer_result_type(I, use);

-                if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM && use->op == BI_OPCODE_SPLIT_I32) {
-                        /* Need to see through the split in a
-                         * ld_var_imm/split/var_tex  sequence
-                         */
-                        bi_instr *tex = uses[use->dest[0].value];
+      if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM &&
+          use->op == BI_OPCODE_SPLIT_I32) {
+         /* Need to see through the split in a
+          * ld_var_imm/split/var_tex  sequence
+          */
+         bi_instr *tex = uses[use->dest[0].value];

-                        if (!tex || BITSET_TEST(multiple, use->dest[0].value))
-                                continue;
+         if (!tex || BITSET_TEST(multiple, use->dest[0].value))
+            continue;

-                        use = tex;
-                        propagated = bi_optimizer_var_tex(ctx, I, use);
-                }
+         use = tex;
+         propagated = bi_optimizer_var_tex(ctx, I, use);
+      }

-                if (propagated) {
-                        bi_remove_instruction(use);
-                        continue;
-                }
-        }
+      if (propagated) {
+         bi_remove_instruction(use);
+         continue;
+      }
+   }

-        free(uses);
-        free(multiple);
+   free(uses);
+   free(multiple);
 }

 /*
@ -443,37 +456,37 @@ bi_opt_mod_prop_backward(bi_context *ctx)
 static bool
 bi_lower_opt_instruction_helper(bi_builder *b, bi_instr *I)
 {
-        bi_instr *repl;
+   bi_instr *repl;

-        switch (I->op) {
-        case BI_OPCODE_FABSNEG_F32:
-        case BI_OPCODE_FCLAMP_F32:
-                repl = bi_fadd_f32_to(b, I->dest[0], I->src[0], bi_negzero());
-                repl->clamp = I->clamp;
-                return true;
+   switch (I->op) {
+   case BI_OPCODE_FABSNEG_F32:
+   case BI_OPCODE_FCLAMP_F32:
+      repl = bi_fadd_f32_to(b, I->dest[0], I->src[0], bi_negzero());
+      repl->clamp = I->clamp;
+      return true;

-        case BI_OPCODE_FABSNEG_V2F16:
-        case BI_OPCODE_FCLAMP_V2F16:
-                repl = bi_fadd_v2f16_to(b, I->dest[0], I->src[0], bi_negzero());
-                repl->clamp = I->clamp;
-                return true;
+   case BI_OPCODE_FABSNEG_V2F16:
+   case BI_OPCODE_FCLAMP_V2F16:
+      repl = bi_fadd_v2f16_to(b, I->dest[0], I->src[0], bi_negzero());
+      repl->clamp = I->clamp;
+      return true;

-        case BI_OPCODE_DISCARD_B32:
-                bi_discard_f32(b, I->src[0], bi_zero(), BI_CMPF_NE);
-                return true;
+   case BI_OPCODE_DISCARD_B32:
+      bi_discard_f32(b, I->src[0], bi_zero(), BI_CMPF_NE);
+      return true;

-        default:
-                return false;
-        }
+   default:
+      return false;
+   }
 }

 void
 bi_lower_opt_instructions(bi_context *ctx)
 {
-        bi_foreach_instr_global_safe(ctx, I) {
-                bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
+   bi_foreach_instr_global_safe(ctx, I) {
+      bi_builder b = bi_init_builder(ctx, bi_before_instr(I));

-                if (bi_lower_opt_instruction_helper(&b, I))
-                        bi_remove_instruction(I);
-        }
+      if (bi_lower_opt_instruction_helper(&b, I))
+         bi_remove_instruction(I);
+   }
 }
--- a/src/panfrost/bifrost/bi_opt_push_ubo.c
+++ b/src/panfrost/bifrost/bi_opt_push_ubo.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"

 /* This optimization pass, intended to run once after code emission but before
 * copy propagation, analyzes direct word-aligned UBO reads and promotes a
@ -32,17 +32,16 @@
 static bool
 bi_is_ubo(bi_instr *ins)
 {
-        return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
-                (ins->seg == BI_SEG_UBO);
+   return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
+          (ins->seg == BI_SEG_UBO);
 }

 static bool
 bi_is_direct_aligned_ubo(bi_instr *ins)
 {
-        return bi_is_ubo(ins) &&
-                (ins->src[0].type == BI_INDEX_CONSTANT) &&
-                (ins->src[1].type == BI_INDEX_CONSTANT) &&
-                ((ins->src[0].value & 0x3) == 0);
+   return bi_is_ubo(ins) && (ins->src[0].type == BI_INDEX_CONSTANT) &&
+          (ins->src[1].type == BI_INDEX_CONSTANT) &&
+          ((ins->src[0].value & 0x3) == 0);
 }

 /* Represents use data for a single UBO */
@ -50,44 +49,46 @@ bi_is_direct_aligned_ubo(bi_instr *ins)
 #define MAX_UBO_WORDS (65536 / 16)

 struct bi_ubo_block {
-        BITSET_DECLARE(pushed, MAX_UBO_WORDS);
-        uint8_t range[MAX_UBO_WORDS];
+   BITSET_DECLARE(pushed, MAX_UBO_WORDS);
+   uint8_t range[MAX_UBO_WORDS];
 };

 struct bi_ubo_analysis {
-        /* Per block analysis */
-        unsigned nr_blocks;
-        struct bi_ubo_block *blocks;
+   /* Per block analysis */
+   unsigned nr_blocks;
+   struct bi_ubo_block *blocks;
 };

 static struct bi_ubo_analysis
 bi_analyze_ranges(bi_context *ctx)
 {
-        struct bi_ubo_analysis res = {
-                .nr_blocks = ctx->nir->info.num_ubos + 1,
-        };
+   struct bi_ubo_analysis res = {
+      .nr_blocks = ctx->nir->info.num_ubos + 1,
+   };

-        res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
+   res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));

-        bi_foreach_instr_global(ctx, ins) {
-                if (!bi_is_direct_aligned_ubo(ins)) continue;
+   bi_foreach_instr_global(ctx, ins) {
+      if (!bi_is_direct_aligned_ubo(ins))
+         continue;

-                unsigned ubo = ins->src[1].value;
-                unsigned word = ins->src[0].value / 4;
-                unsigned channels = bi_opcode_props[ins->op].sr_count;
+      unsigned ubo = ins->src[1].value;
+      unsigned word = ins->src[0].value / 4;
+      unsigned channels = bi_opcode_props[ins->op].sr_count;

-                assert(ubo < res.nr_blocks);
-                assert(channels > 0 && channels <= 4);
+      assert(ubo < res.nr_blocks);
+      assert(channels > 0 && channels <= 4);

-                if (word >= MAX_UBO_WORDS) continue;
+      if (word >= MAX_UBO_WORDS)
+         continue;

-                /* Must use max if the same base is read with different channel
-                 * counts, which is possible with nir_opt_shrink_vectors */
-                uint8_t *range = res.blocks[ubo].range;
-                range[word] = MAX2(range[word], channels);
-        }
+      /* Must use max if the same base is read with different channel
+       * counts, which is possible with nir_opt_shrink_vectors */
+      uint8_t *range = res.blocks[ubo].range;
+      range[word] = MAX2(range[word], channels);
+   }

-        return res;
+   return res;
 }

 /* Select UBO words to push. A sophisticated implementation would consider the
@ -97,92 +98,93 @@ bi_analyze_ranges(bi_context *ctx)
 static void
 bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis)
 {
-        for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
-                struct bi_ubo_block *block = &analysis->blocks[ubo];
+   for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
+      struct bi_ubo_block *block = &analysis->blocks[ubo];

-                for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
-                        unsigned range = block->range[r];
+      for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
+         unsigned range = block->range[r];

-                        /* Don't push something we don't access */
-                        if (range == 0) continue;
+         /* Don't push something we don't access */
+         if (range == 0)
+            continue;

-                        /* Don't push more than possible */
-                        if (push->count > PAN_MAX_PUSH - range)
-                                return;
+         /* Don't push more than possible */
+         if (push->count > PAN_MAX_PUSH - range)
+            return;

-                        for (unsigned offs = 0; offs < range; ++offs) {
-                                struct panfrost_ubo_word word = {
-                                        .ubo = ubo,
-                                        .offset = (r + offs) * 4,
-                                };
+         for (unsigned offs = 0; offs < range; ++offs) {
+            struct panfrost_ubo_word word = {
+               .ubo = ubo,
+               .offset = (r + offs) * 4,
+            };

-                                push->words[push->count++] = word;
-                        }
+            push->words[push->count++] = word;
+         }

-                        /* Mark it as pushed so we can rewrite */
-                        BITSET_SET(block->pushed, r);
-                }
-        }
+         /* Mark it as pushed so we can rewrite */
+         BITSET_SET(block->pushed, r);
+      }
+   }
 }

 void
 bi_opt_push_ubo(bi_context *ctx)
 {
-        struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
-        bi_pick_ubo(ctx->info.push, &analysis);
+   struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
+   bi_pick_ubo(ctx->info.push, &analysis);

-        ctx->ubo_mask = 0;
+   ctx->ubo_mask = 0;

-        bi_foreach_instr_global_safe(ctx, ins) {
-                if (!bi_is_ubo(ins)) continue;
+   bi_foreach_instr_global_safe(ctx, ins) {
+      if (!bi_is_ubo(ins))
+         continue;

-                unsigned ubo = ins->src[1].value;
-                unsigned offset = ins->src[0].value;
+      unsigned ubo = ins->src[1].value;
+      unsigned offset = ins->src[0].value;

-                if (!bi_is_direct_aligned_ubo(ins)) {
-                        /* The load can't be pushed, so this UBO needs to be
-                         * uploaded conventionally */
-                        if (ins->src[1].type == BI_INDEX_CONSTANT)
-                                ctx->ubo_mask |= BITSET_BIT(ubo);
-                        else
-                                ctx->ubo_mask = ~0;
+      if (!bi_is_direct_aligned_ubo(ins)) {
+         /* The load can't be pushed, so this UBO needs to be
+          * uploaded conventionally */
+         if (ins->src[1].type == BI_INDEX_CONSTANT)
+            ctx->ubo_mask |= BITSET_BIT(ubo);
+         else
+            ctx->ubo_mask = ~0;

-                        continue;
-                }
+         continue;
+      }

-                /* Check if we decided to push this */
-                assert(ubo < analysis.nr_blocks);
-                if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {
-                        ctx->ubo_mask |= BITSET_BIT(ubo);
-                        continue;
-                }
+      /* Check if we decided to push this */
+      assert(ubo < analysis.nr_blocks);
+      if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {
+         ctx->ubo_mask |= BITSET_BIT(ubo);
+         continue;
+      }

-                /* Replace the UBO load with moves from FAU */
-                bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
+      /* Replace the UBO load with moves from FAU */
+      bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));

-                unsigned nr = bi_opcode_props[ins->op].sr_count;
-                bi_instr *vec = bi_collect_i32_to(&b, ins->dest[0], nr);
+      unsigned nr = bi_opcode_props[ins->op].sr_count;
+      bi_instr *vec = bi_collect_i32_to(&b, ins->dest[0], nr);

-                bi_foreach_src(vec, w) {
-                        /* FAU is grouped in pairs (2 x 4-byte) */
-                        unsigned base =
-                                pan_lookup_pushed_ubo(ctx->info.push, ubo,
-                                                      (offset + 4 * w));
+      bi_foreach_src(vec, w) {
+         /* FAU is grouped in pairs (2 x 4-byte) */
+         unsigned base =
+            pan_lookup_pushed_ubo(ctx->info.push, ubo, (offset + 4 * w));

-                        unsigned fau_idx = (base >> 1);
-                        unsigned fau_hi = (base & 1);
+         unsigned fau_idx = (base >> 1);
+         unsigned fau_hi = (base & 1);

-                        vec->src[w] = bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi);
-                }
+         vec->src[w] = bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi);
+      }

-                bi_remove_instruction(ins);
-        }
+      bi_remove_instruction(ins);
+   }

-        free(analysis.blocks);
+   free(analysis.blocks);
 }

 typedef struct {
-        BITSET_DECLARE(row, PAN_MAX_PUSH);
+   BITSET_DECLARE(row, PAN_MAX_PUSH);
 } adjacency_row;

 /* Find the connected component containing `node` with depth-first search */
@ -190,33 +192,32 @@ static void
 bi_find_component(adjacency_row *adjacency, BITSET_WORD *visited,
                  unsigned *component, unsigned *size, unsigned node)
 {
-        unsigned neighbour;
+   unsigned neighbour;

-        BITSET_SET(visited, node);
-        component[(*size)++] = node;
+   BITSET_SET(visited, node);
+   component[(*size)++] = node;

-        BITSET_FOREACH_SET(neighbour, adjacency[node].row, PAN_MAX_PUSH) {
-                if (!BITSET_TEST(visited, neighbour)) {
-                        bi_find_component(adjacency, visited, component, size,
-                                          neighbour);
-                }
-        }
+   BITSET_FOREACH_SET(neighbour, adjacency[node].row, PAN_MAX_PUSH) {
+      if (!BITSET_TEST(visited, neighbour)) {
+         bi_find_component(adjacency, visited, component, size, neighbour);
+      }
+   }
 }

 static bool
 bi_is_uniform(bi_index idx)
 {
-        return (idx.type == BI_INDEX_FAU) && (idx.value & BIR_FAU_UNIFORM);
+   return (idx.type == BI_INDEX_FAU) && (idx.value & BIR_FAU_UNIFORM);
 }

 /* Get the index of a uniform in 32-bit words from the start of FAU-RAM */
 static unsigned
 bi_uniform_word(bi_index idx)
 {
-        assert(bi_is_uniform(idx));
-        assert(idx.offset <= 1);
+   assert(bi_is_uniform(idx));
+   assert(idx.offset <= 1);

-        return ((idx.value & ~BIR_FAU_UNIFORM) << 1) | idx.offset;
+   return ((idx.value & ~BIR_FAU_UNIFORM) << 1) | idx.offset;
 }

 /*
@ -228,35 +229,35 @@ bi_uniform_word(bi_index idx)
 static void
 bi_create_fau_interference_graph(bi_context *ctx, adjacency_row *adjacency)
 {
-        bi_foreach_instr_global(ctx, I) {
-                unsigned nodes[BI_MAX_SRCS] = {};
-                unsigned node_count = 0;
+   bi_foreach_instr_global(ctx, I) {
+      unsigned nodes[BI_MAX_SRCS] = {};
+      unsigned node_count = 0;

-                /* Set nodes[] to 32-bit uniforms accessed */
-                bi_foreach_src(I, s) {
-                        if (bi_is_uniform(I->src[s])) {
-                                unsigned word = bi_uniform_word(I->src[s]);
+      /* Set nodes[] to 32-bit uniforms accessed */
+      bi_foreach_src(I, s) {
+         if (bi_is_uniform(I->src[s])) {
+            unsigned word = bi_uniform_word(I->src[s]);

-                                if (word >= ctx->info.push_offset)
-                                        nodes[node_count++] = word;
-                        }
-                }
+            if (word >= ctx->info.push_offset)
+               nodes[node_count++] = word;
+         }
+      }

-                /* Create clique connecting nodes[] */
-                for (unsigned i = 0; i < node_count; ++i) {
-                        for (unsigned j = 0; j < node_count; ++j) {
-                                if (i == j)
-                                        continue;
+      /* Create clique connecting nodes[] */
+      for (unsigned i = 0; i < node_count; ++i) {
+         for (unsigned j = 0; j < node_count; ++j) {
+            if (i == j)
+               continue;

-                                unsigned x = nodes[i], y = nodes[j];
-                                assert(MAX2(x, y) < ctx->info.push->count);
+            unsigned x = nodes[i], y = nodes[j];
+            assert(MAX2(x, y) < ctx->info.push->count);

-                                /* Add undirected edge between the nodes */
-                                BITSET_SET(adjacency[x].row, y);
-                                BITSET_SET(adjacency[y].row, x);
-                        }
-                }
-        }
+            /* Add undirected edge between the nodes */
+            BITSET_SET(adjacency[x].row, y);
+            BITSET_SET(adjacency[y].row, x);
+         }
+      }
+   }
 }

 /*
@ -278,71 +279,72 @@ bi_create_fau_interference_graph(bi_context *ctx, adjacency_row *adjacency)
 void
 bi_opt_reorder_push(bi_context *ctx)
 {
-        adjacency_row adjacency[PAN_MAX_PUSH] = { 0 };
-        BITSET_DECLARE(visited, PAN_MAX_PUSH) = { 0 };
+   adjacency_row adjacency[PAN_MAX_PUSH] = {0};
+   BITSET_DECLARE(visited, PAN_MAX_PUSH) = {0};

-        unsigned ordering[PAN_MAX_PUSH] = { 0 };
-        unsigned unpaired[PAN_MAX_PUSH] = { 0 };
-        unsigned pushed = 0, unpaired_count = 0;
+   unsigned ordering[PAN_MAX_PUSH] = {0};
+   unsigned unpaired[PAN_MAX_PUSH] = {0};
+   unsigned pushed = 0, unpaired_count = 0;

-        struct panfrost_ubo_push *push = ctx->info.push;
-        unsigned push_offset = ctx->info.push_offset;
+   struct panfrost_ubo_push *push = ctx->info.push;
+   unsigned push_offset = ctx->info.push_offset;

-        bi_create_fau_interference_graph(ctx, adjacency);
+   bi_create_fau_interference_graph(ctx, adjacency);

-        for (unsigned i = push_offset; i < push->count; ++i) {
-                if (BITSET_TEST(visited, i)) continue;
+   for (unsigned i = push_offset; i < push->count; ++i) {
+      if (BITSET_TEST(visited, i))
+         continue;

-                unsigned component[PAN_MAX_PUSH] = { 0 };
-                unsigned size = 0;
-                bi_find_component(adjacency, visited, component, &size, i);
+      unsigned component[PAN_MAX_PUSH] = {0};
+      unsigned size = 0;
+      bi_find_component(adjacency, visited, component, &size, i);

-                /* If there is an odd number of uses, at least one use must be
-                 * unpaired. Arbitrarily take the last one.
-                 */
-                if (size % 2)
-                        unpaired[unpaired_count++] = component[--size];
+      /* If there is an odd number of uses, at least one use must be
+       * unpaired. Arbitrarily take the last one.
+       */
+      if (size % 2)
+         unpaired[unpaired_count++] = component[--size];

-                /* The rest of uses are paired */
-                assert((size % 2) == 0);
+      /* The rest of uses are paired */
+      assert((size % 2) == 0);

-                /* Push the paired uses */
-                memcpy(ordering + pushed, component, sizeof(unsigned) * size);
-                pushed += size;
-        }
+      /* Push the paired uses */
+      memcpy(ordering + pushed, component, sizeof(unsigned) * size);
+      pushed += size;
+   }

-        /* Push unpaired nodes at the end */
-        memcpy(ordering + pushed, unpaired, sizeof(unsigned) * unpaired_count);
-        pushed += unpaired_count;
+   /* Push unpaired nodes at the end */
+   memcpy(ordering + pushed, unpaired, sizeof(unsigned) * unpaired_count);
+   pushed += unpaired_count;

-        /* Ordering is a permutation. Invert it for O(1) lookup. */
-        unsigned old_to_new[PAN_MAX_PUSH] = { 0 };
+   /* Ordering is a permutation. Invert it for O(1) lookup. */
+   unsigned old_to_new[PAN_MAX_PUSH] = {0};

-        for (unsigned i = 0; i < push_offset; ++i) {
-                old_to_new[i] = i;
-        }
+   for (unsigned i = 0; i < push_offset; ++i) {
+      old_to_new[i] = i;
+   }

-        for (unsigned i = 0; i < pushed; ++i) {
-                assert(ordering[i] >= push_offset);
-                old_to_new[ordering[i]] = push_offset + i;
-        }
+   for (unsigned i = 0; i < pushed; ++i) {
+      assert(ordering[i] >= push_offset);
+      old_to_new[ordering[i]] = push_offset + i;
+   }

-        /* Use new ordering throughout the program */
-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_src(I, s) {
-                        if (bi_is_uniform(I->src[s])) {
-                                unsigned node = bi_uniform_word(I->src[s]);
-                                unsigned new_node = old_to_new[node];
-                                I->src[s].value = BIR_FAU_UNIFORM | (new_node >> 1);
-                                I->src[s].offset = new_node & 1;
-                        }
-                }
-        }
+   /* Use new ordering throughout the program */
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_src(I, s) {
+         if (bi_is_uniform(I->src[s])) {
+            unsigned node = bi_uniform_word(I->src[s]);
+            unsigned new_node = old_to_new[node];
+            I->src[s].value = BIR_FAU_UNIFORM | (new_node >> 1);
+            I->src[s].offset = new_node & 1;
+         }
+      }
+   }

-        /* Use new ordering for push */
-        struct panfrost_ubo_push old = *push;
-        for (unsigned i = 0; i < pushed; ++i)
-                push->words[push_offset + i] = old.words[ordering[i]];
+   /* Use new ordering for push */
+   struct panfrost_ubo_push old = *push;
+   for (unsigned i = 0; i < pushed; ++i)
+      push->words[push_offset + i] = old.words[ordering[i]];

-        push->count = push_offset + pushed;
+   push->count = push_offset + pushed;
 }
--- a/src/panfrost/bifrost/bi_pack.c
+++ b/src/panfrost/bifrost/bi_pack.c
--- a/src/panfrost/bifrost/bi_pressure_schedule.c
+++ b/src/panfrost/bifrost/bi_pressure_schedule.c
@ -26,149 +26,148 @@

 /* Bottom-up local scheduler to reduce register pressure */

-#include "compiler.h"
 #include "util/dag.h"
+#include "compiler.h"

 struct sched_ctx {
-        /* Dependency graph */
-        struct dag *dag;
+   /* Dependency graph */
+   struct dag *dag;

-        /* Live set */
-        BITSET_WORD *live;
+   /* Live set */
+   BITSET_WORD *live;
 };

 struct sched_node {
-        struct dag_node dag;
+   struct dag_node dag;

-        /* Instruction this node represents */
-        bi_instr *instr;
+   /* Instruction this node represents */
+   bi_instr *instr;
 };

 static void
 add_dep(struct sched_node *a, struct sched_node *b)
 {
-        if (a && b)
-                dag_add_edge(&a->dag, &b->dag, 0);
+   if (a && b)
+      dag_add_edge(&a->dag, &b->dag, 0);
 }

 static struct dag *
 create_dag(bi_context *ctx, bi_block *block, void *memctx)
 {
-        struct dag *dag = dag_create(ctx);
+   struct dag *dag = dag_create(ctx);

-        struct sched_node **last_write =
-                calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
-        struct sched_node *coverage = NULL;
-        struct sched_node *preload = NULL;
+   struct sched_node **last_write =
+      calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
+   struct sched_node *coverage = NULL;
+   struct sched_node *preload = NULL;

-        /* Last memory load, to serialize stores against */
-        struct sched_node *memory_load = NULL;
+   /* Last memory load, to serialize stores against */
+   struct sched_node *memory_load = NULL;

-        /* Last memory store, to serialize loads and stores against */
-        struct sched_node *memory_store = NULL;
+   /* Last memory store, to serialize loads and stores against */
+   struct sched_node *memory_store = NULL;

-        bi_foreach_instr_in_block(block, I) {
-                /* Leave branches at the end */
-                if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
-                        break;
+   bi_foreach_instr_in_block(block, I) {
+      /* Leave branches at the end */
+      if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
+         break;

-                assert(I->branch_target == NULL);
+      assert(I->branch_target == NULL);

-                struct sched_node *node = rzalloc(memctx, struct sched_node);
-                node->instr = I;
-                dag_init_node(dag, &node->dag);
+      struct sched_node *node = rzalloc(memctx, struct sched_node);
+      node->instr = I;
+      dag_init_node(dag, &node->dag);

-                /* Reads depend on writes, no other hazards in SSA */
-                bi_foreach_ssa_src(I, s)
-                        add_dep(node, last_write[I->src[s].value]);
+      /* Reads depend on writes, no other hazards in SSA */
+      bi_foreach_ssa_src(I, s)
+         add_dep(node, last_write[I->src[s].value]);

-                bi_foreach_dest(I, d)
-                        last_write[I->dest[d].value] = node;
+      bi_foreach_dest(I, d)
+         last_write[I->dest[d].value] = node;

-                switch (bi_opcode_props[I->op].message) {
-                case BIFROST_MESSAGE_LOAD:
-                        /* Regular memory loads needs to be serialized against
-                         * other memory access. However, UBO memory is read-only
-                         * so it can be moved around freely.
-                         */
-                        if (I->seg != BI_SEG_UBO) {
-                                add_dep(node, memory_store);
-                                memory_load = node;
-                        }
+      switch (bi_opcode_props[I->op].message) {
+      case BIFROST_MESSAGE_LOAD:
+         /* Regular memory loads needs to be serialized against
+          * other memory access. However, UBO memory is read-only
+          * so it can be moved around freely.
+          */
+         if (I->seg != BI_SEG_UBO) {
+            add_dep(node, memory_store);
+            memory_load = node;
+         }

-                        break;
+         break;

-                case BIFROST_MESSAGE_ATTRIBUTE:
-                        /* Regular attribute loads can be reordered, but
-                         * writeable attributes can't be. Our one use of
-                         * writeable attributes are images.
-                         */
-                        if ((I->op == BI_OPCODE_LD_TEX) ||
-                            (I->op == BI_OPCODE_LD_TEX_IMM) ||
-                            (I->op == BI_OPCODE_LD_ATTR_TEX)) {
-                                add_dep(node, memory_store);
-                                memory_load = node;
-                        }
+      case BIFROST_MESSAGE_ATTRIBUTE:
+         /* Regular attribute loads can be reordered, but
+          * writeable attributes can't be. Our one use of
+          * writeable attributes are images.
+          */
+         if ((I->op == BI_OPCODE_LD_TEX) || (I->op == BI_OPCODE_LD_TEX_IMM) ||
+             (I->op == BI_OPCODE_LD_ATTR_TEX)) {
+            add_dep(node, memory_store);
+            memory_load = node;
+         }

-                        break;
+         break;

-                case BIFROST_MESSAGE_STORE:
-                        assert(I->seg != BI_SEG_UBO);
-                        add_dep(node, memory_load);
-                        add_dep(node, memory_store);
-                        memory_store = node;
-                        break;
+      case BIFROST_MESSAGE_STORE:
+         assert(I->seg != BI_SEG_UBO);
+         add_dep(node, memory_load);
+         add_dep(node, memory_store);
+         memory_store = node;
+         break;

-                case BIFROST_MESSAGE_ATOMIC:
-                case BIFROST_MESSAGE_BARRIER:
-                        add_dep(node, memory_load);
-                        add_dep(node, memory_store);
-                        memory_load = node;
-                        memory_store = node;
-                        break;
+      case BIFROST_MESSAGE_ATOMIC:
+      case BIFROST_MESSAGE_BARRIER:
+         add_dep(node, memory_load);
+         add_dep(node, memory_store);
+         memory_load = node;
+         memory_store = node;
+         break;

-                case BIFROST_MESSAGE_BLEND:
-                case BIFROST_MESSAGE_Z_STENCIL:
-                case BIFROST_MESSAGE_TILE:
-                        add_dep(node, coverage);
-                        coverage = node;
-                        break;
+      case BIFROST_MESSAGE_BLEND:
+      case BIFROST_MESSAGE_Z_STENCIL:
+      case BIFROST_MESSAGE_TILE:
+         add_dep(node, coverage);
+         coverage = node;
+         break;

-                case BIFROST_MESSAGE_ATEST:
-                        /* ATEST signals the end of shader side effects */
-                        add_dep(node, memory_store);
-                        memory_store = node;
+      case BIFROST_MESSAGE_ATEST:
+         /* ATEST signals the end of shader side effects */
+         add_dep(node, memory_store);
+         memory_store = node;

-                        /* ATEST also updates coverage */
-                        add_dep(node, coverage);
-                        coverage = node;
-                        break;
-                default:
-                        break;
-                }
+         /* ATEST also updates coverage */
+         add_dep(node, coverage);
+         coverage = node;
+         break;
+      default:
+         break;
+      }

-                add_dep(node, preload);
+      add_dep(node, preload);

-                if (I->op == BI_OPCODE_DISCARD_F32) {
-                        /* Serialize against ATEST */
-                        add_dep(node, coverage);
-                        coverage = node;
+      if (I->op == BI_OPCODE_DISCARD_F32) {
+         /* Serialize against ATEST */
+         add_dep(node, coverage);
+         coverage = node;

-                        /* Also serialize against memory and barriers */
-                        add_dep(node, memory_load);
-                        add_dep(node, memory_store);
-                        memory_load = node;
-                        memory_store = node;
-                } else if ((I->op == BI_OPCODE_PHI) ||
-                           (I->op == BI_OPCODE_MOV_I32 &&
-                            I->src[0].type == BI_INDEX_REGISTER)) {
-                        preload = node;
-                }
-        }
+         /* Also serialize against memory and barriers */
+         add_dep(node, memory_load);
+         add_dep(node, memory_store);
+         memory_load = node;
+         memory_store = node;
+      } else if ((I->op == BI_OPCODE_PHI) ||
+                 (I->op == BI_OPCODE_MOV_I32 &&
+                  I->src[0].type == BI_INDEX_REGISTER)) {
+         preload = node;
+      }
+   }

-        free(last_write);
+   free(last_write);

-        return dag;
+   return dag;
 }

 /*
@ -183,30 +182,30 @@ create_dag(bi_context *ctx, bi_block *block, void *memctx)
 static signed
 calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
 {
-        signed delta = 0;
+   signed delta = 0;

-        /* Destinations must be unique */
-        bi_foreach_dest(I, d) {
-                if (BITSET_TEST(live, I->dest[d].value))
-                        delta -= bi_count_write_registers(I, d);
-        }
+   /* Destinations must be unique */
+   bi_foreach_dest(I, d) {
+      if (BITSET_TEST(live, I->dest[d].value))
+         delta -= bi_count_write_registers(I, d);
+   }

-        bi_foreach_ssa_src(I, src) {
-                /* Filter duplicates */
-                bool dupe = false;
+   bi_foreach_ssa_src(I, src) {
+      /* Filter duplicates */
+      bool dupe = false;

-                for (unsigned i = 0; i < src; ++i) {
-                        if (bi_is_equiv(I->src[i], I->src[src])) {
-                                dupe = true;
-                                break;
-                        }
-                }
+      for (unsigned i = 0; i < src; ++i) {
+         if (bi_is_equiv(I->src[i], I->src[src])) {
+            dupe = true;
+            break;
+         }
+      }

-                if (!dupe && !BITSET_TEST(live, I->src[src].value))
-                        delta += bi_count_read_registers(I, src);
-        }
+      if (!dupe && !BITSET_TEST(live, I->src[src].value))
+         delta += bi_count_read_registers(I, src);
+   }

-        return delta;
+   return delta;
 }

 /*
@ -216,87 +215,88 @@ calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
 static struct sched_node *
 choose_instr(struct sched_ctx *s)
 {
-        int32_t min_delta = INT32_MAX;
-        struct sched_node *best = NULL;
+   int32_t min_delta = INT32_MAX;
+   struct sched_node *best = NULL;

-        list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
-                int32_t delta = calculate_pressure_delta(n->instr, s->live);
+   list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
+      int32_t delta = calculate_pressure_delta(n->instr, s->live);

-                if (delta < min_delta) {
-                        best = n;
-                        min_delta = delta;
-                }
-        }
+      if (delta < min_delta) {
+         best = n;
+         min_delta = delta;
+      }
+   }

-        return best;
+   return best;
 }

 static void
 pressure_schedule_block(bi_context *ctx, bi_block *block, struct sched_ctx *s)
 {
-        /* off by a constant, that's ok */
-        signed pressure = 0;
-        signed orig_max_pressure = 0;
-        unsigned nr_ins = 0;
+   /* off by a constant, that's ok */
+   signed pressure = 0;
+   signed orig_max_pressure = 0;
+   unsigned nr_ins = 0;

-        memcpy(s->live, block->ssa_live_out, BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
+   memcpy(s->live, block->ssa_live_out,
+          BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));

-        bi_foreach_instr_in_block_rev(block, I) {
-                pressure += calculate_pressure_delta(I, s->live);
-                orig_max_pressure = MAX2(pressure, orig_max_pressure);
-                bi_liveness_ins_update_ssa(s->live, I);
-                nr_ins++;
-        }
+   bi_foreach_instr_in_block_rev(block, I) {
+      pressure += calculate_pressure_delta(I, s->live);
+      orig_max_pressure = MAX2(pressure, orig_max_pressure);
+      bi_liveness_ins_update_ssa(s->live, I);
+      nr_ins++;
+   }

-        memcpy(s->live, block->ssa_live_out, BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
+   memcpy(s->live, block->ssa_live_out,
+          BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));

-        /* off by a constant, that's ok */
-        signed max_pressure = 0;
-        pressure = 0;
+   /* off by a constant, that's ok */
+   signed max_pressure = 0;
+   pressure = 0;

-        struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
-        nr_ins = 0;
+   struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
+   nr_ins = 0;

-        while (!list_is_empty(&s->dag->heads)) {
-                struct sched_node *node = choose_instr(s);
-                pressure += calculate_pressure_delta(node->instr, s->live);
-                max_pressure = MAX2(pressure, max_pressure);
-                dag_prune_head(s->dag, &node->dag);
+   while (!list_is_empty(&s->dag->heads)) {
+      struct sched_node *node = choose_instr(s);
+      pressure += calculate_pressure_delta(node->instr, s->live);
+      max_pressure = MAX2(pressure, max_pressure);
+      dag_prune_head(s->dag, &node->dag);

-                schedule[nr_ins++] = node;
-                bi_liveness_ins_update_ssa(s->live, node->instr);
-        }
+      schedule[nr_ins++] = node;
+      bi_liveness_ins_update_ssa(s->live, node->instr);
+   }

-        /* Bail if it looks like it's worse */
-        if (max_pressure >= orig_max_pressure) {
-                free(schedule);
-                return;
-        }
+   /* Bail if it looks like it's worse */
+   if (max_pressure >= orig_max_pressure) {
+      free(schedule);
+      return;
+   }

-        /* Apply the schedule */
-        for (unsigned i = 0; i < nr_ins; ++i) {
-                bi_remove_instruction(schedule[i]->instr);
-                list_add(&schedule[i]->instr->link, &block->instructions);
-        }
+   /* Apply the schedule */
+   for (unsigned i = 0; i < nr_ins; ++i) {
+      bi_remove_instruction(schedule[i]->instr);
+      list_add(&schedule[i]->instr->link, &block->instructions);
+   }

-        free(schedule);
+   free(schedule);
 }

 void
 bi_pressure_schedule(bi_context *ctx)
 {
-        bi_compute_liveness_ssa(ctx);
-        void *memctx = ralloc_context(ctx);
-        BITSET_WORD *live = ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
+   bi_compute_liveness_ssa(ctx);
+   void *memctx = ralloc_context(ctx);
+   BITSET_WORD *live =
+      ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));

-        bi_foreach_block(ctx, block) {
-                struct sched_ctx sctx = {
-                        .dag = create_dag(ctx, block, memctx),
-                        .live = live
-                };
+   bi_foreach_block(ctx, block) {
+      struct sched_ctx sctx = {.dag = create_dag(ctx, block, memctx),
+                               .live = live};

-                pressure_schedule_block(ctx, block, &sctx);
-        }
+      pressure_schedule_block(ctx, block, &sctx);
+   }

-        ralloc_free(memctx);
+   ralloc_free(memctx);
 }
--- a/src/panfrost/bifrost/bi_print.c
+++ b/src/panfrost/bifrost/bi_print.c
@ -24,177 +24,179 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_print_common.h"
+#include "compiler.h"

 static const char *
 bi_reg_op_name(enum bifrost_reg_op op)
 {
-        switch (op) {
-        case BIFROST_OP_IDLE: return "idle";
-        case BIFROST_OP_READ: return "read";
-        case BIFROST_OP_WRITE: return "write";
-        case BIFROST_OP_WRITE_LO: return "write lo";
-        case BIFROST_OP_WRITE_HI: return "write hi";
-        default: return "invalid";
-        }
+   switch (op) {
+   case BIFROST_OP_IDLE:
+      return "idle";
+   case BIFROST_OP_READ:
+      return "read";
+   case BIFROST_OP_WRITE:
+      return "write";
+   case BIFROST_OP_WRITE_LO:
+      return "write lo";
+   case BIFROST_OP_WRITE_HI:
+      return "write hi";
+   default:
+      return "invalid";
+   }
 }

 void
 bi_print_slots(bi_registers *regs, FILE *fp)
 {
-        for (unsigned i = 0; i < 2; ++i) {
-                if (regs->enabled[i])
-                        fprintf(fp, "slot %u: %u\n", i, regs->slot[i]);
-        }
+   for (unsigned i = 0; i < 2; ++i) {
+      if (regs->enabled[i])
+         fprintf(fp, "slot %u: %u\n", i, regs->slot[i]);
+   }

-        if (regs->slot23.slot2) {
-                fprintf(fp, "slot 2 (%s%s): %u\n",
-                                bi_reg_op_name(regs->slot23.slot2),
-                                regs->slot23.slot2 >= BIFROST_OP_WRITE ?
-                                        " FMA": "",
-                                regs->slot[2]);
-        }
+   if (regs->slot23.slot2) {
+      fprintf(fp, "slot 2 (%s%s): %u\n", bi_reg_op_name(regs->slot23.slot2),
+              regs->slot23.slot2 >= BIFROST_OP_WRITE ? " FMA" : "",
+              regs->slot[2]);
+   }

-        if (regs->slot23.slot3) {
-                fprintf(fp, "slot 3 (%s %s): %u\n",
-                                bi_reg_op_name(regs->slot23.slot3),
-                                regs->slot23.slot3_fma ? "FMA" : "ADD",
-                                regs->slot[3]);
-        }
+   if (regs->slot23.slot3) {
+      fprintf(fp, "slot 3 (%s %s): %u\n", bi_reg_op_name(regs->slot23.slot3),
+              regs->slot23.slot3_fma ? "FMA" : "ADD", regs->slot[3]);
+   }
 }

 void
 bi_print_tuple(bi_tuple *tuple, FILE *fp)
 {
-        bi_instr *ins[2] = { tuple->fma, tuple->add };
+   bi_instr *ins[2] = {tuple->fma, tuple->add};

-        for (unsigned i = 0; i < 2; ++i) {
-                fprintf(fp, (i == 0) ? "\t* " : "\t+ ");
+   for (unsigned i = 0; i < 2; ++i) {
+      fprintf(fp, (i == 0) ? "\t* " : "\t+ ");

-                if (ins[i])
-                        bi_print_instr(ins[i], fp);
-                else
-                        fprintf(fp, "NOP\n");
-        }
+      if (ins[i])
+         bi_print_instr(ins[i], fp);
+      else
+         fprintf(fp, "NOP\n");
+   }
 }

 void
 bi_print_clause(bi_clause *clause, FILE *fp)
 {
-        fprintf(fp, "id(%u)", clause->scoreboard_id);
+   fprintf(fp, "id(%u)", clause->scoreboard_id);

-        if (clause->dependencies) {
-                fprintf(fp, " wait(");
+   if (clause->dependencies) {
+      fprintf(fp, " wait(");

-                for (unsigned i = 0; i < 8; ++i) {
-                        if (clause->dependencies & (1 << i))
-                                fprintf(fp, "%u ", i);
-                }
+      for (unsigned i = 0; i < 8; ++i) {
+         if (clause->dependencies & (1 << i))
+            fprintf(fp, "%u ", i);
+      }

-                fprintf(fp, ")");
-        }
+      fprintf(fp, ")");
+   }

-        fprintf(fp, " %s", bi_flow_control_name(clause->flow_control));
+   fprintf(fp, " %s", bi_flow_control_name(clause->flow_control));

-        if (!clause->next_clause_prefetch)
-               fprintf(fp, " no_prefetch");
+   if (!clause->next_clause_prefetch)
+      fprintf(fp, " no_prefetch");

-        if (clause->staging_barrier)
-                fprintf(fp, " osrb");
+   if (clause->staging_barrier)
+      fprintf(fp, " osrb");

-        if (clause->td)
-                fprintf(fp, " td");
+   if (clause->td)
+      fprintf(fp, " td");

-        if (clause->pcrel_idx != ~0)
-                fprintf(fp, " pcrel(%u)", clause->pcrel_idx);
+   if (clause->pcrel_idx != ~0)
+      fprintf(fp, " pcrel(%u)", clause->pcrel_idx);

-        fprintf(fp, "\n");
+   fprintf(fp, "\n");

-        for (unsigned i = 0; i < clause->tuple_count; ++i)
-                bi_print_tuple(&clause->tuples[i], fp);
+   for (unsigned i = 0; i < clause->tuple_count; ++i)
+      bi_print_tuple(&clause->tuples[i], fp);

-        if (clause->constant_count) {
-                for (unsigned i = 0; i < clause->constant_count; ++i)
-                        fprintf(fp, "%" PRIx64 " ", clause->constants[i]);
+   if (clause->constant_count) {
+      for (unsigned i = 0; i < clause->constant_count; ++i)
+         fprintf(fp, "%" PRIx64 " ", clause->constants[i]);

-                if (clause->branch_constant)
-                        fprintf(fp, "*");
+      if (clause->branch_constant)
+         fprintf(fp, "*");

-                fprintf(fp, "\n");
-        }
+      fprintf(fp, "\n");
+   }

-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }

 static void
-bi_print_scoreboard_line(unsigned slot, const char *name, uint64_t mask, FILE *fp)
+bi_print_scoreboard_line(unsigned slot, const char *name, uint64_t mask,
+                         FILE *fp)
 {
-        if (!mask)
-                return;
+   if (!mask)
+      return;

-        fprintf(fp, "slot %u %s:", slot, name);
+   fprintf(fp, "slot %u %s:", slot, name);

-        u_foreach_bit64(reg, mask)
-                fprintf(fp, " r%" PRId64, reg);
+   u_foreach_bit64(reg, mask) fprintf(fp, " r%" PRId64, reg);

-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }

 static void
 bi_print_scoreboard(struct bi_scoreboard_state *state, FILE *fp)
 {
-        for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
-                bi_print_scoreboard_line(i, "reads", state->read[i], fp);
-                bi_print_scoreboard_line(i, "writes", state->write[i], fp);
-        }
+   for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
+      bi_print_scoreboard_line(i, "reads", state->read[i], fp);
+      bi_print_scoreboard_line(i, "writes", state->write[i], fp);
+   }
 }

 void
 bi_print_block(bi_block *block, FILE *fp)
 {
-        if (block->scheduled) {
-                bi_print_scoreboard(&block->scoreboard_in, fp);
-                fprintf(fp, "\n");
-        }
+   if (block->scheduled) {
+      bi_print_scoreboard(&block->scoreboard_in, fp);
+      fprintf(fp, "\n");
+   }

-        fprintf(fp, "block%u {\n", block->index);
+   fprintf(fp, "block%u {\n", block->index);

-        if (block->scheduled) {
-                bi_foreach_clause_in_block(block, clause)
-                        bi_print_clause(clause, fp);
-        } else {
-                bi_foreach_instr_in_block(block, ins)
-                        bi_print_instr((bi_instr *) ins, fp);
-        }
+   if (block->scheduled) {
+      bi_foreach_clause_in_block(block, clause)
+         bi_print_clause(clause, fp);
+   } else {
+      bi_foreach_instr_in_block(block, ins)
+         bi_print_instr((bi_instr *)ins, fp);
+   }

-        fprintf(fp, "}");
+   fprintf(fp, "}");

-        if (block->successors[0]) {
-                fprintf(fp, " -> ");
+   if (block->successors[0]) {
+      fprintf(fp, " -> ");

-                bi_foreach_successor((block), succ)
-                        fprintf(fp, "block%u ", succ->index);
-        }
+      bi_foreach_successor((block), succ)
+         fprintf(fp, "block%u ", succ->index);
+   }

-        if (bi_num_predecessors(block)) {
-                fprintf(fp, " from");
+   if (bi_num_predecessors(block)) {
+      fprintf(fp, " from");

-                bi_foreach_predecessor(block, pred)
-                        fprintf(fp, " block%u", (*pred)->index);
-        }
+      bi_foreach_predecessor(block, pred)
+         fprintf(fp, " block%u", (*pred)->index);
+   }

-        if (block->scheduled) {
-                fprintf(fp, "\n");
-                bi_print_scoreboard(&block->scoreboard_out, fp);
-        }
+   if (block->scheduled) {
+      fprintf(fp, "\n");
+      bi_print_scoreboard(&block->scoreboard_out, fp);
+   }

-        fprintf(fp, "\n\n");
+   fprintf(fp, "\n\n");
 }

 void
 bi_print_shader(bi_context *ctx, FILE *fp)
 {
-        bi_foreach_block(ctx, block)
-                bi_print_block(block, fp);
+   bi_foreach_block(ctx, block)
+      bi_print_block(block, fp);
 }
--- a/src/panfrost/bifrost/bi_print_common.c
+++ b/src/panfrost/bifrost/bi_print_common.c
@ -31,38 +31,63 @@
 const char *
 bi_message_type_name(enum bifrost_message_type T)
 {
-        switch (T) {
-        case BIFROST_MESSAGE_NONE: return "";
-        case BIFROST_MESSAGE_VARYING: return "vary";
-        case BIFROST_MESSAGE_ATTRIBUTE: return "attr";
-        case BIFROST_MESSAGE_TEX: return "tex";
-        case BIFROST_MESSAGE_VARTEX: return "vartex";
-        case BIFROST_MESSAGE_LOAD: return "load";
-        case BIFROST_MESSAGE_STORE: return "store";
-        case BIFROST_MESSAGE_ATOMIC: return "atomic";
-        case BIFROST_MESSAGE_BARRIER: return "barrier";
-        case BIFROST_MESSAGE_BLEND: return "blend";
-        case BIFROST_MESSAGE_TILE: return "tile";
-        case BIFROST_MESSAGE_Z_STENCIL: return "z_stencil";
-        case BIFROST_MESSAGE_ATEST: return "atest";
-        case BIFROST_MESSAGE_JOB: return "job";
-        case BIFROST_MESSAGE_64BIT: return "64";
-        default: return "XXX reserved";
-        }
+   switch (T) {
+   case BIFROST_MESSAGE_NONE:
+      return "";
+   case BIFROST_MESSAGE_VARYING:
+      return "vary";
+   case BIFROST_MESSAGE_ATTRIBUTE:
+      return "attr";
+   case BIFROST_MESSAGE_TEX:
+      return "tex";
+   case BIFROST_MESSAGE_VARTEX:
+      return "vartex";
+   case BIFROST_MESSAGE_LOAD:
+      return "load";
+   case BIFROST_MESSAGE_STORE:
+      return "store";
+   case BIFROST_MESSAGE_ATOMIC:
+      return "atomic";
+   case BIFROST_MESSAGE_BARRIER:
+      return "barrier";
+   case BIFROST_MESSAGE_BLEND:
+      return "blend";
+   case BIFROST_MESSAGE_TILE:
+      return "tile";
+   case BIFROST_MESSAGE_Z_STENCIL:
+      return "z_stencil";
+   case BIFROST_MESSAGE_ATEST:
+      return "atest";
+   case BIFROST_MESSAGE_JOB:
+      return "job";
+   case BIFROST_MESSAGE_64BIT:
+      return "64";
+   default:
+      return "XXX reserved";
+   }
 }

 const char *
 bi_flow_control_name(enum bifrost_flow mode)
 {
-        switch (mode) {
-        case BIFROST_FLOW_END: return "eos";
-        case BIFROST_FLOW_NBTB_PC: return "nbb br_pc";
-        case BIFROST_FLOW_NBTB_UNCONDITIONAL: return "nbb r_uncond";
-        case BIFROST_FLOW_NBTB: return "nbb";
-        case BIFROST_FLOW_BTB_UNCONDITIONAL: return "bb r_uncond";
-        case BIFROST_FLOW_BTB_NONE: return "bb";
-        case BIFROST_FLOW_WE_UNCONDITIONAL: return "we r_uncond";
-        case BIFROST_FLOW_WE: return "we";
-        default: return "XXX";
-        }
+   switch (mode) {
+   case BIFROST_FLOW_END:
+      return "eos";
+   case BIFROST_FLOW_NBTB_PC:
+      return "nbb br_pc";
+   case BIFROST_FLOW_NBTB_UNCONDITIONAL:
+      return "nbb r_uncond";
+   case BIFROST_FLOW_NBTB:
+      return "nbb";
+   case BIFROST_FLOW_BTB_UNCONDITIONAL:
+      return "bb r_uncond";
+   case BIFROST_FLOW_BTB_NONE:
+      return "bb";
+   case BIFROST_FLOW_WE_UNCONDITIONAL:
+      return "we r_uncond";
+   case BIFROST_FLOW_WE:
+      return "we";
+   default:
+      return "XXX";
+   }
 }
--- a/src/panfrost/bifrost/bi_print_common.h
+++ b/src/panfrost/bifrost/bi_print_common.h
@ -30,7 +30,7 @@
 #include <stdio.h>
 #include "bifrost.h"

-const char * bi_message_type_name(enum bifrost_message_type T);
-const char * bi_flow_control_name(enum bifrost_flow mode);
+const char *bi_message_type_name(enum bifrost_message_type T);
+const char *bi_flow_control_name(enum bifrost_flow mode);

 #endif
--- a/src/panfrost/bifrost/bi_quirks.h
+++ b/src/panfrost/bifrost/bi_quirks.h
@ -44,15 +44,15 @@
 static inline unsigned
 bifrost_get_quirks(unsigned product_id)
 {
-        switch (product_id >> 8) {
-        case 0x60: /* G71 */
-                return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
-        case 0x62: /* G72 */
-        case 0x70: /* G31 */
-                return BIFROST_LIMITED_CLPER;
-        default:
-                return 0;
-        }
+   switch (product_id >> 8) {
+   case 0x60: /* G71 */
+      return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
+   case 0x62: /* G72 */
+   case 0x70: /* G31 */
+      return BIFROST_LIMITED_CLPER;
+   default:
+      return 0;
+   }
 }

 #endif
--- a/src/panfrost/bifrost/bi_ra.c
+++ b/src/panfrost/bifrost/bi_ra.c
--- a/src/panfrost/bifrost/bi_schedule.c
+++ b/src/panfrost/bifrost/bi_schedule.c
--- a/src/panfrost/bifrost/bi_scoreboard.c
+++ b/src/panfrost/bifrost/bi_scoreboard.c
@ -54,9 +54,9 @@
 */

 #define BI_NUM_GENERAL_SLOTS 6
-#define BI_NUM_SLOTS 8
-#define BI_NUM_REGISTERS 64
-#define BI_SLOT_SERIAL 0 /* arbitrary */
+#define BI_NUM_SLOTS         8
+#define BI_NUM_REGISTERS     64
+#define BI_SLOT_SERIAL       0 /* arbitrary */

 /*
 * Due to the crude scoreboarding we do, we need to serialize varying loads and
@ -65,26 +65,26 @@
 static bool
 bi_should_serialize(bi_instr *I)
 {
-        /* For debug, serialize everything to disable scoreboard opts */
-        if (bifrost_debug & BIFROST_DBG_NOSB)
-                return true;
+   /* For debug, serialize everything to disable scoreboard opts */
+   if (bifrost_debug & BIFROST_DBG_NOSB)
+      return true;

-        /* Although nominally on the attribute unit, image loads have the same
-         * coherency requirements as general memory loads. Serialize them for
-         * now until we can do something more clever.
-         */
-        if (I->op == BI_OPCODE_LD_ATTR_TEX)
-                return true;
+   /* Although nominally on the attribute unit, image loads have the same
+    * coherency requirements as general memory loads. Serialize them for
+    * now until we can do something more clever.
+    */
+   if (I->op == BI_OPCODE_LD_ATTR_TEX)
+      return true;

-        switch (bi_opcode_props[I->op].message) {
-        case BIFROST_MESSAGE_VARYING:
-        case BIFROST_MESSAGE_LOAD:
-        case BIFROST_MESSAGE_STORE:
-        case BIFROST_MESSAGE_ATOMIC:
-                return true;
-        default:
-                return false;
-        }
+   switch (bi_opcode_props[I->op].message) {
+   case BIFROST_MESSAGE_VARYING:
+   case BIFROST_MESSAGE_LOAD:
+   case BIFROST_MESSAGE_STORE:
+   case BIFROST_MESSAGE_ATOMIC:
+      return true;
+   default:
+      return false;
+   }
 }

 /* Given a scoreboard model, choose a slot for a clause wrapping a given
@ -93,76 +93,77 @@ bi_should_serialize(bi_instr *I)
 static unsigned
 bi_choose_scoreboard_slot(bi_instr *message)
 {
-        /* ATEST, ZS_EMIT must be issued with slot #0 */
-        if (message->op == BI_OPCODE_ATEST || message->op == BI_OPCODE_ZS_EMIT)
-                return 0;
+   /* ATEST, ZS_EMIT must be issued with slot #0 */
+   if (message->op == BI_OPCODE_ATEST || message->op == BI_OPCODE_ZS_EMIT)
+      return 0;

-        /* BARRIER must be issued with slot #7 */
-        if (message->op == BI_OPCODE_BARRIER)
-                return 7;
+   /* BARRIER must be issued with slot #7 */
+   if (message->op == BI_OPCODE_BARRIER)
+      return 7;

-        /* For now, make serialization is easy */
-        if (bi_should_serialize(message))
-                return BI_SLOT_SERIAL;
+   /* For now, make serialization is easy */
+   if (bi_should_serialize(message))
+      return BI_SLOT_SERIAL;

-        return 0;
+   return 0;
 }

 static uint64_t
 bi_read_mask(bi_instr *I, bool staging_only)
 {
-        uint64_t mask = 0;
+   uint64_t mask = 0;

-        if (staging_only && !bi_opcode_props[I->op].sr_read)
-                return mask;
+   if (staging_only && !bi_opcode_props[I->op].sr_read)
+      return mask;

-        bi_foreach_src(I, s) {
-                if (I->src[s].type == BI_INDEX_REGISTER) {
-                        unsigned reg = I->src[s].value;
-                        unsigned count = bi_count_read_registers(I, s);
+   bi_foreach_src(I, s) {
+      if (I->src[s].type == BI_INDEX_REGISTER) {
+         unsigned reg = I->src[s].value;
+         unsigned count = bi_count_read_registers(I, s);

-                        mask |= (BITFIELD64_MASK(count) << reg);
-                }
+         mask |= (BITFIELD64_MASK(count) << reg);
+      }

-                if (staging_only)
-                        break;
-        }
+      if (staging_only)
+         break;
+   }

-        return mask;
+   return mask;
 }

 static uint64_t
 bi_write_mask(bi_instr *I)
 {
-        uint64_t mask = 0;
+   uint64_t mask = 0;

-        bi_foreach_dest(I, d) {
-                if (bi_is_null(I->dest[d])) continue;
+   bi_foreach_dest(I, d) {
+      if (bi_is_null(I->dest[d]))
+         continue;

-                assert(I->dest[d].type == BI_INDEX_REGISTER);
+      assert(I->dest[d].type == BI_INDEX_REGISTER);

-                unsigned reg = I->dest[d].value;
-                unsigned count = bi_count_write_registers(I, d);
+      unsigned reg = I->dest[d].value;
+      unsigned count = bi_count_write_registers(I, d);

-                mask |= (BITFIELD64_MASK(count) << reg);
-        }
+      mask |= (BITFIELD64_MASK(count) << reg);
+   }

-        /* Instructions like AXCHG.i32 unconditionally both read and write
-         * staging registers. Even if we discard the result, the write still
-         * happens logically and needs to be included in our calculations.
-         * Obscurely, ATOM_CX is sr_write but can ignore the staging register in
-         * certain circumstances; this does not require consideration.
-         */
-        if (bi_opcode_props[I->op].sr_write && I->nr_dests && I->nr_srcs &&
-            bi_is_null(I->dest[0]) && !bi_is_null(I->src[0])) {
+   /* Instructions like AXCHG.i32 unconditionally both read and write
+    * staging registers. Even if we discard the result, the write still
+    * happens logically and needs to be included in our calculations.
+    * Obscurely, ATOM_CX is sr_write but can ignore the staging register in
+    * certain circumstances; this does not require consideration.
+    */
+   if (bi_opcode_props[I->op].sr_write && I->nr_dests && I->nr_srcs &&
+       bi_is_null(I->dest[0]) && !bi_is_null(I->src[0])) {

-                unsigned reg = I->src[0].value;
-                unsigned count = bi_count_write_registers(I, 0);
+      unsigned reg = I->src[0].value;
+      unsigned count = bi_count_write_registers(I, 0);

-                mask |= (BITFIELD64_MASK(count) << reg);
-        }
+      mask |= (BITFIELD64_MASK(count) << reg);
+   }

-        return mask;
+   return mask;
 }

 /* Update the scoreboard model to assign an instruction to a given slot */
@ -170,140 +171,143 @@ bi_write_mask(bi_instr *I)
 static void
 bi_push_clause(struct bi_scoreboard_state *st, bi_clause *clause)
 {
-        bi_instr *I = clause->message;
-        unsigned slot = clause->scoreboard_id;
+   bi_instr *I = clause->message;
+   unsigned slot = clause->scoreboard_id;

-        if (!I)
-                return;
+   if (!I)
+      return;

-        st->read[slot] |= bi_read_mask(I, true);
+   st->read[slot] |= bi_read_mask(I, true);

-        if (bi_opcode_props[I->op].sr_write)
-                st->write[slot] |= bi_write_mask(I);
+   if (bi_opcode_props[I->op].sr_write)
+      st->write[slot] |= bi_write_mask(I);
 }

 /* Adds a dependency on each slot writing any specified register */

 static void
-bi_depend_on_writers(bi_clause *clause, struct bi_scoreboard_state *st, uint64_t regmask)
+bi_depend_on_writers(bi_clause *clause, struct bi_scoreboard_state *st,
+                     uint64_t regmask)
 {
-        for (unsigned slot = 0; slot < ARRAY_SIZE(st->write); ++slot) {
-                if (!(st->write[slot] & regmask))
-                        continue;
+   for (unsigned slot = 0; slot < ARRAY_SIZE(st->write); ++slot) {
+      if (!(st->write[slot] & regmask))
+         continue;

-                st->write[slot] = 0;
-                st->read[slot] = 0;
+      st->write[slot] = 0;
+      st->read[slot] = 0;

-                clause->dependencies |= BITFIELD_BIT(slot);
-        }
+      clause->dependencies |= BITFIELD_BIT(slot);
+   }
 }

 static void
-bi_set_staging_barrier(bi_clause *clause, struct bi_scoreboard_state *st, uint64_t regmask)
+bi_set_staging_barrier(bi_clause *clause, struct bi_scoreboard_state *st,
+                       uint64_t regmask)
 {
-        for (unsigned slot = 0; slot < ARRAY_SIZE(st->read); ++slot) {
-                if (!(st->read[slot] & regmask))
-                        continue;
+   for (unsigned slot = 0; slot < ARRAY_SIZE(st->read); ++slot) {
+      if (!(st->read[slot] & regmask))
+         continue;

-                st->read[slot] = 0;
-                clause->staging_barrier = true;
-        }
+      st->read[slot] = 0;
+      clause->staging_barrier = true;
+   }
 }

 /* Sets the dependencies for a given clause, updating the model */

 static void
-bi_set_dependencies(bi_block *block, bi_clause *clause, struct bi_scoreboard_state *st)
+bi_set_dependencies(bi_block *block, bi_clause *clause,
+                    struct bi_scoreboard_state *st)
 {
-        bi_foreach_instr_in_clause(block, clause, I) {
-                uint64_t read = bi_read_mask(I, false);
-                uint64_t written = bi_write_mask(I);
+   bi_foreach_instr_in_clause(block, clause, I) {
+      uint64_t read = bi_read_mask(I, false);
+      uint64_t written = bi_write_mask(I);

-                /* Read-after-write; write-after-write */
-                bi_depend_on_writers(clause, st, read | written);
+      /* Read-after-write; write-after-write */
+      bi_depend_on_writers(clause, st, read | written);

-                /* Write-after-read */
-                bi_set_staging_barrier(clause, st, written);
-        }
+      /* Write-after-read */
+      bi_set_staging_barrier(clause, st, written);
+   }

-        /* LD_VAR instructions must be serialized per-quad. Just always depend
-         * on any LD_VAR instructions. This isn't optimal, but doing better
-         * requires divergence-aware data flow analysis.
-         *
-         * Similarly, memory loads/stores need to be synchronized. For now,
-         * force them to be serialized. This is not optimal.
-         */
-        if (clause->message && bi_should_serialize(clause->message))
-                clause->dependencies |= BITFIELD_BIT(BI_SLOT_SERIAL);
+   /* LD_VAR instructions must be serialized per-quad. Just always depend
+    * on any LD_VAR instructions. This isn't optimal, but doing better
+    * requires divergence-aware data flow analysis.
+    *
+    * Similarly, memory loads/stores need to be synchronized. For now,
+    * force them to be serialized. This is not optimal.
+    */
+   if (clause->message && bi_should_serialize(clause->message))
+      clause->dependencies |= BITFIELD_BIT(BI_SLOT_SERIAL);

-        /* Barriers must wait on all slots to flush existing work. It might be
-         * possible to skip this with more information about the barrier. For
-         * now, be conservative.
-         */
-        if (clause->message && clause->message->op == BI_OPCODE_BARRIER)
-                clause->dependencies |= BITFIELD_MASK(BI_NUM_GENERAL_SLOTS);
+   /* Barriers must wait on all slots to flush existing work. It might be
+    * possible to skip this with more information about the barrier. For
+    * now, be conservative.
+    */
+   if (clause->message && clause->message->op == BI_OPCODE_BARRIER)
+      clause->dependencies |= BITFIELD_MASK(BI_NUM_GENERAL_SLOTS);
 }

 static bool
 scoreboard_block_update(bi_block *blk)
 {
-        bool progress = false;
+   bool progress = false;

-        /* pending_in[s] = sum { p in pred[s] } ( pending_out[p] ) */
-        bi_foreach_predecessor(blk, pred) {
-                for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
-                        blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
-                        blk->scoreboard_in.write[i] |= (*pred)->scoreboard_out.write[i];
-                }
-        }
+   /* pending_in[s] = sum { p in pred[s] } ( pending_out[p] ) */
+   bi_foreach_predecessor(blk, pred) {
+      for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
+         blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
+         blk->scoreboard_in.write[i] |= (*pred)->scoreboard_out.write[i];
+      }
+   }

-        struct bi_scoreboard_state state = blk->scoreboard_in;
+   struct bi_scoreboard_state state = blk->scoreboard_in;

-        /* Assign locally */
+   /* Assign locally */

-        bi_foreach_clause_in_block(blk, clause) {
-                bi_set_dependencies(blk, clause, &state);
-                bi_push_clause(&state, clause);
-        }
+   bi_foreach_clause_in_block(blk, clause) {
+      bi_set_dependencies(blk, clause, &state);
+      bi_push_clause(&state, clause);
+   }

-        /* To figure out progress, diff scoreboard_out */
+   /* To figure out progress, diff scoreboard_out */

-        for (unsigned i = 0; i < BI_NUM_SLOTS; ++i)
-                progress |= !!memcmp(&state, &blk->scoreboard_out, sizeof(state));
+   for (unsigned i = 0; i < BI_NUM_SLOTS; ++i)
+      progress |= !!memcmp(&state, &blk->scoreboard_out, sizeof(state));

-        blk->scoreboard_out = state;
+   blk->scoreboard_out = state;

-        return progress;
+   return progress;
 }

 void
 bi_assign_scoreboard(bi_context *ctx)
 {
-        u_worklist worklist;
-        bi_worklist_init(ctx, &worklist);
+   u_worklist worklist;
+   bi_worklist_init(ctx, &worklist);

-        /* First, assign slots. */
-        bi_foreach_block(ctx, block) {
-                bi_foreach_clause_in_block(block, clause) {
-                        if (clause->message) {
-                                unsigned slot = bi_choose_scoreboard_slot(clause->message);
-                                clause->scoreboard_id = slot;
-                        }
-                }
+   /* First, assign slots. */
+   bi_foreach_block(ctx, block) {
+      bi_foreach_clause_in_block(block, clause) {
+         if (clause->message) {
+            unsigned slot = bi_choose_scoreboard_slot(clause->message);
+            clause->scoreboard_id = slot;
+         }
+      }

-                bi_worklist_push_tail(&worklist, block);
-        }
+      bi_worklist_push_tail(&worklist, block);
+   }

-        /* Next, perform forward data flow analysis to calculate dependencies */
-        while (!u_worklist_is_empty(&worklist)) {
-                /* Pop from the front for forward analysis */
-                bi_block *blk = bi_worklist_pop_head(&worklist);
+   /* Next, perform forward data flow analysis to calculate dependencies */
+   while (!u_worklist_is_empty(&worklist)) {
+      /* Pop from the front for forward analysis */
+      bi_block *blk = bi_worklist_pop_head(&worklist);

-                if (scoreboard_block_update(blk)) {
-                        bi_foreach_successor(blk, succ)
-                                bi_worklist_push_tail(&worklist, succ);
-                }
-        }
+      if (scoreboard_block_update(blk)) {
+         bi_foreach_successor(blk, succ)
+            bi_worklist_push_tail(&worklist, succ);
+      }
+   }

-        u_worklist_fini(&worklist);
+   u_worklist_fini(&worklist);
 }
--- a/src/panfrost/bifrost/bi_test.h
+++ b/src/panfrost/bifrost/bi_test.h
@ -27,38 +27,38 @@
 #ifndef __BI_TEST_H
 #define __BI_TEST_H

-#include <stdio.h>
 #include <inttypes.h>
+#include <stdio.h>
 #include "compiler.h"

 /* Helper to generate a bi_builder suitable for creating test instructions */
 static inline bi_block *
 bit_block(bi_context *ctx)
 {
-        bi_block *blk = rzalloc(ctx, bi_block);
+   bi_block *blk = rzalloc(ctx, bi_block);

-        util_dynarray_init(&blk->predecessors, blk);
-        list_addtail(&blk->link, &ctx->blocks);
-        list_inithead(&blk->instructions);
+   util_dynarray_init(&blk->predecessors, blk);
+   list_addtail(&blk->link, &ctx->blocks);
+   list_inithead(&blk->instructions);

-        blk->index = ctx->num_blocks++;
+   blk->index = ctx->num_blocks++;

-        return blk;
+   return blk;
 }

 static inline bi_builder *
 bit_builder(void *memctx)
 {
-        bi_context *ctx = rzalloc(memctx, bi_context);
-        list_inithead(&ctx->blocks);
-        ctx->inputs = rzalloc(memctx, struct panfrost_compile_inputs);
+   bi_context *ctx = rzalloc(memctx, bi_context);
+   list_inithead(&ctx->blocks);
+   ctx->inputs = rzalloc(memctx, struct panfrost_compile_inputs);

-        bi_block *blk = bit_block(ctx);
+   bi_block *blk = bit_block(ctx);

-        bi_builder *b = rzalloc(memctx, bi_builder);
-        b->shader = ctx;
-        b->cursor = bi_after_block(blk);
-        return b;
+   bi_builder *b = rzalloc(memctx, bi_builder);
+   b->shader = ctx;
+   b->cursor = bi_after_block(blk);
+   return b;
 }

 /* Helper to compare for logical equality of instructions. Need to skip over
@ -69,14 +69,15 @@ bit_instr_equal(bi_instr *A, bi_instr *B)
 {
   size_t skip = sizeof(struct list_head) + 2 * sizeof(bi_index *);

-   if (memcmp((uint8_t *) A + skip, (uint8_t *) B + skip, sizeof(bi_instr) - skip))
-           return false;
+   if (memcmp((uint8_t *)A + skip, (uint8_t *)B + skip,
+              sizeof(bi_instr) - skip))
+      return false;

   if (memcmp(A->dest, B->dest, sizeof(bi_index) * A->nr_dests))
-           return false;
+      return false;

   if (memcmp(A->src, B->src, sizeof(bi_index) * A->nr_srcs))
-           return false;
+      return false;

   return true;
 }
@ -87,8 +88,9 @@ bit_block_equal(bi_block *A, bi_block *B)
   if (list_length(&A->instructions) != list_length(&B->instructions))
      return false;

-   list_pair_for_each_entry(bi_instr, insA, insB,
-                            &A->instructions, &B->instructions, link) {
+   list_pair_for_each_entry(bi_instr, insA, insB, &A->instructions,
+                            &B->instructions, link)
+   {
      if (!bit_instr_equal(insA, insB))
         return false;
   }
@ -102,8 +104,9 @@ bit_shader_equal(bi_context *A, bi_context *B)
   if (list_length(&A->blocks) != list_length(&B->blocks))
      return false;

-   list_pair_for_each_entry(bi_block, blockA, blockB,
-                            &A->blocks, &B->blocks, link) {
+   list_pair_for_each_entry(bi_block, blockA, blockB, &A->blocks, &B->blocks,
+                            link)
+   {
      if (!bit_block_equal(blockA, blockB))
         return false;
   }
@ -111,30 +114,31 @@ bit_shader_equal(bi_context *A, bi_context *B)
   return true;
 }

-#define ASSERT_SHADER_EQUAL(A, B) \
-   if (!bit_shader_equal(A, B)) { \
-      ADD_FAILURE(); \
-      fprintf(stderr, "Pass produced unexpected results"); \
-      fprintf(stderr, "  Actual:\n"); \
-      bi_print_shader(A, stderr); \
-      fprintf(stderr, " Expected:\n"); \
-      bi_print_shader(B, stderr); \
-      fprintf(stderr, "\n"); \
-   } \
+#define ASSERT_SHADER_EQUAL(A, B)                                              \
+   if (!bit_shader_equal(A, B)) {                                              \
+      ADD_FAILURE();                                                           \
+      fprintf(stderr, "Pass produced unexpected results");                     \
+      fprintf(stderr, "  Actual:\n");                                          \
+      bi_print_shader(A, stderr);                                              \
+      fprintf(stderr, " Expected:\n");                                         \
+      bi_print_shader(B, stderr);                                              \
+      fprintf(stderr, "\n");                                                   \
+   }

-#define INSTRUCTION_CASE(instr, expected, pass) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      bi_builder *b = A; \
-      instr; \
-   } \
-   { \
-      bi_builder *b = B; \
-      expected; \
-   } \
-   pass(A->shader); \
-   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
-} while(0)
+#define INSTRUCTION_CASE(instr, expected, pass)                                \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         bi_builder *b = A;                                                    \
+         instr;                                                                \
+      }                                                                        \
+      {                                                                        \
+         bi_builder *b = B;                                                    \
+         expected;                                                             \
+      }                                                                        \
+      pass(A->shader);                                                         \
+      ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
+   } while (0)

 #endif
--- a/src/panfrost/bifrost/bi_validate.c
+++ b/src/panfrost/bifrost/bi_validate.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "util/u_memory.h"
+#include "compiler.h"

 /* Validatation doesn't make sense in release builds */
 #ifndef NDEBUG
@ -35,21 +35,21 @@
 bool
 bi_validate_initialization(bi_context *ctx)
 {
-        bool success = true;
+   bool success = true;

-        /* Calculate the live set */
-        bi_block *entry = bi_entry_block(ctx);
-        bi_compute_liveness_ssa(ctx);
+   /* Calculate the live set */
+   bi_block *entry = bi_entry_block(ctx);
+   bi_compute_liveness_ssa(ctx);

-        /* Validate that the live set is indeed empty */
-        for (unsigned i = 0; i < ctx->ssa_alloc; ++i) {
-                if (BITSET_TEST(entry->ssa_live_in, i)) {
-                        fprintf(stderr, "%u\n", i);
-                        success = false;
-                }
-        }
+   /* Validate that the live set is indeed empty */
+   for (unsigned i = 0; i < ctx->ssa_alloc; ++i) {
+      if (BITSET_TEST(entry->ssa_live_in, i)) {
+         fprintf(stderr, "%u\n", i);
+         success = false;
+      }
+   }

-        return success;
+   return success;
 }

 /*
@ -60,47 +60,46 @@ bi_validate_initialization(bi_context *ctx)
 static bool
 bi_validate_preload(bi_context *ctx)
 {
-        bool start = true;
-        uint64_t preloaded = 0;
+   bool start = true;
+   uint64_t preloaded = 0;

-        bi_foreach_block(ctx, block) {
-                bi_foreach_instr_in_block(block, I) {
-                        /* No instruction should have a register destination */
-                        bi_foreach_dest(I, d) {
-                                if (I->dest[d].type == BI_INDEX_REGISTER)
-                                        return false;
-                        }
+   bi_foreach_block(ctx, block) {
+      bi_foreach_instr_in_block(block, I) {
+         /* No instruction should have a register destination */
+         bi_foreach_dest(I, d) {
+            if (I->dest[d].type == BI_INDEX_REGISTER)
+               return false;
+         }

-                        /* Preloads are register moves at the start */
-                        bool is_preload =
-                                start && I->op == BI_OPCODE_MOV_I32 &&
-                                I->src[0].type == BI_INDEX_REGISTER;
+         /* Preloads are register moves at the start */
+         bool is_preload = start && I->op == BI_OPCODE_MOV_I32 &&
+                           I->src[0].type == BI_INDEX_REGISTER;

-                        /* After the first nonpreload, we're done preloading */
-                        start &= is_preload;
+         /* After the first nonpreload, we're done preloading */
+         start &= is_preload;

-                        /* Only preloads may have a register source */
-                        bi_foreach_src(I, s) {
-                                if (I->src[s].type == BI_INDEX_REGISTER && !is_preload)
-                                        return false;
-                        }
+         /* Only preloads may have a register source */
+         bi_foreach_src(I, s) {
+            if (I->src[s].type == BI_INDEX_REGISTER && !is_preload)
+               return false;
+         }

-                        /* Check uniqueness */
-                        if (is_preload) {
-                                unsigned r = I->src[0].value;
+         /* Check uniqueness */
+         if (is_preload) {
+            unsigned r = I->src[0].value;

-                                if (preloaded & BITFIELD64_BIT(r))
-                                        return false;
+            if (preloaded & BITFIELD64_BIT(r))
+               return false;

-                                preloaded |= BITFIELD64_BIT(r);
-                        }
-                }
+            preloaded |= BITFIELD64_BIT(r);
+         }
+      }

-                /* Only the first block may preload */
-                start = false;
-        }
+      /* Only the first block may preload */
+      start = false;
+   }

-        return true;
+   return true;
 }

 /*
@ -111,38 +110,37 @@ bi_validate_preload(bi_context *ctx)
 static bool
 bi_validate_width(bi_context *ctx)
 {
-        bool succ = true;
-        uint8_t *width = calloc(ctx->ssa_alloc, sizeof(uint8_t));
+   bool succ = true;
+   uint8_t *width = calloc(ctx->ssa_alloc, sizeof(uint8_t));

-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_dest(I, d) {
-                        assert(bi_is_ssa(I->dest[d]));
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_dest(I, d) {
+         assert(bi_is_ssa(I->dest[d]));

-                        unsigned v = I->dest[d].value;
-                        assert(width[v] == 0 && "broken SSA");
+         unsigned v = I->dest[d].value;
+         assert(width[v] == 0 && "broken SSA");

-                        width[v] = bi_count_write_registers(I, d);
-                }
-        }
+         width[v] = bi_count_write_registers(I, d);
+      }
+   }

-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_ssa_src(I, s) {
-                        unsigned v = I->src[s].value;
-                        unsigned n = bi_count_read_registers(I, s);
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_ssa_src(I, s) {
+         unsigned v = I->src[s].value;
+         unsigned n = bi_count_read_registers(I, s);

-                        if (width[v] != n) {
-                                succ = false;
-                                fprintf(stderr,
-                                        "source %u, expected width %u, got width %u\n",
-                                        s, n, width[v]);
-                                bi_print_instr(I, stderr);
-                                fprintf(stderr, "\n");
-                        }
-                }
-        }
+         if (width[v] != n) {
+            succ = false;
+            fprintf(stderr, "source %u, expected width %u, got width %u\n", s,
+                    n, width[v]);
+            bi_print_instr(I, stderr);
+            fprintf(stderr, "\n");
+         }
+      }
+   }

-        free(width);
-        return succ;
+   free(width);
+   return succ;
 }

 /*
@ -151,20 +149,20 @@ bi_validate_width(bi_context *ctx)
 static bool
 bi_validate_dest(bi_context *ctx)
 {
-        bool succ = true;
+   bool succ = true;

-        bi_foreach_instr_global(ctx, I) {
-                bi_foreach_dest(I, d) {
-                        if (bi_is_null(I->dest[d])) {
-                                succ = false;
-                                fprintf(stderr, "expected dest %u", d);
-                                bi_print_instr(I, stderr);
-                                fprintf(stderr, "\n");
-                        }
-                }
-        }
+   bi_foreach_instr_global(ctx, I) {
+      bi_foreach_dest(I, d) {
+         if (bi_is_null(I->dest[d])) {
+            succ = false;
+            fprintf(stderr, "expected dest %u", d);
+            bi_print_instr(I, stderr);
+            fprintf(stderr, "\n");
+         }
+      }
+   }

-        return succ;
+   return succ;
 }

 /*
@ -173,57 +171,57 @@ bi_validate_dest(bi_context *ctx)
 static bool
 bi_validate_phi_ordering(bi_context *ctx)
 {
-        bi_foreach_block(ctx, block) {
-                bool start = true;
+   bi_foreach_block(ctx, block) {
+      bool start = true;

-                bi_foreach_instr_in_block(block, I) {
-                        if (start)
-                                start = I->op == BI_OPCODE_PHI;
-                        else if (I->op == BI_OPCODE_PHI)
-                                return false;
-                }
-        }
+      bi_foreach_instr_in_block(block, I) {
+         if (start)
+            start = I->op == BI_OPCODE_PHI;
+         else if (I->op == BI_OPCODE_PHI)
+            return false;
+      }
+   }

-        return true;
+   return true;
 }

 void
 bi_validate(bi_context *ctx, const char *after)
 {
-        bool fail = false;
+   bool fail = false;

-        if (bifrost_debug & BIFROST_DBG_NOVALIDATE)
-                return;
+   if (bifrost_debug & BIFROST_DBG_NOVALIDATE)
+      return;

-        if (!bi_validate_initialization(ctx)) {
-                fprintf(stderr, "Uninitialized data read after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_initialization(ctx)) {
+      fprintf(stderr, "Uninitialized data read after %s\n", after);
+      fail = true;
+   }

-        if (!bi_validate_preload(ctx)) {
-                fprintf(stderr, "Unexpected preload after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_preload(ctx)) {
+      fprintf(stderr, "Unexpected preload after %s\n", after);
+      fail = true;
+   }

-        if (!bi_validate_width(ctx)) {
-                fprintf(stderr, "Unexpected vector with after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_width(ctx)) {
+      fprintf(stderr, "Unexpected vector with after %s\n", after);
+      fail = true;
+   }

-        if (!bi_validate_dest(ctx)) {
-                fprintf(stderr, "Unexpected source/dest after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_dest(ctx)) {
+      fprintf(stderr, "Unexpected source/dest after %s\n", after);
+      fail = true;
+   }

-        if (!bi_validate_phi_ordering(ctx)) {
-                fprintf(stderr, "Unexpected phi ordering after %s\n", after);
-                fail = true;
-        }
+   if (!bi_validate_phi_ordering(ctx)) {
+      fprintf(stderr, "Unexpected phi ordering after %s\n", after);
+      fail = true;
+   }

-        if (fail) {
-                bi_print_shader(ctx, stderr);
-                exit(1);
-        }
+   if (fail) {
+      bi_print_shader(ctx, stderr);
+      exit(1);
+   }
 }

 #endif /* NDEBUG */
--- a/src/panfrost/bifrost/bifrost.h
+++ b/src/panfrost/bifrost/bifrost.h
@ -26,63 +26,63 @@
 #ifndef __bifrost_h__
 #define __bifrost_h__

-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
 #include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

-#define BIFROST_DBG_MSGS        0x0001
-#define BIFROST_DBG_SHADERS     0x0002
-#define BIFROST_DBG_SHADERDB    0x0004
-#define BIFROST_DBG_VERBOSE     0x0008
-#define BIFROST_DBG_INTERNAL    0x0010
-#define BIFROST_DBG_NOSCHED     0x0020
-#define BIFROST_DBG_INORDER     0x0040
-#define BIFROST_DBG_NOVALIDATE  0x0080
-#define BIFROST_DBG_NOOPT       0x0100
-#define BIFROST_DBG_NOIDVS      0x0200
-#define BIFROST_DBG_NOSB        0x0400
-#define BIFROST_DBG_NOPRELOAD   0x0800
-#define BIFROST_DBG_SPILL       0x1000
-#define BIFROST_DBG_NOPSCHED    0x2000
+#define BIFROST_DBG_MSGS       0x0001
+#define BIFROST_DBG_SHADERS    0x0002
+#define BIFROST_DBG_SHADERDB   0x0004
+#define BIFROST_DBG_VERBOSE    0x0008
+#define BIFROST_DBG_INTERNAL   0x0010
+#define BIFROST_DBG_NOSCHED    0x0020
+#define BIFROST_DBG_INORDER    0x0040
+#define BIFROST_DBG_NOVALIDATE 0x0080
+#define BIFROST_DBG_NOOPT      0x0100
+#define BIFROST_DBG_NOIDVS     0x0200
+#define BIFROST_DBG_NOSB       0x0400
+#define BIFROST_DBG_NOPRELOAD  0x0800
+#define BIFROST_DBG_SPILL      0x1000
+#define BIFROST_DBG_NOPSCHED   0x2000

 extern int bifrost_debug;

 enum bifrost_message_type {
-        BIFROST_MESSAGE_NONE       = 0,
-        BIFROST_MESSAGE_VARYING    = 1,
-        BIFROST_MESSAGE_ATTRIBUTE  = 2,
-        BIFROST_MESSAGE_TEX        = 3,
-        BIFROST_MESSAGE_VARTEX     = 4,
-        BIFROST_MESSAGE_LOAD       = 5,
-        BIFROST_MESSAGE_STORE      = 6,
-        BIFROST_MESSAGE_ATOMIC     = 7,
-        BIFROST_MESSAGE_BARRIER    = 8,
-        BIFROST_MESSAGE_BLEND      = 9,
-        BIFROST_MESSAGE_TILE       = 10,
-        /* type 11 reserved */
-        BIFROST_MESSAGE_Z_STENCIL  = 12,
-        BIFROST_MESSAGE_ATEST      = 13,
-        BIFROST_MESSAGE_JOB        = 14,
-        BIFROST_MESSAGE_64BIT      = 15
+   BIFROST_MESSAGE_NONE = 0,
+   BIFROST_MESSAGE_VARYING = 1,
+   BIFROST_MESSAGE_ATTRIBUTE = 2,
+   BIFROST_MESSAGE_TEX = 3,
+   BIFROST_MESSAGE_VARTEX = 4,
+   BIFROST_MESSAGE_LOAD = 5,
+   BIFROST_MESSAGE_STORE = 6,
+   BIFROST_MESSAGE_ATOMIC = 7,
+   BIFROST_MESSAGE_BARRIER = 8,
+   BIFROST_MESSAGE_BLEND = 9,
+   BIFROST_MESSAGE_TILE = 10,
+   /* type 11 reserved */
+   BIFROST_MESSAGE_Z_STENCIL = 12,
+   BIFROST_MESSAGE_ATEST = 13,
+   BIFROST_MESSAGE_JOB = 14,
+   BIFROST_MESSAGE_64BIT = 15
 };

 enum bifrost_ftz {
-        BIFROST_FTZ_DISABLE = 0,
-        BIFROST_FTZ_DX11 = 1,
-        BIFROST_FTZ_ALWAYS = 2,
-        BIFROST_FTZ_ABRUPT = 3
+   BIFROST_FTZ_DISABLE = 0,
+   BIFROST_FTZ_DX11 = 1,
+   BIFROST_FTZ_ALWAYS = 2,
+   BIFROST_FTZ_ABRUPT = 3
 };

 enum bifrost_exceptions {
-        BIFROST_EXCEPTIONS_ENABLED = 0,
-        BIFROST_EXCEPTIONS_DISABLED = 1,
-        BIFROST_EXCEPTIONS_PRECISE_DIVISION = 2,
-        BIFROST_EXCEPTIONS_PRECISE_SQRT = 3,
+   BIFROST_EXCEPTIONS_ENABLED = 0,
+   BIFROST_EXCEPTIONS_DISABLED = 1,
+   BIFROST_EXCEPTIONS_PRECISE_DIVISION = 2,
+   BIFROST_EXCEPTIONS_PRECISE_SQRT = 3,
 };

 /* Describes clause flow control, with respect to control flow and branch
@ -102,182 +102,182 @@ enum bifrost_exceptions {
 */

 enum bifrost_flow {
-        /* End-of-shader */
-        BIFROST_FLOW_END = 0,
+   /* End-of-shader */
+   BIFROST_FLOW_END = 0,

-        /* Non back-to-back, PC-encoded reconvergence */
-        BIFROST_FLOW_NBTB_PC = 1,
+   /* Non back-to-back, PC-encoded reconvergence */
+   BIFROST_FLOW_NBTB_PC = 1,

-        /* Non back-to-back, unconditional reconvergence */
-        BIFROST_FLOW_NBTB_UNCONDITIONAL = 2,
+   /* Non back-to-back, unconditional reconvergence */
+   BIFROST_FLOW_NBTB_UNCONDITIONAL = 2,

-        /* Non back-to-back, no reconvergence */
-        BIFROST_FLOW_NBTB = 3,
+   /* Non back-to-back, no reconvergence */
+   BIFROST_FLOW_NBTB = 3,

-        /* Back-to-back, unconditional reconvergence */
-        BIFROST_FLOW_BTB_UNCONDITIONAL = 4,
+   /* Back-to-back, unconditional reconvergence */
+   BIFROST_FLOW_BTB_UNCONDITIONAL = 4,

-        /* Back-to-back, no reconvergence */
-        BIFROST_FLOW_BTB_NONE = 5,
+   /* Back-to-back, no reconvergence */
+   BIFROST_FLOW_BTB_NONE = 5,

-        /* Write elision, unconditional reconvergence */
-        BIFROST_FLOW_WE_UNCONDITIONAL = 6,
+   /* Write elision, unconditional reconvergence */
+   BIFROST_FLOW_WE_UNCONDITIONAL = 6,

-        /* Write elision, no reconvergence */
-        BIFROST_FLOW_WE = 7,
+   /* Write elision, no reconvergence */
+   BIFROST_FLOW_WE = 7,
 };

 enum bifrost_slot {
-        /* 0-5 are general purpose */
-        BIFROST_SLOT_ELDEST_DEPTH = 6,
-        BIFROST_SLOT_ELDEST_COLOUR = 7,
+   /* 0-5 are general purpose */
+   BIFROST_SLOT_ELDEST_DEPTH = 6,
+   BIFROST_SLOT_ELDEST_COLOUR = 7,
 };

 struct bifrost_header {
-        /* Reserved */
-        unsigned zero1 : 5;
+   /* Reserved */
+   unsigned zero1 : 5;

-        /* Flush-to-zero mode, leave zero for GL */
-        enum bifrost_ftz flush_to_zero : 2;
+   /* Flush-to-zero mode, leave zero for GL */
+   enum bifrost_ftz flush_to_zero : 2;

-        /* Convert any infinite result of any floating-point operation to the
-         * biggest representable number */
-        unsigned suppress_inf: 1;
+   /* Convert any infinite result of any floating-point operation to the
+    * biggest representable number */
+   unsigned suppress_inf : 1;

-        /* Convert NaN to +0.0 */
-        unsigned suppress_nan : 1;
+   /* Convert NaN to +0.0 */
+   unsigned suppress_nan : 1;

-        /* Floating-point excception handling mode */
-        enum bifrost_exceptions float_exceptions : 2;
+   /* Floating-point excception handling mode */
+   enum bifrost_exceptions float_exceptions : 2;

-        /* Enum describing the flow control, which matters for handling
-         * divergence and reconvergence efficiently */
-        enum bifrost_flow flow_control : 3;
+   /* Enum describing the flow control, which matters for handling
+    * divergence and reconvergence efficiently */
+   enum bifrost_flow flow_control : 3;

-        /* Reserved */
-        unsigned zero2 : 1;
+   /* Reserved */
+   unsigned zero2 : 1;

-        /* Terminate discarded threads, rather than continuing execution. Set
-         * for fragment shaders for standard GL behaviour of DISCARD. Also in a
-         * fragment shader, this disables helper invocations, so cannot be used
-         * in a shader that requires derivatives or texture LOD computation */
-        unsigned terminate_discarded_threads : 1;
+   /* Terminate discarded threads, rather than continuing execution. Set
+    * for fragment shaders for standard GL behaviour of DISCARD. Also in a
+    * fragment shader, this disables helper invocations, so cannot be used
+    * in a shader that requires derivatives or texture LOD computation */
+   unsigned terminate_discarded_threads : 1;

-        /* If set, the hardware may prefetch the next clause. If false, the
-         * hardware may not. Clear for unconditional branches. */
-        unsigned next_clause_prefetch : 1;
+   /* If set, the hardware may prefetch the next clause. If false, the
+    * hardware may not. Clear for unconditional branches. */
+   unsigned next_clause_prefetch : 1;

-        /* If set, a barrier will be inserted after the clause waiting for all
-         * message passing instructions to read their staging registers, such
-         * that it is safe for the next clause to write them. */
-        unsigned staging_barrier: 1;
-        unsigned staging_register : 6;
+   /* If set, a barrier will be inserted after the clause waiting for all
+    * message passing instructions to read their staging registers, such
+    * that it is safe for the next clause to write them. */
+   unsigned staging_barrier  : 1;
+   unsigned staging_register : 6;

-        /* Slots to wait on and slot to be used for message passing
-         * instructions respectively */
-        unsigned dependency_wait : 8;
-        unsigned dependency_slot : 3;
+   /* Slots to wait on and slot to be used for message passing
+    * instructions respectively */
+   unsigned dependency_wait : 8;
+   unsigned dependency_slot : 3;

-        enum bifrost_message_type message_type : 5;
-        enum bifrost_message_type next_message_type : 5;
+   enum bifrost_message_type message_type      : 5;
+   enum bifrost_message_type next_message_type : 5;
 } __attribute__((packed));

 enum bifrost_packed_src {
-        BIFROST_SRC_PORT0    = 0,
-        BIFROST_SRC_PORT1    = 1,
-        BIFROST_SRC_PORT2    = 2,
-        BIFROST_SRC_STAGE    = 3,
-        BIFROST_SRC_FAU_LO   = 4,
-        BIFROST_SRC_FAU_HI   = 5,
-        BIFROST_SRC_PASS_FMA = 6,
-        BIFROST_SRC_PASS_ADD = 7,
+   BIFROST_SRC_PORT0 = 0,
+   BIFROST_SRC_PORT1 = 1,
+   BIFROST_SRC_PORT2 = 2,
+   BIFROST_SRC_STAGE = 3,
+   BIFROST_SRC_FAU_LO = 4,
+   BIFROST_SRC_FAU_HI = 5,
+   BIFROST_SRC_PASS_FMA = 6,
+   BIFROST_SRC_PASS_ADD = 7,
 };

 struct bifrost_fma_inst {
-        unsigned src0 : 3;
-        unsigned op   : 20;
+   unsigned src0 : 3;
+   unsigned op   : 20;
 } __attribute__((packed));

 struct bifrost_add_inst {
-        unsigned src0 : 3;
-        unsigned op   : 17;
+   unsigned src0 : 3;
+   unsigned op   : 17;
 } __attribute__((packed));

 enum branch_bit_size {
-        BR_SIZE_32 = 0,
-        BR_SIZE_16XX = 1,
-        BR_SIZE_16YY = 2,
-        // For the above combinations of bitsize and location, an extra bit is
-        // encoded via comparing the sources. The only possible source of ambiguity
-        // would be if the sources were the same, but then the branch condition
-        // would be always true or always false anyways, so we can ignore it. But
-        // this no longer works when comparing the y component to the x component,
-        // since it's valid to compare the y component of a source against its own
-        // x component. Instead, the extra bit is encoded via an extra bitsize.
-        BR_SIZE_16YX0 = 3,
-        BR_SIZE_16YX1 = 4,
-        BR_SIZE_32_AND_16X = 5,
-        BR_SIZE_32_AND_16Y = 6,
-        // Used for comparisons with zero and always-true, see below. I think this
-        // only works for integer comparisons.
-        BR_SIZE_ZERO = 7,
+   BR_SIZE_32 = 0,
+   BR_SIZE_16XX = 1,
+   BR_SIZE_16YY = 2,
+   // For the above combinations of bitsize and location, an extra bit is
+   // encoded via comparing the sources. The only possible source of ambiguity
+   // would be if the sources were the same, but then the branch condition
+   // would be always true or always false anyways, so we can ignore it. But
+   // this no longer works when comparing the y component to the x component,
+   // since it's valid to compare the y component of a source against its own
+   // x component. Instead, the extra bit is encoded via an extra bitsize.
+   BR_SIZE_16YX0 = 3,
+   BR_SIZE_16YX1 = 4,
+   BR_SIZE_32_AND_16X = 5,
+   BR_SIZE_32_AND_16Y = 6,
+   // Used for comparisons with zero and always-true, see below. I think this
+   // only works for integer comparisons.
+   BR_SIZE_ZERO = 7,
 };

 struct bifrost_regs {
-        unsigned fau_idx : 8;
-        unsigned reg3 : 6;
-        unsigned reg2 : 6;
-        unsigned reg0 : 5;
-        unsigned reg1 : 6;
-        unsigned ctrl : 4;
+   unsigned fau_idx : 8;
+   unsigned reg3    : 6;
+   unsigned reg2    : 6;
+   unsigned reg0    : 5;
+   unsigned reg1    : 6;
+   unsigned ctrl    : 4;
 } __attribute__((packed));

-#define BIFROST_FMTC_CONSTANTS       0b0011
-#define BIFROST_FMTC_FINAL           0b0111
+#define BIFROST_FMTC_CONSTANTS 0b0011
+#define BIFROST_FMTC_FINAL     0b0111

 struct bifrost_fmt_constant {
-        unsigned pos : 4;
-        unsigned tag : 4;
-        uint64_t imm_1 : 60;
-        uint64_t imm_2 : 60;
+   unsigned pos   : 4;
+   unsigned tag   : 4;
+   uint64_t imm_1 : 60;
+   uint64_t imm_2 : 60;
 } __attribute__((packed));

 /* Clause formats, encoded in a table */

 enum bi_clause_subword {
-        /* Literal 3-bit values */
-        BI_CLAUSE_SUBWORD_LITERAL_0 = 0,
-        /* etc */
-        BI_CLAUSE_SUBWORD_LITERAL_7 = 7,
+   /* Literal 3-bit values */
+   BI_CLAUSE_SUBWORD_LITERAL_0 = 0,
+   /* etc */
+   BI_CLAUSE_SUBWORD_LITERAL_7 = 7,

-        /* The value of the corresponding tuple in the corresponding bits */
-        BI_CLAUSE_SUBWORD_TUPLE_0 = 8,
-        /* etc */
-        BI_CLAUSE_SUBWORD_TUPLE_7 = 15,
+   /* The value of the corresponding tuple in the corresponding bits */
+   BI_CLAUSE_SUBWORD_TUPLE_0 = 8,
+   /* etc */
+   BI_CLAUSE_SUBWORD_TUPLE_7 = 15,

-        /* Clause header */
-        BI_CLAUSE_SUBWORD_HEADER = 16,
+   /* Clause header */
+   BI_CLAUSE_SUBWORD_HEADER = 16,

-        /* Leave zero, but semantically distinct from literal 0 */
-        BI_CLAUSE_SUBWORD_RESERVED = 17,
+   /* Leave zero, but semantically distinct from literal 0 */
+   BI_CLAUSE_SUBWORD_RESERVED = 17,

-        /* Embedded constant 0 */
-        BI_CLAUSE_SUBWORD_CONSTANT = 18,
+   /* Embedded constant 0 */
+   BI_CLAUSE_SUBWORD_CONSTANT = 18,

-        /* M bits controlling modifier for the constant */
-        BI_CLAUSE_SUBWORD_M = 19,
+   /* M bits controlling modifier for the constant */
+   BI_CLAUSE_SUBWORD_M = 19,

-        /* Z bit: 1 to begin encoding constants, 0 to terminate the clause */
-        BI_CLAUSE_SUBWORD_Z = 20,
+   /* Z bit: 1 to begin encoding constants, 0 to terminate the clause */
+   BI_CLAUSE_SUBWORD_Z = 20,

-        /* Upper 3-bits of a given tuple and zero extended */
-        BI_CLAUSE_SUBWORD_UPPER_0 = 32,
-        /* etc */
-        BI_CLAUSE_SUBWORD_UPPER_7 = BI_CLAUSE_SUBWORD_UPPER_0 + 7,
+   /* Upper 3-bits of a given tuple and zero extended */
+   BI_CLAUSE_SUBWORD_UPPER_0 = 32,
+   /* etc */
+   BI_CLAUSE_SUBWORD_UPPER_7 = BI_CLAUSE_SUBWORD_UPPER_0 + 7,

-        /* Upper 3-bits of two tuples, concatenated and zero-extended */
-        BI_CLAUSE_SUBWORD_UPPER_23 = BI_CLAUSE_SUBWORD_UPPER_0 + 23,
-        BI_CLAUSE_SUBWORD_UPPER_56 = BI_CLAUSE_SUBWORD_UPPER_0 + 56,
+   /* Upper 3-bits of two tuples, concatenated and zero-extended */
+   BI_CLAUSE_SUBWORD_UPPER_23 = BI_CLAUSE_SUBWORD_UPPER_0 + 23,
+   BI_CLAUSE_SUBWORD_UPPER_56 = BI_CLAUSE_SUBWORD_UPPER_0 + 56,
 };

 #define L(x) ((enum bi_clause_subword)(BI_CLAUSE_SUBWORD_LITERAL_0 + x))
@ -290,15 +290,15 @@ enum bi_clause_subword {
 #define R    BI_CLAUSE_SUBWORD_RESERVED

 struct bi_clause_format {
-        unsigned format; /* format number */
-        unsigned pos; /* index in the clause */
-        enum bi_clause_subword tag_1; /* 2-bits */
-        enum bi_clause_subword tag_2; /* 3-bits */
-        enum bi_clause_subword tag_3; /* 3-bits */
-        enum bi_clause_subword s0_s3; /* 60 bits */
-        enum bi_clause_subword s4; /* 15 bits */
-        enum bi_clause_subword s5_s6; /* 30 bits */
-        enum bi_clause_subword s7; /* 15 bits */
+   unsigned format;              /* format number */
+   unsigned pos;                 /* index in the clause */
+   enum bi_clause_subword tag_1; /* 2-bits */
+   enum bi_clause_subword tag_2; /* 3-bits */
+   enum bi_clause_subword tag_3; /* 3-bits */
+   enum bi_clause_subword s0_s3; /* 60 bits */
+   enum bi_clause_subword s4;    /* 15 bits */
+   enum bi_clause_subword s5_s6; /* 30 bits */
+   enum bi_clause_subword s7;    /* 15 bits */
 };

 /* clang-format off */
@ -341,46 +341,46 @@ static const struct bi_clause_format bi_clause_formats[] = {
 * set (and ignored) as a placeholder to differentiate from reserved.
 */
 enum bifrost_reg_mode {
-        BIFROST_R_WL_FMA  = 1,
-        BIFROST_R_WH_FMA  = 2,
-        BIFROST_R_W_FMA   = 3,
-        BIFROST_R_WL_ADD  = 4,
-        BIFROST_R_WH_ADD  = 5,
-        BIFROST_R_W_ADD   = 6,
-        BIFROST_WL_WL_ADD = 7,
-        BIFROST_WL_WH_ADD = 8,
-        BIFROST_WL_W_ADD  = 9,
-        BIFROST_WH_WL_ADD = 10,
-        BIFROST_WH_WH_ADD = 11,
-        BIFROST_WH_W_ADD  = 12,
-        BIFROST_W_WL_ADD  = 13,
-        BIFROST_W_WH_ADD  = 14,
-        BIFROST_W_W_ADD   = 15,
-        BIFROST_IDLE_1    = 16,
-        BIFROST_I_W_FMA   = 17,
-        BIFROST_I_WL_FMA  = 18,
-        BIFROST_I_WH_FMA  = 19,
-        BIFROST_R_I       = 20,
-        BIFROST_I_W_ADD   = 21,
-        BIFROST_I_WL_ADD  = 22,
-        BIFROST_I_WH_ADD  = 23,
-        BIFROST_WL_WH_MIX = 24,
-        BIFROST_WH_WL_MIX = 26,
-        BIFROST_IDLE      = 27,
+   BIFROST_R_WL_FMA = 1,
+   BIFROST_R_WH_FMA = 2,
+   BIFROST_R_W_FMA = 3,
+   BIFROST_R_WL_ADD = 4,
+   BIFROST_R_WH_ADD = 5,
+   BIFROST_R_W_ADD = 6,
+   BIFROST_WL_WL_ADD = 7,
+   BIFROST_WL_WH_ADD = 8,
+   BIFROST_WL_W_ADD = 9,
+   BIFROST_WH_WL_ADD = 10,
+   BIFROST_WH_WH_ADD = 11,
+   BIFROST_WH_W_ADD = 12,
+   BIFROST_W_WL_ADD = 13,
+   BIFROST_W_WH_ADD = 14,
+   BIFROST_W_W_ADD = 15,
+   BIFROST_IDLE_1 = 16,
+   BIFROST_I_W_FMA = 17,
+   BIFROST_I_WL_FMA = 18,
+   BIFROST_I_WH_FMA = 19,
+   BIFROST_R_I = 20,
+   BIFROST_I_W_ADD = 21,
+   BIFROST_I_WL_ADD = 22,
+   BIFROST_I_WH_ADD = 23,
+   BIFROST_WL_WH_MIX = 24,
+   BIFROST_WH_WL_MIX = 26,
+   BIFROST_IDLE = 27,
 };

 enum bifrost_reg_op {
-        BIFROST_OP_IDLE = 0,
-        BIFROST_OP_READ = 1,
-        BIFROST_OP_WRITE = 2,
-        BIFROST_OP_WRITE_LO = 3,
-        BIFROST_OP_WRITE_HI = 4,
+   BIFROST_OP_IDLE = 0,
+   BIFROST_OP_READ = 1,
+   BIFROST_OP_WRITE = 2,
+   BIFROST_OP_WRITE_LO = 3,
+   BIFROST_OP_WRITE_HI = 4,
 };

 struct bifrost_reg_ctrl_23 {
-        enum bifrost_reg_op slot2;
-        enum bifrost_reg_op slot3;
-        bool slot3_fma;
+   enum bifrost_reg_op slot2;
+   enum bifrost_reg_op slot3;
+   bool slot3_fma;
 };

 /* clang-format off */
@ -420,201 +420,201 @@ static const struct bifrost_reg_ctrl_23 bifrost_reg_ctrl_lut[32] = {
 * compiler and stored as a constant */

 enum bifrost_texture_operation_mode {
-        /* Dual texturing */
-        BIFROST_TEXTURE_OPERATION_DUAL = 1,
+   /* Dual texturing */
+   BIFROST_TEXTURE_OPERATION_DUAL = 1,

-        /* Single texturing */
-        BIFROST_TEXTURE_OPERATION_SINGLE = 3,
+   /* Single texturing */
+   BIFROST_TEXTURE_OPERATION_SINGLE = 3,
 };

 enum bifrost_index {
-        /* Both texture/sampler index immediate */
-        BIFROST_INDEX_IMMEDIATE_SHARED = 0,
+   /* Both texture/sampler index immediate */
+   BIFROST_INDEX_IMMEDIATE_SHARED = 0,

-        /* Sampler index immediate, texture index from staging */
-        BIFROST_INDEX_IMMEDIATE_SAMPLER = 1,
+   /* Sampler index immediate, texture index from staging */
+   BIFROST_INDEX_IMMEDIATE_SAMPLER = 1,

-        /* Texture index immediate, sampler index from staging */
-        BIFROST_INDEX_IMMEDIATE_TEXTURE = 2,
+   /* Texture index immediate, sampler index from staging */
+   BIFROST_INDEX_IMMEDIATE_TEXTURE = 2,

-        /* Both indices from (separate) staging registers */
-        BIFROST_INDEX_REGISTER = 3,
+   /* Both indices from (separate) staging registers */
+   BIFROST_INDEX_REGISTER = 3,
 };

 enum bifrost_tex_op {
-        /* Given explicit derivatives, compute a gradient descriptor */
-        BIFROST_TEX_OP_GRDESC_DER = 4,
+   /* Given explicit derivatives, compute a gradient descriptor */
+   BIFROST_TEX_OP_GRDESC_DER = 4,

-        /* Given implicit derivatives (texture coordinates in a fragment
-         * shader), compute a gradient descriptor */
-        BIFROST_TEX_OP_GRDESC = 5,
+   /* Given implicit derivatives (texture coordinates in a fragment
+    * shader), compute a gradient descriptor */
+   BIFROST_TEX_OP_GRDESC = 5,

-        /* Fetch a texel. Takes a staging register with LOD level / face index
-         * packed 16:16 */
-        BIFROST_TEX_OP_FETCH = 6,
+   /* Fetch a texel. Takes a staging register with LOD level / face index
+    * packed 16:16 */
+   BIFROST_TEX_OP_FETCH = 6,

-        /* Filtered texture */
-        BIFROST_TEX_OP_TEX = 7,
+   /* Filtered texture */
+   BIFROST_TEX_OP_TEX = 7,
 };

 enum bifrost_lod_mode {
-        /* Takes two staging registers forming a 64-bit gradient descriptor
-         * (computed by a previous GRDESC or GRDESC_DER operation) */
-        BIFROST_LOD_MODE_GRDESC = 3,
+   /* Takes two staging registers forming a 64-bit gradient descriptor
+    * (computed by a previous GRDESC or GRDESC_DER operation) */
+   BIFROST_LOD_MODE_GRDESC = 3,

-        /* Take a staging register with 8:8 fixed-point in bottom 16-bits
-         * specifying an explicit LOD */
-        BIFROST_LOD_MODE_EXPLICIT = 4,
+   /* Take a staging register with 8:8 fixed-point in bottom 16-bits
+    * specifying an explicit LOD */
+   BIFROST_LOD_MODE_EXPLICIT = 4,

-        /* Takes a staging register with bottom 16-bits as 8:8 fixed-point LOD
-         * bias and top 16-bit as 8:8 fixed-point lower bound (generally left
-         * zero), added and clamped to a computed LOD */
-        BIFROST_LOD_MODE_BIAS = 5,
+   /* Takes a staging register with bottom 16-bits as 8:8 fixed-point LOD
+    * bias and top 16-bit as 8:8 fixed-point lower bound (generally left
+    * zero), added and clamped to a computed LOD */
+   BIFROST_LOD_MODE_BIAS = 5,

-        /* Set LOD to zero */
-        BIFROST_LOD_MODE_ZERO = 6,
+   /* Set LOD to zero */
+   BIFROST_LOD_MODE_ZERO = 6,

-        /* Compute LOD */
-        BIFROST_LOD_MODE_COMPUTE = 7,
+   /* Compute LOD */
+   BIFROST_LOD_MODE_COMPUTE = 7,
 };

 enum bifrost_texture_format {
-        /* 16-bit floating point, with optional clamping */
-        BIFROST_TEXTURE_FORMAT_F16 = 0,
-        BIFROST_TEXTURE_FORMAT_F16_POS = 1,
-        BIFROST_TEXTURE_FORMAT_F16_PM1 = 2,
-        BIFROST_TEXTURE_FORMAT_F16_1 = 3,
+   /* 16-bit floating point, with optional clamping */
+   BIFROST_TEXTURE_FORMAT_F16 = 0,
+   BIFROST_TEXTURE_FORMAT_F16_POS = 1,
+   BIFROST_TEXTURE_FORMAT_F16_PM1 = 2,
+   BIFROST_TEXTURE_FORMAT_F16_1 = 3,

-        /* 32-bit floating point, with optional clamping */
-        BIFROST_TEXTURE_FORMAT_F32 = 4,
-        BIFROST_TEXTURE_FORMAT_F32_POS = 5,
-        BIFROST_TEXTURE_FORMAT_F32_PM1 = 6,
-        BIFROST_TEXTURE_FORMAT_F32_1 = 7,
+   /* 32-bit floating point, with optional clamping */
+   BIFROST_TEXTURE_FORMAT_F32 = 4,
+   BIFROST_TEXTURE_FORMAT_F32_POS = 5,
+   BIFROST_TEXTURE_FORMAT_F32_PM1 = 6,
+   BIFROST_TEXTURE_FORMAT_F32_1 = 7,
 };

 enum bifrost_texture_format_full {
-        /* Transclude bifrost_texture_format from above */
+   /* Transclude bifrost_texture_format from above */

-        /* Integers, unclamped */
-        BIFROST_TEXTURE_FORMAT_U16 = 12,
-        BIFROST_TEXTURE_FORMAT_S16 = 13,
-        BIFROST_TEXTURE_FORMAT_U32 = 14,
-        BIFROST_TEXTURE_FORMAT_S32 = 15,
+   /* Integers, unclamped */
+   BIFROST_TEXTURE_FORMAT_U16 = 12,
+   BIFROST_TEXTURE_FORMAT_S16 = 13,
+   BIFROST_TEXTURE_FORMAT_U32 = 14,
+   BIFROST_TEXTURE_FORMAT_S32 = 15,
 };

 enum bifrost_texture_fetch {
-        /* Default texelFetch */
-        BIFROST_TEXTURE_FETCH_TEXEL = 1,
+   /* Default texelFetch */
+   BIFROST_TEXTURE_FETCH_TEXEL = 1,

-        /* Deprecated, fetches 4x U32 of a U8 x 4 texture. Do not use. */
-        BIFROST_TEXTURE_FETCH_GATHER4_RGBA = 3,
+   /* Deprecated, fetches 4x U32 of a U8 x 4 texture. Do not use. */
+   BIFROST_TEXTURE_FETCH_GATHER4_RGBA = 3,

-        /* Gathers */
-        BIFROST_TEXTURE_FETCH_GATHER4_R = 4,
-        BIFROST_TEXTURE_FETCH_GATHER4_G = 5,
-        BIFROST_TEXTURE_FETCH_GATHER4_B = 6,
-        BIFROST_TEXTURE_FETCH_GATHER4_A = 7
+   /* Gathers */
+   BIFROST_TEXTURE_FETCH_GATHER4_R = 4,
+   BIFROST_TEXTURE_FETCH_GATHER4_G = 5,
+   BIFROST_TEXTURE_FETCH_GATHER4_B = 6,
+   BIFROST_TEXTURE_FETCH_GATHER4_A = 7
 };

 struct bifrost_texture_operation {
-        /* If immediate_indices is set:
-         *     - immediate sampler index
-         *     - index used as texture index
-         * Otherwise:
-         *      - bifrost_single_index in lower 2 bits
-         *      - 0x3 in upper 2 bits (single-texturing)
-         */
-        unsigned sampler_index_or_mode : 4;
-        unsigned index : 7;
-        bool immediate_indices : 1;
-        enum bifrost_tex_op op : 3;
+   /* If immediate_indices is set:
+    *     - immediate sampler index
+    *     - index used as texture index
+    * Otherwise:
+    *      - bifrost_single_index in lower 2 bits
+    *      - 0x3 in upper 2 bits (single-texturing)
+    */
+   unsigned sampler_index_or_mode : 4;
+   unsigned index                 : 7;
+   bool immediate_indices         : 1;
+   enum bifrost_tex_op op         : 3;

-        /* If set for TEX/FETCH, loads texel offsets and multisample index from
-         * a staging register containing offset_x:offset_y:offset_z:ms_index
-         * packed 8:8:8:8. Offsets must be in [-31, +31]. If set for
-         * GRDESC(_DER), disable LOD bias. */
-        bool offset_or_bias_disable : 1;
+   /* If set for TEX/FETCH, loads texel offsets and multisample index from
+    * a staging register containing offset_x:offset_y:offset_z:ms_index
+    * packed 8:8:8:8. Offsets must be in [-31, +31]. If set for
+    * GRDESC(_DER), disable LOD bias. */
+   bool offset_or_bias_disable : 1;

-        /* If set for TEX/FETCH, loads fp32 shadow comparison value from a
-         * staging register. Implies fetch_component = gather4_r. If set for
-         * GRDESC(_DER), disables LOD clamping. */
-        bool shadow_or_clamp_disable : 1;
+   /* If set for TEX/FETCH, loads fp32 shadow comparison value from a
+    * staging register. Implies fetch_component = gather4_r. If set for
+    * GRDESC(_DER), disables LOD clamping. */
+   bool shadow_or_clamp_disable : 1;

-        /* If set, loads an uint32 array index from a staging register. */
-        bool array : 1;
+   /* If set, loads an uint32 array index from a staging register. */
+   bool array : 1;

-        /* Texture dimension, or 0 for a cubemap */
-        unsigned dimension : 2;
+   /* Texture dimension, or 0 for a cubemap */
+   unsigned dimension : 2;

-        /* Method to compute LOD value or for a FETCH, the
-         * bifrost_texture_fetch component specification */
-        enum bifrost_lod_mode lod_or_fetch : 3;
+   /* Method to compute LOD value or for a FETCH, the
+    * bifrost_texture_fetch component specification */
+   enum bifrost_lod_mode lod_or_fetch : 3;

-        /* Reserved */
-        unsigned zero : 1;
+   /* Reserved */
+   unsigned zero : 1;

-        /* Register format for the result */
-        enum bifrost_texture_format_full format : 4;
+   /* Register format for the result */
+   enum bifrost_texture_format_full format : 4;

-        /* Write mask for the result */
-        unsigned mask : 4;
+   /* Write mask for the result */
+   unsigned mask : 4;
 } __attribute__((packed));

 struct bifrost_dual_texture_operation {
-        unsigned primary_sampler_index : 2;
-        unsigned mode : 2; /* 0x1 for dual */
-        unsigned primary_texture_index : 2;
-        unsigned secondary_sampler_index : 2;
-        unsigned secondary_texture_index : 2;
+   unsigned primary_sampler_index   : 2;
+   unsigned mode                    : 2; /* 0x1 for dual */
+   unsigned primary_texture_index   : 2;
+   unsigned secondary_sampler_index : 2;
+   unsigned secondary_texture_index : 2;

-        /* Leave zero for dual texturing */
-        unsigned reserved : 1;
-        unsigned index_mode_zero : 1;
+   /* Leave zero for dual texturing */
+   unsigned reserved        : 1;
+   unsigned index_mode_zero : 1;

-        /* Base staging register to write the secondary results to */
-        unsigned secondary_register : 6;
+   /* Base staging register to write the secondary results to */
+   unsigned secondary_register : 6;

-        /* Format/mask for each texture */
-        enum bifrost_texture_format secondary_format : 3;
-        unsigned secondary_mask : 4;
+   /* Format/mask for each texture */
+   enum bifrost_texture_format secondary_format : 3;
+   unsigned secondary_mask                      : 4;

-        enum bifrost_texture_format primary_format : 3;
-        unsigned primary_mask : 4;
+   enum bifrost_texture_format primary_format : 3;
+   unsigned primary_mask                      : 4;
 } __attribute__((packed));

 static inline uint32_t
 bi_dual_tex_as_u32(struct bifrost_dual_texture_operation desc)
 {
-        uint32_t desc_u;
-        memcpy(&desc_u, &desc, sizeof(desc));
+   uint32_t desc_u;
+   memcpy(&desc_u, &desc, sizeof(desc));

-        return desc_u;
+   return desc_u;
 }

-#define BIFROST_MEGA_SAMPLE 128
-#define BIFROST_ALL_SAMPLES 255
+#define BIFROST_MEGA_SAMPLE   128
+#define BIFROST_ALL_SAMPLES   255
 #define BIFROST_CURRENT_PIXEL 255

 struct bifrost_pixel_indices {
-        unsigned sample : 8;
-        unsigned rt : 8;
-        unsigned x : 8;
-        unsigned y : 8;
+   unsigned sample : 8;
+   unsigned rt     : 8;
+   unsigned x      : 8;
+   unsigned y      : 8;
 } __attribute__((packed));

 enum bi_constmod {
-        BI_CONSTMOD_NONE,
-        BI_CONSTMOD_PC_LO,
-        BI_CONSTMOD_PC_HI,
-        BI_CONSTMOD_PC_LO_HI
+   BI_CONSTMOD_NONE,
+   BI_CONSTMOD_PC_LO,
+   BI_CONSTMOD_PC_HI,
+   BI_CONSTMOD_PC_LO_HI
 };

 struct bi_constants {
-        /* Raw constant values */
-        uint64_t raw[6];
+   /* Raw constant values */
+   uint64_t raw[6];

-        /* Associated modifier derived from M values */
-        enum bi_constmod mods[6];
+   /* Associated modifier derived from M values */
+   enum bi_constmod mods[6];
 };

 /* FAU selectors for constants are out-of-order, construct the top bits
@ -623,12 +623,10 @@ struct bi_constants {
 static inline unsigned
 bi_constant_field(unsigned idx)
 {
-        const unsigned values[] = {
-                4, 5, 6, 7, 2, 3
-        };
+   const unsigned values[] = {4, 5, 6, 7, 2, 3};

-        assert(idx <= 5);
-        return values[idx] << 4;
+   assert(idx <= 5);
+   return values[idx] << 4;
 }

 #ifdef __cplusplus
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@ -25,73 +25,73 @@
 #define __BIFROST_PUBLIC_H_

 #include "compiler/nir/nir.h"
-#include "util/u_dynarray.h"
 #include "panfrost/util/pan_ir.h"
+#include "util/u_dynarray.h"

-void
-bifrost_compile_shader_nir(nir_shader *nir,
-                           const struct panfrost_compile_inputs *inputs,
-                           struct util_dynarray *binary,
-                           struct pan_shader_info *info);
+void bifrost_compile_shader_nir(nir_shader *nir,
+                                const struct panfrost_compile_inputs *inputs,
+                                struct util_dynarray *binary,
+                                struct pan_shader_info *info);

 static const nir_shader_compiler_options bifrost_nir_options = {
-        .lower_scmp = true,
-        .lower_flrp16 = true,
-        .lower_flrp32 = true,
-        .lower_flrp64 = true,
-        .lower_ffract = true,
-        .lower_fmod = true,
-        .lower_fdiv = true,
-        .lower_isign = true,
-        .lower_find_lsb = true,
-        .lower_ifind_msb = true,
-        .lower_fdph = true,
-        .lower_fsqrt = true,
+   .lower_scmp = true,
+   .lower_flrp16 = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_ffract = true,
+   .lower_fmod = true,
+   .lower_fdiv = true,
+   .lower_isign = true,
+   .lower_find_lsb = true,
+   .lower_ifind_msb = true,
+   .lower_fdph = true,
+   .lower_fsqrt = true,

-        .lower_fsign = true,
+   .lower_fsign = true,

-        .lower_bitfield_insert_to_shifts = true,
-        .lower_bitfield_extract_to_shifts = true,
-        .lower_insert_byte = true,
-        .lower_rotate = true,
+   .lower_bitfield_insert_to_shifts = true,
+   .lower_bitfield_extract_to_shifts = true,
+   .lower_insert_byte = true,
+   .lower_rotate = true,

-        .lower_pack_half_2x16 = true,
-        .lower_pack_unorm_2x16 = true,
-        .lower_pack_snorm_2x16 = true,
-        .lower_pack_unorm_4x8 = true,
-        .lower_pack_snorm_4x8 = true,
-        .lower_unpack_half_2x16 = true,
-        .lower_unpack_unorm_2x16 = true,
-        .lower_unpack_snorm_2x16 = true,
-        .lower_unpack_unorm_4x8 = true,
-        .lower_unpack_snorm_4x8 = true,
-        .lower_pack_split = true,
+   .lower_pack_half_2x16 = true,
+   .lower_pack_unorm_2x16 = true,
+   .lower_pack_snorm_2x16 = true,
+   .lower_pack_unorm_4x8 = true,
+   .lower_pack_snorm_4x8 = true,
+   .lower_unpack_half_2x16 = true,
+   .lower_unpack_unorm_2x16 = true,
+   .lower_unpack_snorm_2x16 = true,
+   .lower_unpack_unorm_4x8 = true,
+   .lower_unpack_snorm_4x8 = true,
+   .lower_pack_split = true,

-        .lower_doubles_options = nir_lower_dmod,
-        /* TODO: Don't lower supported 64-bit operations */
-        .lower_int64_options = ~0,
-        /* TODO: Use IMULD on v7 */
-        .lower_mul_high = true,
-        .lower_fisnormal = true,
-        .lower_uadd_carry = true,
-        .lower_usub_borrow = true,
+   .lower_doubles_options = nir_lower_dmod,
+   /* TODO: Don't lower supported 64-bit operations */
+   .lower_int64_options = ~0,
+   /* TODO: Use IMULD on v7 */
+   .lower_mul_high = true,
+   .lower_fisnormal = true,
+   .lower_uadd_carry = true,
+   .lower_usub_borrow = true,

-        .has_fsub = true,
-        .has_isub = true,
-        .vectorize_io = true,
-        .vectorize_vec2_16bit = true,
-        .fuse_ffma16 = true,
-        .fuse_ffma32 = true,
-        .fuse_ffma64 = true,
-        .use_interpolated_input_intrinsics = true,
+   .has_fsub = true,
+   .has_isub = true,
+   .vectorize_io = true,
+   .vectorize_vec2_16bit = true,
+   .fuse_ffma16 = true,
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .use_interpolated_input_intrinsics = true,

-        .lower_uniforms_to_ubo = true,
+   .lower_uniforms_to_ubo = true,

-        .has_cs_global_id = true,
-        .lower_cs_local_index_to_id = true,
-        .max_unroll_iterations = 32,
-        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
-        .force_indirect_unrolling_sampler = true,
+   .has_cs_global_id = true,
+   .lower_cs_local_index_to_id = true,
+   .max_unroll_iterations = 32,
+   .force_indirect_unrolling =
+      (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
+   .force_indirect_unrolling_sampler = true,
 };

 #endif
--- a/src/panfrost/bifrost/bir.c
+++ b/src/panfrost/bifrost/bir.c
@ -24,21 +24,21 @@
 *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
 */

-#include "compiler.h"
 #include "bi_builder.h"
+#include "compiler.h"

 bool
 bi_has_arg(const bi_instr *ins, bi_index arg)
 {
-        if (!ins)
-                return false;
+   if (!ins)
+      return false;

-        bi_foreach_src(ins, s) {
-                if (bi_is_equiv(ins->src[s], arg))
-                        return true;
-        }
+   bi_foreach_src(ins, s) {
+      if (bi_is_equiv(ins->src[s], arg))
+         return true;
+   }

-        return false;
+   return false;
 }

 /* Precondition: valid 16-bit or 32-bit register format. Returns whether it is
@ -48,131 +48,131 @@ bi_has_arg(const bi_instr *ins, bi_index arg)
 bool
 bi_is_regfmt_16(enum bi_register_format fmt)
 {
-        switch  (fmt) {
-        case BI_REGISTER_FORMAT_F16:
-        case BI_REGISTER_FORMAT_S16:
-        case BI_REGISTER_FORMAT_U16:
-                return true;
-        case BI_REGISTER_FORMAT_F32:
-        case BI_REGISTER_FORMAT_S32:
-        case BI_REGISTER_FORMAT_U32:
-        case BI_REGISTER_FORMAT_AUTO:
-                return false;
-        default:
-                unreachable("Invalid register format");
-        }
+   switch (fmt) {
+   case BI_REGISTER_FORMAT_F16:
+   case BI_REGISTER_FORMAT_S16:
+   case BI_REGISTER_FORMAT_U16:
+      return true;
+   case BI_REGISTER_FORMAT_F32:
+   case BI_REGISTER_FORMAT_S32:
+   case BI_REGISTER_FORMAT_U32:
+   case BI_REGISTER_FORMAT_AUTO:
+      return false;
+   default:
+      unreachable("Invalid register format");
+   }
 }

 static unsigned
 bi_count_staging_registers(const bi_instr *ins)
 {
-        enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
-        unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
+   enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
+   unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */

-        switch (count) {
-        case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
-                return count;
-        case BI_SR_COUNT_FORMAT:
-                return bi_is_regfmt_16(ins->register_format) ?
-                        DIV_ROUND_UP(vecsize, 2) : vecsize;
-        case BI_SR_COUNT_VECSIZE:
-                return vecsize;
-        case BI_SR_COUNT_SR_COUNT:
-                return ins->sr_count;
-        }
+   switch (count) {
+   case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
+      return count;
+   case BI_SR_COUNT_FORMAT:
+      return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(vecsize, 2)
+                                                   : vecsize;
+   case BI_SR_COUNT_VECSIZE:
+      return vecsize;
+   case BI_SR_COUNT_SR_COUNT:
+      return ins->sr_count;
+   }

-        unreachable("Invalid sr_count");
+   unreachable("Invalid sr_count");
 }

 unsigned
 bi_count_read_registers(const bi_instr *ins, unsigned s)
 {
-        /* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
-        if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
-                return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
-        else if (s == 0 && bi_opcode_props[ins->op].sr_read)
-                return bi_count_staging_registers(ins);
-        else if (s == 4 && ins->op == BI_OPCODE_BLEND)
-                return ins->sr_count_2; /* Dual source blending */
-        else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
-                return ins->nr_dests;
-        else
-                return 1;
+   /* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
+   if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
+      return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
+   else if (s == 0 && bi_opcode_props[ins->op].sr_read)
+      return bi_count_staging_registers(ins);
+   else if (s == 4 && ins->op == BI_OPCODE_BLEND)
+      return ins->sr_count_2; /* Dual source blending */
+   else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
+      return ins->nr_dests;
+   else
+      return 1;
 }

 unsigned
 bi_count_write_registers(const bi_instr *ins, unsigned d)
 {
-        if (d == 0 && bi_opcode_props[ins->op].sr_write) {
-                switch (ins->op) {
-                case BI_OPCODE_TEXC:
-                case BI_OPCODE_TEXC_DUAL:
-                        if (ins->sr_count_2)
-                                return ins->sr_count;
-                        else
-                                return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
+   if (d == 0 && bi_opcode_props[ins->op].sr_write) {
+      switch (ins->op) {
+      case BI_OPCODE_TEXC:
+      case BI_OPCODE_TEXC_DUAL:
+         if (ins->sr_count_2)
+            return ins->sr_count;
+         else
+            return bi_is_regfmt_16(ins->register_format) ? 2 : 4;

-                case BI_OPCODE_TEX_SINGLE:
-                case BI_OPCODE_TEX_FETCH:
-                case BI_OPCODE_TEX_GATHER: {
-                        unsigned chans = util_bitcount(ins->write_mask);
+      case BI_OPCODE_TEX_SINGLE:
+      case BI_OPCODE_TEX_FETCH:
+      case BI_OPCODE_TEX_GATHER: {
+         unsigned chans = util_bitcount(ins->write_mask);

-                        return bi_is_regfmt_16(ins->register_format) ?
-                                DIV_ROUND_UP(chans, 2) : chans;
-                }
+         return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(chans, 2)
+                                                      : chans;
+      }

-                case BI_OPCODE_ACMPXCHG_I32:
-                        /* Reads 2 but writes 1 */
-                        return 1;
+      case BI_OPCODE_ACMPXCHG_I32:
+         /* Reads 2 but writes 1 */
+         return 1;

-                case BI_OPCODE_ATOM1_RETURN_I32:
-                        /* Allow omitting the destination for plain ATOM1 */
-                        return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
-                default:
-                        return bi_count_staging_registers(ins);
-                }
-        } else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
-                return 2;
-        } else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
-                return ins->sr_count_2;
-        } else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
-                return ins->nr_srcs;
-        }
+      case BI_OPCODE_ATOM1_RETURN_I32:
+         /* Allow omitting the destination for plain ATOM1 */
+         return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
+      default:
+         return bi_count_staging_registers(ins);
+      }
+   } else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
+      return 2;
+   } else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
+      return ins->sr_count_2;
+   } else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
+      return ins->nr_srcs;
+   }

-        return 1;
+   return 1;
 }

 unsigned
 bi_writemask(const bi_instr *ins, unsigned d)
 {
-        unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
-        unsigned shift = ins->dest[d].offset;
-        return (mask << shift);
+   unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
+   unsigned shift = ins->dest[d].offset;
+   return (mask << shift);
 }

 bi_clause *
 bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
 {
-        if (!block && !clause)
-                return NULL;
+   if (!block && !clause)
+      return NULL;

-        /* Try the first clause in this block if we're starting from scratch */
-        if (!clause && !list_is_empty(&block->clauses))
-                return list_first_entry(&block->clauses, bi_clause, link);
+   /* Try the first clause in this block if we're starting from scratch */
+   if (!clause && !list_is_empty(&block->clauses))
+      return list_first_entry(&block->clauses, bi_clause, link);

-        /* Try the next clause in this block */
-        if (clause && clause->link.next != &block->clauses)
-                return list_first_entry(&(clause->link), bi_clause, link);
+   /* Try the next clause in this block */
+   if (clause && clause->link.next != &block->clauses)
+      return list_first_entry(&(clause->link), bi_clause, link);

-        /* Try the next block, or the one after that if it's empty, etc .*/
-        bi_block *next_block = bi_next_block(block);
+   /* Try the next block, or the one after that if it's empty, etc .*/
+   bi_block *next_block = bi_next_block(block);

-        bi_foreach_block_from(ctx, next_block, block) {
-                if (!list_is_empty(&block->clauses))
-                        return list_first_entry(&block->clauses, bi_clause, link);
-        }
+   bi_foreach_block_from(ctx, next_block, block) {
+      if (!list_is_empty(&block->clauses))
+         return list_first_entry(&block->clauses, bi_clause, link);
+   }

-        return NULL;
+   return NULL;
 }

 /* Does an instruction have a side effect not captured by its register
@ -184,41 +184,41 @@ bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
 bool
 bi_side_effects(const bi_instr *I)
 {
-        if (bi_opcode_props[I->op].last)
-                return true;
+   if (bi_opcode_props[I->op].last)
+      return true;

-        switch (I->op) {
-        case BI_OPCODE_DISCARD_F32:
-        case BI_OPCODE_DISCARD_B32:
-                return true;
-        default:
-                break;
-        }
+   switch (I->op) {
+   case BI_OPCODE_DISCARD_F32:
+   case BI_OPCODE_DISCARD_B32:
+      return true;
+   default:
+      break;
+   }

-        switch (bi_opcode_props[I->op].message) {
-        case BIFROST_MESSAGE_NONE:
-        case BIFROST_MESSAGE_VARYING:
-        case BIFROST_MESSAGE_ATTRIBUTE:
-        case BIFROST_MESSAGE_TEX:
-        case BIFROST_MESSAGE_VARTEX:
-        case BIFROST_MESSAGE_LOAD:
-        case BIFROST_MESSAGE_64BIT:
-                return false;
+   switch (bi_opcode_props[I->op].message) {
+   case BIFROST_MESSAGE_NONE:
+   case BIFROST_MESSAGE_VARYING:
+   case BIFROST_MESSAGE_ATTRIBUTE:
+   case BIFROST_MESSAGE_TEX:
+   case BIFROST_MESSAGE_VARTEX:
+   case BIFROST_MESSAGE_LOAD:
+   case BIFROST_MESSAGE_64BIT:
+      return false;

-        case BIFROST_MESSAGE_STORE:
-        case BIFROST_MESSAGE_ATOMIC:
-        case BIFROST_MESSAGE_BARRIER:
-        case BIFROST_MESSAGE_BLEND:
-        case BIFROST_MESSAGE_Z_STENCIL:
-        case BIFROST_MESSAGE_ATEST:
-        case BIFROST_MESSAGE_JOB:
-                return true;
+   case BIFROST_MESSAGE_STORE:
+   case BIFROST_MESSAGE_ATOMIC:
+   case BIFROST_MESSAGE_BARRIER:
+   case BIFROST_MESSAGE_BLEND:
+   case BIFROST_MESSAGE_Z_STENCIL:
+   case BIFROST_MESSAGE_ATEST:
+   case BIFROST_MESSAGE_JOB:
+      return true;

-        case BIFROST_MESSAGE_TILE:
-                return (I->op != BI_OPCODE_LD_TILE);
-        }
+   case BIFROST_MESSAGE_TILE:
+      return (I->op != BI_OPCODE_LD_TILE);
+   }

-        unreachable("Invalid message type");
+   unreachable("Invalid message type");
 }

 /* Branch reconvergence is required when the execution mask may change
@ -230,10 +230,10 @@ bi_side_effects(const bi_instr *I)
 bool
 bi_reconverge_branches(bi_block *block)
 {
-        if (bi_num_successors(block) == 1)
-                return bi_num_predecessors(block->successors[0]) > 1;
-        else
-                return true;
+   if (bi_num_successors(block) == 1)
+      return bi_num_predecessors(block->successors[0]) > 1;
+   else
+      return true;
 }

 /*
@ -252,42 +252,41 @@ bi_reconverge_branches(bi_block *block)
 bool
 bi_can_replace_with_csel(bi_instr *I)
 {
-        return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
-                (I->mux != BI_MUX_BIT) &&
-                (I->src[0].swizzle == BI_SWIZZLE_H01) &&
-                (I->src[1].swizzle == BI_SWIZZLE_H01) &&
-                (I->src[2].swizzle == BI_SWIZZLE_H01);
+   return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
+          (I->mux != BI_MUX_BIT) && (I->src[0].swizzle == BI_SWIZZLE_H01) &&
+          (I->src[1].swizzle == BI_SWIZZLE_H01) &&
+          (I->src[2].swizzle == BI_SWIZZLE_H01);
 }

 static enum bi_opcode
 bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux)
 {
-        switch (mux) {
-        case BI_MUX_INT_ZERO:
-                if (must_sign)
-                        return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
-                else
-                        return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
-        case BI_MUX_NEG:
-                return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
-        case BI_MUX_FP_ZERO:
-                return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
-        default:
-             unreachable("No CSEL for MUX.bit");
-        }
+   switch (mux) {
+   case BI_MUX_INT_ZERO:
+      if (must_sign)
+         return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
+      else
+         return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
+   case BI_MUX_NEG:
+      return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
+   case BI_MUX_FP_ZERO:
+      return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
+   default:
+      unreachable("No CSEL for MUX.bit");
+   }
 }

 bi_instr *
 bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign)
 {
-        assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
+   assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);

-        /* Build a new CSEL */
-        enum bi_cmpf cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
-        bi_instr *csel = bi_csel_u32_to(b, I->dest[0], I->src[2], bi_zero(),
-                                        I->src[0], I->src[1], cmpf);
+   /* Build a new CSEL */
+   enum bi_cmpf cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
+   bi_instr *csel = bi_csel_u32_to(b, I->dest[0], I->src[2], bi_zero(),
+                                   I->src[0], I->src[1], cmpf);

-        /* Fixup the opcode and use it */
-        csel->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
-        return csel;
+   /* Fixup the opcode and use it */
+   csel->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
+   return csel;
 }
--- a/src/panfrost/bifrost/cmdline.c
+++ b/src/panfrost/bifrost/cmdline.c
@ -26,15 +26,15 @@

 #include <getopt.h>
 #include <string.h>
-#include "disassemble.h"
 #include "valhall/disassemble.h"
 #include "compiler.h"
+#include "disassemble.h"

-#include "main/mtypes.h"
-#include "compiler/glsl/standalone.h"
-#include "compiler/glsl/glsl_to_nir.h"
 #include "compiler/glsl/gl_nir.h"
+#include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/glsl/standalone.h"
 #include "compiler/nir_types.h"
+#include "main/mtypes.h"
 #include "util/u_dynarray.h"
 #include "bifrost_compile.h"

@ -44,25 +44,25 @@ int verbose = 0;
 static gl_shader_stage
 filename_to_stage(const char *stage)
 {
-        const char *ext = strrchr(stage, '.');
+   const char *ext = strrchr(stage, '.');

-        if (ext == NULL) {
-                fprintf(stderr, "No extension found in %s\n", stage);
-                exit(1);
-        }
+   if (ext == NULL) {
+      fprintf(stderr, "No extension found in %s\n", stage);
+      exit(1);
+   }

-        if (!strcmp(ext, ".cs") || !strcmp(ext, ".comp"))
-                return MESA_SHADER_COMPUTE;
-        else if (!strcmp(ext, ".vs") || !strcmp(ext, ".vert"))
-                return MESA_SHADER_VERTEX;
-        else if (!strcmp(ext, ".fs") || !strcmp(ext, ".frag"))
-                return MESA_SHADER_FRAGMENT;
-        else {
-                fprintf(stderr, "Invalid extension %s\n", ext);
-                exit(1);
-        }
+   if (!strcmp(ext, ".cs") || !strcmp(ext, ".comp"))
+      return MESA_SHADER_COMPUTE;
+   else if (!strcmp(ext, ".vs") || !strcmp(ext, ".vert"))
+      return MESA_SHADER_VERTEX;
+   else if (!strcmp(ext, ".fs") || !strcmp(ext, ".frag"))
+      return MESA_SHADER_FRAGMENT;
+   else {
+      fprintf(stderr, "Invalid extension %s\n", ext);
+      exit(1);
+   }

-        unreachable("Should've returned or bailed");
+   unreachable("Should've returned or bailed");
 }

 static int
@ -80,7 +80,7 @@ glsl_type_size(const struct glsl_type *type, bool bindless)
 static void
 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
 {
-   nir_foreach_variable_in_list (var, var_list) {
+   nir_foreach_variable_in_list(var, var_list) {
      if (var->data.location > new_var->data.location) {
         exec_node_insert_node_before(&var->node, &new_var->node);
         return;
@ -94,7 +94,7 @@ sort_varyings(nir_shader *nir, nir_variable_mode mode)
 {
   struct exec_list new_list;
   exec_list_make_empty(&new_list);
-   nir_foreach_variable_with_modes_safe (var, nir, mode) {
+   nir_foreach_variable_with_modes_safe(var, nir, mode) {
      exec_node_remove(&var->node);
      insert_sorted(&new_list, var);
   }
@ -104,7 +104,7 @@ sort_varyings(nir_shader *nir, nir_variable_mode mode)
 static void
 fixup_varying_slots(nir_shader *nir, nir_variable_mode mode)
 {
-   nir_foreach_variable_with_modes (var, nir, mode) {
+   nir_foreach_variable_with_modes(var, nir, mode) {
      if (var->data.location >= VARYING_SLOT_VAR0) {
         var->data.location += 9;
      } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
@ -117,228 +117,219 @@ fixup_varying_slots(nir_shader *nir, nir_variable_mode mode)
 static void
 compile_shader(int stages, char **files)
 {
-        struct gl_shader_program *prog;
-        nir_shader *nir[MESA_SHADER_COMPUTE + 1];
-        unsigned shader_types[MESA_SHADER_COMPUTE + 1];
+   struct gl_shader_program *prog;
+   nir_shader *nir[MESA_SHADER_COMPUTE + 1];
+   unsigned shader_types[MESA_SHADER_COMPUTE + 1];

-        if (stages > MESA_SHADER_COMPUTE) {
-                fprintf(stderr, "Too many stages");
-                exit(1);
-        }
+   if (stages > MESA_SHADER_COMPUTE) {
+      fprintf(stderr, "Too many stages");
+      exit(1);
+   }

-        for (unsigned i = 0; i < stages; ++i)
-                shader_types[i] = filename_to_stage(files[i]);
+   for (unsigned i = 0; i < stages; ++i)
+      shader_types[i] = filename_to_stage(files[i]);

-        struct standalone_options options = {
-                .glsl_version = 300, /* ES - needed for precision */
-                .do_link = true,
-                .lower_precision = true
-        };
+   struct standalone_options options = {
+      .glsl_version = 300, /* ES - needed for precision */
+      .do_link = true,
+      .lower_precision = true};

-        static struct gl_context local_ctx;
+   static struct gl_context local_ctx;

-        prog = standalone_compile_shader(&options, stages, files, &local_ctx);
+   prog = standalone_compile_shader(&options, stages, files, &local_ctx);

-        for (unsigned i = 0; i < stages; ++i) {
-                gl_shader_stage stage = shader_types[i];
-                prog->_LinkedShaders[stage]->Program->info.stage = stage;
-        }
+   for (unsigned i = 0; i < stages; ++i) {
+      gl_shader_stage stage = shader_types[i];
+      prog->_LinkedShaders[stage]->Program->info.stage = stage;
+   }

-        struct util_dynarray binary;
+   struct util_dynarray binary;

-        util_dynarray_init(&binary, NULL);
+   util_dynarray_init(&binary, NULL);

-        for (unsigned i = 0; i < stages; ++i) {
-                nir[i] = glsl_to_nir(&local_ctx.Const, prog, shader_types[i], &bifrost_nir_options);
+   for (unsigned i = 0; i < stages; ++i) {
+      nir[i] = glsl_to_nir(&local_ctx.Const, prog, shader_types[i],
+                           &bifrost_nir_options);

-                if (shader_types[i] == MESA_SHADER_VERTEX) {
-                        nir_assign_var_locations(nir[i], nir_var_shader_in, &nir[i]->num_inputs,
-                                        glsl_type_size);
-                        sort_varyings(nir[i], nir_var_shader_out);
-                        nir_assign_var_locations(nir[i], nir_var_shader_out, &nir[i]->num_outputs,
-                                        glsl_type_size);
-                        fixup_varying_slots(nir[i], nir_var_shader_out);
-                } else if (shader_types[i] == MESA_SHADER_FRAGMENT) {
-                      sort_varyings(nir[i], nir_var_shader_in);
-                      nir_assign_var_locations(nir[i], nir_var_shader_in, &nir[i]->num_inputs,
-                                      glsl_type_size);
-                      fixup_varying_slots(nir[i], nir_var_shader_in);
-                      nir_assign_var_locations(nir[i], nir_var_shader_out, &nir[i]->num_outputs,
-                                      glsl_type_size);
-                }
+      if (shader_types[i] == MESA_SHADER_VERTEX) {
+         nir_assign_var_locations(nir[i], nir_var_shader_in,
+                                  &nir[i]->num_inputs, glsl_type_size);
+         sort_varyings(nir[i], nir_var_shader_out);
+         nir_assign_var_locations(nir[i], nir_var_shader_out,
+                                  &nir[i]->num_outputs, glsl_type_size);
+         fixup_varying_slots(nir[i], nir_var_shader_out);
+      } else if (shader_types[i] == MESA_SHADER_FRAGMENT) {
+         sort_varyings(nir[i], nir_var_shader_in);
+         nir_assign_var_locations(nir[i], nir_var_shader_in,
+                                  &nir[i]->num_inputs, glsl_type_size);
+         fixup_varying_slots(nir[i], nir_var_shader_in);
+         nir_assign_var_locations(nir[i], nir_var_shader_out,
+                                  &nir[i]->num_outputs, glsl_type_size);
+      }

-                nir_assign_var_locations(nir[i], nir_var_uniform, &nir[i]->num_uniforms,
-                                glsl_type_size);
+      nir_assign_var_locations(nir[i], nir_var_uniform, &nir[i]->num_uniforms,
+                               glsl_type_size);

-                NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
-                NIR_PASS_V(nir[i], nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir[i]), true, i == 0);
-                NIR_PASS_V(nir[i], nir_opt_copy_prop_vars);
-                NIR_PASS_V(nir[i], nir_opt_combine_stores, nir_var_all);
+      NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
+      NIR_PASS_V(nir[i], nir_lower_io_to_temporaries,
+                 nir_shader_get_entrypoint(nir[i]), true, i == 0);
+      NIR_PASS_V(nir[i], nir_opt_copy_prop_vars);
+      NIR_PASS_V(nir[i], nir_opt_combine_stores, nir_var_all);

-                NIR_PASS_V(nir[i], nir_lower_system_values);
-                NIR_PASS_V(nir[i], gl_nir_lower_samplers, prog);
-                NIR_PASS_V(nir[i], nir_split_var_copies);
-                NIR_PASS_V(nir[i], nir_lower_var_copies);
+      NIR_PASS_V(nir[i], nir_lower_system_values);
+      NIR_PASS_V(nir[i], gl_nir_lower_samplers, prog);
+      NIR_PASS_V(nir[i], nir_split_var_copies);
+      NIR_PASS_V(nir[i], nir_lower_var_copies);

-                NIR_PASS_V(nir[i], nir_lower_io, nir_var_uniform,
-                                st_packed_uniforms_type_size,
-                                (nir_lower_io_options)0);
-                NIR_PASS_V(nir[i], nir_lower_uniforms_to_ubo, true, false);
+      NIR_PASS_V(nir[i], nir_lower_io, nir_var_uniform,
+                 st_packed_uniforms_type_size, (nir_lower_io_options)0);
+      NIR_PASS_V(nir[i], nir_lower_uniforms_to_ubo, true, false);

-                /* before buffers and vars_to_ssa */
-                NIR_PASS_V(nir[i], gl_nir_lower_images, true);
+      /* before buffers and vars_to_ssa */
+      NIR_PASS_V(nir[i], gl_nir_lower_images, true);

-                NIR_PASS_V(nir[i], gl_nir_lower_buffers, prog);
-                NIR_PASS_V(nir[i], nir_opt_constant_folding);
+      NIR_PASS_V(nir[i], gl_nir_lower_buffers, prog);
+      NIR_PASS_V(nir[i], nir_opt_constant_folding);

-                struct panfrost_compile_inputs inputs = {
-                        .gpu_id = gpu_id,
-                        .fixed_sysval_ubo = -1,
-                };
-                struct pan_shader_info info = { 0 };
+      struct panfrost_compile_inputs inputs = {
+         .gpu_id = gpu_id,
+         .fixed_sysval_ubo = -1,
+      };
+      struct pan_shader_info info = {0};

-                util_dynarray_clear(&binary);
-                bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
+      util_dynarray_clear(&binary);
+      bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);

-                char *fn = NULL;
-                asprintf(&fn, "shader_%u.bin", i);
-                assert(fn != NULL);
-                FILE *fp = fopen(fn, "wb");
-                fwrite(binary.data, 1, binary.size, fp);
-                fclose(fp);
-                free(fn);
-        }
+      char *fn = NULL;
+      asprintf(&fn, "shader_%u.bin", i);
+      assert(fn != NULL);
+      FILE *fp = fopen(fn, "wb");
+      fwrite(binary.data, 1, binary.size, fp);
+      fclose(fp);
+      free(fn);
+   }

-        util_dynarray_fini(&binary);
+   util_dynarray_fini(&binary);
 }

-#define BI_FOURCC(ch0, ch1, ch2, ch3) ( \
-  (uint32_t)(ch0)        | (uint32_t)(ch1) << 8 | \
-  (uint32_t)(ch2) << 16  | (uint32_t)(ch3) << 24)
+#define BI_FOURCC(ch0, ch1, ch2, ch3)                                          \
+   ((uint32_t)(ch0) | (uint32_t)(ch1) << 8 | (uint32_t)(ch2) << 16 |           \
+    (uint32_t)(ch3) << 24)

 static void
 disassemble(const char *filename)
 {
-        FILE *fp = fopen(filename, "rb");
-        assert(fp);
+   FILE *fp = fopen(filename, "rb");
+   assert(fp);

-        fseek(fp, 0, SEEK_END);
-        unsigned filesize = ftell(fp);
-        rewind(fp);
+   fseek(fp, 0, SEEK_END);
+   unsigned filesize = ftell(fp);
+   rewind(fp);

-        uint32_t *code = malloc(filesize);
-        unsigned res = fread(code, 1, filesize, fp);
-        if (res != filesize) {
-                printf("Couldn't read full file\n");
-        }
+   uint32_t *code = malloc(filesize);
+   unsigned res = fread(code, 1, filesize, fp);
+   if (res != filesize) {
+      printf("Couldn't read full file\n");
+   }

-        fclose(fp);
+   fclose(fp);

-        void *entrypoint = code;
+   void *entrypoint = code;

-        if (filesize && code[0] == BI_FOURCC('M', 'B', 'S', '2')) {
-                for (int i = 0; i < filesize / 4; ++i) {
-                        if (code[i] != BI_FOURCC('O', 'B', 'J', 'C'))
-                                continue;
+   if (filesize && code[0] == BI_FOURCC('M', 'B', 'S', '2')) {
+      for (int i = 0; i < filesize / 4; ++i) {
+         if (code[i] != BI_FOURCC('O', 'B', 'J', 'C'))
+            continue;

-                        unsigned size = code[i + 1];
-                        unsigned offset = i + 2;
+         unsigned size = code[i + 1];
+         unsigned offset = i + 2;

-                        entrypoint = code + offset;
-                        filesize = size;
-                }
-        }
+         entrypoint = code + offset;
+         filesize = size;
+      }
+   }

-        if ((gpu_id >> 12) >= 9)
-                disassemble_valhall(stdout, entrypoint, filesize, verbose);
-        else
-                disassemble_bifrost(stdout, entrypoint, filesize, verbose);
+   if ((gpu_id >> 12) >= 9)
+      disassemble_valhall(stdout, entrypoint, filesize, verbose);
+   else
+      disassemble_bifrost(stdout, entrypoint, filesize, verbose);

-        free(code);
+   free(code);
 }

 int
 main(int argc, char **argv)
 {
-        int c;
+   int c;

-        if (argc < 2) {
-                printf("Pass a command\n");
-                exit(1);
-        }
+   if (argc < 2) {
+      printf("Pass a command\n");
+      exit(1);
+   }

-        static struct option longopts[] = {
-                { "id", optional_argument, NULL, 'i' },
-                { "gpu", optional_argument, NULL, 'g' },
-                { "verbose", no_argument, &verbose, 'v' },
-                { NULL, 0, NULL, 0 }
-        };
+   static struct option longopts[] = {{"id", optional_argument, NULL, 'i'},
+                                      {"gpu", optional_argument, NULL, 'g'},
+                                      {"verbose", no_argument, &verbose, 'v'},
+                                      {NULL, 0, NULL, 0}};

-        static struct {
-                const char *name;
-                unsigned major, minor;
-        } gpus[] = {
-                { "G71",   6, 0 },
-                { "G72",   6, 2 },
-                { "G51",   7, 0 },
-                { "G76",   7, 1 },
-                { "G52",   7, 2 },
-                { "G31",   7, 3 },
-                { "G77",   9, 0 },
-                { "G57",   9, 1 },
-                { "G78",   9, 2 },
-                { "G57",   9, 3 },
-                { "G68",   9, 4 },
-                { "G78AE", 9, 5 },
-        };
+   static struct {
+      const char *name;
+      unsigned major, minor;
+   } gpus[] = {
+      {"G71", 6, 0}, {"G72", 6, 2}, {"G51", 7, 0}, {"G76", 7, 1},
+      {"G52", 7, 2}, {"G31", 7, 3}, {"G77", 9, 0}, {"G57", 9, 1},
+      {"G78", 9, 2}, {"G57", 9, 3}, {"G68", 9, 4}, {"G78AE", 9, 5},
+   };

-        while ((c = getopt_long(argc, argv, "v:", longopts, NULL)) != -1) {
+   while ((c = getopt_long(argc, argv, "v:", longopts, NULL)) != -1) {

-                switch (c) {
-                case 'i':
-                        gpu_id = atoi(optarg);
+      switch (c) {
+      case 'i':
+         gpu_id = atoi(optarg);

-                        if (!gpu_id) {
-                                fprintf(stderr, "Expected GPU ID, got %s\n", optarg);
-                                return 1;
-                        }
+         if (!gpu_id) {
+            fprintf(stderr, "Expected GPU ID, got %s\n", optarg);
+            return 1;
+         }

-                        break;
-                case 'g':
-                        gpu_id = 0;
+         break;
+      case 'g':
+         gpu_id = 0;

-                        /* Compatibility with the Arm compiler */
-                        if (strncmp(optarg, "Mali-", 5) == 0) optarg += 5;
+         /* Compatibility with the Arm compiler */
+         if (strncmp(optarg, "Mali-", 5) == 0)
+            optarg += 5;

-                        for (unsigned i = 0; i < ARRAY_SIZE(gpus); ++i) {
-                                if (strcmp(gpus[i].name, optarg)) continue;
+         for (unsigned i = 0; i < ARRAY_SIZE(gpus); ++i) {
+            if (strcmp(gpus[i].name, optarg))
+               continue;

-                                unsigned major = gpus[i].major;
-                                unsigned minor = gpus[i].minor;
+            unsigned major = gpus[i].major;
+            unsigned minor = gpus[i].minor;

-                                gpu_id = (major << 12) | (minor << 8);
-                                break;
-                        }
+            gpu_id = (major << 12) | (minor << 8);
+            break;
+         }

-                        if (!gpu_id) {
-                                fprintf(stderr, "Unknown GPU %s\n", optarg);
-                                return 1;
-                        }
+         if (!gpu_id) {
+            fprintf(stderr, "Unknown GPU %s\n", optarg);
+            return 1;
+         }

-                        break;
-                default:
-                        break;
-                }
-        }
+         break;
+      default:
+         break;
+      }
+   }

-        if (strcmp(argv[optind], "compile") == 0)
-                compile_shader(argc - optind - 1, &argv[optind + 1]);
-        else if (strcmp(argv[optind], "disasm") == 0)
-                disassemble(argv[optind + 1]);
-        else {
-                fprintf(stderr, "Unknown command. Valid: compile/disasm\n");
-                return 1;
-        }
+   if (strcmp(argv[optind], "compile") == 0)
+      compile_shader(argc - optind - 1, &argv[optind + 1]);
+   else if (strcmp(argv[optind], "disasm") == 0)
+      disassemble(argv[optind + 1]);
+   else {
+      fprintf(stderr, "Unknown command. Valid: compile/disasm\n");
+      return 1;
+   }

-        return 0;
+   return 0;
 }
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
--- a/src/panfrost/bifrost/disassemble.c
+++ b/src/panfrost/bifrost/disassemble.c
--- a/src/panfrost/bifrost/disassemble.h
+++ b/src/panfrost/bifrost/disassemble.h
@ -34,14 +34,20 @@

 void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose);

-void
-bi_disasm_fma(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool first);
+void bi_disasm_fma(FILE *fp, unsigned bits, struct bifrost_regs *srcs,
+                   struct bifrost_regs *next_regs, unsigned staging_register,
+                   unsigned branch_offset, struct bi_constants *consts,
+                   bool first);

-void bi_disasm_add(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool first);
+void bi_disasm_add(FILE *fp, unsigned bits, struct bifrost_regs *srcs,
+                   struct bifrost_regs *next_regs, unsigned staging_register,
+                   unsigned branch_offset, struct bi_constants *consts,
+                   bool first);

 void bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool first);
 void bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool first);

-void dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool isFMA);
+void dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs,
+              unsigned branch_offset, struct bi_constants *consts, bool isFMA);

 #endif
--- a/src/panfrost/bifrost/nodearray.h
+++ b/src/panfrost/bifrost/nodearray.h
@ -62,182 +62,187 @@ typedef uint16_t nodearray_value;
 typedef uint64_t nodearray_sparse;

 typedef struct {
-        union {
-                nodearray_sparse *sparse;
-                nodearray_value *dense;
-        };
-        unsigned size;
-        unsigned sparse_capacity;
+   union {
+      nodearray_sparse *sparse;
+      nodearray_value *dense;
+   };
+   unsigned size;
+   unsigned sparse_capacity;
 } nodearray;

 /* Align sizes to 16-bytes for SIMD purposes */
 #define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)

-#define nodearray_sparse_foreach(buf, elem) \
-   for (nodearray_sparse *elem = (buf)->sparse; \
+#define nodearray_sparse_foreach(buf, elem)                                    \
+   for (nodearray_sparse *elem = (buf)->sparse;                                \
        elem < (buf)->sparse + (buf)->size; elem++)

-#define nodearray_dense_foreach(buf, elem) \
-   for (nodearray_value *elem = (buf)->dense; \
+#define nodearray_dense_foreach(buf, elem)                                     \
+   for (nodearray_value *elem = (buf)->dense;                                  \
        elem < (buf)->dense + (buf)->size; elem++)

-#define nodearray_dense_foreach_64(buf, elem) \
-   for (uint64_t *elem = (uint64_t *)(buf)->dense; \
+#define nodearray_dense_foreach_64(buf, elem)                                  \
+   for (uint64_t *elem = (uint64_t *)(buf)->dense;                             \
        (nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)

 static inline bool
 nodearray_is_sparse(const nodearray *a)
 {
-        return a->sparse_capacity != ~0U;
+   return a->sparse_capacity != ~0U;
 }

 static inline void
 nodearray_init(nodearray *a)
 {
-        memset(a, 0, sizeof(nodearray));
+   memset(a, 0, sizeof(nodearray));
 }

 static inline void
 nodearray_reset(nodearray *a)
 {
-        free(a->sparse);
-        nodearray_init(a);
+   free(a->sparse);
+   nodearray_init(a);
 }

 static inline nodearray_sparse
 nodearray_encode(unsigned key, nodearray_value value)
 {
-        static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
-        return ((nodearray_sparse) key << 16) | value;
+   static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
+   return ((nodearray_sparse)key << 16) | value;
 }

 static inline unsigned
 nodearray_sparse_key(const nodearray_sparse *elem)
 {
-        static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
-        return *elem >> 16;
+   static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
+   return *elem >> 16;
 }

 static inline nodearray_value
 nodearray_sparse_value(const nodearray_sparse *elem)
 {
-        return *elem & NODEARRAY_MAX_VALUE;
+   return *elem & NODEARRAY_MAX_VALUE;
 }

 static inline unsigned
-nodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem)
+nodearray_sparse_search(const nodearray *a, nodearray_sparse key,
+                        nodearray_sparse **elem)
 {
-        assert(nodearray_is_sparse(a) && a->size);
+   assert(nodearray_is_sparse(a) && a->size);

-        nodearray_sparse *data = a->sparse;
+   nodearray_sparse *data = a->sparse;

-        /* Encode the key using the highest possible value, so that the
-         * matching node must be encoded lower than this
-         */
-        nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
+   /* Encode the key using the highest possible value, so that the
+    * matching node must be encoded lower than this
+    */
+   nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);

-        unsigned left = 0;
-        unsigned right = a->size - 1;
+   unsigned left = 0;
+   unsigned right = a->size - 1;

-        if (data[right] <= skey)
-                left = right;
+   if (data[right] <= skey)
+      left = right;

-        while (left != right) {
-                /* No need to worry about overflow, we couldn't have more than
-                 * 2^24 elements */
-                unsigned probe = (left + right + 1) / 2;
+   while (left != right) {
+      /* No need to worry about overflow, we couldn't have more than
+       * 2^24 elements */
+      unsigned probe = (left + right + 1) / 2;

-                if (data[probe] > skey)
-                        right = probe - 1;
-                else
-                        left = probe;
-        }
+      if (data[probe] > skey)
+         right = probe - 1;
+      else
+         left = probe;
+   }

-        *elem = data + left;
-        return left;
+   *elem = data + left;
+   return left;
 }

 static inline void
 nodearray_orr(nodearray *a, unsigned key, nodearray_value value,
              unsigned max_sparse, unsigned max)
 {
-        assert(key < (1 << 24));
-        assert(key < max);
+   assert(key < (1 << 24));
+   assert(key < max);

-        if (!value)
-                return;
+   if (!value)
+      return;

-        if (nodearray_is_sparse(a)) {
-                unsigned size = a->size;
-                unsigned left = 0;
+   if (nodearray_is_sparse(a)) {
+      unsigned size = a->size;
+      unsigned left = 0;

-                if (size) {
-                        /* First, binary search for key */
-                        nodearray_sparse *elem;
-                        left = nodearray_sparse_search(a, key, &elem);
+      if (size) {
+         /* First, binary search for key */
+         nodearray_sparse *elem;
+         left = nodearray_sparse_search(a, key, &elem);

-                        if (nodearray_sparse_key(elem) == key) {
-                                *elem |= value;
-                                return;
-                        }
+         if (nodearray_sparse_key(elem) == key) {
+            *elem |= value;
+            return;
+         }

-                        /* We insert before `left`, so increment it if it's
-                         * out of order */
-                        if (nodearray_sparse_key(elem) < key)
-                                ++left;
-                }
+         /* We insert before `left`, so increment it if it's
+          * out of order */
+         if (nodearray_sparse_key(elem) < key)
+            ++left;
+      }

-                if (size < max_sparse && (size + 1) < max / 4) {
-                        /* We didn't find it, but we know where to insert it. */
+      if (size < max_sparse && (size + 1) < max / 4) {
+         /* We didn't find it, but we know where to insert it. */

-                        nodearray_sparse *data = a->sparse;
-                        nodearray_sparse *data_move = data + left;
+         nodearray_sparse *data = a->sparse;
+         nodearray_sparse *data_move = data + left;

-                        bool realloc = (++a->size) > a->sparse_capacity;
+         bool realloc = (++a->size) > a->sparse_capacity;

-                        if (realloc) {
-                                a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
+         if (realloc) {
+            a->sparse_capacity =
+               MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);

-                                a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse));
+            a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity *
+                                                   sizeof(nodearray_sparse));

-                                if (left)
-                                        memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
-                        }
+            if (left)
+               memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
+         }

-                        nodearray_sparse *elem = a->sparse + left;
+         nodearray_sparse *elem = a->sparse + left;

-                        if (left != size)
-                                memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse));
+         if (left != size)
+            memmove(elem + 1, data_move,
+                    (size - left) * sizeof(nodearray_sparse));

-                        *elem = nodearray_encode(key, value);
+         *elem = nodearray_encode(key, value);

-                        if (realloc)
-                                free(data);
+         if (realloc)
+            free(data);

-                        return;
-                }
+         return;
+      }

-                /* There are too many elements, so convert to a dense array */
-                nodearray old = *a;
+      /* There are too many elements, so convert to a dense array */
+      nodearray old = *a;

-                a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value));
-                a->size = max;
-                a->sparse_capacity = ~0U;
+      a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max),
+                                           sizeof(nodearray_value));
+      a->size = max;
+      a->sparse_capacity = ~0U;

-                nodearray_value *data = a->dense;
+      nodearray_value *data = a->dense;

-                nodearray_sparse_foreach(&old, x) {
-                        unsigned key = nodearray_sparse_key(x);
-                        nodearray_value value = nodearray_sparse_value(x);
+      nodearray_sparse_foreach(&old, x) {
+         unsigned key = nodearray_sparse_key(x);
+         nodearray_value value = nodearray_sparse_value(x);

-                        assert(key < max);
-                        data[key] = value;
-                }
+         assert(key < max);
+         data[key] = value;
+      }

-                free(old.sparse);
-        }
+      free(old.sparse);
+   }

-        a->dense[key] |= value;
+   a->dense[key] |= value;
 }

 #ifdef __cplusplus
--- a/src/panfrost/bifrost/test/test-constant-fold.cpp
+++ b/src/panfrost/bifrost/test/test-constant-fold.cpp
@ -21,14 +21,15 @@
 * SOFTWARE.
 */

-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"

 #include <gtest/gtest.h>

 static std::string
-to_string(const bi_instr *I) {
+to_string(const bi_instr *I)
+{
   char *cstr = NULL;
   size_t size = 0;
   FILE *f = open_memstream(&cstr, &size);
@ -40,23 +41,21 @@ to_string(const bi_instr *I) {
 }

 static testing::AssertionResult
-constant_fold_pred(const char *I_expr,
-                   const char *expected_expr,
-                   bi_instr *I,
+constant_fold_pred(const char *I_expr, const char *expected_expr, bi_instr *I,
                   uint32_t expected)
 {
   bool unsupported = false;
   uint32_t v = bi_fold_constant(I, &unsupported);
   if (unsupported) {
      return testing::AssertionFailure()
-         << "Constant fold unsupported for instruction \n\n"
-         << "  " << to_string(I);
+             << "Constant fold unsupported for instruction \n\n"
+             << "  " << to_string(I);
   } else if (v != expected) {
      return testing::AssertionFailure()
-         << "Unexpected result when constant folding instruction\n\n"
-         << "  " << to_string(I) << "\n"
-         << "  Actual: " << v << "\n"
-         << "Expected: " << expected << "\n";
+             << "Unexpected result when constant folding instruction\n\n"
+             << "  " << to_string(I) << "\n"
+             << "  Actual: " << v << "\n"
+             << "Expected: " << expected << "\n";
   } else {
      return testing::AssertionSuccess();
   }
@ -64,7 +63,6 @@ constant_fold_pred(const char *I_expr,

 #define EXPECT_FOLD(i, e) EXPECT_PRED_FORMAT2(constant_fold_pred, i, e)

-
 static testing::AssertionResult
 not_constant_fold_pred(const char *I_expr, bi_instr *I)
 {
@ -74,22 +72,23 @@ not_constant_fold_pred(const char *I_expr, bi_instr *I)
      return testing::AssertionSuccess();
   } else {
      return testing::AssertionFailure()
-         << "Instruction\n\n"
-         << "  " << to_string(I) << "\n"
-         << "shouldn't have constant folded, but folded to: " << v;
+             << "Instruction\n\n"
+             << "  " << to_string(I) << "\n"
+             << "shouldn't have constant folded, but folded to: " << v;
   }
 }

 #define EXPECT_NOT_FOLD(i) EXPECT_PRED_FORMAT1(not_constant_fold_pred, i)

-
 class ConstantFold : public testing::Test {
-protected:
-   ConstantFold() {
+ protected:
+   ConstantFold()
+   {
      mem_ctx = ralloc_context(NULL);
      b = bit_builder(mem_ctx);
   }
-   ~ConstantFold() {
+   ~ConstantFold()
+   {
      ralloc_free(mem_ctx);
   }

@ -101,9 +100,7 @@ TEST_F(ConstantFold, Swizzles)
 {
   bi_index reg = bi_register(0);

-   EXPECT_FOLD(
-      bi_swz_v2i16_to(b, reg, bi_imm_u32(0xCAFEBABE)),
-      0xCAFEBABE);
+   EXPECT_FOLD(bi_swz_v2i16_to(b, reg, bi_imm_u32(0xCAFEBABE)), 0xCAFEBABE);

   EXPECT_FOLD(
      bi_swz_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
@ -123,18 +120,17 @@ TEST_F(ConstantFold, VectorConstructions2i16)
   bi_index reg = bi_register(0);

   EXPECT_FOLD(
-      bi_mkvec_v2i16_to(b, reg, bi_imm_u16(0xCAFE),
-                                bi_imm_u16(0xBABE)),
+      bi_mkvec_v2i16_to(b, reg, bi_imm_u16(0xCAFE), bi_imm_u16(0xBABE)),
      0xBABECAFE);

   EXPECT_FOLD(
      bi_mkvec_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), true, true),
-                                bi_imm_u16(0xBABE)),
+                        bi_imm_u16(0xBABE)),
      0xBABECAFE);

   EXPECT_FOLD(
      bi_mkvec_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), true, true),
-                                bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
+                        bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
      0xBABECAFE);
 }

@ -173,17 +169,18 @@ TEST_F(ConstantFold, LimitedShiftsForTexturing)
 {
   bi_index reg = bi_register(0);

-   EXPECT_FOLD(
-      bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_imm_u32(0xA0000), bi_imm_u8(4)),
-      (0xCAFE << 4) | 0xA0000);
+   EXPECT_FOLD(bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE),
+                                   bi_imm_u32(0xA0000), bi_imm_u8(4)),
+               (0xCAFE << 4) | 0xA0000);

-   EXPECT_NOT_FOLD(
-      bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_not(bi_imm_u32(0xA0000)), bi_imm_u8(4)));
+   EXPECT_NOT_FOLD(bi_lshift_or_i32_to(
+      b, reg, bi_imm_u32(0xCAFE), bi_not(bi_imm_u32(0xA0000)), bi_imm_u8(4)));

-   EXPECT_NOT_FOLD(
-      bi_lshift_or_i32_to(b, reg, bi_not(bi_imm_u32(0xCAFE)), bi_imm_u32(0xA0000), bi_imm_u8(4)));
+   EXPECT_NOT_FOLD(bi_lshift_or_i32_to(b, reg, bi_not(bi_imm_u32(0xCAFE)),
+                                       bi_imm_u32(0xA0000), bi_imm_u8(4)));

-   bi_instr *I = bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_imm_u32(0xA0000), bi_imm_u8(4));
+   bi_instr *I = bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE),
+                                     bi_imm_u32(0xA0000), bi_imm_u8(4));
   I->not_result = true;
   EXPECT_NOT_FOLD(I);
 }
@ -193,9 +190,12 @@ TEST_F(ConstantFold, NonConstantSourcesCannotBeFolded)
   bi_index reg = bi_register(0);

   EXPECT_NOT_FOLD(bi_swz_v2i16_to(b, reg, bi_temp(b->shader)));
-   EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_temp(b->shader)));
-   EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_imm_u32(0xDEADBEEF)));
-   EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_imm_u32(0xDEADBEEF), bi_temp(b->shader)));
+   EXPECT_NOT_FOLD(
+      bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_temp(b->shader)));
+   EXPECT_NOT_FOLD(
+      bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_imm_u32(0xDEADBEEF)));
+   EXPECT_NOT_FOLD(
+      bi_mkvec_v2i16_to(b, reg, bi_imm_u32(0xDEADBEEF), bi_temp(b->shader)));
 }

 TEST_F(ConstantFold, OtherOperationsShouldNotFold)
--- a/src/panfrost/bifrost/test/test-dual-texture.cpp
+++ b/src/panfrost/bifrost/test/test-dual-texture.cpp
@ -21,55 +21,57 @@
 * SOFTWARE.
 */

-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"

 #include <gtest/gtest.h>

-#define CASE(shader_stage, instr, expected) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      bi_builder *b = A; \
-      bi_index u = bi_temp(b->shader); \
-      bi_index v = bi_temp(b->shader); \
-      A->shader->stage = MESA_SHADER_ ## shader_stage; \
-      instr; \
-   } \
-   { \
-      bi_builder *b = B; \
-      bi_index u = bi_temp(b->shader); \
-      bi_index v = bi_temp(b->shader); \
-      B->shader->stage = MESA_SHADER_ ## shader_stage; \
-      expected; \
-   } \
-   bi_opt_fuse_dual_texture(A->shader); \
-   if (!bit_shader_equal(A->shader, B->shader)) { \
-      ADD_FAILURE(); \
-      fprintf(stderr, "Optimization produce unexpected result"); \
-      fprintf(stderr, "  Actual:\n"); \
-      bi_print_shader(A->shader, stderr); \
-      fprintf(stderr, "Expected:\n"); \
-      bi_print_shader(B->shader, stderr); \
-      fprintf(stderr, "\n"); \
-   } \
-} while(0)
+#define CASE(shader_stage, instr, expected)                                    \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         bi_builder *b = A;                                                    \
+         bi_index u = bi_temp(b->shader);                                      \
+         bi_index v = bi_temp(b->shader);                                      \
+         A->shader->stage = MESA_SHADER_##shader_stage;                        \
+         instr;                                                                \
+      }                                                                        \
+      {                                                                        \
+         bi_builder *b = B;                                                    \
+         bi_index u = bi_temp(b->shader);                                      \
+         bi_index v = bi_temp(b->shader);                                      \
+         B->shader->stage = MESA_SHADER_##shader_stage;                        \
+         expected;                                                             \
+      }                                                                        \
+      bi_opt_fuse_dual_texture(A->shader);                                     \
+      if (!bit_shader_equal(A->shader, B->shader)) {                           \
+         ADD_FAILURE();                                                        \
+         fprintf(stderr, "Optimization produce unexpected result");            \
+         fprintf(stderr, "  Actual:\n");                                       \
+         bi_print_shader(A->shader, stderr);                                   \
+         fprintf(stderr, "Expected:\n");                                       \
+         bi_print_shader(B->shader, stderr);                                   \
+         fprintf(stderr, "\n");                                                \
+      }                                                                        \
+   } while (0)

 #define NEGCASE(stage, instr) CASE(stage, instr, instr)

 class DualTexture : public testing::Test {
-protected:
-   DualTexture() {
+ protected:
+   DualTexture()
+   {
      mem_ctx = ralloc_context(NULL);

-      reg     = bi_register(0);
-      x       = bi_register(4);
-      y       = bi_register(8);
-
+      reg = bi_register(0);
+      x = bi_register(4);
+      y = bi_register(8);
   }

-   ~DualTexture() {
+   ~DualTexture()
+   {
      ralloc_free(mem_ctx);
   }

@ -78,134 +80,165 @@ protected:
   bi_index reg, x, y;
 };

-
 TEST_F(DualTexture, FuseDualTexFragment)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), false, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144),
+                         false, 4, 4);
+      });
 }

 TEST_F(DualTexture, FuseDualTexKernel)
 {
-   CASE(KERNEL, {
+   CASE(
+      KERNEL,
+      {
         bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
         bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true,
+                         4, 4);
+      });
 }

 TEST_F(DualTexture, FuseDualTexVertex)
 {
-   CASE(VERTEX, {
+   CASE(
+      VERTEX,
+      {
         bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
         bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true,
+                         4, 4);
+      });
 }

 TEST_F(DualTexture, DontFuseDualTexWrongStage)
 {
   NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
   });

   NEGCASE(KERNEL, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
   });

   NEGCASE(VERTEX, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
   });
 }

 TEST_F(DualTexture, FuseDualTexMaximumIndex)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
         bi_texs_2d_f32_to(b, x, u, v, false, 2, 2);
         bi_texs_2d_f32_to(b, y, u, v, false, 3, 3);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003E6), false, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003E6),
+                         false, 4, 4);
+      });
 }

 TEST_F(DualTexture, FuseDualTexMixedIndex)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
         bi_texs_2d_f32_to(b, x, u, v, false, 3, 2);
         bi_texs_2d_f32_to(b, y, u, v, false, 2, 3);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003A7), false, 4, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003A7),
+                         false, 4, 4);
+      });
 }

 TEST_F(DualTexture, DontFuseDualTexOutOfBounds)
 {
   NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 4, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 4, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
   });

   NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 4);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 4);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
   });

   NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 4, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 4, 1);
   });

   NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 4);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 4);
   });
 }

 TEST_F(DualTexture, FuseDualTexFP16)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
         bi_texs_2d_f16_to(b, x, u, v, false, 0, 0);
         bi_texs_2d_f16_to(b, y, u, v, false, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1E00144), false, 2, 2);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1E00144),
+                         false, 2, 2);
+      });
 }

 TEST_F(DualTexture, FuseDualTexMixedSize)
 {
-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
         bi_texs_2d_f16_to(b, y, u, v, false, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0XF9E00144), false, 4, 2);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0XF9E00144),
+                         false, 4, 2);
+      });

-   CASE(FRAGMENT, {
+   CASE(
+      FRAGMENT,
+      {
         bi_texs_2d_f16_to(b, x, u, v, false, 0, 0);
         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
-   }, {
-         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1F00144), false, 2, 4);
-   });
+      },
+      {
+         bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1F00144),
+                         false, 2, 4);
+      });
 }

 TEST_F(DualTexture, DontFuseMixedCoordinates)
 {
   NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, bi_neg(u), v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, bi_neg(u), v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
   });

   NEGCASE(FRAGMENT, {
-         bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
-         bi_texs_2d_f32_to(b, y, v, u, false, 1, 1);
+      bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
+      bi_texs_2d_f32_to(b, y, v, u, false, 1, 1);
   });
 }
--- a/src/panfrost/bifrost/test/test-lower-swizzle.cpp
+++ b/src/panfrost/bifrost/test/test-lower-swizzle.cpp
@ -21,31 +21,34 @@
 * SOFTWARE.
 */

-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"

 #include <gtest/gtest.h>

-#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, bi_lower_swizzle)
+#define CASE(instr, expected)                                                  \
+   INSTRUCTION_CASE(instr, expected, bi_lower_swizzle)
 #define NEGCASE(instr) CASE(instr, instr)

 class LowerSwizzle : public testing::Test {
-protected:
-   LowerSwizzle() {
+ protected:
+   LowerSwizzle()
+   {
      mem_ctx = ralloc_context(NULL);

-      reg     = bi_register(0);
-      x       = bi_register(1);
-      y       = bi_register(2);
-      z       = bi_register(3);
-      w       = bi_register(4);
+      reg = bi_register(0);
+      x = bi_register(1);
+      y = bi_register(2);
+      z = bi_register(3);
+      w = bi_register(4);

-      x3210   = x;
+      x3210 = x;
      x3210.swizzle = BI_SWIZZLE_B3210;
   }

-   ~LowerSwizzle() {
+   ~LowerSwizzle()
+   {
      ralloc_free(mem_ctx);
   }

@ -58,7 +61,8 @@ protected:
 TEST_F(LowerSwizzle, Csel16)
 {
   CASE(bi_csel_v2f16_to(b, reg, bi_half(x, 0), y, z, w, BI_CMPF_NE),
-        bi_csel_v2f16_to(b, reg, bi_swz_v2i16(b, bi_half(x, 0)), y, z, w, BI_CMPF_NE));
+        bi_csel_v2f16_to(b, reg, bi_swz_v2i16(b, bi_half(x, 0)), y, z, w,
+                         BI_CMPF_NE));
 }

 TEST_F(LowerSwizzle, Fma16)
@ -79,23 +83,22 @@ TEST_F(LowerSwizzle, ClzHadd8)
 TEST_F(LowerSwizzle, FirstShift8)
 {
   enum bi_opcode ops[] = {
-      BI_OPCODE_LSHIFT_AND_V4I8,
-      BI_OPCODE_LSHIFT_OR_V4I8,
-      BI_OPCODE_LSHIFT_XOR_V4I8,
-      BI_OPCODE_RSHIFT_AND_V4I8,
-      BI_OPCODE_RSHIFT_OR_V4I8,
-      BI_OPCODE_RSHIFT_XOR_V4I8,
+      BI_OPCODE_LSHIFT_AND_V4I8, BI_OPCODE_LSHIFT_OR_V4I8,
+      BI_OPCODE_LSHIFT_XOR_V4I8, BI_OPCODE_RSHIFT_AND_V4I8,
+      BI_OPCODE_RSHIFT_OR_V4I8,  BI_OPCODE_RSHIFT_XOR_V4I8,
   };

   for (unsigned i = 0; i < ARRAY_SIZE(ops); ++i) {
-      CASE({
+      CASE(
+         {
            bi_instr *I = bi_lshift_and_v4i8_to(b, reg, x3210, y, z);
            I->op = ops[i];
-      },
-      {
-            bi_instr *I = bi_lshift_and_v4i8_to(b, reg, bi_swz_v4i8(b, x3210), y, z);
+         },
+         {
+            bi_instr *I =
+               bi_lshift_and_v4i8_to(b, reg, bi_swz_v4i8(b, x3210), y, z);
            I->op = ops[i];
-      });
+         });
   }
 }

--- a/src/panfrost/bifrost/test/test-message-preload.cpp
+++ b/src/panfrost/bifrost/test/test-message-preload.cpp
@ -21,56 +21,58 @@
 * SOFTWARE.
 */

-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"

 #include <gtest/gtest.h>

-#define CASE(instr, expected) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   A->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
-   B->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
-   { \
-      bi_builder *b = A; \
-      bi_index u = bi_temp(b->shader); \
-      UNUSED bi_index v = bi_temp(b->shader); \
-      UNUSED bi_index w = bi_temp(b->shader); \
-      instr; \
-   } \
-   { \
-      bi_builder *b = B; \
-      bi_index u = bi_temp(b->shader); \
-      UNUSED bi_index v = bi_temp(b->shader); \
-      UNUSED bi_index w = bi_temp(b->shader); \
-      expected; \
-   } \
-   bi_opt_message_preload(A->shader); \
-   if (!bit_shader_equal(A->shader, B->shader)) { \
-      ADD_FAILURE(); \
-      fprintf(stderr, "Optimization produce unexpected result"); \
-      fprintf(stderr, "  Actual:\n"); \
-      bi_print_shader(A->shader, stderr); \
-      fprintf(stderr, "Expected:\n"); \
-      bi_print_shader(B->shader, stderr); \
-      fprintf(stderr, "\n"); \
-   } \
-} while(0)
+#define CASE(instr, expected)                                                  \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      A->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info);  \
+      B->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info);  \
+      {                                                                        \
+         bi_builder *b = A;                                                    \
+         bi_index u = bi_temp(b->shader);                                      \
+         UNUSED bi_index v = bi_temp(b->shader);                               \
+         UNUSED bi_index w = bi_temp(b->shader);                               \
+         instr;                                                                \
+      }                                                                        \
+      {                                                                        \
+         bi_builder *b = B;                                                    \
+         bi_index u = bi_temp(b->shader);                                      \
+         UNUSED bi_index v = bi_temp(b->shader);                               \
+         UNUSED bi_index w = bi_temp(b->shader);                               \
+         expected;                                                             \
+      }                                                                        \
+      bi_opt_message_preload(A->shader);                                       \
+      if (!bit_shader_equal(A->shader, B->shader)) {                           \
+         ADD_FAILURE();                                                        \
+         fprintf(stderr, "Optimization produce unexpected result");            \
+         fprintf(stderr, "  Actual:\n");                                       \
+         bi_print_shader(A->shader, stderr);                                   \
+         fprintf(stderr, "Expected:\n");                                       \
+         bi_print_shader(B->shader, stderr);                                   \
+         fprintf(stderr, "\n");                                                \
+      }                                                                        \
+   } while (0)

 #define NEGCASE(instr) CASE(instr, instr)

 class MessagePreload : public testing::Test {
-protected:
-   MessagePreload() {
+ protected:
+   MessagePreload()
+   {
      mem_ctx = ralloc_context(NULL);

-      x       = bi_register(16);
-      y       = bi_register(32);
-
+      x = bi_register(16);
+      y = bi_register(32);
   }

-   ~MessagePreload() {
+   ~MessagePreload()
+   {
      ralloc_free(mem_ctx);
   }

@ -84,100 +86,117 @@ protected:

      b->cursor = bi_before_block(bi_start_block(&b->shader->blocks));
      bi_foreach_src(I, i)
-         I->src[i] = bi_mov_i32(b, bi_register(idx*4 + i));
+         I->src[i] = bi_mov_i32(b, bi_register(idx * 4 + i));

      b->cursor = bi_after_instr(I);
   }
 };

-
 TEST_F(MessagePreload, PreloadLdVarSample)
 {
-   CASE({
+   CASE(
+      {
         bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
                          BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
-   }, {
-         preload_moves(b, u, 4, 0);
-   });
+      },
+      { preload_moves(b, u, 4, 0); });
 }

 TEST_F(MessagePreload, PreloadLdVarLdVar)
 {
-   CASE({
+   CASE(
+      {
         bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
                          BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 2);
         bi_ld_var_imm_to(b, v, bi_register(61), BI_REGISTER_FORMAT_F32,
                          BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
-   }, {
+      },
+      {
         preload_moves(b, u, 4, 0);
         preload_moves(b, v, 4, 1);
-   });
+      });
 }

 TEST_F(MessagePreload, MaxTwoMessages)
 {
-   CASE({
+   CASE(
+      {
         bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
                          BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 2);
         bi_ld_var_imm_to(b, v, bi_register(61), BI_REGISTER_FORMAT_F32,
                          BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
         bi_ld_var_imm_to(b, w, bi_register(61), BI_REGISTER_FORMAT_F32,
                          BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
-   },
-   {
+      },
+      {
         preload_moves(b, u, 4, 0);
         preload_moves(b, v, 4, 1);
         bi_ld_var_imm_to(b, w, bi_register(61), BI_REGISTER_FORMAT_F32,
                          BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
-   });
+      });

-   CASE({
-         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-         bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1, 2);
-         bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3, 3);
-   }, {
+   CASE(
+      {
+         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+         bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1,
+                           2);
+         bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3,
+                           3);
+      },
+      {
         preload_moves(b, u, 4, 0);
         preload_moves(b, v, 2, 1);
-         bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3, 3);
-   });
+         bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3,
+                           3);
+      });
 }

 TEST_F(MessagePreload, PreloadVartexF16)
 {
-   CASE({
-         bi_var_tex_f16_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-   }, {
-         preload_moves(b, u, 2, 0);
-   });
+   CASE(
+      {
+         bi_var_tex_f16_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+      },
+      { preload_moves(b, u, 2, 0); });
 }

 TEST_F(MessagePreload, PreloadVartexF32)
 {
-   CASE({
-         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-   }, {
-         preload_moves(b, u, 4, 0);
-   });
+   CASE(
+      {
+         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+      },
+      { preload_moves(b, u, 4, 0); });
 }

 TEST_F(MessagePreload, PreloadVartexF32VartexF16)
 {
-   CASE({
-         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-         bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1, 2);
-   }, {
+   CASE(
+      {
+         bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+         bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1,
+                           2);
+      },
+      {
         preload_moves(b, u, 4, 0);
         preload_moves(b, v, 2, 1);
-   });
+      });
 }

 TEST_F(MessagePreload, PreloadVartexLodModes)
 {
-   CASE({
+   CASE(
+      {
         bi_var_tex_f32_to(b, u, true, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-         bi_var_tex_f32_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-   }, {
+         bi_var_tex_f32_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+      },
+      {
         preload_moves(b, u, 4, 0);
         preload_moves(b, v, 4, 1);
-   });
+      });
 }
--- a/src/panfrost/bifrost/test/test-optimizer.cpp
+++ b/src/panfrost/bifrost/test/test-optimizer.cpp
@ -21,9 +21,9 @@
 * SOFTWARE.
 */

-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"

 #include <gtest/gtest.h>

@ -38,24 +38,35 @@ bi_optimizer(bi_context *ctx)
 /* Define reg first so it has a consistent variable index, and pass it to an
 * instruction that cannot be dead code eliminated so the program is nontrivial.
 */
-#define CASE(instr, expected) INSTRUCTION_CASE(\
-      { UNUSED bi_index reg = bi_temp(b->shader); instr; bi_kaboom(b, reg); }, \
-      { UNUSED bi_index reg = bi_temp(b->shader); expected; bi_kaboom(b, reg); }, \
+#define CASE(instr, expected)                                                  \
+   INSTRUCTION_CASE(                                                           \
+      {                                                                        \
+         UNUSED bi_index reg = bi_temp(b->shader);                             \
+         instr;                                                                \
+         bi_kaboom(b, reg);                                                    \
+      },                                                                       \
+      {                                                                        \
+         UNUSED bi_index reg = bi_temp(b->shader);                             \
+         expected;                                                             \
+         bi_kaboom(b, reg);                                                    \
+      },                                                                       \
      bi_optimizer);

 #define NEGCASE(instr) CASE(instr, instr)

 class Optimizer : public testing::Test {
-protected:
-   Optimizer() {
+ protected:
+   Optimizer()
+   {
      mem_ctx = ralloc_context(NULL);

-      x       = bi_register(1);
-      y       = bi_register(2);
+      x = bi_register(1);
+      y = bi_register(2);
      negabsx = bi_neg(bi_abs(x));
   }

-   ~Optimizer() {
+   ~Optimizer()
+   {
      ralloc_free(mem_ctx);
   }

@ -95,91 +106,124 @@ TEST_F(Optimizer, FusedFABSNEGForFP16)

 TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)
 {
-   CASE({
-         bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
         I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
+      },
+      {
         bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
         I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+      });

-   CASE({
-         bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
         I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
+      },
+      {
         bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
         I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+      });

-   CASE({
-         bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
         I->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
+      },
+      {
         bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
         I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+      });
 }

 TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)
 {
-   CASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
         I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
+      },
+      {
         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
         I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+      });

-   CASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
         I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
+      },
+      {
         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
         I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+      });

-   CASE({
-         bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
+   CASE(
+      {
+         bi_instr *I =
+            bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
         I->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
+      },
+      {
         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
         I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+      });
 }

 TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp)
 {
   NEGCASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
-         I->clamp = BI_CLAMP_CLAMP_0_1;
+      bi_instr *I =
+         bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
+      I->clamp = BI_CLAMP_CLAMP_0_1;
   });

   NEGCASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
-         I->clamp = BI_CLAMP_CLAMP_0_1;
+      bi_instr *I =
+         bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
+      I->clamp = BI_CLAMP_CLAMP_0_1;
   });

   NEGCASE({
-      bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
+      bi_instr *I =
+         bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
      I->clamp = BI_CLAMP_CLAMP_0_INF;
   });
 }

 TEST_F(Optimizer, SwizzlesComposedForFP16)
 {
-   CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
        bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));

-   CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
        bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));

-   CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg,
+           bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true,
+                     false),
+           y),
        bi_fadd_v2f16_to(b, reg, negabsx, y));

-   CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg,
+           bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false),
+           y),
        bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y));

-   CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y),
+   CASE(bi_fadd_v2f16_to(
+           b, reg,
+           bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false),
+           y),
        bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y));
 }

@ -192,7 +236,8 @@ TEST_F(Optimizer, PreserveWidens)
   CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y),
        bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y));

-   CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false))),
+   CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)),
+                       bi_fabsneg_f32(b, bi_half(x, false))),
        bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false)));
 }

@ -219,85 +264,100 @@ TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns)

 TEST_F(Optimizer, ClampsPropagated)
 {
-   CASE({
-      bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+      });

-   CASE({
-      bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   }, {
-      bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      },
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });
 }

-
 TEST_F(Optimizer, ClampsComposed)
 {
-   CASE({
-      bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_M1_1;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_M1_1;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });

-   CASE({
-      bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });

-   CASE({
-      bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+      });

-   CASE({
-      bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_M1_1;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_M1_1;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });

-   CASE({
-      bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_1;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_1;
+      });

-   CASE({
-      bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
-      bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-      J->clamp = BI_CLAMP_CLAMP_0_INF;
-   }, {
-      bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
-      I->clamp = BI_CLAMP_CLAMP_0_INF;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
+         bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+         J->clamp = BI_CLAMP_CLAMP_0_INF;
+      },
+      {
+         bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
+         I->clamp = BI_CLAMP_CLAMP_0_INF;
+      });
 }

 TEST_F(Optimizer, DoNotMixSizesWhenClamping)
@ -341,21 +401,29 @@ TEST_F(Optimizer, FuseComparisonsWithDISCARD)
        bi_discard_f32(b, x, y, BI_CMPF_EQ));

   for (unsigned h = 0; h < 2; ++h) {
-      CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1), h)),
+      CASE(bi_discard_b32(
+              b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1),
+                         h)),
           bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_LE));

-      CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1), h)),
+      CASE(bi_discard_b32(
+              b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1),
+                         h)),
           bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_NE));

-      CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1), h)),
+      CASE(bi_discard_b32(
+              b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1),
+                         h)),
           bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_EQ));
   }
 }

 TEST_F(Optimizer, DoNotFuseSpecialComparisons)
 {
-   NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
-   NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
+   NEGCASE(
+      bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
+   NEGCASE(bi_discard_b32(
+      b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
 }

 TEST_F(Optimizer, FuseResultType)
@ -365,25 +433,33 @@ TEST_F(Optimizer, FuseResultType)
                      BI_MUX_INT_ZERO),
        bi_fcmp_f32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1));

-   CASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
-                      bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
-        bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1));
+   CASE(bi_mux_i32_to(
+           b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
+           bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
+           BI_MUX_INT_ZERO),
+        bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                       BI_RESULT_TYPE_F1));

-   CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
-                      bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
-        bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1));
+   CASE(bi_mux_i32_to(
+           b, reg, bi_imm_u32(0), bi_imm_u32(1),
+           bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
+           BI_MUX_INT_ZERO),
+        bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                       BI_RESULT_TYPE_I1));

   CASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
-                      bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
-        bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1));
+                        bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                                      BI_RESULT_TYPE_M1),
+                        BI_MUX_INT_ZERO),
+        bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                         BI_RESULT_TYPE_F1));

   CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
-                      bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
-        bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1));
+                        bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                                      BI_RESULT_TYPE_M1),
+                        BI_MUX_INT_ZERO),
+        bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
+                         BI_RESULT_TYPE_I1));

   CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
                      bi_icmp_u32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
@ -391,13 +467,13 @@ TEST_F(Optimizer, FuseResultType)
        bi_icmp_u32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));

   CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
-                      bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
+                        bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
+                        BI_MUX_INT_ZERO),
        bi_icmp_v2u16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));

   CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
-                      bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
+                       bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
+                       BI_MUX_INT_ZERO),
        bi_icmp_v4u8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));

   CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
@ -406,31 +482,36 @@ TEST_F(Optimizer, FuseResultType)
        bi_icmp_s32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));

   CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
-                      bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
+                        bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
+                        BI_MUX_INT_ZERO),
        bi_icmp_v2s16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));

   CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
-                      bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO),
+                       bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
+                       BI_MUX_INT_ZERO),
        bi_icmp_v4s8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
 }

 TEST_F(Optimizer, DoNotFuseMixedSizeResultType)
 {
-   NEGCASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
-                      bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO));
+   NEGCASE(bi_mux_i32_to(
+      b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
+      bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
+      BI_MUX_INT_ZERO));

-   NEGCASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
-                      bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
-                      BI_MUX_INT_ZERO));
+   NEGCASE(bi_mux_v2i16_to(
+      b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
+      bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
+      BI_MUX_INT_ZERO));
 }

 TEST_F(Optimizer, VarTexCoord32)
 {
-   CASE({
-         bi_index ld = bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);
+   CASE(
+      {
+         bi_index ld =
+            bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32,
+                          BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);

         bi_index x = bi_temp(b->shader);
         bi_index y = bi_temp(b->shader);
@ -439,9 +520,11 @@ TEST_F(Optimizer, VarTexCoord32)
         split->dest[1] = y;

         bi_texs_2d_f32_to(b, reg, x, y, false, 0, 0);
-   }, {
-         bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
-   });
+      },
+      {
+         bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
+                           0);
+      });
 }

 TEST_F(Optimizer, Int8ToFloat32)
@ -458,7 +541,6 @@ TEST_F(Optimizer, Int8ToFloat32)
   }
 }

-
 TEST_F(Optimizer, Int16ToFloat32)
 {
   for (unsigned i = 0; i < 2; ++i) {
--- a/src/panfrost/bifrost/test/test-pack-formats.cpp
+++ b/src/panfrost/bifrost/test/test-pack-formats.cpp
@ -21,23 +21,27 @@
 * SOFTWARE.
 */

-#include "compiler.h"
 #include "bi_test.h"
+#include "compiler.h"

 #include <gtest/gtest.h>
 #include "mesa-gtest-extras.h"

-class PackFormats : public testing::Test
-{
-protected:
-   PackFormats() {
+class PackFormats : public testing::Test {
+ protected:
+   PackFormats()
+   {
      util_dynarray_init(&result, NULL);
   }
-   ~PackFormats() {
+   ~PackFormats()
+   {
      util_dynarray_fini(&result);
   }

-   const uint64_t *result_as_u64_array() { return reinterpret_cast<uint64_t *>(result.data); }
+   const uint64_t *result_as_u64_array()
+   {
+      return reinterpret_cast<uint64_t *>(result.data);
+   }

   struct util_dynarray result;
 };
@ -46,7 +50,7 @@ TEST_F(PackFormats, 1)
 {
   /* Test case from the blob */
   struct bi_packed_tuple tuples[] = {
-      { 0x2380cb1c02200000, 0x10e0 },
+      {0x2380cb1c02200000, 0x10e0},
   };

   uint64_t header = 0x021000011800;
@ -65,8 +69,8 @@ TEST_F(PackFormats, 1)
 TEST_F(PackFormats, 2)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0x9380cb6044000044, 0xf65 },
-      { 0xaf8721a05c000081, 0x1831 },
+      {0x9380cb6044000044, 0xf65},
+      {0xaf8721a05c000081, 0x1831},
   };

   bi_pack_format(&result, 0, tuples, 2, 0x52800011800, 0, 0, false);
@ -86,9 +90,9 @@ TEST_F(PackFormats, 2)
 TEST_F(PackFormats, 3)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0x93805b8040000000, 0xf65 },
-      { 0x93886db05c000000, 0xf65 },
-      { 0xb380cb180c000080, 0x18b1 },
+      {0x93805b8040000000, 0xf65},
+      {0x93886db05c000000, 0xf65},
+      {0xb380cb180c000080, 0x18b1},
   };

   bi_pack_format(&result, 0, tuples, 3, 0x3100000000, 0, 0, true);
@ -96,12 +100,8 @@ TEST_F(PackFormats, 3)
   bi_pack_format(&result, 4, tuples, 3, 0x3100000000, 0, 0, true);

   const uint64_t expected[] = {
-      0x805b804000000029,
-      0x0188000000076593,
-      0x886db05c00000021,
-      0x58c0600004076593,
-      0x0000000000000044,
-      0x60002c6ce0300000,
+      0x805b804000000029, 0x0188000000076593, 0x886db05c00000021,
+      0x58c0600004076593, 0x0000000000000044, 0x60002c6ce0300000,
   };

   ASSERT_EQ(result.size, 48);
@ -111,10 +111,10 @@ TEST_F(PackFormats, 3)
 TEST_F(PackFormats, 4)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0xad8c87004000005f, 0x2f18 },
-      { 0xad8c87385c00004f, 0x2f18 },
-      { 0xad8c87385c00006e, 0x2f18 },
-      { 0xb380cb182c000080, 0x18b1 },
+      {0xad8c87004000005f, 0x2f18},
+      {0xad8c87385c00004f, 0x2f18},
+      {0xad8c87385c00006e, 0x2f18},
+      {0xb380cb182c000080, 0x18b1},
   };

   uint64_t EC0 = (0x10000001ff000000) >> 4;
@ -124,12 +124,8 @@ TEST_F(PackFormats, 4)
   bi_pack_format(&result, 6, tuples, 4, 0x3100000000, EC0, 0, false);

   const uint64_t expected[] = {
-      0x8c87004000005f2d,
-      0x01880000000718ad,
-      0x8c87385c00004f25,
-      0x39c2e000037718ad,
-      0x80cb182c00008005,
-      0xac01c62b6320b1b3,
+      0x8c87004000005f2d, 0x01880000000718ad, 0x8c87385c00004f25,
+      0x39c2e000037718ad, 0x80cb182c00008005, 0xac01c62b6320b1b3,
   };

   ASSERT_EQ(result.size, 48);
@ -139,11 +135,9 @@ TEST_F(PackFormats, 4)
 TEST_F(PackFormats, 5)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0x9380688040000000, 0xf65 },
-      { 0xd4057300c000040, 0xf26 },
-      { 0x1f80cb1858000000, 0x19ab },
-      { 0x937401f85c000000, 0xf65 },
-      { 0xb380cb180c000080, 0x18a1 },
+      {0x9380688040000000, 0xf65},  {0xd4057300c000040, 0xf26},
+      {0x1f80cb1858000000, 0x19ab}, {0x937401f85c000000, 0xf65},
+      {0xb380cb180c000080, 0x18a1},
   };

   uint64_t EC0 = (0x183f800000) >> 4;
@ -154,14 +148,9 @@ TEST_F(PackFormats, 5)
   bi_pack_format(&result, 8, tuples, 5, 0x3100000000, EC0, 0, true);

   const uint64_t expected[] = {
-      0x8068804000000029,
-      0x0188000000076593,
-      0x4057300c00004021,
-      0x58c2c0000007260d,
-      0x7401f85c0000008b,
-      0x00006ac7e0376593,
-      0x80cb180c00008053,
-      0x000000183f80a1b3,
+      0x8068804000000029, 0x0188000000076593, 0x4057300c00004021,
+      0x58c2c0000007260d, 0x7401f85c0000008b, 0x00006ac7e0376593,
+      0x80cb180c00008053, 0x000000183f80a1b3,
   };

   ASSERT_EQ(result.size, 64);
@ -171,12 +160,9 @@ TEST_F(PackFormats, 5)
 TEST_F(PackFormats, 6)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0xad8c870068000048, 0x2f18 },
-      { 0xad8c87385c000050, 0x2f18 },
-      { 0xad8c87385c00006a, 0x2f18 },
-      { 0xad8c87385c000074, 0x2f18 },
-      { 0xad8c87385c000020, 0x2f18 },
-      { 0xad8c87385c000030, 0x2f18 },
+      {0xad8c870068000048, 0x2f18}, {0xad8c87385c000050, 0x2f18},
+      {0xad8c87385c00006a, 0x2f18}, {0xad8c87385c000074, 0x2f18},
+      {0xad8c87385c000020, 0x2f18}, {0xad8c87385c000030, 0x2f18},
   };

   uint64_t EC0 = (0x345678912345670) >> 4;
@ -188,15 +174,9 @@ TEST_F(PackFormats, 6)
   bi_pack_format(&result, 10, tuples, 6, 0x60000011800, EC0, 0, false);

   const uint64_t expected[] = {
-      0x8c8700680000482d,
-      0x30000008c00718ad,
-      0x8c87385c00005025,
-      0x39c2e000035718ad,
-      0x8c87385c00007401,
-      0xb401c62b632718ad,
-      0x8c87385c00002065,
-      0x39c2e000018718ad,
-      0x3456789123456706,
+      0x8c8700680000482d, 0x30000008c00718ad, 0x8c87385c00005025,
+      0x39c2e000035718ad, 0x8c87385c00007401, 0xb401c62b632718ad,
+      0x8c87385c00002065, 0x39c2e000018718ad, 0x3456789123456706,
      0xa001c62b63200000,
   };

@ -207,13 +187,10 @@ TEST_F(PackFormats, 6)
 TEST_F(PackFormats, 7)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0x9020074040000083, 0xf65 },
-      { 0x90000d4058100080, 0xf65 },
-      { 0x90000a3058700082, 0xf65 },
-      { 0x9020074008114581, 0xf65 },
-      { 0x90000d0058000080, 0xf65 },
-      { 0x9000083058700082, 0xf65 },
-      { 0x2380cb199ac38400, 0x327a },
+      {0x9020074040000083, 0xf65},  {0x90000d4058100080, 0xf65},
+      {0x90000a3058700082, 0xf65},  {0x9020074008114581, 0xf65},
+      {0x90000d0058000080, 0xf65},  {0x9000083058700082, 0xf65},
+      {0x2380cb199ac38400, 0x327a},
   };

   bi_pack_format(&result, 0, tuples, 7, 0x3000100000, 0, 0, true);
@ -223,15 +200,9 @@ TEST_F(PackFormats, 7)
   bi_pack_format(&result, 11, tuples, 7, 0x3000100000, 0, 0, true);

   const uint64_t expected[] = {
-      0x2007404000008329,
-      0x0180008000076590,
-      0x000d405810008021,
-      0x5182c38004176590,
-      0x2007400811458101,
-      0x2401d96400076590,
-      0x000d005800008061,
-      0x4182c38004176590,
-      0x80cb199ac3840047,
+      0x2007404000008329, 0x0180008000076590, 0x000d405810008021,
+      0x5182c38004176590, 0x2007400811458101, 0x2401d96400076590,
+      0x000d005800008061, 0x4182c38004176590, 0x80cb199ac3840047,
      0x3801d96400027a23,
   };

@ -242,14 +213,10 @@ TEST_F(PackFormats, 7)
 TEST_F(PackFormats, 8)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0x442087037a2f8643, 0x3021 },
-      { 0x84008d0586100043, 0x200 },
-      { 0x7c008d0028014543, 0x0 },
-      { 0x1c00070058200081, 0x1980 },
-      { 0x1600dd878320400, 0x200 },
-      { 0x49709c1b08308900, 0x200 },
-      { 0x6c2007807881ca00, 0x40 },
-      { 0x8d70fc0d94900083, 0x800 },
+      {0x442087037a2f8643, 0x3021}, {0x84008d0586100043, 0x200},
+      {0x7c008d0028014543, 0x0},    {0x1c00070058200081, 0x1980},
+      {0x1600dd878320400, 0x200},   {0x49709c1b08308900, 0x200},
+      {0x6c2007807881ca00, 0x40},   {0x8d70fc0d94900083, 0x800},
   };

   uint64_t EC0 = (0x32e635d0) >> 4;
@ -262,18 +229,10 @@ TEST_F(PackFormats, 8)
   bi_pack_format(&result, 13, tuples, 8, 0x61001311800, EC0, 0, true);

   const uint64_t expected[] = {
-      0x2087037a2f86432e,
-      0x30800988c0002144,
-      0x008d058610004320,
-      0x6801400a2a1a0084,
-      0x0007005820008101,
-      0x0c00001f0021801c,
-      0x600dd87832040060,
-      0xe0d8418448020001,
-      0x2007807881ca00c0,
-      0xc6ba80125c20406c,
-      0x70fc0d9490008359,
-      0x0000000032e0008d,
+      0x2087037a2f86432e, 0x30800988c0002144, 0x008d058610004320,
+      0x6801400a2a1a0084, 0x0007005820008101, 0x0c00001f0021801c,
+      0x600dd87832040060, 0xe0d8418448020001, 0x2007807881ca00c0,
+      0xc6ba80125c20406c, 0x70fc0d9490008359, 0x0000000032e0008d,
   };

   ASSERT_EQ(result.size, 96);
--- a/src/panfrost/bifrost/test/test-packing.cpp
+++ b/src/panfrost/bifrost/test/test-packing.cpp
@ -39,14 +39,9 @@ TEST(Packing, PackLiteral)
 TEST(Packing, PackUpper)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0, 0x3 << (75 - 64) },
-      { 0, 0x1 << (75 - 64) },
-      { 0, 0x7 << (75 - 64) },
-      { 0, 0x0 << (75 - 64) },
-      { 0, 0x2 << (75 - 64) },
-      { 0, 0x6 << (75 - 64) },
-      { 0, 0x5 << (75 - 64) },
-      { 0, 0x4 << (75 - 64) },
+      {0, 0x3 << (75 - 64)}, {0, 0x1 << (75 - 64)}, {0, 0x7 << (75 - 64)},
+      {0, 0x0 << (75 - 64)}, {0, 0x2 << (75 - 64)}, {0, 0x6 << (75 - 64)},
+      {0, 0x5 << (75 - 64)}, {0, 0x4 << (75 - 64)},
   };

   EXPECT_EQ(bi_pack_upper(U(0), tuples, 8), 3);
@ -62,9 +57,9 @@ TEST(Packing, PackUpper)
 TEST(Packing, PackTupleBits)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0x1234567801234567, 0x3A },
-      { 0x9876543299999999, 0x1B },
-      { 0xABCDEF0101234567, 0x7C },
+      {0x1234567801234567, 0x3A},
+      {0x9876543299999999, 0x1B},
+      {0xABCDEF0101234567, 0x7C},
   };

   EXPECT_EQ(bi_pack_tuple_bits(T(0), tuples, 8, 0, 30), 0x01234567);
@ -75,19 +70,14 @@ TEST(Packing, PackTupleBits)
 TEST(Packing, PackSync)
 {
   struct bi_packed_tuple tuples[] = {
-      { 0, 0x3 << (75 - 64) },
-      { 0, 0x5 << (75 - 64) },
-      { 0, 0x7 << (75 - 64) },
-      { 0, 0x0 << (75 - 64) },
-      { 0, 0x2 << (75 - 64) },
-      { 0, 0x6 << (75 - 64) },
-      { 0, 0x5 << (75 - 64) },
-      { 0, 0x4 << (75 - 64) },
+      {0, 0x3 << (75 - 64)}, {0, 0x5 << (75 - 64)}, {0, 0x7 << (75 - 64)},
+      {0, 0x0 << (75 - 64)}, {0, 0x2 << (75 - 64)}, {0, 0x6 << (75 - 64)},
+      {0, 0x5 << (75 - 64)}, {0, 0x4 << (75 - 64)},
   };

   EXPECT_EQ(bi_pack_sync(L(3), L(1), L(7), tuples, 8, false), 0xCF);
   EXPECT_EQ(bi_pack_sync(L(3), L(1), U(7), tuples, 8, false), 0xCC);
   EXPECT_EQ(bi_pack_sync(L(3), U(1), U(7), tuples, 8, false), 0xEC);
-   EXPECT_EQ(bi_pack_sync(Z,    U(1), U(7), tuples, 8, false), 0x2C);
-   EXPECT_EQ(bi_pack_sync(Z,    U(1), U(7), tuples, 8, true) , 0x6C);
+   EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, false), 0x2C);
+   EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, true), 0x6C);
 }
--- a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp
+++ b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp
@ -21,23 +21,28 @@
 * SOFTWARE.
 */

-#include "compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "compiler.h"

 #include <gtest/gtest.h>

 class SchedulerPredicates : public testing::Test {
-protected:
-   SchedulerPredicates() {
+ protected:
+   SchedulerPredicates()
+   {
      mem_ctx = ralloc_context(NULL);
      b = bit_builder(mem_ctx);
   }
-   ~SchedulerPredicates() {
+   ~SchedulerPredicates()
+   {
      ralloc_free(mem_ctx);
   }

-   bi_index TMP() { return bi_temp(b->shader); }
+   bi_index TMP()
+   {
+      return bi_temp(b->shader);
+   }

   void *mem_ctx;
   bi_builder *b;
--- a/src/panfrost/bifrost/valhall/disassemble.h
+++ b/src/panfrost/bifrost/valhall/disassemble.h
@ -1,21 +1,21 @@
 #ifndef __DISASM_H
 #define __DISASM_H

-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
 #include <assert.h>
+#include <inttypes.h>
 #include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

-#define BIT(b) (1ull << (b))
-#define MASK(count) ((1ull << (count)) - 1)
+#define BIT(b)         (1ull << (b))
+#define MASK(count)    ((1ull << (count)) - 1)
 #define SEXT(b, count) ((b ^ BIT(count - 1)) - BIT(count - 1))
-#define UNUSED __attribute__((unused))
+#define UNUSED         __attribute__((unused))

 #define VA_SRC_UNIFORM_TYPE 0x2
-#define VA_SRC_IMM_TYPE 0x3
+#define VA_SRC_IMM_TYPE     0x3

 static inline void
 va_print_dest(FILE *fp, uint8_t dest, bool can_mask)
@ -51,7 +51,7 @@ disassemble_valhall(FILE *fp, const uint64_t *code, unsigned size, bool verbose)
      if (verbose) {
         /* Print byte pattern */
         for (unsigned j = 0; j < 8; ++j)
-            fprintf(fp, "%02x ", (uint8_t) (instr >> (j * 8)));
+            fprintf(fp, "%02x ", (uint8_t)(instr >> (j * 8)));

         fprintf(fp, "   ");
      } else {
--- a/src/panfrost/bifrost/valhall/test/test-add-imm.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-add-imm.cpp
@ -21,10 +21,10 @@
 * SOFTWARE.
 */

-#include "va_compiler.h"
-#include "bi_test.h"
-#include "bi_builder.h"
 #include "util/u_cpu_detect.h"
+#include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"

 #include <gtest/gtest.h>

@ -37,102 +37,137 @@ add_imm(bi_context *ctx)
 }

 #define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm)
-#define NEGCASE(instr) CASE(instr, instr)
+#define NEGCASE(instr)        CASE(instr, instr)

 class AddImm : public testing::Test {
-protected:
-   AddImm() {
+ protected:
+   AddImm()
+   {
      mem_ctx = ralloc_context(NULL);
   }

-   ~AddImm() {
+   ~AddImm()
+   {
      ralloc_free(mem_ctx);
   }

   void *mem_ctx;
 };

-
-TEST_F(AddImm, Basic) {
+TEST_F(AddImm, Basic)
+{
   CASE(bi_mov_i32_to(b, bi_register(63), bi_imm_u32(0xABAD1DEA)),
        bi_iadd_imm_i32_to(b, bi_register(63), bi_zero(), 0xABAD1DEA));

   CASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0)),
        bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));

-   CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_imm_f32(42.0)),
-        bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(42.0)));
+   CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
+                       bi_imm_f32(42.0)),
+        bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
+                           fui(42.0)));

-   CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_neg(bi_imm_f32(42.0))),
-        bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(-42.0)));
+   CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
+                       bi_neg(bi_imm_f32(42.0))),
+        bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
+                           fui(-42.0)));
 }

-TEST_F(AddImm, Commutativty) {
+TEST_F(AddImm, Commutativty)
+{
   CASE(bi_fadd_f32_to(b, bi_register(1), bi_imm_f32(42.0), bi_register(2)),
        bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));
 }

-TEST_F(AddImm, NoModifiers) {
-   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)), bi_imm_f32(42.0)));
-   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)), bi_imm_f32(42.0)));
-   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_f32(42.0)));
+TEST_F(AddImm, NoModifiers)
+{
+   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)),
+                          bi_imm_f32(42.0)));
+   NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)),
+                          bi_imm_f32(42.0)));
+   NEGCASE(bi_fadd_f32_to(b, bi_register(1),
+                          bi_swz_16(bi_register(2), false, false),
+                          bi_imm_f32(42.0)));
 }

-TEST_F(AddImm, NoClamp) {
+TEST_F(AddImm, NoClamp)
+{
   NEGCASE({
-      bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2),
-            bi_imm_f32(42.0));
+      bi_instr *I =
+         bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
      I->clamp = BI_CLAMP_CLAMP_M1_1;
   });
 }

-TEST_F(AddImm, OtherTypes) {
+TEST_F(AddImm, OtherTypes)
+{
   CASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0)),
        bi_fadd_imm_v2f16_to(b, bi_register(1), bi_register(2), 0x51405140));

-   CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2),
+                       bi_imm_u32(0xDEADBEEF), false),
        bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));

-   CASE(bi_iadd_v2u16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_v2u16_to(b, bi_register(1), bi_register(2),
+                         bi_imm_u32(0xDEADBEEF), false),
        bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));

-   CASE(bi_iadd_v4u8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_v4u8_to(b, bi_register(1), bi_register(2),
+                        bi_imm_u32(0xDEADBEEF), false),
        bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));

-   CASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2),
+                       bi_imm_u32(0xDEADBEEF), false),
        bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));

-   CASE(bi_iadd_v2s16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_v2s16_to(b, bi_register(1), bi_register(2),
+                         bi_imm_u32(0xDEADBEEF), false),
        bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));

-   CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
+   CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2),
+                        bi_imm_u32(0xDEADBEEF), false),
        bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));

-   NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
-   NEGCASE(bi_iadd_v2u16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
-   NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true));
-   NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
-   NEGCASE(bi_iadd_v2s16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(bi_iadd_u32_to(b, bi_register(1),
+                          bi_swz_16(bi_register(2), false, false),
+                          bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(bi_iadd_v2u16_to(b, bi_register(1),
+                            bi_swz_16(bi_register(2), false, false),
+                            bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2),
+                          bi_imm_u32(0xDEADBEEF), true));
+   NEGCASE(bi_iadd_s32_to(b, bi_register(1),
+                          bi_swz_16(bi_register(2), false, false),
+                          bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(bi_iadd_v2s16_to(b, bi_register(1),
+                            bi_swz_16(bi_register(2), false, false),
+                            bi_imm_u32(0xDEADBEEF), false));

-   NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true));
+   NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2),
+                          bi_imm_u32(0xDEADBEEF), true));
 }

-TEST_F(AddImm, Int8) {
+TEST_F(AddImm, Int8)
+{
   bi_index idx = bi_register(2);
   idx.swizzle = BI_SWIZZLE_B0000;
-   NEGCASE(bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
-   NEGCASE(bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(
+      bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
+   NEGCASE(
+      bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
 }

-TEST_F(AddImm, OnlyRTE) {
+TEST_F(AddImm, OnlyRTE)
+{
   NEGCASE({
-         bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
-         I->round = BI_ROUND_RTP;
+      bi_instr *I =
+         bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
+      I->round = BI_ROUND_RTP;
   });

   NEGCASE({
-         bi_instr *I = bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0));
-         I->round = BI_ROUND_RTZ;
+      bi_instr *I =
+         bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0));
+      I->round = BI_ROUND_RTZ;
   });
 }
-
--- a/src/panfrost/bifrost/valhall/test/test-disassembler.c
+++ b/src/panfrost/bifrost/valhall/test/test-disassembler.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include <stdio.h>
 #include <inttypes.h>
+#include <stdio.h>
 #include "disassemble.h"

 static inline uint8_t
@ -39,7 +39,7 @@ parse_hex(const char *in)

   for (unsigned i = 0; i < 8; ++i) {
      uint8_t byte = (parse_nibble(in[0]) << 4) | parse_nibble(in[1]);
-      v |= ((uint64_t) byte) << (8 * i);
+      v |= ((uint64_t)byte) << (8 * i);

      /* Skip the space after the byte */
      in += 3;
--- a/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"

@ -37,177 +37,190 @@ strip_nops(bi_context *ctx)
   }
 }

-#define CASE(shader_stage, test) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      UNUSED bi_builder *b = A; \
-      A->shader->stage = MESA_SHADER_ ## shader_stage; \
-      test; \
-   } \
-   strip_nops(A->shader); \
-   va_insert_flow_control_nops(A->shader); \
-   { \
-      UNUSED bi_builder *b = B; \
-      B->shader->stage = MESA_SHADER_ ## shader_stage; \
-      test; \
-   } \
-   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
-} while(0)
+#define CASE(shader_stage, test)                                               \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         UNUSED bi_builder *b = A;                                             \
+         A->shader->stage = MESA_SHADER_##shader_stage;                        \
+         test;                                                                 \
+      }                                                                        \
+      strip_nops(A->shader);                                                   \
+      va_insert_flow_control_nops(A->shader);                                  \
+      {                                                                        \
+         UNUSED bi_builder *b = B;                                             \
+         B->shader->stage = MESA_SHADER_##shader_stage;                        \
+         test;                                                                 \
+      }                                                                        \
+      ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
+   } while (0)

-#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
+#define flow(f) bi_nop(b)->flow = VA_FLOW_##f

 class InsertFlow : public testing::Test {
-protected:
-   InsertFlow() {
+ protected:
+   InsertFlow()
+   {
      mem_ctx = ralloc_context(NULL);
   }

-   ~InsertFlow() {
+   ~InsertFlow()
+   {
      ralloc_free(mem_ctx);
   }

   void *mem_ctx;
 };

-TEST_F(InsertFlow, PreserveEmptyShader) {
+TEST_F(InsertFlow, PreserveEmptyShader)
+{
   CASE(FRAGMENT, {});
 }

-TEST_F(InsertFlow, TilebufferWait7) {
+TEST_F(InsertFlow, TilebufferWait7)
+{
   CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT);
-        bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_register(6), bi_register(7), bi_register(8),
-                    BI_REGISTER_FORMAT_AUTO, 4, 4);
-        flow(END);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT);
+      bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                  bi_register(6), bi_register(7), bi_register(8),
+                  BI_REGISTER_FORMAT_AUTO, 4, 4);
+      flow(END);
   });

   CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT);
-        bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT);
+      bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
+                 bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
+      flow(END);
+   });
+
+   CASE(FRAGMENT, {
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT);
+      bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
                    bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
-        flow(END);
-   });
-
-   CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT);
-        bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
-        flow(END);
+      flow(END);
   });
 }

-TEST_F(InsertFlow, AtestWait6AndWait0After) {
+TEST_F(InsertFlow, AtestWait6AndWait0After)
+{
   CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT0126);
-        bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_fau(BIR_FAU_ATEST_PARAM, false));
-        flow(WAIT0);
-        flow(END);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT0126);
+      bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
+                  bi_fau(BIR_FAU_ATEST_PARAM, false));
+      flow(WAIT0);
+      flow(END);
   });
 }

-TEST_F(InsertFlow, ZSEmitWait6) {
+TEST_F(InsertFlow, ZSEmitWait6)
+{
   CASE(FRAGMENT, {
-        flow(DISCARD);
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT0126);
-        bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
-                      bi_register(6), true, true);
-        flow(END);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT0126);
+      bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
+                    bi_register(6), true, true);
+      flow(END);
   });
 }

-TEST_F(InsertFlow, LoadThenUnrelatedThenUse) {
+TEST_F(InsertFlow, LoadThenUnrelatedThenUse)
+{
   CASE(VERTEX, {
-         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
-                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(WAIT0);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
-         flow(END);
+      bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
+                        BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(WAIT0);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
+      flow(END);
   });
 }

-TEST_F(InsertFlow, SingleLdVar) {
+TEST_F(InsertFlow, SingleLdVar)
+{
   CASE(FRAGMENT, {
-         flow(DISCARD);
-         bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
-                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
-         flow(WAIT0);
-         flow(END);
+      flow(DISCARD);
+      bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
+                               BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
+                               BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
+                               BI_VECSIZE_V4, 0);
+      flow(WAIT0);
+      flow(END);
   });
 }

-TEST_F(InsertFlow, SerializeLdVars) {
+TEST_F(InsertFlow, SerializeLdVars)
+{
   CASE(FRAGMENT, {
-         flow(DISCARD);
-         bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
-                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
-         bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
-                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
-         flow(WAIT0);
-         bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
-                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
-         flow(WAIT0);
-         flow(END);
+      flow(DISCARD);
+      bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
+                               BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
+                               BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
+                               BI_VECSIZE_V4, 0);
+      bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
+                               BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
+                               BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
+                               BI_VECSIZE_V4, 0);
+      flow(WAIT0);
+      bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
+                               BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
+                               BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
+                               BI_VECSIZE_V4, 1);
+      flow(WAIT0);
+      flow(END);
   });
 }

-TEST_F(InsertFlow, Clper) {
+TEST_F(InsertFlow, Clper)
+{
   CASE(FRAGMENT, {
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                         BI_SUBGROUP_SUBGROUP4);
-         flow(DISCARD);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(END);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
+                      BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                      BI_SUBGROUP_SUBGROUP4);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(END);
   });
 }

-TEST_F(InsertFlow, TextureImplicit) {
+TEST_F(InsertFlow, TextureImplicit)
+{
   CASE(FRAGMENT, {
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
-                          bi_register(12), false, BI_DIMENSION_2D,
-                          BI_REGISTER_FORMAT_F32, false, false,
-                          BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
-         flow(DISCARD);
-         flow(WAIT0);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(END);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
+                       bi_register(12), false, BI_DIMENSION_2D,
+                       BI_REGISTER_FORMAT_F32, false, false,
+                       BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
+      flow(DISCARD);
+      flow(WAIT0);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(END);
   });
 }

-TEST_F(InsertFlow, TextureExplicit) {
+TEST_F(InsertFlow, TextureExplicit)
+{
   CASE(FRAGMENT, {
-         flow(DISCARD);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
-                          bi_register(12), false, BI_DIMENSION_2D,
-                          BI_REGISTER_FORMAT_F32, false, false,
-                          BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
-         flow(WAIT0);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(END);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
+                       bi_register(12), false, BI_DIMENSION_2D,
+                       BI_REGISTER_FORMAT_F32, false, false,
+                       BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
+      flow(WAIT0);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(END);
   });
 }

@ -217,49 +230,52 @@ TEST_F(InsertFlow, TextureExplicit) {
 *     \ /
 *      D
 */
-TEST_F(InsertFlow, DiamondCFG) {
+TEST_F(InsertFlow, DiamondCFG)
+{
   CASE(FRAGMENT, {
-         bi_block *A = bi_start_block(&b->shader->blocks);
-         bi_block *B = bit_block(b->shader);
-         bi_block *C = bit_block(b->shader);
-         bi_block *D = bit_block(b->shader);
+      bi_block *A = bi_start_block(&b->shader->blocks);
+      bi_block *B = bit_block(b->shader);
+      bi_block *C = bit_block(b->shader);
+      bi_block *D = bit_block(b->shader);

-         bi_block_add_successor(A, B);
-         bi_block_add_successor(A, C);
+      bi_block_add_successor(A, B);
+      bi_block_add_successor(A, C);

-         bi_block_add_successor(B, D);
-         bi_block_add_successor(C, D);
+      bi_block_add_successor(B, D);
+      bi_block_add_successor(C, D);

-         /* B uses helper invocations, no other block does.
-          *
-          * That means B and C need to discard helpers.
-          */
-         b->cursor = bi_after_block(B);
-         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-               BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-               BI_SUBGROUP_SUBGROUP4);
-         flow(DISCARD);
-         flow(RECONVERGE);
+      /* B uses helper invocations, no other block does.
+       *
+       * That means B and C need to discard helpers.
+       */
+      b->cursor = bi_after_block(B);
+      bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
+                      BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                      BI_SUBGROUP_SUBGROUP4);
+      flow(DISCARD);
+      flow(RECONVERGE);

-         b->cursor = bi_after_block(C);
-         flow(DISCARD);
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         flow(RECONVERGE);
+      b->cursor = bi_after_block(C);
+      flow(DISCARD);
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      flow(RECONVERGE);

-         b->cursor = bi_after_block(D);
-         flow(END);
+      b->cursor = bi_after_block(D);
+      flow(END);
   });
 }

-TEST_F(InsertFlow, BarrierBug) {
+TEST_F(InsertFlow, BarrierBug)
+{
   CASE(KERNEL, {
-         bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2), bi_register(4), BI_SEG_NONE, 0);
-         I->slot = 2;
+      bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2),
+                                 bi_register(4), BI_SEG_NONE, 0);
+      I->slot = 2;

-         bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
-         flow(WAIT2);
-         bi_barrier(b);
-         flow(WAIT);
-         flow(END);
+      bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
+      flow(WAIT2);
+      bi_barrier(b);
+      flow(WAIT);
+      flow(END);
   });
 }
--- a/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-lower-constants.cpp
@ -21,9 +21,9 @@
 * SOFTWARE.
 */

-#include "va_compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"

 #include <gtest/gtest.h>

@ -38,19 +38,22 @@ add_imm(bi_context *ctx)
 #define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm)

 class LowerConstants : public testing::Test {
-protected:
-   LowerConstants() {
+ protected:
+   LowerConstants()
+   {
      mem_ctx = ralloc_context(NULL);
   }

-   ~LowerConstants() {
+   ~LowerConstants()
+   {
      ralloc_free(mem_ctx);
   }

   void *mem_ctx;
 };

-TEST_F(LowerConstants, Float32) {
+TEST_F(LowerConstants, Float32)
+{
   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(0.0)),
        bi_fadd_f32_to(b, bi_register(0), bi_register(0), va_lut(0)));

@ -61,46 +64,59 @@ TEST_F(LowerConstants, Float32) {
        bi_fadd_f32_to(b, bi_register(0), bi_register(0), va_lut(17)));
 }

-TEST_F(LowerConstants, WidenFloat16) {
+TEST_F(LowerConstants, WidenFloat16)
+{
   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(0.5)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(26), 1)));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(26), 1)));

   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(255.0)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 0)));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(23), 0)));

   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(256.0)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 1)));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(23), 1)));

   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(8.0)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(30), 1)));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(30), 1)));
 }

-TEST_F(LowerConstants, ReplicateFloat16) {
+TEST_F(LowerConstants, ReplicateFloat16)
+{
   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(255.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 0)));
+        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                         bi_half(va_lut(23), 0)));

   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(4.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(29), 1)));
+        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                         bi_half(va_lut(29), 1)));
 }

-TEST_F(LowerConstants, NegateFloat32) {
+TEST_F(LowerConstants, NegateFloat32)
+{
   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(-1.0)),
        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(16))));

   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(-255.0)),
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(23), 0))));
+        bi_fadd_f32_to(b, bi_register(0), bi_register(0),
+                       bi_neg(bi_half(va_lut(23), 0))));
 }

 TEST_F(LowerConstants, NegateReplicateFloat16)
 {
   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-255.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(23), 0))));
+        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                         bi_neg(bi_half(va_lut(23), 0))));
 }

 TEST_F(LowerConstants, NegateVec2Float16)
 {
-   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xBC008000)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(27))));
+   CASE(
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                       bi_imm_u32(0xBC008000)),
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(27))));
 }

 TEST_F(LowerConstants, Int8InInt32)
@ -117,87 +133,105 @@ TEST_F(LowerConstants, ZeroExtendForUnsigned)
   CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF),
                           bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
        bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1));

-   CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
-        bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+   CASE(
+      bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFFFF),
+                         bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+      bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
+                         bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT,
+                         BI_RESULT_TYPE_I1));
 }

 TEST_F(LowerConstants, SignExtendPositiveForSigned)
 {
-   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0x7F), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0x7F),
+                           bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
        bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1));

-   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0x7FFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
-        bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+   CASE(
+      bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0x7FFF),
+                         bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+      bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
+                         bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT,
+                         BI_RESULT_TYPE_I1));
 }

 TEST_F(LowerConstants, SignExtendNegativeForSigned)
 {
   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+                           bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1),
        bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1));

   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+                           bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1),
        bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1));
 }

 TEST_F(LowerConstants, DontZeroExtendForSigned)
 {
-   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF),
+                           bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
        bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
-
-   CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
-        bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
-                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0),
+                           bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0),
                           BI_CMPF_LT, BI_RESULT_TYPE_I1));
+
+   CASE(
+      bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFFFF),
+                         bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+      bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
+                         bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0),
+                         BI_CMPF_LT, BI_RESULT_TYPE_I1));
 }

 TEST_F(LowerConstants, DontZeroExtendNegative)
 {
   CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+                           bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1),
        bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8), bi_register(0),
-                           BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8),
+                           bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));

   CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
+                           bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT,
+                           BI_RESULT_TYPE_I1),
        bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
-                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC), bi_register(0),
-                           BI_CMPF_LT, BI_RESULT_TYPE_I1));
+                           bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC),
+                           bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
 }

 TEST_F(LowerConstants, HandleTrickyNegativesFP16)
 {
-   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-57216.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(3), 1)));
+   CASE(
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-57216.0)),
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                       bi_half(va_lut(3), 1)));

-   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(57216.0)),
-        bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(3), 1))));
+   CASE(
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(57216.0)),
+      bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
+                       bi_neg(bi_half(va_lut(3), 1))));
 }

 TEST_F(LowerConstants, MaintainMkvecRestrictedSwizzles)
 {
-   CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
-                         bi_imm_u8(0), bi_imm_u32(0)),
+   CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0), bi_imm_u8(0),
+                         bi_imm_u32(0)),
        bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
                         bi_byte(va_lut(0), 0), va_lut(0)));

-   CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
-                         bi_imm_u8(14), bi_imm_u32(0)),
+   CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0), bi_imm_u8(14),
+                         bi_imm_u32(0)),
        bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
                         bi_byte(va_lut(11), 2), va_lut(0)));
 }
--- a/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp
@ -21,18 +21,19 @@
 * SOFTWARE.
 */

-#include "va_compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"

 #include <gtest/gtest.h>

 #define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, va_lower_isel)
-#define NEGCASE(instr) CASE(instr, instr)
+#define NEGCASE(instr)        CASE(instr, instr)

 class LowerIsel : public testing::Test {
-protected:
-   LowerIsel() {
+ protected:
+   LowerIsel()
+   {
      mem_ctx = ralloc_context(NULL);
      reg = bi_register(1);
      x = bi_register(2);
@ -40,7 +41,8 @@ protected:
      z = bi_register(4);
   }

-   ~LowerIsel() {
+   ~LowerIsel()
+   {
      ralloc_free(mem_ctx);
   }

@ -48,14 +50,16 @@ protected:
   bi_index reg, x, y, z;
 };

-TEST_F(LowerIsel, 8BitSwizzles) {
+TEST_F(LowerIsel, 8BitSwizzles)
+{
   for (unsigned i = 0; i < 4; ++i) {
      CASE(bi_swz_v4i8_to(b, reg, bi_byte(reg, i)),
           bi_iadd_v4u8_to(b, reg, bi_byte(reg, i), bi_zero(), false));
   }
 }

-TEST_F(LowerIsel, 16BitSwizzles) {
+TEST_F(LowerIsel, 16BitSwizzles)
+{
   for (unsigned i = 0; i < 2; ++i) {
      for (unsigned j = 0; j < 2; ++j) {
         CASE(bi_swz_v2i16_to(b, reg, bi_swz_16(reg, i, j)),
@ -64,24 +68,30 @@ TEST_F(LowerIsel, 16BitSwizzles) {
   }
 }

-TEST_F(LowerIsel, JumpsLoweredToBranches) {
-   bi_block block = { };
+TEST_F(LowerIsel, JumpsLoweredToBranches)
+{
+   bi_block block = {};

-   CASE({
-      bi_instr *I = bi_jump(b, bi_imm_u32(0xDEADBEEF));
-      I->branch_target = &block;
-   }, {
-      bi_instr *I = bi_branchz_i16(b, bi_zero(), bi_imm_u32(0xDEADBEEF), BI_CMPF_EQ);
-      I->branch_target = &block;
-   });
+   CASE(
+      {
+         bi_instr *I = bi_jump(b, bi_imm_u32(0xDEADBEEF));
+         I->branch_target = &block;
+      },
+      {
+         bi_instr *I =
+            bi_branchz_i16(b, bi_zero(), bi_imm_u32(0xDEADBEEF), BI_CMPF_EQ);
+         I->branch_target = &block;
+      });
 }

-TEST_F(LowerIsel, IndirectJumpsLoweredToBranches) {
+TEST_F(LowerIsel, IndirectJumpsLoweredToBranches)
+{
   CASE(bi_jump(b, bi_register(17)),
        bi_branchzi(b, bi_zero(), bi_register(17), BI_CMPF_EQ));
 }

-TEST_F(LowerIsel, IntegerCSEL) {
+TEST_F(LowerIsel, IntegerCSEL)
+{
   CASE(bi_csel_i32(b, reg, reg, reg, reg, BI_CMPF_EQ),
        bi_csel_u32(b, reg, reg, reg, reg, BI_CMPF_EQ));

@ -89,7 +99,8 @@ TEST_F(LowerIsel, IntegerCSEL) {
        bi_csel_v2u16(b, reg, reg, reg, reg, BI_CMPF_EQ));
 }

-TEST_F(LowerIsel, AvoidSimpleMux) {
+TEST_F(LowerIsel, AvoidSimpleMux)
+{
   CASE(bi_mux_i32(b, x, y, z, BI_MUX_INT_ZERO),
        bi_csel_u32(b, z, bi_zero(), x, y, BI_CMPF_EQ));
   CASE(bi_mux_i32(b, x, y, z, BI_MUX_NEG),
@ -105,27 +116,32 @@ TEST_F(LowerIsel, AvoidSimpleMux) {
        bi_csel_v2f16(b, z, bi_zero(), x, y, BI_CMPF_EQ));
 }

-TEST_F(LowerIsel, BitwiseMux) {
+TEST_F(LowerIsel, BitwiseMux)
+{
   NEGCASE(bi_mux_i32(b, x, y, z, BI_MUX_BIT));
   NEGCASE(bi_mux_v2i16(b, x, y, z, BI_MUX_BIT));
   NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_BIT));
 }

-TEST_F(LowerIsel, MuxInt8) {
+TEST_F(LowerIsel, MuxInt8)
+{
   NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_INT_ZERO));
   NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_NEG));
   NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_FP_ZERO));
 }

-TEST_F(LowerIsel, FaddRscale) {
-   CASE(bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_NONE),
-        bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_NONE));
+TEST_F(LowerIsel, FaddRscale)
+{
+   CASE(
+      bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_NONE),
+      bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_NONE));

   CASE(bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_N),
        bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_N));
 }

-TEST_F(LowerIsel, Smoke) {
+TEST_F(LowerIsel, Smoke)
+{
   NEGCASE(bi_fadd_f32_to(b, reg, reg, reg));
   NEGCASE(bi_csel_s32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));
   NEGCASE(bi_csel_u32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));
--- a/src/panfrost/bifrost/valhall/test/test-mark-last.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-mark-last.cpp
@ -21,14 +21,14 @@
 * SOFTWARE.
 */

-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"

 #include <gtest/gtest.h>

-#define R(x) bi_register(x)
+#define R(x)  bi_register(x)
 #define DR(x) bi_discard(R(x))

 static void
@ -40,105 +40,119 @@ strip_discard(bi_context *ctx)
   }
 }

-#define CASE(test) do { \
-   void *mem_ctx = ralloc_context(NULL); \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      UNUSED bi_builder *b = A; \
-      test; \
-   } \
-   strip_discard(A->shader); \
-   va_mark_last(A->shader); \
-   { \
-      UNUSED bi_builder *b = B; \
-      test; \
-   } \
-   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
-   ralloc_free(mem_ctx); \
-} while(0)
+#define CASE(test)                                                             \
+   do {                                                                        \
+      void *mem_ctx = ralloc_context(NULL);                                    \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         UNUSED bi_builder *b = A;                                             \
+         test;                                                                 \
+      }                                                                        \
+      strip_discard(A->shader);                                                \
+      va_mark_last(A->shader);                                                 \
+      {                                                                        \
+         UNUSED bi_builder *b = B;                                             \
+         test;                                                                 \
+      }                                                                        \
+      ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
+      ralloc_free(mem_ctx);                                                    \
+   } while (0)

-TEST(MarkLast, Simple) {
+TEST(MarkLast, Simple)
+{
   CASE(bi_fadd_f32_to(b, R(0), DR(0), DR(1)));

   CASE({
-        bi_fadd_f32_to(b, R(2), R(0), DR(1));
-        bi_fadd_f32_to(b, R(0), DR(0), DR(2));
+      bi_fadd_f32_to(b, R(2), R(0), DR(1));
+      bi_fadd_f32_to(b, R(0), DR(0), DR(2));
   });
 }

-TEST(MarkLast, SameSourceAndDestination) {
+TEST(MarkLast, SameSourceAndDestination)
+{
   CASE({
-         bi_fadd_f32_to(b, R(0), DR(0), DR(0));
-         bi_fadd_f32_to(b, R(0), DR(0), DR(0));
-         bi_fadd_f32_to(b, R(0), DR(0), DR(0));
+      bi_fadd_f32_to(b, R(0), DR(0), DR(0));
+      bi_fadd_f32_to(b, R(0), DR(0), DR(0));
+      bi_fadd_f32_to(b, R(0), DR(0), DR(0));
   });
 }

-TEST(MarkLast, StagingReadBefore) {
+TEST(MarkLast, StagingReadBefore)
+{
   CASE({
-         bi_fadd_f32_to(b, R(9), R(2), DR(7));
-         bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
+      bi_fadd_f32_to(b, R(9), R(2), DR(7));
+      bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32,
+                 BI_VECSIZE_V4);
   });
 }

-TEST(MarkLast, StagingReadAfter) {
+TEST(MarkLast, StagingReadAfter)
+{
   CASE({
-         bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
-         bi_fadd_f32_to(b, R(9), R(2), DR(7));
+      bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32,
+                 BI_VECSIZE_V4);
+      bi_fadd_f32_to(b, R(9), R(2), DR(7));
   });
 }

-TEST(MarkLast, NonstagingSourceToAsync) {
+TEST(MarkLast, NonstagingSourceToAsync)
+{
   CASE({
-         bi_st_tile(b, R(0), R(4), R(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
-         bi_fadd_f32_to(b, R(9), DR(4), DR(5));
+      bi_st_tile(b, R(0), R(4), R(5), DR(6), BI_REGISTER_FORMAT_F32,
+                 BI_VECSIZE_V4);
+      bi_fadd_f32_to(b, R(9), DR(4), DR(5));
   });
 }

-TEST(MarkLast, Both64) {
+TEST(MarkLast, Both64)
+{
   CASE(bi_load_i32_to(b, R(0), DR(8), DR(9), BI_SEG_NONE, 0));
 }

-TEST(MarkLast, Neither64ThenBoth) {
+TEST(MarkLast, Neither64ThenBoth)
+{
   CASE({
-         bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
-         bi_load_i32_to(b, R(1), DR(8), DR(9), BI_SEG_NONE, 8);
+      bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
+      bi_load_i32_to(b, R(1), DR(8), DR(9), BI_SEG_NONE, 8);
   });
 }

-TEST(MarkLast, Half64) {
+TEST(MarkLast, Half64)
+{
   CASE({
-         bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
-         bi_fadd_f32_to(b, R(8), DR(8), DR(8));
+      bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
+      bi_fadd_f32_to(b, R(8), DR(8), DR(8));
   });

   CASE({
-         bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
-         bi_fadd_f32_to(b, R(9), DR(9), DR(9));
+      bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
+      bi_fadd_f32_to(b, R(9), DR(9), DR(9));
   });
 }

-TEST(MarkLast, RegisterBlendDescriptor) {
+TEST(MarkLast, RegisterBlendDescriptor)
+{
   CASE({
-         bi_blend_to(b, R(48), R(0), DR(60), DR(4), DR(5), bi_null(),
-                     BI_REGISTER_FORMAT_F32, 4, 0);
+      bi_blend_to(b, R(48), R(0), DR(60), DR(4), DR(5), bi_null(),
+                  BI_REGISTER_FORMAT_F32, 4, 0);
   });

   CASE({
-         bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
-                     BI_REGISTER_FORMAT_F32, 4, 0);
-         bi_fadd_f32_to(b, R(4), DR(4), DR(7));
+      bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
+                  BI_REGISTER_FORMAT_F32, 4, 0);
+      bi_fadd_f32_to(b, R(4), DR(4), DR(7));
   });

   CASE({
-         bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
-                     BI_REGISTER_FORMAT_F32, 4, 0);
-         bi_fadd_f32_to(b, R(4), DR(5), DR(7));
+      bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
+                  BI_REGISTER_FORMAT_F32, 4, 0);
+      bi_fadd_f32_to(b, R(4), DR(5), DR(7));
   });
 }

-TEST(MarkLast, ControlFlowAllFeatures) {
+TEST(MarkLast, ControlFlowAllFeatures)
+{
   /*      A
    *     / \
    *    B   C
@ -153,9 +167,8 @@ TEST(MarkLast, ControlFlowAllFeatures) {

      b->cursor = bi_after_block(A);
      {
-         bi_instr *I =
-            bi_st_tile(b, R(10), DR(14), DR(15), DR(16),
-                       BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
+         bi_instr *I = bi_st_tile(b, R(10), DR(14), DR(15), DR(16),
+                                  BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
         I->slot = 2;

         bi_load_i32_to(b, R(20), R(28), R(29), BI_SEG_NONE, 0);
--- a/src/panfrost/bifrost/valhall/test/test-merge-flow.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-merge-flow.cpp
@ -21,42 +21,45 @@
 * SOFTWARE.
 */

-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"

 #include <gtest/gtest.h>

-#define CASE(test, expected) do { \
-   bi_builder *A = bit_builder(mem_ctx); \
-   bi_builder *B = bit_builder(mem_ctx); \
-   { \
-      bi_builder *b = A; \
-      A->shader->stage = MESA_SHADER_FRAGMENT; \
-      test; \
-   } \
-   va_merge_flow(A->shader); \
-   { \
-      bi_builder *b = B; \
-      B->shader->stage = MESA_SHADER_FRAGMENT; \
-      expected; \
-   } \
-   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
-} while(0)
+#define CASE(test, expected)                                                   \
+   do {                                                                        \
+      bi_builder *A = bit_builder(mem_ctx);                                    \
+      bi_builder *B = bit_builder(mem_ctx);                                    \
+      {                                                                        \
+         bi_builder *b = A;                                                    \
+         A->shader->stage = MESA_SHADER_FRAGMENT;                              \
+         test;                                                                 \
+      }                                                                        \
+      va_merge_flow(A->shader);                                                \
+      {                                                                        \
+         bi_builder *b = B;                                                    \
+         B->shader->stage = MESA_SHADER_FRAGMENT;                              \
+         expected;                                                             \
+      }                                                                        \
+      ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
+   } while (0)

 #define NEGCASE(test) CASE(test, test)

-#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
+#define flow(f) bi_nop(b)->flow = VA_FLOW_##f

 class MergeFlow : public testing::Test {
-protected:
-   MergeFlow() {
+ protected:
+   MergeFlow()
+   {
      mem_ctx = ralloc_context(NULL);
      atest = bi_fau(BIR_FAU_ATEST_PARAM, false);
   }

-   ~MergeFlow() {
+   ~MergeFlow()
+   {
      ralloc_free(mem_ctx);
   }

@ -65,74 +68,84 @@ protected:
   bi_index atest;
 };

-TEST_F(MergeFlow, End) {
-   CASE({
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_register(6), bi_register(7), bi_register(8),
-                    BI_REGISTER_FORMAT_AUTO, 4, 4);
-        flow(END);
-   },
-   {
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                        bi_register(6), bi_register(7), bi_register(8),
-                        BI_REGISTER_FORMAT_AUTO, 4, 4);
-        I->flow = VA_FLOW_END;
-   });
+TEST_F(MergeFlow, End)
+{
+   CASE(
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                     bi_register(6), bi_register(7), bi_register(8),
+                     BI_REGISTER_FORMAT_AUTO, 4, 4);
+         flow(END);
+      },
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                         bi_register(6), bi_register(7), bi_register(8),
+                         BI_REGISTER_FORMAT_AUTO, 4, 4);
+         I->flow = VA_FLOW_END;
+      });
 }

-TEST_F(MergeFlow, Reconverge) {
-   CASE({
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                    bi_register(6), bi_register(7), bi_register(8),
-                    BI_REGISTER_FORMAT_AUTO, 4, 4);
-        flow(RECONVERGE);
-   },
-   {
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
-                        bi_register(6), bi_register(7), bi_register(8),
-                        BI_REGISTER_FORMAT_AUTO, 4, 4);
-        I->flow = VA_FLOW_RECONVERGE;
-   });
+TEST_F(MergeFlow, Reconverge)
+{
+   CASE(
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                     bi_register(6), bi_register(7), bi_register(8),
+                     BI_REGISTER_FORMAT_AUTO, 4, 4);
+         flow(RECONVERGE);
+      },
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
+                         bi_register(6), bi_register(7), bi_register(8),
+                         BI_REGISTER_FORMAT_AUTO, 4, 4);
+         I->flow = VA_FLOW_RECONVERGE;
+      });
 }

-TEST_F(MergeFlow, TrivialWait) {
-   CASE({
-        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        flow(WAIT0126);
-        bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
-   },
-   {
-        I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-        I->flow = VA_FLOW_WAIT0126;
-        bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
-   });
+TEST_F(MergeFlow, TrivialWait)
+{
+   CASE(
+      {
+         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         flow(WAIT0126);
+         bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
+      },
+      {
+         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+         I->flow = VA_FLOW_WAIT0126;
+         bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
+      });
 }

-TEST_F(MergeFlow, LoadThenUnrelatedThenUse) {
-   CASE({
+TEST_F(MergeFlow, LoadThenUnrelatedThenUse)
+{
+   CASE(
+      {
         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         flow(WAIT0);
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
         flow(END);
-   },
-   {
+      },
+      {
         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I->flow = VA_FLOW_WAIT0;
         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
         I->flow = VA_FLOW_END;
-   });
+      });
 }

-TEST_F(MergeFlow, TrivialDiscard) {
-   CASE({
+TEST_F(MergeFlow, TrivialDiscard)
+{
+   CASE(
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
@ -140,31 +153,35 @@ TEST_F(MergeFlow, TrivialDiscard) {
         flow(DISCARD);
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         flow(END);
-   },
-   {
+      },
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                         BI_SUBGROUP_SUBGROUP4);
+                             BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                             BI_SUBGROUP_SUBGROUP4);
         I->flow = VA_FLOW_DISCARD;
         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I->flow = VA_FLOW_END;
-   });
+      });
 }

-TEST_F(MergeFlow, TrivialDiscardAtTheStart) {
-   CASE({
+TEST_F(MergeFlow, TrivialDiscardAtTheStart)
+{
+   CASE(
+      {
         flow(DISCARD);
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-   },
-   {
+      },
+      {
         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I->flow = VA_FLOW_DISCARD;
-   });
+      });
 }

-TEST_F(MergeFlow, MoveDiscardPastWait) {
-   CASE({
+TEST_F(MergeFlow, MoveDiscardPastWait)
+{
+   CASE(
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
@ -172,20 +189,22 @@ TEST_F(MergeFlow, MoveDiscardPastWait) {
         flow(DISCARD);
         flow(WAIT0);
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-   },
-   {
+      },
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                         BI_SUBGROUP_SUBGROUP4);
+                             BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                             BI_SUBGROUP_SUBGROUP4);
         I->flow = VA_FLOW_WAIT0;
         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I->flow = VA_FLOW_DISCARD;
-   });
+      });
 }

-TEST_F(MergeFlow, OccludedWaitsAndDiscard) {
-   CASE({
+TEST_F(MergeFlow, OccludedWaitsAndDiscard)
+{
+   CASE(
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
@ -194,75 +213,84 @@ TEST_F(MergeFlow, OccludedWaitsAndDiscard) {
         flow(DISCARD);
         flow(WAIT2);
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-   },
-   {
+      },
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
-                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
-                         BI_SUBGROUP_SUBGROUP4);
+                             BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
+                             BI_SUBGROUP_SUBGROUP4);
         I->flow = VA_FLOW_WAIT02;
         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I->flow = VA_FLOW_DISCARD;
-   });
+      });
 }

-TEST_F(MergeFlow, DeleteUselessWaits) {
-   CASE({
+TEST_F(MergeFlow, DeleteUselessWaits)
+{
+   CASE(
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         flow(WAIT0);
         flow(WAIT2);
         flow(END);
-   },
-   {
+      },
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         I->flow = VA_FLOW_END;
-   });
+      });
 }

-TEST_F(MergeFlow, BlockFullOfUselessWaits) {
-   CASE({
+TEST_F(MergeFlow, BlockFullOfUselessWaits)
+{
+   CASE(
+      {
         flow(WAIT0);
         flow(WAIT2);
         flow(DISCARD);
         flow(END);
-   },
-   {
-         flow(END);
-   });
+      },
+      { flow(END); });
 }

-TEST_F(MergeFlow, WaitWithMessage) {
-   CASE({
+TEST_F(MergeFlow, WaitWithMessage)
+{
+   CASE(
+      {
         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
         flow(WAIT0);
-   },
-   {
-         I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
-                               BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
+      },
+      {
+         I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60),
+                               bi_register(61), BI_REGISTER_FORMAT_F32,
+                               BI_VECSIZE_V4, 1);
         I->flow = VA_FLOW_WAIT0;
-   });
+      });
 }

-TEST_F(MergeFlow, CantMoveWaitPastMessage) {
+TEST_F(MergeFlow, CantMoveWaitPastMessage)
+{
   NEGCASE({
-         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
+      bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
+      I =
+         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);

-         /* Pretend it's blocked for some reason. This doesn't actually happen
-          * with the current algorithm, but it's good to handle the special
-          * cases correctly in case we change later on.
-          */
-         I->flow = VA_FLOW_DISCARD;
-         flow(WAIT0);
+      /* Pretend it's blocked for some reason. This doesn't actually happen
+       * with the current algorithm, but it's good to handle the special
+       * cases correctly in case we change later on.
+       */
+      I->flow = VA_FLOW_DISCARD;
+      flow(WAIT0);
   });
 }

-TEST_F(MergeFlow, DeletePointlessDiscard) {
-   CASE({
+TEST_F(MergeFlow, DeletePointlessDiscard)
+{
+   CASE(
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
         bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
                          bi_register(12), false, BI_DIMENSION_2D,
@ -277,31 +305,34 @@ TEST_F(MergeFlow, DeletePointlessDiscard) {
                     bi_register(6), bi_register(7), bi_register(8),
                     BI_REGISTER_FORMAT_AUTO, 4, 4);
         flow(END);
-   },
-   {
+      },
+      {
         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
-         I = bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
-                          bi_register(12), false, BI_DIMENSION_2D,
-                          BI_REGISTER_FORMAT_F32, false, false,
-                          BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
+         I = bi_tex_single_to(
+            b, bi_register(0), bi_register(4), bi_register(8), bi_register(12),
+            false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false,
+            BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
         I->flow = VA_FLOW_WAIT0126;
-         I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
+         I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
+                         atest);
         I->flow = VA_FLOW_WAIT;
         I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
                         bi_register(6), bi_register(7), bi_register(8),
                         BI_REGISTER_FORMAT_AUTO, 4, 4);
         I->flow = VA_FLOW_END;
-   });
+      });
 }

-TEST_F(MergeFlow, PreserveTerminalBarriers) {
-   CASE({
+TEST_F(MergeFlow, PreserveTerminalBarriers)
+{
+   CASE(
+      {
         bi_barrier(b);
         flow(WAIT);
         flow(END);
-   },
-   {
+      },
+      {
         bi_barrier(b)->flow = VA_FLOW_WAIT;
         flow(END);
-   });
+      });
 }
--- a/src/panfrost/bifrost/valhall/test/test-packing.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-packing.cpp
@ -21,34 +21,38 @@
 * SOFTWARE.
 */

-#include "va_compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"

 #include <gtest/gtest.h>

-#define CASE(instr, expected) do { \
-   uint64_t _value = va_pack_instr(instr); \
-   if (_value != expected) { \
-      fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, (uint64_t) expected); \
-      bi_print_instr(instr, stderr); \
-      fprintf(stderr, "\n"); \
-      ADD_FAILURE(); \
-   } \
-} while(0)
+#define CASE(instr, expected)                                                  \
+   do {                                                                        \
+      uint64_t _value = va_pack_instr(instr);                                  \
+      if (_value != expected) {                                                \
+         fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value,    \
+                 (uint64_t)expected);                                          \
+         bi_print_instr(instr, stderr);                                        \
+         fprintf(stderr, "\n");                                                \
+         ADD_FAILURE();                                                        \
+      }                                                                        \
+   } while (0)

 class ValhallPacking : public testing::Test {
-protected:
-   ValhallPacking() {
+ protected:
+   ValhallPacking()
+   {
      mem_ctx = ralloc_context(NULL);
      b = bit_builder(mem_ctx);

-      zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false);
-      one = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 8), false);
-      n4567 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 4), true);
+      zero = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 0), false);
+      one = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 8), false);
+      n4567 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 4), true);
   }

-   ~ValhallPacking() {
+   ~ValhallPacking()
+   {
      ralloc_free(mem_ctx);
   }

@ -57,60 +61,67 @@ protected:
   bi_index zero, one, n4567;
 };

-TEST_F(ValhallPacking, Moves) {
+TEST_F(ValhallPacking, Moves)
+{
   CASE(bi_mov_i32_to(b, bi_register(1), bi_register(2)),
-         0x0091c10000000002ULL);
-   CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false)),
-         0x0091c1000000008aULL);
+        0x0091c10000000002ULL);
+   CASE(bi_mov_i32_to(b, bi_register(1),
+                      bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false)),
+        0x0091c1000000008aULL);
 }

-TEST_F(ValhallPacking, Fadd) {
+TEST_F(ValhallPacking, Fadd)
+{
   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2)),
-         0x00a4c00000000201ULL);
-   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))),
-         0x00a4c02000000201ULL);
-   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))),
-         0x00a4c01000000201ULL);
+        0x00a4c00000000201ULL);
+   CASE(
+      bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))),
+      0x00a4c02000000201ULL);
+   CASE(
+      bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))),
+      0x00a4c01000000201ULL);

-   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_swz_16(bi_register(1), false, false),
+   CASE(bi_fadd_v2f16_to(b, bi_register(0),
+                         bi_swz_16(bi_register(1), false, false),
                         bi_swz_16(bi_register(0), true, true)),
-         0x00a5c0000c000001ULL);
+        0x00a5c0000c000001ULL);

   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0)),
-         0x00a5c00028000001ULL);
+        0x00a5c00028000001ULL);

   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1),
                         bi_swz_16(bi_register(0), true, false)),
-         0x00a5c00024000001ULL);
+        0x00a5c00024000001ULL);

   CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))),
                         bi_neg(zero)),
-         0x00a5c0902800c040ULL);
+        0x00a5c0902800c040ULL);

-   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
-                       zero),
-         0x00a4c0000000c001ULL);
+   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), zero),
+        0x00a4c0000000c001ULL);

-   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
-                       bi_neg(zero)),
-         0x00a4c0100000c001ULL);
+   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(zero)),
+        0x00a4c0100000c001ULL);

   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
                       bi_half(bi_register(0), true)),
-         0x00a4c00008000001ULL);
+        0x00a4c00008000001ULL);

   CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
                       bi_half(bi_register(0), false)),
-         0x00a4c00004000001ULL);
+        0x00a4c00004000001ULL);
 }

-TEST_F(ValhallPacking, Clper) {
+TEST_F(ValhallPacking, Clper)
+{
   CASE(bi_clper_i32_to(b, bi_register(0), bi_register(0), bi_byte(n4567, 0),
-                        BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP16),
-         0x00a0c030128fc900);
+                        BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE,
+                        BI_SUBGROUP_SUBGROUP16),
+        0x00a0c030128fc900);
 }

-TEST_F(ValhallPacking, Clamps) {
+TEST_F(ValhallPacking, Clamps)
+{
   bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
                                bi_neg(bi_abs(bi_register(2))));
   CASE(I, 0x00a4c03000000201ULL);
@ -119,209 +130,243 @@ TEST_F(ValhallPacking, Clamps) {
   CASE(I, 0x00a4c03200000201ULL);
 }

-TEST_F(ValhallPacking, Misc) {
+TEST_F(ValhallPacking, Misc)
+{
   CASE(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
-                         bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 4), false),
-                         bi_neg(zero)),
-         0x00b2c10400c08841ULL);
+                      bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 4), false),
+                      bi_neg(zero)),
+        0x00b2c10400c08841ULL);

   CASE(bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))),
                         BI_ROUND_RTN),
-         0x0090c240800d0042ULL);
+        0x0090c240800d0042ULL);

   CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false), bi_register(0),
-                         BI_ROUND_RTN),
-         0x00904000a00f0000ULL);
+                           BI_ROUND_RTN),
+        0x00904000a00f0000ULL);

-   CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
-                           bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN),
-         0x00904000900f0001ULL);
+   CASE(
+      bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
+                         bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN),
+      0x00904000900f0001ULL);
 }

-TEST_F(ValhallPacking, FaddImm) {
-   CASE(bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)), 0x4847C6C0),
-         0x0114C24847C6C042ULL);
+TEST_F(ValhallPacking, FaddImm)
+{
+   CASE(bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)),
+                           0x4847C6C0),
+        0x0114C24847C6C042ULL);

-   CASE(bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)), 0x70AC6784),
-         0x0115C270AC678442ULL);
+   CASE(bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)),
+                             0x70AC6784),
+        0x0115C270AC678442ULL);
 }

-TEST_F(ValhallPacking, Comparions) {
+TEST_F(ValhallPacking, Comparions)
+{
   CASE(bi_icmp_or_v2s16_to(b, bi_register(2),
-            bi_discard(bi_swz_16(bi_register(3), true, false)),
-            bi_discard(bi_swz_16(bi_register(2), true, false)),
-            zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
+                            bi_discard(bi_swz_16(bi_register(3), true, false)),
+                            bi_discard(bi_swz_16(bi_register(2), true, false)),
+                            zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
        0x00f9c21184c04243);

   CASE(bi_fcmp_or_v2f16_to(b, bi_register(2),
-            bi_discard(bi_swz_16(bi_register(3), true, false)),
-            bi_discard(bi_swz_16(bi_register(2), false, false)),
-            zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
-         0x00f5c20190c04243);
+                            bi_discard(bi_swz_16(bi_register(3), true, false)),
+                            bi_discard(bi_swz_16(bi_register(2), false, false)),
+                            zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
+        0x00f5c20190c04243);
 }

-TEST_F(ValhallPacking, Conversions) {
+TEST_F(ValhallPacking, Conversions)
+{
   CASE(bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2))),
-         0x0090c22000070042);
+        0x0090c22000070042);
 }

-TEST_F(ValhallPacking, BranchzI16) {
-   bi_instr *I = bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
+TEST_F(ValhallPacking, BranchzI16)
+{
+   bi_instr *I =
+      bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
   I->branch_offset = 1;
   CASE(I, 0x001fc03000000102);
 }

-TEST_F(ValhallPacking, BranchzI16Backwards) {
+TEST_F(ValhallPacking, BranchzI16Backwards)
+{
   bi_instr *I = bi_branchz_i16(b, zero, bi_null(), BI_CMPF_EQ);
   I->branch_offset = -8;
   CASE(I, 0x001fc017fffff8c0);
 }

-TEST_F(ValhallPacking, Blend) {
-   CASE(bi_blend_to(b, bi_null(), bi_register(0), bi_register(60),
-                       bi_fau(BIR_FAU_BLEND_0, false),
-                       bi_fau(BIR_FAU_BLEND_0, true),
-                       bi_null(), BI_REGISTER_FORMAT_F16, 2, 0),
-        0x007f4004333c00f0);
+TEST_F(ValhallPacking, Blend)
+{
+   CASE(
+      bi_blend_to(b, bi_null(), bi_register(0), bi_register(60),
+                  bi_fau(BIR_FAU_BLEND_0, false), bi_fau(BIR_FAU_BLEND_0, true),
+                  bi_null(), BI_REGISTER_FORMAT_F16, 2, 0),
+      0x007f4004333c00f0);
 }

-TEST_F(ValhallPacking, Mux) {
+TEST_F(ValhallPacking, Mux)
+{
   CASE(bi_mux_i32_to(b, bi_register(0), bi_discard(bi_register(0)),
                      bi_discard(bi_register(4)),
-                      bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false), BI_MUX_BIT),
+                      bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false),
+                      BI_MUX_BIT),
        0x00b8c00300804440ull);
 }

-TEST_F(ValhallPacking, AtestFP16) {
+TEST_F(ValhallPacking, AtestFP16)
+{
   CASE(bi_atest_to(b, bi_register(60), bi_register(60),
                    bi_half(bi_register(1), true),
                    bi_fau(BIR_FAU_ATEST_PARAM, false)),
        0x007dbc0208ea013c);
 }

-TEST_F(ValhallPacking, AtestFP32) {
+TEST_F(ValhallPacking, AtestFP32)
+{
   CASE(bi_atest_to(b, bi_register(60), bi_register(60), one,
                    bi_fau(BIR_FAU_ATEST_PARAM, false)),
        0x007dbc0200ead03c);
 }

-TEST_F(ValhallPacking, Transcendentals) {
+TEST_F(ValhallPacking, Transcendentals)
+{
   CASE(bi_frexpm_f32_to(b, bi_register(1), bi_register(0), false, true),
        0x0099c10001000000);

-   CASE(bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false, true),
+   CASE(bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false,
+                         true),
        0x0099c00001020040);

-   CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)),
-        0x009cc20000020001);
+   CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)), 0x009cc20000020001);

-   CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_SPECIAL_LEFT),
+   CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)),
+                             bi_discard(bi_register(2)), bi_neg(zero),
+                             bi_discard(bi_register(0)), BI_SPECIAL_LEFT),
        0x0162c00440c04241);
 }

-TEST_F(ValhallPacking, Csel) {
+TEST_F(ValhallPacking, Csel)
+{
   CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)),
                       bi_discard(bi_register(3)),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
                       BI_CMPF_EQ),
        0x0150c10085844342);

   CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)),
                       bi_discard(bi_register(3)),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
                       BI_CMPF_LT),
        0x0150c10485844342);

   CASE(bi_csel_s32_to(b, bi_register(1), bi_discard(bi_register(2)),
                       bi_discard(bi_register(3)),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
-                       bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
+                       bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
                       BI_CMPF_LT),
        0x0158c10485844342);
 }

-TEST_F(ValhallPacking, LdAttrImm) {
-   bi_instr *I = bi_ld_attr_imm_to(b, bi_register(0),
-                                   bi_discard(bi_register(60)),
-                                   bi_discard(bi_register(61)),
-                                   BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 1);
+TEST_F(ValhallPacking, LdAttrImm)
+{
+   bi_instr *I = bi_ld_attr_imm_to(
+      b, bi_register(0), bi_discard(bi_register(60)),
+      bi_discard(bi_register(61)), BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 1);
   I->table = 1;

   CASE(I, 0x0066800433117d7c);
 }

-TEST_F(ValhallPacking, LdVarBufImmF16) {
+TEST_F(ValhallPacking, LdVarBufImmF16)
+{
   CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0),
+                                 BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
+                                 BI_VECSIZE_V4, 0),
        0x005d82143300003d);

   CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_SAMPLE,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 0),
-         0x005d80843300003d);
+                                 BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
+                                 BI_VECSIZE_V4, 0),
+        0x005d80843300003d);

   CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID,
-                                 BI_SOURCE_FORMAT_F16,
-                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
-         0x005d80443308003d);
+                                 BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
+                                 BI_VECSIZE_V4, 8),
+        0x005d80443308003d);
 }

-TEST_F(ValhallPacking, LeaBufImm) {
+TEST_F(ValhallPacking, LeaBufImm)
+{
   CASE(bi_lea_buf_imm_to(b, bi_register(4), bi_discard(bi_register(59))),
        0x005e840400000d7b);
 }

-TEST_F(ValhallPacking, StoreSegment) {
+TEST_F(ValhallPacking, StoreSegment)
+{
   CASE(bi_store_i96(b, bi_register(0), bi_discard(bi_register(4)),
-                        bi_discard(bi_register(5)), BI_SEG_VARY, 0),
+                     bi_discard(bi_register(5)), BI_SEG_VARY, 0),
        0x0061400632000044);
 }

-TEST_F(ValhallPacking, Convert16To32) {
-   CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
-         0x0090c20000140077);
+TEST_F(ValhallPacking, Convert16To32)
+{
+   CASE(bi_u16_to_u32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), false, false))),
+        0x0090c20000140077);

-   CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
-         0x0090c20010140077);
+   CASE(bi_u16_to_u32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), true, false))),
+        0x0090c20010140077);

-   CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
-         0x0090c20000150077);
+   CASE(bi_u16_to_f32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), false, false))),
+        0x0090c20000150077);

-   CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
-         0x0090c20010150077);
+   CASE(bi_u16_to_f32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), true, false))),
+        0x0090c20010150077);

-   CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
-         0x0090c20000040077);
+   CASE(bi_s16_to_s32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), false, false))),
+        0x0090c20000040077);

-   CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
-         0x0090c20010040077);
+   CASE(bi_s16_to_s32_to(b, bi_register(2),
+                         bi_discard(bi_swz_16(bi_register(55), true, false))),
+        0x0090c20010040077);
 }

-TEST_F(ValhallPacking, Swizzle8) {
-   CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0),
-                           zero, zero, BI_CMPF_NE, BI_RESULT_TYPE_I1),
+TEST_F(ValhallPacking, Swizzle8)
+{
+   CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), zero,
+                           zero, BI_CMPF_NE, BI_RESULT_TYPE_I1),
        0x00f2c14300c0c000);
 }

-TEST_F(ValhallPacking, FauPage1) {
-   CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 32), false)),
-         0x0291c10000000080ULL);
+TEST_F(ValhallPacking, FauPage1)
+{
+   CASE(bi_mov_i32_to(b, bi_register(1),
+                      bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 32), false)),
+        0x0291c10000000080ULL);
 }

-TEST_F(ValhallPacking, LdTileV3F16) {
+TEST_F(ValhallPacking, LdTileV3F16)
+{
   CASE(bi_ld_tile_to(b, bi_register(4), bi_discard(bi_register(0)),
-                         bi_register(60), bi_register(3),
-                         BI_REGISTER_FORMAT_F16, BI_VECSIZE_V3),
+                      bi_register(60), bi_register(3), BI_REGISTER_FORMAT_F16,
+                      BI_VECSIZE_V3),
        0x0078840423033c40);
 }

-TEST_F(ValhallPacking, Rhadd8) {
+TEST_F(ValhallPacking, Rhadd8)
+{
   CASE(bi_hadd_v4s8_to(b, bi_register(0), bi_discard(bi_register(1)),
                        bi_discard(bi_register(0)), BI_ROUND_RTP),
        0x00aac000400b4041);
--- a/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp
+++ b/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp
@ -21,41 +21,44 @@
 * SOFTWARE.
 */

-#include "va_compiler.h"
-#include "bi_test.h"
 #include "bi_builder.h"
+#include "bi_test.h"
+#include "va_compiler.h"

 #include <gtest/gtest.h>

-#define CASE(instr, expected) do { \
-   if (va_validate_fau(instr) != expected) { \
-      fprintf(stderr, "Incorrect validation for:\n"); \
-      bi_print_instr(instr, stderr); \
-      fprintf(stderr, "\n"); \
-      ADD_FAILURE(); \
-   } \
-} while(0)
+#define CASE(instr, expected)                                                  \
+   do {                                                                        \
+      if (va_validate_fau(instr) != expected) {                                \
+         fprintf(stderr, "Incorrect validation for:\n");                       \
+         bi_print_instr(instr, stderr);                                        \
+         fprintf(stderr, "\n");                                                \
+         ADD_FAILURE();                                                        \
+      }                                                                        \
+   } while (0)

-#define VALID(instr) CASE(instr, true)
+#define VALID(instr)   CASE(instr, true)
 #define INVALID(instr) CASE(instr, false)

 class ValidateFau : public testing::Test {
-protected:
-   ValidateFau() {
+ protected:
+   ValidateFau()
+   {
      mem_ctx = ralloc_context(NULL);
      b = bit_builder(mem_ctx);

-      zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false);
-      imm1 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 1), false);
-      imm2 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 2), false);
-      unif = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false);
-      unif_hi = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), true);
-      unif2 = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 6), false);
+      zero = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 0), false);
+      imm1 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 1), false);
+      imm2 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 2), false);
+      unif = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false);
+      unif_hi = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), true);
+      unif2 = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 6), false);
      core_id = bi_fau(BIR_FAU_CORE_ID, false);
      lane_id = bi_fau(BIR_FAU_LANE_ID, false);
   }

-   ~ValidateFau() {
+   ~ValidateFau()
+   {
      ralloc_free(mem_ctx);
   }

@ -66,8 +69,8 @@ protected:

 TEST_F(ValidateFau, One64BitUniformSlot)
 {
-   VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3),
-            unif));
+   VALID(
+      bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3), unif));
   VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), unif_hi, unif));
   VALID(bi_fma_f32_to(b, bi_register(1), unif, unif, unif_hi));
   INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_register(1)));
@ -77,8 +80,8 @@ TEST_F(ValidateFau, One64BitUniformSlot)
    * marked as valid in early versions of the validator.
    */
   INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2),
-                         bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false),
-                         bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 1), true)));
+                         bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false),
+                         bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 1), true)));
 }

 TEST_F(ValidateFau, Combined64BitUniformsConstants)
@ -99,17 +102,16 @@ TEST_F(ValidateFau, UniformsOnlyInDefaultMode)
 TEST_F(ValidateFau, SingleSpecialImmediate)
 {
   VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
-            lane_id));
+                       lane_id));
   VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
-            core_id));
-   INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id,
-            core_id));
+                       core_id));
+   INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id, core_id));
 }

 TEST_F(ValidateFau, SmokeTests)
 {
   VALID(bi_mov_i32_to(b, bi_register(1), bi_register(2)));
   VALID(bi_mov_i32_to(b, bi_register(1), unif));
-   VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
-                        unif, bi_neg(zero)));
+   VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)), unif,
+                       bi_neg(zero)));
 }
--- a/src/panfrost/bifrost/valhall/va_compiler.h
+++ b/src/panfrost/bifrost/valhall/va_compiler.h
@ -79,7 +79,7 @@ va_select_fau_page(const bi_instr *I)
 {
   bi_foreach_src(I, s) {
      if (I->src[s].type == BI_INDEX_FAU)
-         return va_fau_page((enum bir_fau) I->src[s].value);
+         return va_fau_page((enum bir_fau)I->src[s].value);
   }

   return 0;
@ -91,8 +91,7 @@ struct va_stats {
   unsigned fma, cvt, sfu, v, ls, t;
 };

-void
-va_count_instr_stats(bi_instr *I, struct va_stats *stats);
+void va_count_instr_stats(bi_instr *I, struct va_stats *stats);

 #ifdef __cplusplus
 } /* extern C */
--- a/src/panfrost/bifrost/valhall/va_insert_flow.c
+++ b/src/panfrost/bifrost/valhall/va_insert_flow.c
@ -21,9 +21,9 @@
 * SOFTWARE.
 */

+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"
-#include "bi_builder.h"

 /*
 * Insert flow control into a scheduled and register allocated shader.  This
@ -176,7 +176,8 @@ bi_depend_on_writers(struct bi_scoreboard_state *st, uint64_t regmask)
 /* Sets the dependencies for a given clause, updating the model */

 static void
-bi_set_dependencies(bi_block *block, bi_instr *I, struct bi_scoreboard_state *st)
+bi_set_dependencies(bi_block *block, bi_instr *I,
+                    struct bi_scoreboard_state *st)
 {
   /* Depend on writers to handle read-after-write and write-after-write
    * dependencies. Write-after-read dependencies are handled in the hardware
@ -482,7 +483,8 @@ va_insert_flow_control_nops(bi_context *ctx)
       */
      if (va_should_end(block) || block->needs_nop) {
         /* Don't bother adding a NOP into an unreachable block */
-         if (block == bi_start_block(&ctx->blocks) || bi_num_predecessors(block))
+         if (block == bi_start_block(&ctx->blocks) ||
+             bi_num_predecessors(block))
            bi_flow(ctx, bi_after_block(block), VA_FLOW_END);
      } else if (bi_reconverge_branches(block)) {
         /* TODO: Do we have ever need to reconverge from an empty block? */
--- a/src/panfrost/bifrost/valhall/va_lower_constants.c
+++ b/src/panfrost/bifrost/valhall/va_lower_constants.c
@ -21,9 +21,9 @@
 * SOFTWARE.
 */

+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
-#include "bi_builder.h"

 /* Only some special immediates are available, as specified in the Table of
 * Immediates in the specification. Other immediates must be lowered, either to
@ -51,7 +51,7 @@ va_lut_index_32(uint32_t imm)
 static bi_index
 va_lut_index_16(uint16_t imm)
 {
-   uint16_t *arr16 = (uint16_t *) valhall_immediates;
+   uint16_t *arr16 = (uint16_t *)valhall_immediates;

   for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) {
      if (arr16[i] == imm)
@ -64,7 +64,7 @@ va_lut_index_16(uint16_t imm)
 UNUSED static bi_index
 va_lut_index_8(uint8_t imm)
 {
-   uint8_t *arr8 = (uint8_t *) valhall_immediates;
+   uint8_t *arr8 = (uint8_t *)valhall_immediates;

   for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) {
      if (arr8[i] == imm)
@ -109,36 +109,43 @@ is_extension_of_16(uint32_t x, bool is_signed)
 }

 static bi_index
-va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool is_signed, bool staging)
+va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
+                    bool is_signed, bool staging)
 {
   /* Try the constant as-is */
   if (!staging) {
      bi_index lut = va_lut_index_32(value);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;

      /* ...or negated as a FP32 constant */
      if (info.absneg && info.size == VA_SIZE_32) {
         lut = bi_neg(va_lut_index_32(fui(-uif(value))));
-         if (!bi_is_null(lut)) return lut;
+         if (!bi_is_null(lut))
+            return lut;
      }

      /* ...or negated as a FP16 constant */
      if (info.absneg && info.size == VA_SIZE_16) {
         lut = bi_neg(va_lut_index_32(value ^ 0x80008000));
-         if (!bi_is_null(lut)) return lut;
+         if (!bi_is_null(lut))
+            return lut;
      }
   }

   /* Try using a single half of a FP16 constant */
   bool replicated_halves = (value & 0xFFFF) == (value >> 16);
-   if (!staging && info.swizzle && info.size == VA_SIZE_16 && replicated_halves) {
+   if (!staging && info.swizzle && info.size == VA_SIZE_16 &&
+       replicated_halves) {
      bi_index lut = va_lut_index_16(value & 0xFFFF);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;

      /* ...possibly negated */
      if (info.absneg) {
         lut = bi_neg(va_lut_index_16((value & 0xFFFF) ^ 0x8000));
-         if (!bi_is_null(lut)) return lut;
+         if (!bi_is_null(lut))
+            return lut;
      }
   }

@ -147,25 +154,28 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool
       is_extension_of_8(value, is_signed)) {

      bi_index lut = va_lut_index_8(value & 0xFF);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;
   }

   /* Try extending a halfword */
-   if (!staging && info.widen &&
-       is_extension_of_16(value, is_signed)) {
+   if (!staging && info.widen && is_extension_of_16(value, is_signed)) {

      bi_index lut = va_lut_index_16(value & 0xFFFF);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;
   }

   /* Try demoting the constant to FP16 */
   if (!staging && info.swizzle && info.size == VA_SIZE_32) {
      bi_index lut = va_demote_constant_fp16(value);
-      if (!bi_is_null(lut)) return lut;
+      if (!bi_is_null(lut))
+         return lut;

      if (info.absneg) {
         bi_index lut = bi_neg(va_demote_constant_fp16(fui(-uif(value))));
-         if (!bi_is_null(lut)) return lut;
+         if (!bi_is_null(lut))
+            return lut;
      }
   }

@ -218,7 +228,8 @@ va_lower_constants(bi_context *ctx, bi_instr *I)
            value = bi_apply_swizzle(value, swz);
         }

-         bi_index cons = va_resolve_constant(&b, value, info, is_signed, staging);
+         bi_index cons =
+            va_resolve_constant(&b, value, info, is_signed, staging);
         cons.neg ^= I->src[s].neg;
         I->src[s] = cons;

--- a/src/panfrost/bifrost/valhall/va_lower_isel.c
+++ b/src/panfrost/bifrost/valhall/va_lower_isel.c
@ -21,9 +21,9 @@
 * SOFTWARE.
 */

+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
-#include "bi_builder.h"

 static bi_instr *
 lower(bi_builder *b, bi_instr *I)
@ -38,45 +38,56 @@ lower(bi_builder *b, bi_instr *I)
      return bi_iadd_v4u8_to(b, I->dest[0], I->src[0], bi_zero(), false);

   case BI_OPCODE_ICMP_I32:
-      return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                               I->cmpf, I->result_type);

   case BI_OPCODE_ICMP_V2I16:
-      return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                 I->cmpf, I->result_type);

   case BI_OPCODE_ICMP_V4I8:
-      return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                I->cmpf, I->result_type);

   case BI_OPCODE_ICMP_U32:
-      return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                               I->cmpf, I->result_type);

   case BI_OPCODE_ICMP_V2U16:
-      return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                 I->cmpf, I->result_type);

   case BI_OPCODE_ICMP_V4U8:
-      return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                I->cmpf, I->result_type);

   case BI_OPCODE_ICMP_S32:
-      return bi_icmp_or_s32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_s32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                               I->cmpf, I->result_type);

   case BI_OPCODE_ICMP_V2S16:
-      return bi_icmp_or_v2s16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v2s16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                 I->cmpf, I->result_type);

   case BI_OPCODE_ICMP_V4S8:
-      return bi_icmp_or_v4s8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_icmp_or_v4s8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                I->cmpf, I->result_type);

   case BI_OPCODE_FCMP_F32:
-      return bi_fcmp_or_f32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_fcmp_or_f32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                               I->cmpf, I->result_type);

   case BI_OPCODE_FCMP_V2F16:
-      return bi_fcmp_or_v2f16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
+      return bi_fcmp_or_v2f16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
+                                 I->cmpf, I->result_type);

   /* Integer CSEL must have a signedness */
   case BI_OPCODE_CSEL_I32:
   case BI_OPCODE_CSEL_V2I16:
      assert(I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE);

-      I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32 :
-              BI_OPCODE_CSEL_V2U16;
+      I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32
+                                            : BI_OPCODE_CSEL_V2U16;
      return NULL;

   /* Jump -> conditional branch with condition tied to true. */
@ -117,7 +128,7 @@ lower(bi_builder *b, bi_instr *I)

   case BI_OPCODE_FADD_RSCALE_F32:
      return bi_fma_rscale_f32_to(b, I->dest[0], I->src[0], bi_imm_f32(1.0),
-                                     I->src[1], I->src[2], I->special);
+                                  I->src[1], I->src[2], I->special);

   default:
      return NULL;
--- a/src/panfrost/bifrost/valhall/va_lower_split_64bit.c
+++ b/src/panfrost/bifrost/valhall/va_lower_split_64bit.c
@ -21,8 +21,8 @@
 * SOFTWARE.
 */

-#include "va_compiler.h"
 #include "bi_builder.h"
+#include "va_compiler.h"

 /*
 * Bifrost uses split 64-bit addresses, specified as two consecutive sources.
@ -38,8 +38,7 @@ lower_split_src(bi_context *ctx, bi_instr *I, unsigned s)
   bi_index offset_fau = I->src[s];
   offset_fau.offset++;

-   if (I->src[s].type == BI_INDEX_FAU &&
-       I->src[s].offset == 0 &&
+   if (I->src[s].type == BI_INDEX_FAU && I->src[s].offset == 0 &&
       bi_is_value_equiv(offset_fau, I->src[s + 1])) {
      return;
   }
--- a/src/panfrost/bifrost/valhall/va_mark_last.c
+++ b/src/panfrost/bifrost/valhall/va_mark_last.c
@ -97,7 +97,7 @@ scoreboard_update(struct bi_scoreboard_state *st, const bi_instr *I)
   /* Unmark registers after they are waited on */
   for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) {
      if (waits_on_slot(I->flow, i))
-            st->read[i] = 0;
+         st->read[i] = 0;
   }
 }

@ -111,8 +111,8 @@ va_analyze_scoreboard_reads(bi_context *ctx)
      bi_worklist_push_tail(&worklist, block);

      /* Reset analysis from previous pass */
-      block->scoreboard_in = (struct bi_scoreboard_state){ 0 };
-      block->scoreboard_out = (struct bi_scoreboard_state){ 0 };
+      block->scoreboard_in = (struct bi_scoreboard_state){0};
+      block->scoreboard_out = (struct bi_scoreboard_state){0};
   }

   /* Perform forward data flow analysis to calculate dependencies */
--- a/src/panfrost/bifrost/valhall/va_merge_flow.c
+++ b/src/panfrost/bifrost/valhall/va_merge_flow.c
@ -21,9 +21,9 @@
 * SOFTWARE.
 */

+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall_enums.h"
-#include "bi_builder.h"

 /*
 * Merge NOPs with flow control with nearby instructions to eliminate the NOPs,
@ -80,8 +80,10 @@ merge_end_reconverge(bi_block *block)
   bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
   bi_instr *penult = bi_prev_op(last);

-   if (last->op != BI_OPCODE_NOP) return;
-   if (last->flow != VA_FLOW_RECONVERGE && last->flow != VA_FLOW_END) return;
+   if (last->op != BI_OPCODE_NOP)
+      return;
+   if (last->flow != VA_FLOW_RECONVERGE && last->flow != VA_FLOW_END)
+      return;

   /* End implies all other flow control except for waiting on barriers (slot
    * #7, with VA_FLOW_WAIT), so remove blocking flow control.
@ -99,7 +101,8 @@ merge_end_reconverge(bi_block *block)
   }

   /* If there is blocking flow control, we can't merge */
-   if (penult->flow != VA_FLOW_NONE) return;
+   if (penult->flow != VA_FLOW_NONE)
+      return;

   /* Else, merge */
   penult->flow = last->flow;
@ -133,8 +136,8 @@ merge_waits(bi_block *block)
   bi_instr *last_free = NULL;

   bi_foreach_instr_in_block_safe(block, I) {
-      if (last_free != NULL &&
-          I->op == BI_OPCODE_NOP && va_flow_is_wait_or_none(I->flow)) {
+      if (last_free != NULL && I->op == BI_OPCODE_NOP &&
+          va_flow_is_wait_or_none(I->flow)) {

         /* Merge waits with compatible instructions */
         last_free->flow = union_waits(last_free->flow, I->flow);
@ -212,8 +215,10 @@ va_merge_flow(bi_context *ctx)
 {
   bi_foreach_block(ctx, block) {
      /* If there are less than 2 instructions, there's nothing to merge */
-      if (list_is_empty(&block->instructions)) continue;
-      if (list_is_singular(&block->instructions)) continue;
+      if (list_is_empty(&block->instructions))
+         continue;
+      if (list_is_singular(&block->instructions))
+         continue;

      merge_end_reconverge(block);
      merge_waits(block);
--- a/src/panfrost/bifrost/valhall/va_optimize.c
+++ b/src/panfrost/bifrost/valhall/va_optimize.c
@ -29,15 +29,21 @@ static enum bi_opcode
 va_op_add_imm(enum bi_opcode op)
 {
   switch (op) {
-   case BI_OPCODE_FADD_F32:   return BI_OPCODE_FADD_IMM_F32;
-   case BI_OPCODE_FADD_V2F16: return BI_OPCODE_FADD_IMM_V2F16;
+   case BI_OPCODE_FADD_F32:
+      return BI_OPCODE_FADD_IMM_F32;
+   case BI_OPCODE_FADD_V2F16:
+      return BI_OPCODE_FADD_IMM_V2F16;
   case BI_OPCODE_IADD_S32:
-   case BI_OPCODE_IADD_U32:   return BI_OPCODE_IADD_IMM_I32;
+   case BI_OPCODE_IADD_U32:
+      return BI_OPCODE_IADD_IMM_I32;
   case BI_OPCODE_IADD_V2S16:
-   case BI_OPCODE_IADD_V2U16: return BI_OPCODE_IADD_IMM_V2I16;
+   case BI_OPCODE_IADD_V2U16:
+      return BI_OPCODE_IADD_IMM_V2I16;
   case BI_OPCODE_IADD_V4S8:
-   case BI_OPCODE_IADD_V4U8:  return BI_OPCODE_IADD_IMM_V4I8;
-   default: return 0;
+   case BI_OPCODE_IADD_V4U8:
+      return BI_OPCODE_IADD_IMM_V4I8;
+   default:
+      return 0;
   }
 }

@ -46,8 +52,8 @@ va_is_add_imm(bi_instr *I, unsigned s)
 {
   assert(s < I->nr_srcs);

-   return I->src[s].swizzle == BI_SWIZZLE_H01 &&
-          !I->src[s].abs && !I->src[s].neg && !I->clamp && !I->round;
+   return I->src[s].swizzle == BI_SWIZZLE_H01 && !I->src[s].abs &&
+          !I->src[s].neg && !I->clamp && !I->round;
 }

 static unsigned
@ -83,11 +89,14 @@ va_fuse_add_imm(bi_instr *I)
   }

   enum bi_opcode op = va_op_add_imm(I->op);
-   if (!op) return;
+   if (!op)
+      return;

   unsigned s = va_choose_imm(I);
-   if (s > 1) return;
-   if (!va_is_add_imm(I, 1 - s)) return;
+   if (s > 1)
+      return;
+   if (!va_is_add_imm(I, 1 - s))
+      return;

   I->op = op;
   I->index = bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
--- a/src/panfrost/bifrost/valhall/va_pack.c
+++ b/src/panfrost/bifrost/valhall/va_pack.c
@ -21,10 +21,10 @@
 * SOFTWARE.
 */

+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
 #include "valhall_enums.h"
-#include "bi_builder.h"

 /* This file contains the final passes of the compiler. Running after
 * scheduling and RA, the IR is now finalized, so we need to emit it to actual
@ -36,7 +36,7 @@
 * Prints the (first) failing instruction to aid debugging.
 */
 NORETURN static void PRINTFLIKE(2, 3)
-invalid_instruction(const bi_instr *I, const char *cause, ...)
+   invalid_instruction(const bi_instr *I, const char *cause, ...)
 {
   fputs("\nInvalid ", stderr);

@ -56,8 +56,9 @@ invalid_instruction(const bi_instr *I, const char *cause, ...)
 * Like assert, but prints the instruction if the assertion fails to aid
 * debugging invalid inputs to the packing module.
 */
-#define pack_assert(I, cond) \
-   if (!(cond)) invalid_instruction(I, "invariant " #cond);
+#define pack_assert(I, cond)                                                   \
+   if (!(cond))                                                                \
+      invalid_instruction(I, "invariant " #cond);

 /*
 * Validate that two adjacent 32-bit sources form an aligned 64-bit register
@ -95,14 +96,20 @@ static unsigned
 va_pack_fau_special(const bi_instr *I, enum bir_fau fau)
 {
   switch (fau) {
-   case BIR_FAU_ATEST_PARAM:     return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM;
-   case BIR_FAU_TLS_PTR:         return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER;
-   case BIR_FAU_WLS_PTR:         return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER;
-   case BIR_FAU_LANE_ID:         return VA_FAU_SPECIAL_PAGE_3_LANE_ID;
-   case BIR_FAU_PROGRAM_COUNTER: return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER;
-   case BIR_FAU_SAMPLE_POS_ARRAY:return VA_FAU_SPECIAL_PAGE_0_SAMPLE;
+   case BIR_FAU_ATEST_PARAM:
+      return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM;
+   case BIR_FAU_TLS_PTR:
+      return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER;
+   case BIR_FAU_WLS_PTR:
+      return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER;
+   case BIR_FAU_LANE_ID:
+      return VA_FAU_SPECIAL_PAGE_3_LANE_ID;
+   case BIR_FAU_PROGRAM_COUNTER:
+      return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER;
+   case BIR_FAU_SAMPLE_POS_ARRAY:
+      return VA_FAU_SPECIAL_PAGE_0_SAMPLE;

-   case BIR_FAU_BLEND_0...(BIR_FAU_BLEND_0 + 7):
+   case BIR_FAU_BLEND_0 ...(BIR_FAU_BLEND_0 + 7):
      return VA_FAU_SPECIAL_PAGE_0_BLEND_DESCRIPTOR_0 + (fau - BIR_FAU_BLEND_0);

   default:
@ -136,7 +143,8 @@ va_pack_src(const bi_instr *I, unsigned s)

   if (idx.type == BI_INDEX_REGISTER) {
      unsigned value = va_pack_reg(I, idx);
-      if (idx.discard) value |= (1 << 6);
+      if (idx.discard)
+         value |= (1 << 6);
      return value;
   } else if (idx.type == BI_INDEX_FAU) {
      pack_assert(I, idx.offset <= 1);
@ -150,10 +158,14 @@ static unsigned
 va_pack_wrmask(const bi_instr *I)
 {
   switch (I->dest[0].swizzle) {
-   case BI_SWIZZLE_H00: return 0x1;
-   case BI_SWIZZLE_H11: return 0x2;
-   case BI_SWIZZLE_H01: return 0x3;
-   default: invalid_instruction(I, "write mask");
+   case BI_SWIZZLE_H00:
+      return 0x1;
+   case BI_SWIZZLE_H11:
+      return 0x2;
+   case BI_SWIZZLE_H01:
+      return 0x3;
+   default:
+      invalid_instruction(I, "write mask");
   }
 }

@ -161,17 +173,27 @@ static enum va_atomic_operation
 va_pack_atom_opc(const bi_instr *I)
 {
   switch (I->atom_opc) {
-   case BI_ATOM_OPC_AADD:  return VA_ATOMIC_OPERATION_AADD;
-   case BI_ATOM_OPC_ASMIN: return VA_ATOMIC_OPERATION_ASMIN;
-   case BI_ATOM_OPC_ASMAX: return VA_ATOMIC_OPERATION_ASMAX;
-   case BI_ATOM_OPC_AUMIN: return VA_ATOMIC_OPERATION_AUMIN;
-   case BI_ATOM_OPC_AUMAX: return VA_ATOMIC_OPERATION_AUMAX;
-   case BI_ATOM_OPC_AAND:  return VA_ATOMIC_OPERATION_AAND;
-   case BI_ATOM_OPC_AOR:   return VA_ATOMIC_OPERATION_AOR;
-   case BI_ATOM_OPC_AXOR:  return VA_ATOMIC_OPERATION_AXOR;
+   case BI_ATOM_OPC_AADD:
+      return VA_ATOMIC_OPERATION_AADD;
+   case BI_ATOM_OPC_ASMIN:
+      return VA_ATOMIC_OPERATION_ASMIN;
+   case BI_ATOM_OPC_ASMAX:
+      return VA_ATOMIC_OPERATION_ASMAX;
+   case BI_ATOM_OPC_AUMIN:
+      return VA_ATOMIC_OPERATION_AUMIN;
+   case BI_ATOM_OPC_AUMAX:
+      return VA_ATOMIC_OPERATION_AUMAX;
+   case BI_ATOM_OPC_AAND:
+      return VA_ATOMIC_OPERATION_AAND;
+   case BI_ATOM_OPC_AOR:
+      return VA_ATOMIC_OPERATION_AOR;
+   case BI_ATOM_OPC_AXOR:
+      return VA_ATOMIC_OPERATION_AXOR;
   case BI_ATOM_OPC_ACMPXCHG:
-   case BI_ATOM_OPC_AXCHG: return VA_ATOMIC_OPERATION_AXCHG;
-   default: invalid_instruction(I, "atomic opcode");
+   case BI_ATOM_OPC_AXCHG:
+      return VA_ATOMIC_OPERATION_AXCHG;
+   default:
+      invalid_instruction(I, "atomic opcode");
   }
 }

@ -179,12 +201,18 @@ static enum va_atomic_operation_with_1
 va_pack_atom_opc_1(const bi_instr *I)
 {
   switch (I->atom_opc) {
-   case BI_ATOM_OPC_AINC:     return VA_ATOMIC_OPERATION_WITH_1_AINC;
-   case BI_ATOM_OPC_ADEC:     return VA_ATOMIC_OPERATION_WITH_1_ADEC;
-   case BI_ATOM_OPC_AUMAX1:   return VA_ATOMIC_OPERATION_WITH_1_AUMAX1;
-   case BI_ATOM_OPC_ASMAX1:   return VA_ATOMIC_OPERATION_WITH_1_ASMAX1;
-   case BI_ATOM_OPC_AOR1:     return VA_ATOMIC_OPERATION_WITH_1_AOR1;
-   default: invalid_instruction(I, "atomic opcode with implicit 1");
+   case BI_ATOM_OPC_AINC:
+      return VA_ATOMIC_OPERATION_WITH_1_AINC;
+   case BI_ATOM_OPC_ADEC:
+      return VA_ATOMIC_OPERATION_WITH_1_ADEC;
+   case BI_ATOM_OPC_AUMAX1:
+      return VA_ATOMIC_OPERATION_WITH_1_AUMAX1;
+   case BI_ATOM_OPC_ASMAX1:
+      return VA_ATOMIC_OPERATION_WITH_1_ASMAX1;
+   case BI_ATOM_OPC_AOR1:
+      return VA_ATOMIC_OPERATION_WITH_1_AOR1;
+   default:
+      invalid_instruction(I, "atomic opcode with implicit 1");
   }
 }

@ -199,10 +227,14 @@ static enum va_widen
 va_pack_widen_f32(const bi_instr *I, enum bi_swizzle swz)
 {
   switch (swz) {
-   case BI_SWIZZLE_H01: return VA_WIDEN_NONE;
-   case BI_SWIZZLE_H00: return VA_WIDEN_H0;
-   case BI_SWIZZLE_H11: return VA_WIDEN_H1;
-   default: invalid_instruction(I, "widen");
+   case BI_SWIZZLE_H01:
+      return VA_WIDEN_NONE;
+   case BI_SWIZZLE_H00:
+      return VA_WIDEN_H0;
+   case BI_SWIZZLE_H11:
+      return VA_WIDEN_H1;
+   default:
+      invalid_instruction(I, "widen");
   }
 }

@ -210,11 +242,16 @@ static enum va_swizzles_16_bit
 va_pack_swizzle_f16(const bi_instr *I, enum bi_swizzle swz)
 {
   switch (swz) {
-   case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00;
-   case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10;
-   case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01;
-   case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11;
-   default: invalid_instruction(I, "16-bit swizzle");
+   case BI_SWIZZLE_H00:
+      return VA_SWIZZLES_16_BIT_H00;
+   case BI_SWIZZLE_H10:
+      return VA_SWIZZLES_16_BIT_H10;
+   case BI_SWIZZLE_H01:
+      return VA_SWIZZLES_16_BIT_H01;
+   case BI_SWIZZLE_H11:
+      return VA_SWIZZLES_16_BIT_H11;
+   default:
+      invalid_instruction(I, "16-bit swizzle");
   }
 }

@ -223,37 +260,62 @@ va_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size)
 {
   if (size == VA_SIZE_8) {
      switch (swz) {
-      case BI_SWIZZLE_H01:    return VA_SWIZZLES_8_BIT_B0123;
-      case BI_SWIZZLE_H00:    return VA_SWIZZLES_8_BIT_B0101;
-      case BI_SWIZZLE_H11:    return VA_SWIZZLES_8_BIT_B2323;
-      case BI_SWIZZLE_B0000:  return VA_SWIZZLES_8_BIT_B0000;
-      case BI_SWIZZLE_B1111:  return VA_SWIZZLES_8_BIT_B1111;
-      case BI_SWIZZLE_B2222:  return VA_SWIZZLES_8_BIT_B2222;
-      case BI_SWIZZLE_B3333:  return VA_SWIZZLES_8_BIT_B3333;
-      default: invalid_instruction(I, "8-bit widen");
+      case BI_SWIZZLE_H01:
+         return VA_SWIZZLES_8_BIT_B0123;
+      case BI_SWIZZLE_H00:
+         return VA_SWIZZLES_8_BIT_B0101;
+      case BI_SWIZZLE_H11:
+         return VA_SWIZZLES_8_BIT_B2323;
+      case BI_SWIZZLE_B0000:
+         return VA_SWIZZLES_8_BIT_B0000;
+      case BI_SWIZZLE_B1111:
+         return VA_SWIZZLES_8_BIT_B1111;
+      case BI_SWIZZLE_B2222:
+         return VA_SWIZZLES_8_BIT_B2222;
+      case BI_SWIZZLE_B3333:
+         return VA_SWIZZLES_8_BIT_B3333;
+      default:
+         invalid_instruction(I, "8-bit widen");
      }
   } else if (size == VA_SIZE_16) {
      switch (swz) {
-      case BI_SWIZZLE_H00:    return VA_SWIZZLES_16_BIT_H00;
-      case BI_SWIZZLE_H10:    return VA_SWIZZLES_16_BIT_H10;
-      case BI_SWIZZLE_H01:    return VA_SWIZZLES_16_BIT_H01;
-      case BI_SWIZZLE_H11:    return VA_SWIZZLES_16_BIT_H11;
-      case BI_SWIZZLE_B0000:  return VA_SWIZZLES_16_BIT_B00;
-      case BI_SWIZZLE_B1111:  return VA_SWIZZLES_16_BIT_B11;
-      case BI_SWIZZLE_B2222:  return VA_SWIZZLES_16_BIT_B22;
-      case BI_SWIZZLE_B3333:  return VA_SWIZZLES_16_BIT_B33;
-      default: invalid_instruction(I, "16-bit widen");
+      case BI_SWIZZLE_H00:
+         return VA_SWIZZLES_16_BIT_H00;
+      case BI_SWIZZLE_H10:
+         return VA_SWIZZLES_16_BIT_H10;
+      case BI_SWIZZLE_H01:
+         return VA_SWIZZLES_16_BIT_H01;
+      case BI_SWIZZLE_H11:
+         return VA_SWIZZLES_16_BIT_H11;
+      case BI_SWIZZLE_B0000:
+         return VA_SWIZZLES_16_BIT_B00;
+      case BI_SWIZZLE_B1111:
+         return VA_SWIZZLES_16_BIT_B11;
+      case BI_SWIZZLE_B2222:
+         return VA_SWIZZLES_16_BIT_B22;
+      case BI_SWIZZLE_B3333:
+         return VA_SWIZZLES_16_BIT_B33;
+      default:
+         invalid_instruction(I, "16-bit widen");
      }
   } else if (size == VA_SIZE_32) {
      switch (swz) {
-      case BI_SWIZZLE_H01:    return VA_SWIZZLES_32_BIT_NONE;
-      case BI_SWIZZLE_H00:    return VA_SWIZZLES_32_BIT_H0;
-      case BI_SWIZZLE_H11:    return VA_SWIZZLES_32_BIT_H1;
-      case BI_SWIZZLE_B0000:  return VA_SWIZZLES_32_BIT_B0;
-      case BI_SWIZZLE_B1111:  return VA_SWIZZLES_32_BIT_B1;
-      case BI_SWIZZLE_B2222:  return VA_SWIZZLES_32_BIT_B2;
-      case BI_SWIZZLE_B3333:  return VA_SWIZZLES_32_BIT_B3;
-      default: invalid_instruction(I, "32-bit widen");
+      case BI_SWIZZLE_H01:
+         return VA_SWIZZLES_32_BIT_NONE;
+      case BI_SWIZZLE_H00:
+         return VA_SWIZZLES_32_BIT_H0;
+      case BI_SWIZZLE_H11:
+         return VA_SWIZZLES_32_BIT_H1;
+      case BI_SWIZZLE_B0000:
+         return VA_SWIZZLES_32_BIT_B0;
+      case BI_SWIZZLE_B1111:
+         return VA_SWIZZLES_32_BIT_B1;
+      case BI_SWIZZLE_B2222:
+         return VA_SWIZZLES_32_BIT_B2;
+      case BI_SWIZZLE_B3333:
+         return VA_SWIZZLES_32_BIT_B3;
+      default:
+         invalid_instruction(I, "32-bit widen");
      }
   } else {
      invalid_instruction(I, "type size for widen");
@ -264,14 +326,22 @@ static enum va_half_swizzles_8_bit
 va_pack_halfswizzle(const bi_instr *I, enum bi_swizzle swz)
 {
   switch (swz) {
-   case BI_SWIZZLE_B0000: return VA_HALF_SWIZZLES_8_BIT_B00;
-   case BI_SWIZZLE_B1111: return VA_HALF_SWIZZLES_8_BIT_B11;
-   case BI_SWIZZLE_B2222: return VA_HALF_SWIZZLES_8_BIT_B22;
-   case BI_SWIZZLE_B3333: return VA_HALF_SWIZZLES_8_BIT_B33;
-   case BI_SWIZZLE_B0011: return VA_HALF_SWIZZLES_8_BIT_B01;
-   case BI_SWIZZLE_B2233: return VA_HALF_SWIZZLES_8_BIT_B23;
-   case BI_SWIZZLE_B0022: return VA_HALF_SWIZZLES_8_BIT_B02;
-   default: invalid_instruction(I, "v2u8 swizzle");
+   case BI_SWIZZLE_B0000:
+      return VA_HALF_SWIZZLES_8_BIT_B00;
+   case BI_SWIZZLE_B1111:
+      return VA_HALF_SWIZZLES_8_BIT_B11;
+   case BI_SWIZZLE_B2222:
+      return VA_HALF_SWIZZLES_8_BIT_B22;
+   case BI_SWIZZLE_B3333:
+      return VA_HALF_SWIZZLES_8_BIT_B33;
+   case BI_SWIZZLE_B0011:
+      return VA_HALF_SWIZZLES_8_BIT_B01;
+   case BI_SWIZZLE_B2233:
+      return VA_HALF_SWIZZLES_8_BIT_B23;
+   case BI_SWIZZLE_B0022:
+      return VA_HALF_SWIZZLES_8_BIT_B02;
+   default:
+      invalid_instruction(I, "v2u8 swizzle");
   }
 }

@ -279,12 +349,18 @@ static enum va_lanes_8_bit
 va_pack_shift_lanes(const bi_instr *I, enum bi_swizzle swz)
 {
   switch (swz) {
-   case BI_SWIZZLE_H01:    return VA_LANES_8_BIT_B02;
-   case BI_SWIZZLE_B0000:  return VA_LANES_8_BIT_B00;
-   case BI_SWIZZLE_B1111:  return VA_LANES_8_BIT_B11;
-   case BI_SWIZZLE_B2222:  return VA_LANES_8_BIT_B22;
-   case BI_SWIZZLE_B3333:  return VA_LANES_8_BIT_B33;
-   default: invalid_instruction(I, "lane shift");
+   case BI_SWIZZLE_H01:
+      return VA_LANES_8_BIT_B02;
+   case BI_SWIZZLE_B0000:
+      return VA_LANES_8_BIT_B00;
+   case BI_SWIZZLE_B1111:
+      return VA_LANES_8_BIT_B11;
+   case BI_SWIZZLE_B2222:
+      return VA_LANES_8_BIT_B22;
+   case BI_SWIZZLE_B3333:
+      return VA_LANES_8_BIT_B33;
+   default:
+      invalid_instruction(I, "lane shift");
   }
 }

@ -292,10 +368,14 @@ static enum va_combine
 va_pack_combine(const bi_instr *I, enum bi_swizzle swz)
 {
   switch (swz) {
-   case BI_SWIZZLE_H01: return VA_COMBINE_NONE;
-   case BI_SWIZZLE_H00: return VA_COMBINE_H0;
-   case BI_SWIZZLE_H11: return VA_COMBINE_H1;
-   default: invalid_instruction(I, "branch lane");
+   case BI_SWIZZLE_H01:
+      return VA_COMBINE_NONE;
+   case BI_SWIZZLE_H00:
+      return VA_COMBINE_H0;
+   case BI_SWIZZLE_H11:
+      return VA_COMBINE_H1;
+   default:
+      invalid_instruction(I, "branch lane");
   }
 }

@ -303,10 +383,14 @@ static enum va_source_format
 va_pack_source_format(const bi_instr *I)
 {
   switch (I->source_format) {
-   case BI_SOURCE_FORMAT_FLAT32: return VA_SOURCE_FORMAT_SRC_FLAT32;
-   case BI_SOURCE_FORMAT_FLAT16: return VA_SOURCE_FORMAT_SRC_FLAT16;
-   case BI_SOURCE_FORMAT_F32: return VA_SOURCE_FORMAT_SRC_F32;
-   case BI_SOURCE_FORMAT_F16: return VA_SOURCE_FORMAT_SRC_F16;
+   case BI_SOURCE_FORMAT_FLAT32:
+      return VA_SOURCE_FORMAT_SRC_FLAT32;
+   case BI_SOURCE_FORMAT_FLAT16:
+      return VA_SOURCE_FORMAT_SRC_FLAT16;
+   case BI_SOURCE_FORMAT_F32:
+      return VA_SOURCE_FORMAT_SRC_F32;
+   case BI_SOURCE_FORMAT_F16:
+      return VA_SOURCE_FORMAT_SRC_F16;
   }

   invalid_instruction(I, "source format");
@ -316,9 +400,12 @@ static uint64_t
 va_pack_rhadd(const bi_instr *I)
 {
   switch (I->round) {
-   case BI_ROUND_RTN: return 0; /* hadd */
-   case BI_ROUND_RTP: return BITFIELD_BIT(30); /* rhadd */
-   default: unreachable("Invalid round for HADD");
+   case BI_ROUND_RTN:
+      return 0; /* hadd */
+   case BI_ROUND_RTP:
+      return BITFIELD_BIT(30); /* rhadd */
+   default:
+      unreachable("Invalid round for HADD");
   }
 }

@ -334,15 +421,17 @@ va_pack_alu(const bi_instr *I)
   case BI_OPCODE_FREXPE_V2F16:
   case BI_OPCODE_FREXPM_F32:
   case BI_OPCODE_FREXPM_V2F16:
-      if (I->sqrt) hex |= 1ull << 24;
-      if (I->log) hex |= 1ull << 25;
+      if (I->sqrt)
+         hex |= 1ull << 24;
+      if (I->log)
+         hex |= 1ull << 25;
      break;

   /* Add mux type */
   case BI_OPCODE_MUX_I32:
   case BI_OPCODE_MUX_V2I16:
   case BI_OPCODE_MUX_V4I8:
-      hex |= (uint64_t) I->mux << 32;
+      hex |= (uint64_t)I->mux << 32;
      break;

   /* Add .eq flag */
@ -350,12 +439,13 @@ va_pack_alu(const bi_instr *I)
   case BI_OPCODE_BRANCHZI:
      pack_assert(I, I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE);

-      if (I->cmpf == BI_CMPF_EQ) hex |= (1ull << 36);
+      if (I->cmpf == BI_CMPF_EQ)
+         hex |= (1ull << 36);

      if (I->op == BI_OPCODE_BRANCHZI)
         hex |= (0x1ull << 40); /* Absolute */
      else
-         hex |= ((uint64_t) I->branch_offset & BITFIELD_MASK(27)) << 8;
+         hex |= ((uint64_t)I->branch_offset & BITFIELD_MASK(27)) << 8;

      break;

@ -369,7 +459,7 @@ va_pack_alu(const bi_instr *I)
   case BI_OPCODE_RSHIFT_XOR_I32:
   case BI_OPCODE_RSHIFT_XOR_V2I16:
   case BI_OPCODE_RSHIFT_XOR_V4I8:
-      hex |= (uint64_t) I->arithmetic << 34;
+      hex |= (uint64_t)I->arithmetic << 34;
      break;

   case BI_OPCODE_LEA_BUF_IMM:
@ -378,8 +468,8 @@ va_pack_alu(const bi_instr *I)
      break;

   case BI_OPCODE_LEA_ATTR_IMM:
-      hex |= ((uint64_t) I->table) << 16;
-      hex |= ((uint64_t) I->attribute_index) << 20;
+      hex |= ((uint64_t)I->table) << 16;
+      hex |= ((uint64_t)I->attribute_index) << 20;
      break;

   case BI_OPCODE_IADD_IMM_I32:
@ -387,13 +477,13 @@ va_pack_alu(const bi_instr *I)
   case BI_OPCODE_IADD_IMM_V4I8:
   case BI_OPCODE_FADD_IMM_F32:
   case BI_OPCODE_FADD_IMM_V2F16:
-      hex |= ((uint64_t) I->index) << 8;
+      hex |= ((uint64_t)I->index) << 8;
      break;

   case BI_OPCODE_CLPER_I32:
-      hex |= ((uint64_t) I->inactive_result) << 22;
-      hex |= ((uint64_t) I->lane_op) << 32;
-      hex |= ((uint64_t) I->subgroup) << 36;
+      hex |= ((uint64_t)I->inactive_result) << 22;
+      hex |= ((uint64_t)I->lane_op) << 32;
+      hex |= ((uint64_t)I->subgroup) << 36;
      break;

   case BI_OPCODE_LD_VAR:
@ -406,35 +496,37 @@ va_pack_alu(const bi_instr *I)
   case BI_OPCODE_LD_VAR_BUF_IMM_F32:
   case BI_OPCODE_LD_VAR_SPECIAL:
      if (I->op == BI_OPCODE_LD_VAR_SPECIAL)
-         hex |= ((uint64_t) I->varying_name) << 12; /* instead of index */
+         hex |= ((uint64_t)I->varying_name) << 12; /* instead of index */
      else if (I->op == BI_OPCODE_LD_VAR_BUF_IMM_F16 ||
               I->op == BI_OPCODE_LD_VAR_BUF_IMM_F32) {
-         hex |= ((uint64_t) I->index) << 16;
+         hex |= ((uint64_t)I->index) << 16;
      } else if (I->op == BI_OPCODE_LD_VAR_IMM ||
                 I->op == BI_OPCODE_LD_VAR_FLAT_IMM) {
-         hex |= ((uint64_t) I->table) << 8;
-         hex |= ((uint64_t) I->index) << 12;
+         hex |= ((uint64_t)I->table) << 8;
+         hex |= ((uint64_t)I->index) << 12;
      }

-      hex |= ((uint64_t) va_pack_source_format(I)) << 24;
-      hex |= ((uint64_t) I->update) << 36;
-      hex |= ((uint64_t) I->sample) << 38;
+      hex |= ((uint64_t)va_pack_source_format(I)) << 24;
+      hex |= ((uint64_t)I->update) << 36;
+      hex |= ((uint64_t)I->sample) << 38;
      break;

   case BI_OPCODE_LD_ATTR_IMM:
-      hex |= ((uint64_t) I->table) << 16;
-      hex |= ((uint64_t) I->attribute_index) << 20;
+      hex |= ((uint64_t)I->table) << 16;
+      hex |= ((uint64_t)I->attribute_index) << 20;
      break;

   case BI_OPCODE_LD_TEX_IMM:
   case BI_OPCODE_LEA_TEX_IMM:
-      hex |= ((uint64_t) I->table) << 16;
-      hex |= ((uint64_t) I->texture_index) << 20;
+      hex |= ((uint64_t)I->table) << 16;
+      hex |= ((uint64_t)I->texture_index) << 20;
      break;

   case BI_OPCODE_ZS_EMIT:
-      if (I->stencil) hex |= (1 << 24);
-      if (I->z) hex |= (1 << 25);
+      if (I->stencil)
+         hex |= (1 << 24);
+      if (I->z)
+         hex |= (1 << 25);
      break;

   default:
@ -444,14 +536,14 @@ va_pack_alu(const bi_instr *I)
   /* FMA_RSCALE.f32 special modes treated as extra opcodes */
   if (I->op == BI_OPCODE_FMA_RSCALE_F32) {
      pack_assert(I, I->special < 4);
-      hex |= ((uint64_t) I->special) << 48;
+      hex |= ((uint64_t)I->special) << 48;
   }

   /* Add the normal destination or a placeholder.  Staging destinations are
    * added elsewhere, as they require special handling for control fields.
    */
   if (info.has_dest && info.nr_staging_dests == 0) {
-      hex |= (uint64_t) va_pack_dest(I) << 40;
+      hex |= (uint64_t)va_pack_dest(I) << 40;
   } else if (info.nr_staging_dests == 0 && info.nr_staging_srcs == 0) {
      pack_assert(I, I->nr_dests == 0);
      hex |= 0xC0ull << 40; /* Placeholder */
@ -469,19 +561,24 @@ va_pack_alu(const bi_instr *I)
      enum va_size size = src_info.size;

      bi_index src = I->src[logical_i + src_offset];
-      hex |= (uint64_t) va_pack_src(I, logical_i + src_offset) << (8 * i);
+      hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i);

      if (src_info.notted) {
-         if (src.neg) hex |= (1ull << 35);
+         if (src.neg)
+            hex |= (1ull << 35);
      } else if (src_info.absneg) {
         unsigned neg_offs = 32 + 2 + ((2 - i) * 2);
         unsigned abs_offs = 33 + 2 + ((2 - i) * 2);

-         if (src.neg) hex |= 1ull << neg_offs;
-         if (src.abs) hex |= 1ull << abs_offs;
+         if (src.neg)
+            hex |= 1ull << neg_offs;
+         if (src.abs)
+            hex |= 1ull << abs_offs;
      } else {
-         if (src.neg) invalid_instruction(I, "negate");
-         if (src.abs) invalid_instruction(I, "absolute value");
+         if (src.neg)
+            invalid_instruction(I, "negate");
+         if (src.abs)
+            invalid_instruction(I, "absolute value");
      }

      if (src_info.swizzle) {
@ -489,50 +586,56 @@ va_pack_alu(const bi_instr *I)
         unsigned S = src.swizzle;
         pack_assert(I, size == VA_SIZE_16 || size == VA_SIZE_32);

-         uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(I, S) : va_pack_swizzle_f16(I, S));
+         uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(I, S)
+                                          : va_pack_swizzle_f16(I, S));
         hex |= v << offs;
      } else if (src_info.widen) {
         unsigned offs = (i == 1) ? 26 : 36;
-         hex |= (uint64_t) va_pack_widen(I, src.swizzle, src_info.size) << offs;
+         hex |= (uint64_t)va_pack_widen(I, src.swizzle, src_info.size) << offs;
      } else if (src_info.lane) {
-         unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ?
-                         ((i == 0) ? 38 : 36) :
-                         28;
+         unsigned offs =
+            (I->op == BI_OPCODE_MKVEC_V2I8) ? ((i == 0) ? 38 : 36) : 28;

         if (src_info.size == VA_SIZE_16) {
            hex |= (src.swizzle == BI_SWIZZLE_H11 ? 1 : 0) << offs;
         } else if (I->op == BI_OPCODE_BRANCHZ_I16) {
-            hex |= ((uint64_t) va_pack_combine(I, src.swizzle) << 37);
+            hex |= ((uint64_t)va_pack_combine(I, src.swizzle) << 37);
         } else {
            pack_assert(I, src_info.size == VA_SIZE_8);
            unsigned comp = src.swizzle - BI_SWIZZLE_B0000;
            pack_assert(I, comp < 4);
-            hex |= (uint64_t) comp << offs;
+            hex |= (uint64_t)comp << offs;
         }
      } else if (src_info.lanes) {
         pack_assert(I, src_info.size == VA_SIZE_8);
         pack_assert(I, i == 1);
-         hex |= (uint64_t) va_pack_shift_lanes(I, src.swizzle) << 26;
+         hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26;
      } else if (src_info.combine) {
         /* Treat as swizzle, subgroup ops not yet supported */
         pack_assert(I, src_info.size == VA_SIZE_32);
         pack_assert(I, i == 0);
-         hex |= (uint64_t) va_pack_widen_f32(I, src.swizzle) << 37;
+         hex |= (uint64_t)va_pack_widen_f32(I, src.swizzle) << 37;
      } else if (src_info.halfswizzle) {
         pack_assert(I, src_info.size == VA_SIZE_8);
         pack_assert(I, i == 0);
-         hex |= (uint64_t) va_pack_halfswizzle(I, src.swizzle) << 36;
+         hex |= (uint64_t)va_pack_halfswizzle(I, src.swizzle) << 36;
      } else if (src.swizzle != BI_SWIZZLE_H01) {
         invalid_instruction(I, "swizzle");
      }
   }

-   if (info.saturate) hex |= (uint64_t) I->saturate << 30;
-   if (info.rhadd) hex |= va_pack_rhadd(I);
-   if (info.clamp) hex |= (uint64_t) I->clamp << 32;
-   if (info.round_mode) hex |= (uint64_t) I->round << 30;
-   if (info.condition) hex |= (uint64_t) I->cmpf << 32;
-   if (info.result_type) hex |= (uint64_t) I->result_type << 30;
+   if (info.saturate)
+      hex |= (uint64_t)I->saturate << 30;
+   if (info.rhadd)
+      hex |= va_pack_rhadd(I);
+   if (info.clamp)
+      hex |= (uint64_t)I->clamp << 32;
+   if (info.round_mode)
+      hex |= (uint64_t)I->round << 30;
+   if (info.condition)
+      hex |= (uint64_t)I->cmpf << 32;
+   if (info.result_type)
+      hex |= (uint64_t)I->result_type << 30;

   return hex;
 }
@ -541,37 +644,35 @@ static uint64_t
 va_pack_byte_offset(const bi_instr *I)
 {
   int16_t offset = I->byte_offset;
-   if (offset != I->byte_offset) invalid_instruction(I, "byte offset");
+   if (offset != I->byte_offset)
+      invalid_instruction(I, "byte offset");

   uint16_t offset_as_u16 = offset;
-   return ((uint64_t) offset_as_u16) << 8;
+   return ((uint64_t)offset_as_u16) << 8;
 }

 static uint64_t
 va_pack_byte_offset_8(const bi_instr *I)
 {
   uint8_t offset = I->byte_offset;
-   if (offset != I->byte_offset) invalid_instruction(I, "byte offset");
+   if (offset != I->byte_offset)
+      invalid_instruction(I, "byte offset");

-   return ((uint64_t) offset) << 8;
+   return ((uint64_t)offset) << 8;
 }

 static uint64_t
 va_pack_load(const bi_instr *I, bool buffer_descriptor)
 {
   const uint8_t load_lane_identity[8] = {
-      VA_LOAD_LANE_8_BIT_B0,
-      VA_LOAD_LANE_16_BIT_H0,
-      VA_LOAD_LANE_24_BIT_IDENTITY,
-      VA_LOAD_LANE_32_BIT_W0,
-      VA_LOAD_LANE_48_BIT_IDENTITY,
-      VA_LOAD_LANE_64_BIT_IDENTITY,
-      VA_LOAD_LANE_96_BIT_IDENTITY,
-      VA_LOAD_LANE_128_BIT_IDENTITY,
+      VA_LOAD_LANE_8_BIT_B0,        VA_LOAD_LANE_16_BIT_H0,
+      VA_LOAD_LANE_24_BIT_IDENTITY, VA_LOAD_LANE_32_BIT_W0,
+      VA_LOAD_LANE_48_BIT_IDENTITY, VA_LOAD_LANE_64_BIT_IDENTITY,
+      VA_LOAD_LANE_96_BIT_IDENTITY, VA_LOAD_LANE_128_BIT_IDENTITY,
   };

   unsigned memory_size = (valhall_opcodes[I->op].exact >> 27) & 0x7;
-   uint64_t hex = (uint64_t) load_lane_identity[memory_size] << 36;
+   uint64_t hex = (uint64_t)load_lane_identity[memory_size] << 36;

   // unsigned
   hex |= (1ull << 39);
@ -579,10 +680,10 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
   if (!buffer_descriptor)
      hex |= va_pack_byte_offset(I);

-   hex |= (uint64_t) va_pack_src(I, 0) << 0;
+   hex |= (uint64_t)va_pack_src(I, 0) << 0;

   if (buffer_descriptor)
-      hex |= (uint64_t) va_pack_src(I, 1) << 8;
+      hex |= (uint64_t)va_pack_src(I, 1) << 8;

   return hex;
 }
@ -591,10 +692,14 @@ static uint64_t
 va_pack_memory_access(const bi_instr *I)
 {
   switch (I->seg) {
-   case BI_SEG_TL:   return VA_MEMORY_ACCESS_FORCE;
-   case BI_SEG_POS:  return VA_MEMORY_ACCESS_ISTREAM;
-   case BI_SEG_VARY: return VA_MEMORY_ACCESS_ESTREAM;
-   default:          return VA_MEMORY_ACCESS_NONE;
+   case BI_SEG_TL:
+      return VA_MEMORY_ACCESS_FORCE;
+   case BI_SEG_POS:
+      return VA_MEMORY_ACCESS_ISTREAM;
+   case BI_SEG_VARY:
+      return VA_MEMORY_ACCESS_ESTREAM;
+   default:
+      return VA_MEMORY_ACCESS_NONE;
   }
 }

@ -604,7 +709,7 @@ va_pack_store(const bi_instr *I)
   uint64_t hex = va_pack_memory_access(I) << 24;

   va_validate_register_pair(I, 1);
-   hex |= (uint64_t) va_pack_src(I, 1) << 0;
+   hex |= (uint64_t)va_pack_src(I, 1) << 0;

   hex |= va_pack_byte_offset(I);

@ -615,11 +720,16 @@ static enum va_lod_mode
 va_pack_lod_mode(const bi_instr *I)
 {
   switch (I->va_lod_mode) {
-   case BI_VA_LOD_MODE_ZERO_LOD:       return VA_LOD_MODE_ZERO;
-   case BI_VA_LOD_MODE_COMPUTED_LOD:   return VA_LOD_MODE_COMPUTED;
-   case BI_VA_LOD_MODE_EXPLICIT:       return VA_LOD_MODE_EXPLICIT;
-   case BI_VA_LOD_MODE_COMPUTED_BIAS:  return VA_LOD_MODE_COMPUTED_BIAS;
-   case BI_VA_LOD_MODE_GRDESC:         return VA_LOD_MODE_GRDESC;
+   case BI_VA_LOD_MODE_ZERO_LOD:
+      return VA_LOD_MODE_ZERO;
+   case BI_VA_LOD_MODE_COMPUTED_LOD:
+      return VA_LOD_MODE_COMPUTED;
+   case BI_VA_LOD_MODE_EXPLICIT:
+      return VA_LOD_MODE_EXPLICIT;
+   case BI_VA_LOD_MODE_COMPUTED_BIAS:
+      return VA_LOD_MODE_COMPUTED_BIAS;
+   case BI_VA_LOD_MODE_GRDESC:
+      return VA_LOD_MODE_GRDESC;
   }

   invalid_instruction(I, "LOD mode");
@ -650,14 +760,22 @@ static enum va_register_format
 va_pack_register_format(const bi_instr *I)
 {
   switch (I->register_format) {
-   case BI_REGISTER_FORMAT_AUTO: return VA_REGISTER_FORMAT_AUTO;
-   case BI_REGISTER_FORMAT_F32:  return VA_REGISTER_FORMAT_F32;
-   case BI_REGISTER_FORMAT_F16:  return VA_REGISTER_FORMAT_F16;
-   case BI_REGISTER_FORMAT_S32:  return VA_REGISTER_FORMAT_S32;
-   case BI_REGISTER_FORMAT_S16:  return VA_REGISTER_FORMAT_S16;
-   case BI_REGISTER_FORMAT_U32:  return VA_REGISTER_FORMAT_U32;
-   case BI_REGISTER_FORMAT_U16:  return VA_REGISTER_FORMAT_U16;
-   default: invalid_instruction(I, "register format");
+   case BI_REGISTER_FORMAT_AUTO:
+      return VA_REGISTER_FORMAT_AUTO;
+   case BI_REGISTER_FORMAT_F32:
+      return VA_REGISTER_FORMAT_F32;
+   case BI_REGISTER_FORMAT_F16:
+      return VA_REGISTER_FORMAT_F16;
+   case BI_REGISTER_FORMAT_S32:
+      return VA_REGISTER_FORMAT_S32;
+   case BI_REGISTER_FORMAT_S16:
+      return VA_REGISTER_FORMAT_S16;
+   case BI_REGISTER_FORMAT_U32:
+      return VA_REGISTER_FORMAT_U32;
+   case BI_REGISTER_FORMAT_U16:
+      return VA_REGISTER_FORMAT_U16;
+   default:
+      invalid_instruction(I, "register format");
   }
 }

@ -666,35 +784,34 @@ va_pack_instr(const bi_instr *I)
 {
   struct va_opcode_info info = valhall_opcodes[I->op];

-   uint64_t hex = info.exact | (((uint64_t) I->flow) << 59);
-   hex |= ((uint64_t) va_select_fau_page(I)) << 57;
+   uint64_t hex = info.exact | (((uint64_t)I->flow) << 59);
+   hex |= ((uint64_t)va_select_fau_page(I)) << 57;

   if (info.slot)
-      hex |= ((uint64_t) I->slot << 30);
+      hex |= ((uint64_t)I->slot << 30);

   if (info.sr_count) {
      bool read = bi_opcode_props[I->op].sr_read;
      bi_index sr = read ? I->src[0] : I->dest[0];

-      unsigned count = read ?
-         bi_count_read_registers(I, 0) :
-         bi_count_write_registers(I, 0);
+      unsigned count =
+         read ? bi_count_read_registers(I, 0) : bi_count_write_registers(I, 0);

-      hex |= ((uint64_t) count << 33);
-      hex |= (uint64_t) va_pack_reg(I, sr) << 40;
-      hex |= ((uint64_t) info.sr_control << 46);
+      hex |= ((uint64_t)count << 33);
+      hex |= (uint64_t)va_pack_reg(I, sr) << 40;
+      hex |= ((uint64_t)info.sr_control << 46);
   }

   if (info.sr_write_count) {
-      hex |= ((uint64_t) bi_count_write_registers(I, 0) - 1) << 36;
-      hex |= ((uint64_t) va_pack_reg(I, I->dest[0])) << 16;
+      hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1) << 36;
+      hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16;
   }

   if (info.vecsize)
-      hex |= ((uint64_t) I->vecsize << 28);
+      hex |= ((uint64_t)I->vecsize << 28);

   if (info.register_format)
-      hex |= ((uint64_t) va_pack_register_format(I)) << 24;
+      hex |= ((uint64_t)va_pack_register_format(I)) << 24;

   switch (I->op) {
   case BI_OPCODE_LOAD_I8:
@ -738,18 +855,18 @@ va_pack_instr(const bi_instr *I)

      /* 64-bit source */
      va_validate_register_pair(I, 0);
-      hex |= (uint64_t) va_pack_src(I, 0) << 0;
+      hex |= (uint64_t)va_pack_src(I, 0) << 0;
      hex |= va_pack_byte_offset_8(I);
-      hex |= ((uint64_t) va_pack_atom_opc_1(I)) << 22;
+      hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22;
      break;

   case BI_OPCODE_ATOM_I32:
   case BI_OPCODE_ATOM_RETURN_I32:
      /* 64-bit source */
      va_validate_register_pair(I, 1);
-      hex |= (uint64_t) va_pack_src(I, 1) << 0;
+      hex |= (uint64_t)va_pack_src(I, 1) << 0;
      hex |= va_pack_byte_offset_8(I);
-      hex |= ((uint64_t) va_pack_atom_opc(I)) << 22;
+      hex |= ((uint64_t)va_pack_atom_opc(I)) << 22;

      if (I->op == BI_OPCODE_ATOM_RETURN_I32)
         hex |= (0xc0ull << 40); // flags
@ -764,56 +881,61 @@ va_pack_instr(const bi_instr *I)
      hex |= va_pack_store(I);

      /* Conversion descriptor */
-      hex |= (uint64_t) va_pack_src(I, 3) << 16;
+      hex |= (uint64_t)va_pack_src(I, 3) << 16;
      break;

-   case BI_OPCODE_BLEND:
-   {
+   case BI_OPCODE_BLEND: {
      /* Source 0 - Blend descriptor (64-bit) */
-      hex |= ((uint64_t) va_pack_src(I, 2)) << 0;
+      hex |= ((uint64_t)va_pack_src(I, 2)) << 0;
      va_validate_register_pair(I, 2);

      /* Target */
-      if (I->branch_offset & 0x7) invalid_instruction(I, "unaligned branch");
+      if (I->branch_offset & 0x7)
+         invalid_instruction(I, "unaligned branch");
      hex |= ((I->branch_offset >> 3) << 8);

      /* Source 2 - coverage mask */
-      hex |= ((uint64_t) va_pack_reg(I, I->src[1])) << 16;
+      hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16;

      /* Vector size */
      unsigned vecsize = 4;
-      hex |= ((uint64_t) (vecsize - 1) << 28);
+      hex |= ((uint64_t)(vecsize - 1) << 28);

      break;
   }

   case BI_OPCODE_TEX_SINGLE:
   case BI_OPCODE_TEX_FETCH:
-   case BI_OPCODE_TEX_GATHER:
-   {
+   case BI_OPCODE_TEX_GATHER: {
      /* Image to read from */
-      hex |= ((uint64_t) va_pack_src(I, 1)) << 0;
+      hex |= ((uint64_t)va_pack_src(I, 1)) << 0;

      if (I->op == BI_OPCODE_TEX_FETCH && I->shadow)
         invalid_instruction(I, "TEX_FETCH does not support .shadow");

-      if (I->array_enable) hex |= (1ull << 10);
-      if (I->texel_offset) hex |= (1ull << 11);
-      if (I->shadow) hex |= (1ull << 12);
-      if (I->skip) hex |= (1ull << 39);
-      if (!bi_is_regfmt_16(I->register_format)) hex |= (1ull << 46);
+      if (I->array_enable)
+         hex |= (1ull << 10);
+      if (I->texel_offset)
+         hex |= (1ull << 11);
+      if (I->shadow)
+         hex |= (1ull << 12);
+      if (I->skip)
+         hex |= (1ull << 39);
+      if (!bi_is_regfmt_16(I->register_format))
+         hex |= (1ull << 46);

      if (I->op == BI_OPCODE_TEX_SINGLE)
-         hex |= ((uint64_t) va_pack_lod_mode(I)) << 13;
+         hex |= ((uint64_t)va_pack_lod_mode(I)) << 13;

      if (I->op == BI_OPCODE_TEX_GATHER) {
-         if (I->integer_coordinates) hex |= (1 << 13);
-         hex |= ((uint64_t) I->fetch_component) << 14;
+         if (I->integer_coordinates)
+            hex |= (1 << 13);
+         hex |= ((uint64_t)I->fetch_component) << 14;
      }

      hex |= (I->write_mask << 22);
-      hex |= ((uint64_t) va_pack_register_type(I)) << 26;
-      hex |= ((uint64_t) I->dimension) << 28;
+      hex |= ((uint64_t)va_pack_register_type(I)) << 26;
+      hex |= ((uint64_t)I->dimension) << 28;

      break;
   }
--- a/src/panfrost/bifrost/valhall/va_perf.c
+++ b/src/panfrost/bifrost/valhall/va_perf.c
@ -22,9 +22,9 @@
 * SOFTWARE.
 */

+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
-#include "bi_builder.h"

 void
 va_count_instr_stats(bi_instr *I, struct va_stats *stats)
@ -48,8 +48,8 @@ va_count_instr_stats(bi_instr *I, struct va_stats *stats)

   /* Varying is scaled by 16-bit components interpolated */
   case VA_UNIT_V:
-      stats->v += (I->vecsize + 1) *
-         (bi_is_regfmt_16(I->register_format) ? 1 : 2);
+      stats->v +=
+         (I->vecsize + 1) * (bi_is_regfmt_16(I->register_format) ? 1 : 2);
      return;

   /* We just count load/store and texturing for now */
--- a/src/panfrost/bifrost/valhall/va_validate.c
+++ b/src/panfrost/bifrost/valhall/va_validate.c
@ -21,15 +21,16 @@
 * SOFTWARE.
 */

+#include "bi_builder.h"
 #include "va_compiler.h"
 #include "valhall.h"
-#include "bi_builder.h"

 /* Valhall has limits on access to fast-access uniforms:
 *
 *   An instruction may access no more than a single 64-bit uniform slot.
- *   An instruction may access no more than 64-bits of combined uniforms and constants.
- *   An instruction may access no more than a single special immediate (e.g. lane_id).
+ *   An instruction may access no more than 64-bits of combined uniforms and
+ * constants. An instruction may access no more than a single special immediate
+ * (e.g. lane_id).
 *
 * We validate these constraints.
 *
@ -114,7 +115,7 @@ bool
 va_validate_fau(bi_instr *I)
 {
   bool valid = true;
-   struct fau_state fau = { .uniform_slot = -1 };
+   struct fau_state fau = {.uniform_slot = -1};
   unsigned fau_page = va_select_fau_page(I);

   bi_foreach_src(I, s) {
@ -127,7 +128,7 @@ va_validate_fau(bi_instr *I)
 void
 va_repair_fau(bi_builder *b, bi_instr *I)
 {
-   struct fau_state fau = { .uniform_slot = -1 };
+   struct fau_state fau = {.uniform_slot = -1};
   unsigned fau_page = va_select_fau_page(I);

   bi_foreach_src(I, s) {
--- a/src/panfrost/bifrost/valhall/valhall.h
+++ b/src/panfrost/bifrost/valhall/valhall.h
@ -73,43 +73,42 @@ enum va_unit {
 };

 struct va_src_info {
-   bool absneg : 1;
-   bool swizzle : 1;
-   bool notted : 1;
-   bool lane : 1;
-   bool lanes : 1;
-   bool halfswizzle : 1;
-   bool widen : 1;
-   bool combine : 1;
+   bool absneg       : 1;
+   bool swizzle      : 1;
+   bool notted       : 1;
+   bool lane         : 1;
+   bool lanes        : 1;
+   bool halfswizzle  : 1;
+   bool widen        : 1;
+   bool combine      : 1;
   enum va_size size : 2;
 } __attribute__((packed));

 struct va_opcode_info {
   uint64_t exact;
   struct va_src_info srcs[4];
-   uint8_t type_size : 8;
-   enum va_unit unit : 3;
-   unsigned nr_srcs : 3;
-   unsigned nr_staging_srcs : 2;
+   uint8_t type_size         : 8;
+   enum va_unit unit         : 3;
+   unsigned nr_srcs          : 3;
+   unsigned nr_staging_srcs  : 2;
   unsigned nr_staging_dests : 2;
-   bool has_dest : 1;
-   bool is_signed : 1;
-   bool clamp : 1;
-   bool saturate : 1;
-   bool rhadd : 1;
-   bool round_mode : 1;
-   bool condition : 1;
-   bool result_type : 1;
-   bool vecsize : 1;
-   bool register_format : 1;
-   bool slot : 1;
-   bool sr_count : 1;
-   bool sr_write_count : 1;
-   unsigned sr_control : 2;
+   bool has_dest             : 1;
+   bool is_signed            : 1;
+   bool clamp                : 1;
+   bool saturate             : 1;
+   bool rhadd                : 1;
+   bool round_mode           : 1;
+   bool condition            : 1;
+   bool result_type          : 1;
+   bool vecsize              : 1;
+   bool register_format      : 1;
+   bool slot                 : 1;
+   bool sr_count             : 1;
+   bool sr_write_count       : 1;
+   unsigned sr_control       : 2;
 };

-extern const struct va_opcode_info
-valhall_opcodes[BI_NUM_OPCODES];
+extern const struct va_opcode_info valhall_opcodes[BI_NUM_OPCODES];

 /* Bifrost specifies the source of bitwise operations as (A, B, shift), but
 * Valhall specifies (A, shift, B). We follow Bifrost conventions in the
--- a/src/panfrost/drm-shim/panfrost_noop.c
+++ b/src/panfrost/drm-shim/panfrost_noop.c
@ -47,8 +47,7 @@ pan_ioctl_get_param(int fd, unsigned long request, void *arg)
   struct drm_panfrost_get_param *gp = arg;

   switch (gp->param) {
-   case DRM_PANFROST_PARAM_GPU_PROD_ID:
-   {
+   case DRM_PANFROST_PARAM_GPU_PROD_ID: {
      char *override_version = getenv("PAN_GPU_ID");

      if (override_version)
--- a/src/panfrost/ds/pan_pps_driver.h
+++ b/src/panfrost/ds/pan_pps_driver.h
@ -13,22 +13,21 @@

 #include "pan_pps_perf.h"

-namespace pps
-{
+namespace pps {
 /// @brief Panfrost implementation of PPS driver.
-/// This driver queries the GPU through `drm/panfrost_drm.h`, using performance counters ioctls,
-/// which can be enabled by setting a kernel parameter: `modprobe panfrost unstable_ioctls=1`.
-/// The ioctl needs a buffer to copy data from kernel to user space.
-class PanfrostDriver : public Driver
-{
-   public:
+/// This driver queries the GPU through `drm/panfrost_drm.h`, using performance
+/// counters ioctls, which can be enabled by setting a kernel parameter:
+/// `modprobe panfrost unstable_ioctls=1`. The ioctl needs a buffer to copy data
+/// from kernel to user space.
+class PanfrostDriver : public Driver {
+ public:
   static inline PanfrostDriver &into(Driver &dri);
   static inline const PanfrostDriver &into(const Driver &dri);

   /// @param A list of mali counter names
   /// @return A pair with two lists: counter groups and available counters
-   static std::pair<std::vector<CounterGroup>, std::vector<Counter>> create_available_counters(
-      const PanfrostPerf& perf);
+   static std::pair<std::vector<CounterGroup>, std::vector<Counter>>
+   create_available_counters(const PanfrostPerf &perf);

   PanfrostDriver();
   ~PanfrostDriver();
@ -50,12 +49,14 @@ class PanfrostDriver : public Driver
   std::unique_ptr<PanfrostPerf> perf = nullptr;
 };

-PanfrostDriver &PanfrostDriver::into(Driver &dri)
+PanfrostDriver &
+PanfrostDriver::into(Driver &dri)
 {
   return reinterpret_cast<PanfrostDriver &>(dri);
 }

-const PanfrostDriver &PanfrostDriver::into(const Driver &dri)
+const PanfrostDriver &
+PanfrostDriver::into(const Driver &dri)
 {
   return reinterpret_cast<const PanfrostDriver &>(dri);
 }
--- a/src/panfrost/ds/pan_pps_perf.h
+++ b/src/panfrost/ds/pan_pps_perf.h
@ -10,35 +10,32 @@
 struct panfrost_device;
 struct panfrost_perf;

-namespace pps
-{
-class PanfrostDevice
-{
-   public:
+namespace pps {
+class PanfrostDevice {
+ public:
   PanfrostDevice(int fd);
   ~PanfrostDevice();

   PanfrostDevice(const PanfrostDevice &) = delete;
   PanfrostDevice &operator=(const PanfrostDevice &) = delete;

-   PanfrostDevice(PanfrostDevice&&);
-   PanfrostDevice& operator=(PanfrostDevice&&);
+   PanfrostDevice(PanfrostDevice &&);
+   PanfrostDevice &operator=(PanfrostDevice &&);

   void *ctx = nullptr;
-   struct panfrost_device* dev = nullptr;
+   struct panfrost_device *dev = nullptr;
 };

-class PanfrostPerf
-{
-   public:
-   PanfrostPerf(const PanfrostDevice& dev);
+class PanfrostPerf {
+ public:
+   PanfrostPerf(const PanfrostDevice &dev);
   ~PanfrostPerf();

   PanfrostPerf(const PanfrostPerf &) = delete;
   PanfrostPerf &operator=(const PanfrostPerf &) = delete;

-   PanfrostPerf(PanfrostPerf&&);
-   PanfrostPerf& operator=(PanfrostPerf&&);
+   PanfrostPerf(PanfrostPerf &&);
+   PanfrostPerf &operator=(PanfrostPerf &&);

   int enable() const;
   void disable() const;
--- a/src/panfrost/include/panfrost-job.h
+++ b/src/panfrost/include/panfrost-job.h
@ -28,11 +28,11 @@
 #ifndef __PANFROST_JOB_H__
 #define __PANFROST_JOB_H__

-#include <stdint.h>
-#include <stdbool.h>
 #include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>

-typedef uint8_t  u8;
+typedef uint8_t u8;
 typedef uint16_t u16;
 typedef uint32_t u32;
 typedef uint64_t u64;
@ -68,13 +68,13 @@ typedef uint64_t mali_ptr;
 /* These formats seem to largely duplicate the others. They're used at least
 * for Bifrost framebuffer output.
 */
-#define MALI_FORMAT_SPECIAL2 (7 << 5)
-#define MALI_EXTRACT_TYPE(fmt) ((fmt) & 0xe0)
+#define MALI_FORMAT_SPECIAL2   (7 << 5)
+#define MALI_EXTRACT_TYPE(fmt) ((fmt)&0xe0)

 /* If the high 3 bits are 3 to 6 these two bits say how many components
 * there are.
 */
-#define MALI_NR_CHANNELS(n) ((n - 1) << 3)
+#define MALI_NR_CHANNELS(n)        ((n - 1) << 3)
 #define MALI_EXTRACT_CHANNELS(fmt) ((((fmt) >> 3) & 3) + 1)

 /* If the high 3 bits are 3 to 6, then the low 3 bits say how big each
@ -93,7 +93,7 @@ typedef uint64_t mali_ptr;
 /* For MALI_FORMAT_SINT it means a half-float (e.g. RG16F). For
 * MALI_FORMAT_UNORM, it means a 32-bit float.
 */
-#define MALI_CHANNEL_FLOAT 7
+#define MALI_CHANNEL_FLOAT     7
 #define MALI_EXTRACT_BITS(fmt) (fmt & 0x7)

 #define MALI_EXTRACT_INDEX(pixfmt) (((pixfmt) >> 12) & 0xFF)
@ -241,18 +241,18 @@ typedef uint64_t mali_ptr;
 /* Used for lod encoding. Thanks @urjaman for pointing out these routines can
 * be cleaned up a lot. */

-#define DECODE_FIXED_16(x) ((float) (x / 256.0))
+#define DECODE_FIXED_16(x) ((float)(x / 256.0))

 static inline int16_t
 FIXED_16(float x, bool allow_negative)
 {
-        /* Clamp inputs, accounting for float error */
-        float max_lod = (32.0 - (1.0 / 512.0));
-        float min_lod = allow_negative ? -max_lod : 0.0;
+   /* Clamp inputs, accounting for float error */
+   float max_lod = (32.0 - (1.0 / 512.0));
+   float min_lod = allow_negative ? -max_lod : 0.0;

-        x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x));
+   x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x));

-        return (int) (x * 256.0);
+   return (int)(x * 256.0);
 }

 #endif /* __PANFROST_JOB_H__ */
--- a/src/panfrost/lib/genxml/decode.c
+++ b/src/panfrost/lib/genxml/decode.c
--- a/src/panfrost/lib/genxml/decode.h
+++ b/src/panfrost/lib/genxml/decode.h
@ -36,54 +36,54 @@ extern FILE *pandecode_dump_stream;
 void pandecode_dump_file_open(void);

 struct pandecode_mapped_memory {
-        struct rb_node node;
-        size_t length;
-        void *addr;
-        uint64_t gpu_va;
-        bool ro;
-        char name[32];
+   struct rb_node node;
+   size_t length;
+   void *addr;
+   uint64_t gpu_va;
+   bool ro;
+   char name[32];
 };

 char *pointer_as_memory_reference(uint64_t ptr);

-struct pandecode_mapped_memory *pandecode_find_mapped_gpu_mem_containing(uint64_t addr);
+struct pandecode_mapped_memory *
+pandecode_find_mapped_gpu_mem_containing(uint64_t addr);

 void pandecode_map_read_write(void);

 void pandecode_dump_mappings(void);

 static inline void *
-__pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size,
-                          int line, const char *filename)
+__pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size, int line,
+                          const char *filename)
 {
-        const struct pandecode_mapped_memory *mem =
-                pandecode_find_mapped_gpu_mem_containing(gpu_va);
+   const struct pandecode_mapped_memory *mem =
+      pandecode_find_mapped_gpu_mem_containing(gpu_va);

-        if (!mem) {
-                fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n",
-                        gpu_va, filename, line);
-                assert(0);
-        }
+   if (!mem) {
+      fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va,
+              filename, line);
+      assert(0);
+   }

-        assert(size + (gpu_va - mem->gpu_va) <= mem->length);
+   assert(size + (gpu_va - mem->gpu_va) <= mem->length);

-        return mem->addr + gpu_va - mem->gpu_va;
+   return mem->addr + gpu_va - mem->gpu_va;
 }

-#define pandecode_fetch_gpu_mem(gpu_va, size) \
-	__pandecode_fetch_gpu_mem(gpu_va, size, __LINE__, __FILE__)
+#define pandecode_fetch_gpu_mem(gpu_va, size)                                  \
+   __pandecode_fetch_gpu_mem(gpu_va, size, __LINE__, __FILE__)

 /* Returns a validated pointer to mapped GPU memory with the given pointer type,
 * size automatically determined from the pointer type
 */
-#define PANDECODE_PTR(gpu_va, type) \
-	((type*)(__pandecode_fetch_gpu_mem(gpu_va, sizeof(type), \
-					 __LINE__, __FILE__)))
+#define PANDECODE_PTR(gpu_va, type)                                            \
+   ((type *)(__pandecode_fetch_gpu_mem(gpu_va, sizeof(type), __LINE__,         \
+                                       __FILE__)))

 /* Usage: <variable type> PANDECODE_PTR_VAR(name, gpu_va) */
-#define PANDECODE_PTR_VAR(name, gpu_va) \
-	name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), \
-				       __LINE__, __FILE__)
+#define PANDECODE_PTR_VAR(name, gpu_va)                                        \
+   name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), __LINE__, __FILE__)

 /* Forward declare for all supported gens to permit thunking */
 void pandecode_jc_v4(mali_ptr jc_gpu_va, unsigned gpu_id);
@ -101,44 +101,44 @@ void pandecode_abort_on_fault_v9(mali_ptr jc_gpu_va);
 static inline void
 pan_hexdump(FILE *fp, const uint8_t *hex, size_t cnt, bool with_strings)
 {
-        for (unsigned i = 0; i < cnt; ++i) {
-                if ((i & 0xF) == 0)
-                        fprintf(fp, "%06X  ", i);
+   for (unsigned i = 0; i < cnt; ++i) {
+      if ((i & 0xF) == 0)
+         fprintf(fp, "%06X  ", i);

-                uint8_t v = hex[i];
+      uint8_t v = hex[i];

-                if (v == 0 && (i & 0xF) == 0) {
-                        /* Check if we're starting an aligned run of zeroes */
-                        unsigned zero_count = 0;
+      if (v == 0 && (i & 0xF) == 0) {
+         /* Check if we're starting an aligned run of zeroes */
+         unsigned zero_count = 0;

-                        for (unsigned j = i; j < cnt; ++j) {
-                                if (hex[j] == 0)
-                                        zero_count++;
-                                else
-                                        break;
-                        }
+         for (unsigned j = i; j < cnt; ++j) {
+            if (hex[j] == 0)
+               zero_count++;
+            else
+               break;
+         }

-                        if (zero_count >= 32) {
-                                fprintf(fp, "*\n");
-                                i += (zero_count & ~0xF) - 1;
-                                continue;
-                        }
-                }
+         if (zero_count >= 32) {
+            fprintf(fp, "*\n");
+            i += (zero_count & ~0xF) - 1;
+            continue;
+         }
+      }

-                fprintf(fp, "%02X ", hex[i]);
-                if ((i & 0xF) == 0xF && with_strings) {
-                        fprintf(fp, " | ");
-                        for (unsigned j = i & ~0xF; j <= i; ++j) {
-                                uint8_t c = hex[j];
-                                fputc((c < 32 || c > 128) ? '.' : c, fp);
-                        }
-                }
+      fprintf(fp, "%02X ", hex[i]);
+      if ((i & 0xF) == 0xF && with_strings) {
+         fprintf(fp, " | ");
+         for (unsigned j = i & ~0xF; j <= i; ++j) {
+            uint8_t c = hex[j];
+            fputc((c < 32 || c > 128) ? '.' : c, fp);
+         }
+      }

-                if ((i & 0xF) == 0xF)
-                        fprintf(fp, "\n");
-        }
+      if ((i & 0xF) == 0xF)
+         fprintf(fp, "\n");
+   }

-        fprintf(fp, "\n");
+   fprintf(fp, "\n");
 }

 #endif /* __MMAP_TRACE_H__ */
--- a/src/panfrost/lib/genxml/decode_common.c
+++ b/src/panfrost/lib/genxml/decode_common.c
@ -23,18 +23,18 @@
 * SOFTWARE.
 */

-#include <stdio.h>
-#include <stdlib.h>
 #include <assert.h>
 #include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>

-#include "decode.h"
 #include "util/macros.h"
+#include "util/simple_mtx.h"
 #include "util/u_debug.h"
 #include "util/u_dynarray.h"
-#include "util/simple_mtx.h"
+#include "decode.h"

 FILE *pandecode_dump_stream;

@ -46,8 +46,8 @@ static struct util_dynarray ro_mappings;

 static simple_mtx_t pandecode_lock = SIMPLE_MTX_INITIALIZER;

-#define to_mapped_memory(x) \
-	rb_node_data(struct pandecode_mapped_memory, x, node)
+#define to_mapped_memory(x)                                                    \
+   rb_node_data(struct pandecode_mapped_memory, x, node)

 /*
 * Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
@ -57,147 +57,147 @@ static simple_mtx_t pandecode_lock = SIMPLE_MTX_INITIALIZER;
 static int
 pandecode_cmp_key(const struct rb_node *lhs, const void *key)
 {
-        struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
-        uint64_t *gpu_va = (uint64_t *) key;
+   struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
+   uint64_t *gpu_va = (uint64_t *)key;

-        if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
-                return 0;
-        else
-                return mem->gpu_va - *gpu_va;
+   if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
+      return 0;
+   else
+      return mem->gpu_va - *gpu_va;
 }

 static int
 pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs)
 {
-        return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
+   return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
 }

 static struct pandecode_mapped_memory *
 pandecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);

-        struct rb_node *node = rb_tree_search(&mmap_tree, &addr, pandecode_cmp_key);
+   struct rb_node *node = rb_tree_search(&mmap_tree, &addr, pandecode_cmp_key);

-        return to_mapped_memory(node);
+   return to_mapped_memory(node);
 }

 struct pandecode_mapped_memory *
 pandecode_find_mapped_gpu_mem_containing(uint64_t addr)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);

-        struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing_rw(addr);
+   struct pandecode_mapped_memory *mem =
+      pandecode_find_mapped_gpu_mem_containing_rw(addr);

-        if (mem && mem->addr && !mem->ro) {
-                mprotect(mem->addr, mem->length, PROT_READ);
-                mem->ro = true;
-                util_dynarray_append(&ro_mappings, struct pandecode_mapped_memory *, mem);
-        }
+   if (mem && mem->addr && !mem->ro) {
+      mprotect(mem->addr, mem->length, PROT_READ);
+      mem->ro = true;
+      util_dynarray_append(&ro_mappings, struct pandecode_mapped_memory *, mem);
+   }

-        return mem;
+   return mem;
 }

 void
 pandecode_map_read_write(void)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);

-        util_dynarray_foreach(&ro_mappings, struct pandecode_mapped_memory *, mem) {
-                (*mem)->ro = false;
-                mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
-        }
-        util_dynarray_clear(&ro_mappings);
+   util_dynarray_foreach(&ro_mappings, struct pandecode_mapped_memory *, mem) {
+      (*mem)->ro = false;
+      mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
+   }
+   util_dynarray_clear(&ro_mappings);
 }

 static void
-pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va, const char *name)
+pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va,
+                   const char *name)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);

-        if (!name) {
-                /* If we don't have a name, assign one */
+   if (!name) {
+      /* If we don't have a name, assign one */

-                snprintf(mem->name, sizeof(mem->name) - 1,
-                         "memory_%" PRIx64, gpu_va);
-        } else {
-                assert((strlen(name) + 1) < sizeof(mem->name));
-                memcpy(mem->name, name, strlen(name) + 1);
-        }
+      snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
+   } else {
+      assert((strlen(name) + 1) < sizeof(mem->name));
+      memcpy(mem->name, name, strlen(name) + 1);
+   }
 }

 void
 pandecode_inject_mmap(uint64_t gpu_va, void *cpu, unsigned sz, const char *name)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);

-        /* First, search if we already mapped this and are just updating an address */
+   /* First, search if we already mapped this and are just updating an address */

-        struct pandecode_mapped_memory *existing =
-                pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
+   struct pandecode_mapped_memory *existing =
+      pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);

-        if (existing && existing->gpu_va == gpu_va) {
-                existing->length = sz;
-                existing->addr = cpu;
-                pandecode_add_name(existing, gpu_va, name);
-        } else {
-                /* Otherwise, add a fresh mapping */
-                struct pandecode_mapped_memory *mapped_mem = NULL;
+   if (existing && existing->gpu_va == gpu_va) {
+      existing->length = sz;
+      existing->addr = cpu;
+      pandecode_add_name(existing, gpu_va, name);
+   } else {
+      /* Otherwise, add a fresh mapping */
+      struct pandecode_mapped_memory *mapped_mem = NULL;

-                mapped_mem = calloc(1, sizeof(*mapped_mem));
-                mapped_mem->gpu_va = gpu_va;
-                mapped_mem->length = sz;
-                mapped_mem->addr = cpu;
-                pandecode_add_name(mapped_mem, gpu_va, name);
+      mapped_mem = calloc(1, sizeof(*mapped_mem));
+      mapped_mem->gpu_va = gpu_va;
+      mapped_mem->length = sz;
+      mapped_mem->addr = cpu;
+      pandecode_add_name(mapped_mem, gpu_va, name);

-                /* Add it to the tree */
-                rb_tree_insert(&mmap_tree, &mapped_mem->node, pandecode_cmp);
-        }
+      /* Add it to the tree */
+      rb_tree_insert(&mmap_tree, &mapped_mem->node, pandecode_cmp);
+   }

-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }

 void
 pandecode_inject_free(uint64_t gpu_va, unsigned sz)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);

-        struct pandecode_mapped_memory *mem =
-                pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
+   struct pandecode_mapped_memory *mem =
+      pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);

-        if (mem) {
-                assert(mem->gpu_va == gpu_va);
-                assert(mem->length == sz);
+   if (mem) {
+      assert(mem->gpu_va == gpu_va);
+      assert(mem->length == sz);

-                rb_tree_remove(&mmap_tree, &mem->node);
-                free(mem);
-        }
+      rb_tree_remove(&mmap_tree, &mem->node);
+      free(mem);
+   }

-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }

 char *
 pointer_as_memory_reference(uint64_t ptr)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);

-        struct pandecode_mapped_memory *mapped;
-        char *out = malloc(128);
+   struct pandecode_mapped_memory *mapped;
+   char *out = malloc(128);

-        /* Try to find the corresponding mapped zone */
+   /* Try to find the corresponding mapped zone */

-        mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr);
+   mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr);

-        if (mapped) {
-                snprintf(out, 128, "%s + %d", mapped->name, (int) (ptr - mapped->gpu_va));
-                return out;
-        }
+   if (mapped) {
+      snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
+      return out;
+   }

-        /* Just use the raw address if other options are exhausted */
-
-        snprintf(out, 128, "0x%" PRIx64, ptr);
-        return out;
+   /* Just use the raw address if other options are exhausted */

+   snprintf(out, 128, "0x%" PRIx64, ptr);
+   return out;
 }

 static int pandecode_dump_frame_count = 0;
@ -207,129 +207,153 @@ static bool force_stderr = false;
 void
 pandecode_dump_file_open(void)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);

-        if (pandecode_dump_stream)
-                return;
+   if (pandecode_dump_stream)
+      return;

-        /* This does a getenv every frame, so it is possible to use
-         * setenv to change the base at runtime.
-         */
-        const char *dump_file_base = debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
-        if (force_stderr || !strcmp(dump_file_base, "stderr"))
-                pandecode_dump_stream = stderr;
-        else {
-                char buffer[1024];
-                snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base, pandecode_dump_frame_count);
-                printf("pandecode: dump command stream to file %s\n", buffer);
-                pandecode_dump_stream = fopen(buffer, "w");
-                if (!pandecode_dump_stream)
-                        fprintf(stderr,
-                                "pandecode: failed to open command stream log file %s\n",
-                                buffer);
-        }
+   /* This does a getenv every frame, so it is possible to use
+    * setenv to change the base at runtime.
+    */
+   const char *dump_file_base =
+      debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
+   if (force_stderr || !strcmp(dump_file_base, "stderr"))
+      pandecode_dump_stream = stderr;
+   else {
+      char buffer[1024];
+      snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
+               pandecode_dump_frame_count);
+      printf("pandecode: dump command stream to file %s\n", buffer);
+      pandecode_dump_stream = fopen(buffer, "w");
+      if (!pandecode_dump_stream)
+         fprintf(stderr,
+                 "pandecode: failed to open command stream log file %s\n",
+                 buffer);
+   }
 }

 static void
 pandecode_dump_file_close(void)
 {
-        simple_mtx_assert_locked(&pandecode_lock);
+   simple_mtx_assert_locked(&pandecode_lock);

-        if (pandecode_dump_stream && pandecode_dump_stream != stderr) {
-                if (fclose(pandecode_dump_stream))
-                        perror("pandecode: dump file");
+   if (pandecode_dump_stream && pandecode_dump_stream != stderr) {
+      if (fclose(pandecode_dump_stream))
+         perror("pandecode: dump file");

-                pandecode_dump_stream = NULL;
-        }
+      pandecode_dump_stream = NULL;
+   }
 }

 void
 pandecode_initialize(bool to_stderr)
 {
-        force_stderr = to_stderr;
-        rb_tree_init(&mmap_tree);
-        util_dynarray_init(&ro_mappings, NULL);
+   force_stderr = to_stderr;
+   rb_tree_init(&mmap_tree);
+   util_dynarray_init(&ro_mappings, NULL);
 }

 void
 pandecode_next_frame(void)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);

-        pandecode_dump_file_close();
-        pandecode_dump_frame_count++;
+   pandecode_dump_file_close();
+   pandecode_dump_frame_count++;

-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }

 void
 pandecode_close(void)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);

-        rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &mmap_tree, node) {
-                rb_tree_remove(&mmap_tree, &it->node);
-                free(it);
-        }
+   rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &mmap_tree, node) {
+      rb_tree_remove(&mmap_tree, &it->node);
+      free(it);
+   }

-        util_dynarray_fini(&ro_mappings);
-        pandecode_dump_file_close();
+   util_dynarray_fini(&ro_mappings);
+   pandecode_dump_file_close();

-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }

 void
 pandecode_dump_mappings(void)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);

-        pandecode_dump_file_open();
+   pandecode_dump_file_open();

-        rb_tree_foreach(struct pandecode_mapped_memory, it, &mmap_tree, node) {
-                if (!it->addr || !it->length)
-                        continue;
+   rb_tree_foreach(struct pandecode_mapped_memory, it, &mmap_tree, node) {
+      if (!it->addr || !it->length)
+         continue;

-                fprintf(pandecode_dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n",
-                        it->name, it->gpu_va);
+      fprintf(pandecode_dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
+              it->gpu_va);

-                pan_hexdump(pandecode_dump_stream, it->addr, it->length, false);
-                fprintf(pandecode_dump_stream, "\n");
-        }
+      pan_hexdump(pandecode_dump_stream, it->addr, it->length, false);
+      fprintf(pandecode_dump_stream, "\n");
+   }

-        fflush(pandecode_dump_stream);
-        simple_mtx_unlock(&pandecode_lock);
+   fflush(pandecode_dump_stream);
+   simple_mtx_unlock(&pandecode_lock);
 }

 void
 pandecode_abort_on_fault(mali_ptr jc_gpu_va, unsigned gpu_id)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);

-        switch (pan_arch(gpu_id)) {
-        case 4: pandecode_abort_on_fault_v4(jc_gpu_va); break;
-        case 5: pandecode_abort_on_fault_v5(jc_gpu_va); break;
-        case 6: pandecode_abort_on_fault_v6(jc_gpu_va); break;
-        case 7: pandecode_abort_on_fault_v7(jc_gpu_va); break;
-        case 9: pandecode_abort_on_fault_v9(jc_gpu_va); break;
-        default: unreachable("Unsupported architecture");
-        }
+   switch (pan_arch(gpu_id)) {
+   case 4:
+      pandecode_abort_on_fault_v4(jc_gpu_va);
+      break;
+   case 5:
+      pandecode_abort_on_fault_v5(jc_gpu_va);
+      break;
+   case 6:
+      pandecode_abort_on_fault_v6(jc_gpu_va);
+      break;
+   case 7:
+      pandecode_abort_on_fault_v7(jc_gpu_va);
+      break;
+   case 9:
+      pandecode_abort_on_fault_v9(jc_gpu_va);
+      break;
+   default:
+      unreachable("Unsupported architecture");
+   }

-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }

 void
 pandecode_jc(mali_ptr jc_gpu_va, unsigned gpu_id)
 {
-        simple_mtx_lock(&pandecode_lock);
+   simple_mtx_lock(&pandecode_lock);

-        switch (pan_arch(gpu_id)) {
-        case 4: pandecode_jc_v4(jc_gpu_va, gpu_id); break;
-        case 5: pandecode_jc_v5(jc_gpu_va, gpu_id); break;
-        case 6: pandecode_jc_v6(jc_gpu_va, gpu_id); break;
-        case 7: pandecode_jc_v7(jc_gpu_va, gpu_id); break;
-        case 9: pandecode_jc_v9(jc_gpu_va, gpu_id); break;
-        default: unreachable("Unsupported architecture");
-        }
+   switch (pan_arch(gpu_id)) {
+   case 4:
+      pandecode_jc_v4(jc_gpu_va, gpu_id);
+      break;
+   case 5:
+      pandecode_jc_v5(jc_gpu_va, gpu_id);
+      break;
+   case 6:
+      pandecode_jc_v6(jc_gpu_va, gpu_id);
+      break;
+   case 7:
+      pandecode_jc_v7(jc_gpu_va, gpu_id);
+      break;
+   case 9:
+      pandecode_jc_v9(jc_gpu_va, gpu_id);
+      break;
+   default:
+      unreachable("Unsupported architecture");
+   }

-        simple_mtx_unlock(&pandecode_lock);
+   simple_mtx_unlock(&pandecode_lock);
 }
--- a/src/panfrost/lib/genxml/gen_macros.h
+++ b/src/panfrost/lib/genxml/gen_macros.h
@ -56,45 +56,45 @@
 static inline unsigned
 pan_arch(unsigned gpu_id)
 {
-        switch (gpu_id) {
-        case 0x600:
-        case 0x620:
-        case 0x720:
-                return 4;
-        case 0x750:
-        case 0x820:
-        case 0x830:
-        case 0x860:
-        case 0x880:
-                return 5;
-        default:
-                return gpu_id >> 12;
-        }
+   switch (gpu_id) {
+   case 0x600:
+   case 0x620:
+   case 0x720:
+      return 4;
+   case 0x750:
+   case 0x820:
+   case 0x830:
+   case 0x860:
+   case 0x880:
+      return 5;
+   default:
+      return gpu_id >> 12;
+   }
 }

 /* Base macro defined on the command line. */
 #ifndef PAN_ARCH
-#  include "genxml/common_pack.h"
+#include "genxml/common_pack.h"
 #else

 /* Suffixing macros */
 #if (PAN_ARCH == 4)
-#  define GENX(X) X##_v4
-#  include "genxml/v4_pack.h"
+#define GENX(X) X##_v4
+#include "genxml/v4_pack.h"
 #elif (PAN_ARCH == 5)
-#  define GENX(X) X##_v5
-#  include "genxml/v5_pack.h"
+#define GENX(X) X##_v5
+#include "genxml/v5_pack.h"
 #elif (PAN_ARCH == 6)
-#  define GENX(X) X##_v6
-#  include "genxml/v6_pack.h"
+#define GENX(X) X##_v6
+#include "genxml/v6_pack.h"
 #elif (PAN_ARCH == 7)
-#  define GENX(X) X##_v7
-#  include "genxml/v7_pack.h"
+#define GENX(X) X##_v7
+#include "genxml/v7_pack.h"
 #elif (PAN_ARCH == 9)
-#  define GENX(X) X##_v9
-#  include "genxml/v9_pack.h"
+#define GENX(X) X##_v9
+#include "genxml/v9_pack.h"
 #else
-#  error "Need to add suffixing macro for this architecture"
+#error "Need to add suffixing macro for this architecture"
 #endif

 #endif /* PAN_ARCH */
--- a/src/panfrost/lib/pan_afbc.c
+++ b/src/panfrost/lib/pan_afbc.c
@ -50,8 +50,8 @@
 * must also be cache-line aligned, so there can sometimes be a bit of padding
 * between the header and body.
 *
- * As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally and
- * 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16
+ * As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally
+ * and 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16
 * bytes of metadata, so there is a 16*16=256 byte header. 64x64 is already
 * tile aligned, so the body is 64*64 * 4 bytes per pixel = 16384 bytes of
 * body.
@ -69,45 +69,45 @@
 static enum pipe_format
 unswizzled_format(enum pipe_format format)
 {
-        switch (format) {
-        case PIPE_FORMAT_A8_UNORM:
-        case PIPE_FORMAT_L8_UNORM:
-        case PIPE_FORMAT_I8_UNORM:
-                return PIPE_FORMAT_R8_UNORM;
+   switch (format) {
+   case PIPE_FORMAT_A8_UNORM:
+   case PIPE_FORMAT_L8_UNORM:
+   case PIPE_FORMAT_I8_UNORM:
+      return PIPE_FORMAT_R8_UNORM;

-        case PIPE_FORMAT_L8A8_UNORM:
-                return PIPE_FORMAT_R8G8_UNORM;
+   case PIPE_FORMAT_L8A8_UNORM:
+      return PIPE_FORMAT_R8G8_UNORM;

-        case PIPE_FORMAT_B8G8R8_UNORM:
-                return PIPE_FORMAT_R8G8B8_UNORM;
+   case PIPE_FORMAT_B8G8R8_UNORM:
+      return PIPE_FORMAT_R8G8B8_UNORM;

-        case PIPE_FORMAT_R8G8B8X8_UNORM:
-        case PIPE_FORMAT_B8G8R8A8_UNORM:
-        case PIPE_FORMAT_B8G8R8X8_UNORM:
-        case PIPE_FORMAT_A8R8G8B8_UNORM:
-        case PIPE_FORMAT_X8R8G8B8_UNORM:
-        case PIPE_FORMAT_X8B8G8R8_UNORM:
-        case PIPE_FORMAT_A8B8G8R8_UNORM:
-                return PIPE_FORMAT_R8G8B8A8_UNORM;
+   case PIPE_FORMAT_R8G8B8X8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_X8B8G8R8_UNORM:
+   case PIPE_FORMAT_A8B8G8R8_UNORM:
+      return PIPE_FORMAT_R8G8B8A8_UNORM;

-        case PIPE_FORMAT_B5G6R5_UNORM:
-                return PIPE_FORMAT_R5G6B5_UNORM;
+   case PIPE_FORMAT_B5G6R5_UNORM:
+      return PIPE_FORMAT_R5G6B5_UNORM;

-        case PIPE_FORMAT_B5G5R5A1_UNORM:
-                return PIPE_FORMAT_R5G5B5A1_UNORM;
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
+      return PIPE_FORMAT_R5G5B5A1_UNORM;

-        case PIPE_FORMAT_R10G10B10X2_UNORM:
-        case PIPE_FORMAT_B10G10R10A2_UNORM:
-        case PIPE_FORMAT_B10G10R10X2_UNORM:
-                return PIPE_FORMAT_R10G10B10A2_UNORM;
+   case PIPE_FORMAT_R10G10B10X2_UNORM:
+   case PIPE_FORMAT_B10G10R10A2_UNORM:
+   case PIPE_FORMAT_B10G10R10X2_UNORM:
+      return PIPE_FORMAT_R10G10B10A2_UNORM;

-        case PIPE_FORMAT_A4B4G4R4_UNORM:
-        case PIPE_FORMAT_B4G4R4A4_UNORM:
-                return PIPE_FORMAT_R4G4B4A4_UNORM;
+   case PIPE_FORMAT_A4B4G4R4_UNORM:
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
+      return PIPE_FORMAT_R4G4B4A4_UNORM;

-        default:
-                return format;
-        }
+   default:
+      return format;
+   }
 }

 /* AFBC supports compressing a few canonical formats. Additional formats are
@ -118,29 +118,29 @@ unswizzled_format(enum pipe_format format)
 enum pan_afbc_mode
 panfrost_afbc_format(unsigned arch, enum pipe_format format)
 {
-        /* Luminance-alpha not supported for AFBC on v7+ */
-        switch (format) {
-        case PIPE_FORMAT_A8_UNORM:
-        case PIPE_FORMAT_L8_UNORM:
-        case PIPE_FORMAT_I8_UNORM:
-        case PIPE_FORMAT_L8A8_UNORM:
-                if (arch >= 7)
-                        return PAN_AFBC_MODE_INVALID;
-                else
-                        break;
-        default:
-                break;
-        }
+   /* Luminance-alpha not supported for AFBC on v7+ */
+   switch (format) {
+   case PIPE_FORMAT_A8_UNORM:
+   case PIPE_FORMAT_L8_UNORM:
+   case PIPE_FORMAT_I8_UNORM:
+   case PIPE_FORMAT_L8A8_UNORM:
+      if (arch >= 7)
+         return PAN_AFBC_MODE_INVALID;
+      else
+         break;
+   default:
+      break;
+   }

-        /* sRGB does not change the pixel format itself, only the
-         * interpretation. The interpretation is handled by conversion hardware
-         * independent to the compression hardware, so we can compress sRGB
-         * formats by using the corresponding linear format.
-         */
-        format = util_format_linear(format);
+   /* sRGB does not change the pixel format itself, only the
+    * interpretation. The interpretation is handled by conversion hardware
+    * independent to the compression hardware, so we can compress sRGB
+    * formats by using the corresponding linear format.
+    */
+   format = util_format_linear(format);

-        /* We handle swizzling orthogonally to AFBC */
-        format = unswizzled_format(format);
+   /* We handle swizzling orthogonally to AFBC */
+   format = unswizzled_format(format);

   /* clang-format off */
   switch (format) {
@ -166,9 +166,10 @@ panfrost_afbc_format(unsigned arch, enum pipe_format format)
 /* A format may be compressed as AFBC if it has an AFBC internal format */

 bool
-panfrost_format_supports_afbc(const struct panfrost_device *dev, enum pipe_format format)
+panfrost_format_supports_afbc(const struct panfrost_device *dev,
+                              enum pipe_format format)
 {
-        return panfrost_afbc_format(dev->arch, format) != PAN_AFBC_MODE_INVALID;
+   return panfrost_afbc_format(dev->arch, format) != PAN_AFBC_MODE_INVALID;
 }

 /* The lossless colour transform (AFBC_FORMAT_MOD_YTR) requires RGB. */
@ -176,15 +177,14 @@ panfrost_format_supports_afbc(const struct panfrost_device *dev, enum pipe_forma
 bool
 panfrost_afbc_can_ytr(enum pipe_format format)
 {
-        const struct util_format_description *desc =
-                util_format_description(format);
+   const struct util_format_description *desc = util_format_description(format);

-        /* YTR is only defined for RGB(A) */
-        if (desc->nr_channels != 3 && desc->nr_channels != 4)
-                return false;
+   /* YTR is only defined for RGB(A) */
+   if (desc->nr_channels != 3 && desc->nr_channels != 4)
+      return false;

-        /* The fourth channel if it exists doesn't matter */
-        return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB;
+   /* The fourth channel if it exists doesn't matter */
+   return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB;
 }

 /*
@ -194,5 +194,5 @@ panfrost_afbc_can_ytr(enum pipe_format format)
 bool
 panfrost_afbc_can_tile(const struct panfrost_device *dev)
 {
-        return (dev->arch >= 7);
+   return (dev->arch >= 7);
 }
--- a/src/panfrost/lib/pan_attributes.c
+++ b/src/panfrost/lib/pan_attributes.c
@ -39,91 +39,92 @@
 static unsigned
 panfrost_small_padded_vertex_count(unsigned idx)
 {
-        if (idx < 10)
-                return idx;
-        else
-                return (idx + 1) & ~1;
+   if (idx < 10)
+      return idx;
+   else
+      return (idx + 1) & ~1;
 }

 static unsigned
 panfrost_large_padded_vertex_count(uint32_t vertex_count)
 {
-        /* First, we have to find the highest set one */
-        unsigned highest = 32 - __builtin_clz(vertex_count);
+   /* First, we have to find the highest set one */
+   unsigned highest = 32 - __builtin_clz(vertex_count);

-        /* Using that, we mask out the highest 4-bits */
-        unsigned n = highest - 4;
-        unsigned nibble = (vertex_count >> n) & 0xF;
+   /* Using that, we mask out the highest 4-bits */
+   unsigned n = highest - 4;
+   unsigned nibble = (vertex_count >> n) & 0xF;

-        /* Great, we have the nibble. Now we can just try possibilities. Note
-         * that we don't care about the bottom most bit in most cases, and we
-         * know the top bit must be 1 */
+   /* Great, we have the nibble. Now we can just try possibilities. Note
+    * that we don't care about the bottom most bit in most cases, and we
+    * know the top bit must be 1 */

-        unsigned middle_two = (nibble >> 1) & 0x3;
+   unsigned middle_two = (nibble >> 1) & 0x3;

-        switch (middle_two) {
-        case 0b00:
-                if (!(nibble & 1))
-                        return (1 << n) * 9;
-                else
-                        return (1 << (n + 1)) * 5;
-        case 0b01:
-                return (1 << (n + 2)) * 3;
-        case 0b10:
-                return (1 << (n + 1)) * 7;
-        case 0b11:
-                return (1 << (n + 4));
-        default:
-                return 0; /* unreachable */
-        }
+   switch (middle_two) {
+   case 0b00:
+      if (!(nibble & 1))
+         return (1 << n) * 9;
+      else
+         return (1 << (n + 1)) * 5;
+   case 0b01:
+      return (1 << (n + 2)) * 3;
+   case 0b10:
+      return (1 << (n + 1)) * 7;
+   case 0b11:
+      return (1 << (n + 4));
+   default:
+      return 0; /* unreachable */
+   }
 }

 unsigned
 panfrost_padded_vertex_count(unsigned vertex_count)
 {
-        if (vertex_count < 20)
-                return panfrost_small_padded_vertex_count(vertex_count);
-        else
-                return panfrost_large_padded_vertex_count(vertex_count);
+   if (vertex_count < 20)
+      return panfrost_small_padded_vertex_count(vertex_count);
+   else
+      return panfrost_large_padded_vertex_count(vertex_count);
 }

 /* The much, much more irritating case -- instancing is enabled. See
 * panfrost_job.h for notes on how this works */

 unsigned
-panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags)
+panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift,
+                               unsigned *extra_flags)
 {
-        /* We have a NPOT divisor. Here's the fun one (multipling by
-         * the inverse and shifting) */
+   /* We have a NPOT divisor. Here's the fun one (multipling by
+    * the inverse and shifting) */

-        /* floor(log2(d)) */
-        unsigned shift = util_logbase2(hw_divisor);
+   /* floor(log2(d)) */
+   unsigned shift = util_logbase2(hw_divisor);

-        /* m = ceil(2^(32 + shift) / d) */
-        uint64_t shift_hi = 32 + shift;
-        uint64_t t = 1ll << shift_hi;
-        double t_f = t;
-        double hw_divisor_d = hw_divisor;
-        double m_f = ceil(t_f / hw_divisor_d);
-        unsigned m = m_f;
+   /* m = ceil(2^(32 + shift) / d) */
+   uint64_t shift_hi = 32 + shift;
+   uint64_t t = 1ll << shift_hi;
+   double t_f = t;
+   double hw_divisor_d = hw_divisor;
+   double m_f = ceil(t_f / hw_divisor_d);
+   unsigned m = m_f;

-        /* Default case */
-        uint32_t magic_divisor = m;
+   /* Default case */
+   uint32_t magic_divisor = m;

-        /* e = 2^(shift + 32) % d */
-        uint64_t e = t % hw_divisor;
+   /* e = 2^(shift + 32) % d */
+   uint64_t e = t % hw_divisor;

-        /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
-         * seems to use a different condition */
-        if (e <= (1ll << shift)) {
-                magic_divisor = m - 1;
-                *extra_flags = 1;
-        }
+   /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
+    * seems to use a different condition */
+   if (e <= (1ll << shift)) {
+      magic_divisor = m - 1;
+      *extra_flags = 1;
+   }

-        /* Top flag implicitly set */
-        assert(magic_divisor & (1u << 31));
-        magic_divisor &= ~(1u << 31);
-        *o_shift = shift;
+   /* Top flag implicitly set */
+   assert(magic_divisor & (1u << 31));
+   magic_divisor &= ~(1u << 31);
+   *o_shift = shift;

-        return magic_divisor;
+   return magic_divisor;
 }
--- a/src/panfrost/lib/pan_blend.c
+++ b/src/panfrost/lib/pan_blend.c
--- a/src/panfrost/lib/pan_blend.h
+++ b/src/panfrost/lib/pan_blend.h
@ -27,10 +27,10 @@

 #include "genxml/gen_macros.h"

-#include "util/u_dynarray.h"
-#include "util/format/u_format.h"
-#include "compiler/shader_enums.h"
 #include "compiler/nir/nir.h"
+#include "compiler/shader_enums.h"
+#include "util/format/u_format.h"
+#include "util/u_dynarray.h"

 #include "panfrost/util/pan_ir.h"

@ -38,84 +38,78 @@ struct MALI_BLEND_EQUATION;
 struct panfrost_device;

 struct pan_blend_equation {
-        unsigned blend_enable : 1;
-        enum blend_func rgb_func : 3;
-        unsigned rgb_invert_src_factor : 1;
-        enum blend_factor rgb_src_factor : 4;
-        unsigned rgb_invert_dst_factor : 1;
-        enum blend_factor rgb_dst_factor : 4;
-        enum blend_func alpha_func : 3;
-        unsigned alpha_invert_src_factor : 1;
-        enum blend_factor alpha_src_factor : 4;
-        unsigned alpha_invert_dst_factor : 1;
-        enum blend_factor alpha_dst_factor : 4;
-        unsigned color_mask : 4;
+   unsigned blend_enable              : 1;
+   enum blend_func rgb_func           : 3;
+   unsigned rgb_invert_src_factor     : 1;
+   enum blend_factor rgb_src_factor   : 4;
+   unsigned rgb_invert_dst_factor     : 1;
+   enum blend_factor rgb_dst_factor   : 4;
+   enum blend_func alpha_func         : 3;
+   unsigned alpha_invert_src_factor   : 1;
+   enum blend_factor alpha_src_factor : 4;
+   unsigned alpha_invert_dst_factor   : 1;
+   enum blend_factor alpha_dst_factor : 4;
+   unsigned color_mask                : 4;
 };

 struct pan_blend_rt_state {
-        /* RT format */
-        enum pipe_format format;
+   /* RT format */
+   enum pipe_format format;

-        /* Number of samples */
-        unsigned nr_samples;
+   /* Number of samples */
+   unsigned nr_samples;

-        struct pan_blend_equation equation;
+   struct pan_blend_equation equation;
 };

 struct pan_blend_state {
-        bool logicop_enable;
-        enum pipe_logicop logicop_func;
-        float constants[4];
-        unsigned rt_count;
-        struct pan_blend_rt_state rts[8];
+   bool logicop_enable;
+   enum pipe_logicop logicop_func;
+   float constants[4];
+   unsigned rt_count;
+   struct pan_blend_rt_state rts[8];
 };

 struct pan_blend_shader_key {
-        enum pipe_format format;
-        nir_alu_type src0_type, src1_type;
-        uint32_t rt : 3;
-        uint32_t has_constants : 1;
-        uint32_t logicop_enable : 1;
-        uint32_t logicop_func:4;
-        uint32_t nr_samples : 5;
-        uint32_t padding : 18;
-        struct pan_blend_equation equation;
+   enum pipe_format format;
+   nir_alu_type src0_type, src1_type;
+   uint32_t rt             : 3;
+   uint32_t has_constants  : 1;
+   uint32_t logicop_enable : 1;
+   uint32_t logicop_func   : 4;
+   uint32_t nr_samples     : 5;
+   uint32_t padding        : 18;
+   struct pan_blend_equation equation;
 };

 struct pan_blend_shader_variant {
-        struct list_head node;
-        float constants[4];
-        struct util_dynarray binary;
-        unsigned first_tag;
-        unsigned work_reg_count;
+   struct list_head node;
+   float constants[4];
+   struct util_dynarray binary;
+   unsigned first_tag;
+   unsigned work_reg_count;
 };

 #define PAN_BLEND_SHADER_MAX_VARIANTS 32

 struct pan_blend_shader {
-        struct pan_blend_shader_key key;
-        unsigned nvariants;
-        struct list_head variants;
+   struct pan_blend_shader_key key;
+   unsigned nvariants;
+   struct list_head variants;
 };

-bool
-pan_blend_reads_dest(const struct pan_blend_equation eq);
+bool pan_blend_reads_dest(const struct pan_blend_equation eq);

-bool
-pan_blend_can_fixed_function(const struct pan_blend_equation equation,
-                             bool supports_2src);
+bool pan_blend_can_fixed_function(const struct pan_blend_equation equation,
+                                  bool supports_2src);

-bool
-pan_blend_is_opaque(const struct pan_blend_equation eq);
+bool pan_blend_is_opaque(const struct pan_blend_equation eq);

-bool
-pan_blend_alpha_zero_nop(const struct pan_blend_equation eq);
+bool pan_blend_alpha_zero_nop(const struct pan_blend_equation eq);

-bool
-pan_blend_alpha_one_store(const struct pan_blend_equation eq);
+bool pan_blend_alpha_one_store(const struct pan_blend_equation eq);

-unsigned
-pan_blend_constant_mask(const struct pan_blend_equation eq);
+unsigned pan_blend_constant_mask(const struct pan_blend_equation eq);

 /* Fixed-function blending only supports a single constant, so if multiple bits
 * are set in constant_mask, the constants must match. Therefore we may pick
@ -124,7 +118,7 @@ pan_blend_constant_mask(const struct pan_blend_equation eq);
 static inline float
 pan_blend_get_constant(unsigned mask, const float *constants)
 {
-        return mask ? constants[ffs(mask) - 1] : 0.0;
+   return mask ? constants[ffs(mask) - 1] : 0.0;
 }

 /* v6 doesn't support blend constants in FF blend equations whatsoever, and v7
@ -134,7 +128,7 @@ pan_blend_get_constant(unsigned mask, const float *constants)
 static inline bool
 pan_blend_supports_constant(unsigned arch, unsigned rt)
 {
-        return !((arch == 6) || (arch == 7 && rt > 0));
+   return !((arch == 6) || (arch == 7 && rt > 0));
 }

 /* The SOURCE_2 value is new in Bifrost */
@ -142,50 +136,39 @@ pan_blend_supports_constant(unsigned arch, unsigned rt)
 static inline bool
 pan_blend_supports_2src(unsigned arch)
 {
-        return (arch >= 6);
+   return (arch >= 6);
 }

-bool
-pan_blend_is_homogenous_constant(unsigned mask, const float *constants);
+bool pan_blend_is_homogenous_constant(unsigned mask, const float *constants);

-void
-pan_blend_to_fixed_function_equation(const struct pan_blend_equation eq,
-                                     struct MALI_BLEND_EQUATION *equation);
+void pan_blend_to_fixed_function_equation(const struct pan_blend_equation eq,
+                                          struct MALI_BLEND_EQUATION *equation);

-uint32_t
-pan_pack_blend(const struct pan_blend_equation equation);
+uint32_t pan_pack_blend(const struct pan_blend_equation equation);

-void
-pan_blend_shaders_init(struct panfrost_device *dev);
+void pan_blend_shaders_init(struct panfrost_device *dev);

-void
-pan_blend_shaders_cleanup(struct panfrost_device *dev);
+void pan_blend_shaders_cleanup(struct panfrost_device *dev);

 #ifdef PAN_ARCH

-nir_shader *
-GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
-                              const struct pan_blend_state *state,
-                              nir_alu_type src0_type,
-                              nir_alu_type src1_type,
-                              unsigned rt);
+nir_shader *GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
+                                          const struct pan_blend_state *state,
+                                          nir_alu_type src0_type,
+                                          nir_alu_type src1_type, unsigned rt);

 #if PAN_ARCH >= 6
-uint64_t
-GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
-                                  enum pipe_format fmt, unsigned rt,
-                                  unsigned force_size, bool dithered);
+uint64_t GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
+                                           enum pipe_format fmt, unsigned rt,
+                                           unsigned force_size, bool dithered);
 #endif

 /* Take blend_shaders.lock before calling this function and release it when
 * you're done with the shader variant object.
 */
-struct pan_blend_shader_variant *
-GENX(pan_blend_get_shader_locked)(const struct panfrost_device *dev,
-                                  const struct pan_blend_state *state,
-                                  nir_alu_type src0_type,
-                                  nir_alu_type src1_type,
-                                  unsigned rt);
+struct pan_blend_shader_variant *GENX(pan_blend_get_shader_locked)(
+   const struct panfrost_device *dev, const struct pan_blend_state *state,
+   nir_alu_type src0_type, nir_alu_type src1_type, unsigned rt);
 #endif

 #endif
--- a/src/panfrost/lib/pan_blitter.c
+++ b/src/panfrost/lib/pan_blitter.c
--- a/src/panfrost/lib/pan_blitter.h
+++ b/src/panfrost/lib/pan_blitter.h
@ -27,12 +27,12 @@

 #include "genxml/gen_macros.h"

-#include "panfrost-job.h"
+#include "util/format/u_format.h"
 #include "pan_cs.h"
 #include "pan_pool.h"
 #include "pan_texture.h"
 #include "pan_util.h"
-#include "util/format/u_format.h"
+#include "panfrost-job.h"

 struct pan_fb_info;
 struct pan_scoreboard;
@ -40,90 +40,84 @@ struct pan_pool;
 struct panfrost_device;

 struct pan_blit_info {
-        struct {
-                struct {
-                        const struct pan_image *image;
-                        enum pipe_format format;
-                } planes[2];
-                unsigned level;
-                struct {
-                        int32_t x, y, z;
-                        unsigned layer;
-                } start, end;
-        } src, dst;
-        struct {
-               bool enable;
-               uint16_t minx, miny, maxx, maxy;
-        } scissor;
-        bool nearest;
+   struct {
+      struct {
+         const struct pan_image *image;
+         enum pipe_format format;
+      } planes[2];
+      unsigned level;
+      struct {
+         int32_t x, y, z;
+         unsigned layer;
+      } start, end;
+   } src, dst;
+   struct {
+      bool enable;
+      uint16_t minx, miny, maxx, maxy;
+   } scissor;
+   bool nearest;
 };

 struct pan_blit_context {
-        mali_ptr rsd, vpd;
-        mali_ptr textures;
-        mali_ptr samplers;
-        mali_ptr position;
-        struct {
-                enum mali_texture_dimension dim;
-                struct {
-                        float x, y;
-                } start, end;
-                union {
-                        unsigned layer_offset;
-                        float z_offset;
-                };
-        } src;
-        struct {
-                int32_t layer_offset;
-                int32_t cur_layer;
-                int32_t last_layer;
-        } dst;
-        float z_scale;
+   mali_ptr rsd, vpd;
+   mali_ptr textures;
+   mali_ptr samplers;
+   mali_ptr position;
+   struct {
+      enum mali_texture_dimension dim;
+      struct {
+         float x, y;
+      } start, end;
+      union {
+         unsigned layer_offset;
+         float z_offset;
+      };
+   } src;
+   struct {
+      int32_t layer_offset;
+      int32_t cur_layer;
+      int32_t last_layer;
+   } dst;
+   float z_scale;
 };

-void
-GENX(pan_blitter_init)(struct panfrost_device *dev,
-                       struct pan_pool *bin_pool,
-                       struct pan_pool *desc_pool);
+void GENX(pan_blitter_init)(struct panfrost_device *dev,
+                            struct pan_pool *bin_pool,
+                            struct pan_pool *desc_pool);

-void
-GENX(pan_blitter_cleanup)(struct panfrost_device *dev);
+void GENX(pan_blitter_cleanup)(struct panfrost_device *dev);

-unsigned
-GENX(pan_preload_fb)(struct pan_pool *desc_pool,
-                     struct pan_scoreboard *scoreboard,
-                     struct pan_fb_info *fb,
-                     mali_ptr tsd, mali_ptr tiler,
-                     struct panfrost_ptr *jobs);
+unsigned GENX(pan_preload_fb)(struct pan_pool *desc_pool,
+                              struct pan_scoreboard *scoreboard,
+                              struct pan_fb_info *fb, mali_ptr tsd,
+                              mali_ptr tiler, struct panfrost_ptr *jobs);

-void
-GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
-                        const struct pan_blit_info *info,
-                        struct pan_pool *blit_pool,
-                        struct pan_blit_context *ctx);
+void GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
+                             const struct pan_blit_info *info,
+                             struct pan_pool *blit_pool,
+                             struct pan_blit_context *ctx);

 static inline bool
 pan_blit_next_surface(struct pan_blit_context *ctx)
 {
-        if (ctx->dst.last_layer < ctx->dst.layer_offset) {
-                if (ctx->dst.cur_layer <= ctx->dst.last_layer)
-                        return false;
+   if (ctx->dst.last_layer < ctx->dst.layer_offset) {
+      if (ctx->dst.cur_layer <= ctx->dst.last_layer)
+         return false;

-                ctx->dst.cur_layer--;
-        } else {
-                if (ctx->dst.cur_layer >= ctx->dst.last_layer)
-                        return false;
+      ctx->dst.cur_layer--;
+   } else {
+      if (ctx->dst.cur_layer >= ctx->dst.last_layer)
+         return false;

-                ctx->dst.cur_layer++;
-        }
+      ctx->dst.cur_layer++;
+   }

-        return true;
+   return true;
 }

-struct panfrost_ptr
-GENX(pan_blit)(struct pan_blit_context *ctx,
-               struct pan_pool *pool,
-               struct pan_scoreboard *scoreboard,
-               mali_ptr tsd, mali_ptr tiler);
+struct panfrost_ptr GENX(pan_blit)(struct pan_blit_context *ctx,
+                                   struct pan_pool *pool,
+                                   struct pan_scoreboard *scoreboard,
+                                   mali_ptr tsd, mali_ptr tiler);

 #endif
--- a/src/panfrost/lib/pan_bo.c
+++ b/src/panfrost/lib/pan_bo.c
@ -24,10 +24,10 @@
 *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
 */
 #include <errno.h>
-#include <stdio.h>
 #include <fcntl.h>
-#include <xf86drm.h>
 #include <pthread.h>
+#include <stdio.h>
+#include <xf86drm.h>
 #include "drm-uapi/panfrost_drm.h"

 #include "pan_bo.h"
@ -56,53 +56,53 @@
 */

 static struct panfrost_bo *
-panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
-                  uint32_t flags, const char *label)
+panfrost_bo_alloc(struct panfrost_device *dev, size_t size, uint32_t flags,
+                  const char *label)
 {
-        struct drm_panfrost_create_bo create_bo = { .size = size };
-        struct panfrost_bo *bo;
-        int ret;
+   struct drm_panfrost_create_bo create_bo = {.size = size};
+   struct panfrost_bo *bo;
+   int ret;

-        if (dev->kernel_version->version_major > 1 ||
-            dev->kernel_version->version_minor >= 1) {
-                if (flags & PAN_BO_GROWABLE)
-                        create_bo.flags |= PANFROST_BO_HEAP;
-                if (!(flags & PAN_BO_EXECUTE))
-                        create_bo.flags |= PANFROST_BO_NOEXEC;
-        }
+   if (dev->kernel_version->version_major > 1 ||
+       dev->kernel_version->version_minor >= 1) {
+      if (flags & PAN_BO_GROWABLE)
+         create_bo.flags |= PANFROST_BO_HEAP;
+      if (!(flags & PAN_BO_EXECUTE))
+         create_bo.flags |= PANFROST_BO_NOEXEC;
+   }

-        ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
-        if (ret) {
-                fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
-                return NULL;
-        }
+   ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
+   if (ret) {
+      fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
+      return NULL;
+   }

-        bo = pan_lookup_bo(dev, create_bo.handle);
-        assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));
+   bo = pan_lookup_bo(dev, create_bo.handle);
+   assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));

-        bo->size = create_bo.size;
-        bo->ptr.gpu = create_bo.offset;
-        bo->gem_handle = create_bo.handle;
-        bo->flags = flags;
-        bo->dev = dev;
-        bo->label = label;
-        return bo;
+   bo->size = create_bo.size;
+   bo->ptr.gpu = create_bo.offset;
+   bo->gem_handle = create_bo.handle;
+   bo->flags = flags;
+   bo->dev = dev;
+   bo->label = label;
+   return bo;
 }

 static void
 panfrost_bo_free(struct panfrost_bo *bo)
 {
-        struct drm_gem_close gem_close = { .handle = bo->gem_handle };
-        int ret;
+   struct drm_gem_close gem_close = {.handle = bo->gem_handle};
+   int ret;

-        ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
-        if (ret) {
-                fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
-                assert(0);
-        }
+   ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
+   if (ret) {
+      fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
+      assert(0);
+   }

-        /* BO will be freed with the sparse array, but zero to indicate free */
-        memset(bo, 0, sizeof(*bo));
+   /* BO will be freed with the sparse array, but zero to indicate free */
+   memset(bo, 0, sizeof(*bo));
 }

 /* Returns true if the BO is ready, false otherwise.
@ -113,44 +113,44 @@ panfrost_bo_free(struct panfrost_bo *bo)
 bool
 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
 {
-        struct drm_panfrost_wait_bo req = {
-                .handle = bo->gem_handle,
-		.timeout_ns = timeout_ns,
-        };
-        int ret;
+   struct drm_panfrost_wait_bo req = {
+      .handle = bo->gem_handle,
+      .timeout_ns = timeout_ns,
+   };
+   int ret;

-        /* If the BO has been exported or imported we can't rely on the cached
-         * state, we need to call the WAIT_BO ioctl.
-         */
-        if (!(bo->flags & PAN_BO_SHARED)) {
-                /* If ->gpu_access is 0, the BO is idle, no need to wait. */
-                if (!bo->gpu_access)
-                        return true;
+   /* If the BO has been exported or imported we can't rely on the cached
+    * state, we need to call the WAIT_BO ioctl.
+    */
+   if (!(bo->flags & PAN_BO_SHARED)) {
+      /* If ->gpu_access is 0, the BO is idle, no need to wait. */
+      if (!bo->gpu_access)
+         return true;

-                /* If the caller only wants to wait for writers and no
-                 * writes are pending, we don't have to wait.
-                 */
-                if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
-                        return true;
-        }
+      /* If the caller only wants to wait for writers and no
+       * writes are pending, we don't have to wait.
+       */
+      if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
+         return true;
+   }

-        /* The ioctl returns >= 0 value when the BO we are waiting for is ready
-         * -1 otherwise.
-         */
-        ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
-        if (ret != -1) {
-                /* Set gpu_access to 0 so that the next call to bo_wait()
-                 * doesn't have to call the WAIT_BO ioctl.
-                 */
-                bo->gpu_access = 0;
-                return true;
-        }
+   /* The ioctl returns >= 0 value when the BO we are waiting for is ready
+    * -1 otherwise.
+    */
+   ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
+   if (ret != -1) {
+      /* Set gpu_access to 0 so that the next call to bo_wait()
+       * doesn't have to call the WAIT_BO ioctl.
+       */
+      bo->gpu_access = 0;
+      return true;
+   }

-        /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
-         * is invalid, which shouldn't happen here.
-         */
-        assert(errno == ETIMEDOUT || errno == EBUSY);
-        return false;
+   /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
+    * is invalid, which shouldn't happen here.
+    */
+   assert(errno == ETIMEDOUT || errno == EBUSY);
+   return false;
 }

 /* Helper to calculate the bucket index of a BO */
@ -158,24 +158,23 @@ panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
 static unsigned
 pan_bucket_index(unsigned size)
 {
-        /* Round down to POT to compute a bucket index */
+   /* Round down to POT to compute a bucket index */

-        unsigned bucket_index = util_logbase2(size);
+   unsigned bucket_index = util_logbase2(size);

-        /* Clamp the bucket index; all huge allocations will be
-         * sorted into the largest bucket */
+   /* Clamp the bucket index; all huge allocations will be
+    * sorted into the largest bucket */

-        bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET,
-                             MAX_BO_CACHE_BUCKET);
+   bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET, MAX_BO_CACHE_BUCKET);

-        /* Reindex from 0 */
-        return (bucket_index - MIN_BO_CACHE_BUCKET);
+   /* Reindex from 0 */
+   return (bucket_index - MIN_BO_CACHE_BUCKET);
 }

 static struct list_head *
 pan_bucket(struct panfrost_device *dev, unsigned size)
 {
-        return &dev->bo_cache.buckets[pan_bucket_index(size)];
+   return &dev->bo_cache.buckets[pan_bucket_index(size)];
 }

 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
@ -184,74 +183,71 @@ pan_bucket(struct panfrost_device *dev, unsigned size)
 * BO. */

 static struct panfrost_bo *
-panfrost_bo_cache_fetch(struct panfrost_device *dev,
-                        size_t size, uint32_t flags, const char *label,
-                        bool dontwait)
+panfrost_bo_cache_fetch(struct panfrost_device *dev, size_t size,
+                        uint32_t flags, const char *label, bool dontwait)
 {
-        pthread_mutex_lock(&dev->bo_cache.lock);
-        struct list_head *bucket = pan_bucket(dev, size);
-        struct panfrost_bo *bo = NULL;
+   pthread_mutex_lock(&dev->bo_cache.lock);
+   struct list_head *bucket = pan_bucket(dev, size);
+   struct panfrost_bo *bo = NULL;

-        /* Iterate the bucket looking for something suitable */
-        list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
-                                 bucket_link) {
-                if (entry->size < size || entry->flags != flags)
-                        continue;
+   /* Iterate the bucket looking for something suitable */
+   list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
+      if (entry->size < size || entry->flags != flags)
+         continue;

-                /* If the oldest BO in the cache is busy, likely so is
-                 * everything newer, so bail. */
-                if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
-                                      PAN_BO_ACCESS_RW))
-                        break;
+      /* If the oldest BO in the cache is busy, likely so is
+       * everything newer, so bail. */
+      if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, PAN_BO_ACCESS_RW))
+         break;

-                struct drm_panfrost_madvise madv = {
-                        .handle = entry->gem_handle,
-                        .madv = PANFROST_MADV_WILLNEED,
-                };
-                int ret;
+      struct drm_panfrost_madvise madv = {
+         .handle = entry->gem_handle,
+         .madv = PANFROST_MADV_WILLNEED,
+      };
+      int ret;

-                /* This one works, splice it out of the cache */
-                list_del(&entry->bucket_link);
-                list_del(&entry->lru_link);
+      /* This one works, splice it out of the cache */
+      list_del(&entry->bucket_link);
+      list_del(&entry->lru_link);

-                ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
-                if (!ret && !madv.retained) {
-                        panfrost_bo_free(entry);
-                        continue;
-                }
-                /* Let's go! */
-                bo = entry;
-                bo->label = label;
-                break;
-        }
-        pthread_mutex_unlock(&dev->bo_cache.lock);
+      ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
+      if (!ret && !madv.retained) {
+         panfrost_bo_free(entry);
+         continue;
+      }
+      /* Let's go! */
+      bo = entry;
+      bo->label = label;
+      break;
+   }
+   pthread_mutex_unlock(&dev->bo_cache.lock);

-        return bo;
+   return bo;
 }

 static void
 panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
 {
-        struct timespec time;
+   struct timespec time;

-        clock_gettime(CLOCK_MONOTONIC, &time);
-        list_for_each_entry_safe(struct panfrost_bo, entry,
-                                 &dev->bo_cache.lru, lru_link) {
-                /* We want all entries that have been used more than 1 sec
-                 * ago to be dropped, others can be kept.
-                 * Note the <= 2 check and not <= 1. It's here to account for
-                 * the fact that we're only testing ->tv_sec, not ->tv_nsec.
-                 * That means we might keep entries that are between 1 and 2
-                 * seconds old, but we don't really care, as long as unused BOs
-                 * are dropped at some point.
-                 */
-                if (time.tv_sec - entry->last_used <= 2)
-                        break;
+   clock_gettime(CLOCK_MONOTONIC, &time);
+   list_for_each_entry_safe(struct panfrost_bo, entry, &dev->bo_cache.lru,
+                            lru_link) {
+      /* We want all entries that have been used more than 1 sec
+       * ago to be dropped, others can be kept.
+       * Note the <= 2 check and not <= 1. It's here to account for
+       * the fact that we're only testing ->tv_sec, not ->tv_nsec.
+       * That means we might keep entries that are between 1 and 2
+       * seconds old, but we don't really care, as long as unused BOs
+       * are dropped at some point.
+       */
+      if (time.tv_sec - entry->last_used <= 2)
+         break;

-                list_del(&entry->bucket_link);
-                list_del(&entry->lru_link);
-                panfrost_bo_free(entry);
-        }
+      list_del(&entry->bucket_link);
+      list_del(&entry->lru_link);
+      panfrost_bo_free(entry);
+   }
 }

 /* Tries to add a BO to the cache. Returns if it was
@ -260,43 +256,43 @@ panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
 static bool
 panfrost_bo_cache_put(struct panfrost_bo *bo)
 {
-        struct panfrost_device *dev = bo->dev;
+   struct panfrost_device *dev = bo->dev;

-        if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
-                return false;
+   if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
+      return false;

-        /* Must be first */
-        pthread_mutex_lock(&dev->bo_cache.lock);
+   /* Must be first */
+   pthread_mutex_lock(&dev->bo_cache.lock);

-        struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
-        struct drm_panfrost_madvise madv;
-        struct timespec time;
+   struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
+   struct drm_panfrost_madvise madv;
+   struct timespec time;

-        madv.handle = bo->gem_handle;
-        madv.madv = PANFROST_MADV_DONTNEED;
-	madv.retained = 0;
+   madv.handle = bo->gem_handle;
+   madv.madv = PANFROST_MADV_DONTNEED;
+   madv.retained = 0;

-        drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
+   drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);

-        /* Add us to the bucket */
-        list_addtail(&bo->bucket_link, bucket);
+   /* Add us to the bucket */
+   list_addtail(&bo->bucket_link, bucket);

-        /* Add us to the LRU list and update the last_used field. */
-        list_addtail(&bo->lru_link, &dev->bo_cache.lru);
-        clock_gettime(CLOCK_MONOTONIC, &time);
-        bo->last_used = time.tv_sec;
+   /* Add us to the LRU list and update the last_used field. */
+   list_addtail(&bo->lru_link, &dev->bo_cache.lru);
+   clock_gettime(CLOCK_MONOTONIC, &time);
+   bo->last_used = time.tv_sec;

-        /* Let's do some cleanup in the BO cache while we hold the
-         * lock.
-         */
-        panfrost_bo_cache_evict_stale_bos(dev);
+   /* Let's do some cleanup in the BO cache while we hold the
+    * lock.
+    */
+   panfrost_bo_cache_evict_stale_bos(dev);

-        /* Update the label to help debug BO cache memory usage issues */
-        bo->label = "Unused (BO cache)";
+   /* Update the label to help debug BO cache memory usage issues */
+   bo->label = "Unused (BO cache)";

-        /* Must be last */
-        pthread_mutex_unlock(&dev->bo_cache.lock);
-        return true;
+   /* Must be last */
+   pthread_mutex_unlock(&dev->bo_cache.lock);
+   return true;
 }

 /* Evicts all BOs from the cache. Called during context
@ -306,228 +302,226 @@ panfrost_bo_cache_put(struct panfrost_bo *bo)
 * OS) */

 void
-panfrost_bo_cache_evict_all(
-                struct panfrost_device *dev)
+panfrost_bo_cache_evict_all(struct panfrost_device *dev)
 {
-        pthread_mutex_lock(&dev->bo_cache.lock);
-        for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
-                struct list_head *bucket = &dev->bo_cache.buckets[i];
+   pthread_mutex_lock(&dev->bo_cache.lock);
+   for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
+      struct list_head *bucket = &dev->bo_cache.buckets[i];

-                list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
-                                         bucket_link) {
-                        list_del(&entry->bucket_link);
-                        list_del(&entry->lru_link);
-                        panfrost_bo_free(entry);
-                }
-        }
-        pthread_mutex_unlock(&dev->bo_cache.lock);
+      list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
+         list_del(&entry->bucket_link);
+         list_del(&entry->lru_link);
+         panfrost_bo_free(entry);
+      }
+   }
+   pthread_mutex_unlock(&dev->bo_cache.lock);
 }

 void
 panfrost_bo_mmap(struct panfrost_bo *bo)
 {
-        struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
-        int ret;
+   struct drm_panfrost_mmap_bo mmap_bo = {.handle = bo->gem_handle};
+   int ret;

-        if (bo->ptr.cpu)
-                return;
+   if (bo->ptr.cpu)
+      return;

-        ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
-        if (ret) {
-                fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
-                assert(0);
-        }
+   ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
+   if (ret) {
+      fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
+      assert(0);
+   }

-        bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                              bo->dev->fd, mmap_bo.offset);
-        if (bo->ptr.cpu == MAP_FAILED) {
-                bo->ptr.cpu = NULL;
-                fprintf(stderr,
-                        "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
-                        bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
-                        (long long)mmap_bo.offset);
-        }
+   bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         bo->dev->fd, mmap_bo.offset);
+   if (bo->ptr.cpu == MAP_FAILED) {
+      bo->ptr.cpu = NULL;
+      fprintf(stderr,
+              "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
+              bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
+              (long long)mmap_bo.offset);
+   }
 }

 static void
 panfrost_bo_munmap(struct panfrost_bo *bo)
 {
-        if (!bo->ptr.cpu)
-                return;
+   if (!bo->ptr.cpu)
+      return;

-        if (os_munmap((void *) (uintptr_t)bo->ptr.cpu, bo->size)) {
-                perror("munmap");
-                abort();
-        }
+   if (os_munmap((void *)(uintptr_t)bo->ptr.cpu, bo->size)) {
+      perror("munmap");
+      abort();
+   }

-        bo->ptr.cpu = NULL;
+   bo->ptr.cpu = NULL;
 }

 struct panfrost_bo *
-panfrost_bo_create(struct panfrost_device *dev, size_t size,
-                   uint32_t flags, const char *label)
+panfrost_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags,
+                   const char *label)
 {
-        struct panfrost_bo *bo;
+   struct panfrost_bo *bo;

-        /* Kernel will fail (confusingly) with EPERM otherwise */
-        assert(size > 0);
+   /* Kernel will fail (confusingly) with EPERM otherwise */
+   assert(size > 0);

-        /* To maximize BO cache usage, don't allocate tiny BOs */
-        size = ALIGN_POT(size, 4096);
+   /* To maximize BO cache usage, don't allocate tiny BOs */
+   size = ALIGN_POT(size, 4096);

-        /* GROWABLE BOs cannot be mmapped */
-        if (flags & PAN_BO_GROWABLE)
-                assert(flags & PAN_BO_INVISIBLE);
+   /* GROWABLE BOs cannot be mmapped */
+   if (flags & PAN_BO_GROWABLE)
+      assert(flags & PAN_BO_INVISIBLE);

-        /* Ideally, we get a BO that's ready in the cache, or allocate a fresh
-         * BO. If allocation fails, we can try waiting for something in the
-         * cache. But if there's no nothing suitable, we should flush the cache
-         * to make space for the new allocation.
-         */
-        bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
-        if (!bo)
-                bo = panfrost_bo_alloc(dev, size, flags, label);
-        if (!bo)
-                bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
-        if (!bo) {
-                panfrost_bo_cache_evict_all(dev);
-                bo = panfrost_bo_alloc(dev, size, flags, label);
-        }
+   /* Ideally, we get a BO that's ready in the cache, or allocate a fresh
+    * BO. If allocation fails, we can try waiting for something in the
+    * cache. But if there's no nothing suitable, we should flush the cache
+    * to make space for the new allocation.
+    */
+   bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
+   if (!bo)
+      bo = panfrost_bo_alloc(dev, size, flags, label);
+   if (!bo)
+      bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
+   if (!bo) {
+      panfrost_bo_cache_evict_all(dev);
+      bo = panfrost_bo_alloc(dev, size, flags, label);
+   }

-        if (!bo) {
-                unreachable("BO creation failed. We don't handle that yet.");
-                return NULL;
-        }
+   if (!bo) {
+      unreachable("BO creation failed. We don't handle that yet.");
+      return NULL;
+   }

-        /* Only mmap now if we know we need to. For CPU-invisible buffers, we
-         * never map since we don't care about their contents; they're purely
-         * for GPU-internal use. But we do trace them anyway. */
+   /* Only mmap now if we know we need to. For CPU-invisible buffers, we
+    * never map since we don't care about their contents; they're purely
+    * for GPU-internal use. But we do trace them anyway. */

-        if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
-                panfrost_bo_mmap(bo);
+   if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
+      panfrost_bo_mmap(bo);

-        p_atomic_set(&bo->refcnt, 1);
+   p_atomic_set(&bo->refcnt, 1);

-        if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
-                if (flags & PAN_BO_INVISIBLE)
-                        pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
-                else if (!(flags & PAN_BO_DELAY_MMAP))
-                        pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
-        }
+   if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
+      if (flags & PAN_BO_INVISIBLE)
+         pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
+      else if (!(flags & PAN_BO_DELAY_MMAP))
+         pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
+   }

-        return bo;
+   return bo;
 }

 void
 panfrost_bo_reference(struct panfrost_bo *bo)
 {
-        if (bo) {
-                ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
-                assert(count != 1);
-        }
+   if (bo) {
+      ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
+      assert(count != 1);
+   }
 }

 void
 panfrost_bo_unreference(struct panfrost_bo *bo)
 {
-        if (!bo)
-                return;
+   if (!bo)
+      return;

-        /* Don't return to cache if there are still references */
-        if (p_atomic_dec_return(&bo->refcnt))
-                return;
+   /* Don't return to cache if there are still references */
+   if (p_atomic_dec_return(&bo->refcnt))
+      return;

-        struct panfrost_device *dev = bo->dev;
+   struct panfrost_device *dev = bo->dev;

-        pthread_mutex_lock(&dev->bo_map_lock);
+   pthread_mutex_lock(&dev->bo_map_lock);

-        /* Someone might have imported this BO while we were waiting for the
-         * lock, let's make sure it's still not referenced before freeing it.
-         */
-        if (p_atomic_read(&bo->refcnt) == 0) {
-                /* When the reference count goes to zero, we need to cleanup */
-                panfrost_bo_munmap(bo);
+   /* Someone might have imported this BO while we were waiting for the
+    * lock, let's make sure it's still not referenced before freeing it.
+    */
+   if (p_atomic_read(&bo->refcnt) == 0) {
+      /* When the reference count goes to zero, we need to cleanup */
+      panfrost_bo_munmap(bo);

-                if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
-                        pandecode_inject_free(bo->ptr.gpu, bo->size);
+      if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
+         pandecode_inject_free(bo->ptr.gpu, bo->size);

-                /* Rather than freeing the BO now, we'll cache the BO for later
-                 * allocations if we're allowed to.
-                 */
-                if (!panfrost_bo_cache_put(bo))
-                        panfrost_bo_free(bo);
-
-        }
-        pthread_mutex_unlock(&dev->bo_map_lock);
+      /* Rather than freeing the BO now, we'll cache the BO for later
+       * allocations if we're allowed to.
+       */
+      if (!panfrost_bo_cache_put(bo))
+         panfrost_bo_free(bo);
+   }
+   pthread_mutex_unlock(&dev->bo_map_lock);
 }

 struct panfrost_bo *
 panfrost_bo_import(struct panfrost_device *dev, int fd)
 {
-        struct panfrost_bo *bo;
-        struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
-        ASSERTED int ret;
-        unsigned gem_handle;
+   struct panfrost_bo *bo;
+   struct drm_panfrost_get_bo_offset get_bo_offset = {
+      0,
+   };
+   ASSERTED int ret;
+   unsigned gem_handle;

-        ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
-        assert(!ret);
+   ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
+   assert(!ret);

-        pthread_mutex_lock(&dev->bo_map_lock);
-        bo = pan_lookup_bo(dev, gem_handle);
+   pthread_mutex_lock(&dev->bo_map_lock);
+   bo = pan_lookup_bo(dev, gem_handle);

-        if (!bo->dev) {
-                get_bo_offset.handle = gem_handle;
-                ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
-                assert(!ret);
+   if (!bo->dev) {
+      get_bo_offset.handle = gem_handle;
+      ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
+      assert(!ret);

-                bo->dev = dev;
-                bo->ptr.gpu = (mali_ptr) get_bo_offset.offset;
-                bo->size = lseek(fd, 0, SEEK_END);
-                /* Sometimes this can fail and return -1. size of -1 is not
-                 * a nice thing for mmap to try mmap. Be more robust also
-                 * for zero sized maps and fail nicely too
-                 */
-                if ((bo->size == 0) || (bo->size == (size_t)-1)) {
-                        pthread_mutex_unlock(&dev->bo_map_lock);
-                        return NULL;
-                }
-                bo->flags = PAN_BO_SHARED;
-                bo->gem_handle = gem_handle;
-                p_atomic_set(&bo->refcnt, 1);
-        } else {
-                /* bo->refcnt == 0 can happen if the BO
-                 * was being released but panfrost_bo_import() acquired the
-                 * lock before panfrost_bo_unreference(). In that case, refcnt
-                 * is 0 and we can't use panfrost_bo_reference() directly, we
-                 * have to re-initialize the refcnt().
-                 * Note that panfrost_bo_unreference() checks
-                 * refcnt value just after acquiring the lock to
-                 * make sure the object is not freed if panfrost_bo_import()
-                 * acquired it in the meantime.
-                 */
-                if (p_atomic_read(&bo->refcnt) == 0)
-                        p_atomic_set(&bo->refcnt, 1);
-                else
-                        panfrost_bo_reference(bo);
-        }
-        pthread_mutex_unlock(&dev->bo_map_lock);
+      bo->dev = dev;
+      bo->ptr.gpu = (mali_ptr)get_bo_offset.offset;
+      bo->size = lseek(fd, 0, SEEK_END);
+      /* Sometimes this can fail and return -1. size of -1 is not
+       * a nice thing for mmap to try mmap. Be more robust also
+       * for zero sized maps and fail nicely too
+       */
+      if ((bo->size == 0) || (bo->size == (size_t)-1)) {
+         pthread_mutex_unlock(&dev->bo_map_lock);
+         return NULL;
+      }
+      bo->flags = PAN_BO_SHARED;
+      bo->gem_handle = gem_handle;
+      p_atomic_set(&bo->refcnt, 1);
+   } else {
+      /* bo->refcnt == 0 can happen if the BO
+       * was being released but panfrost_bo_import() acquired the
+       * lock before panfrost_bo_unreference(). In that case, refcnt
+       * is 0 and we can't use panfrost_bo_reference() directly, we
+       * have to re-initialize the refcnt().
+       * Note that panfrost_bo_unreference() checks
+       * refcnt value just after acquiring the lock to
+       * make sure the object is not freed if panfrost_bo_import()
+       * acquired it in the meantime.
+       */
+      if (p_atomic_read(&bo->refcnt) == 0)
+         p_atomic_set(&bo->refcnt, 1);
+      else
+         panfrost_bo_reference(bo);
+   }
+   pthread_mutex_unlock(&dev->bo_map_lock);

-        return bo;
+   return bo;
 }

 int
 panfrost_bo_export(struct panfrost_bo *bo)
 {
-        struct drm_prime_handle args = {
-                .handle = bo->gem_handle,
-                .flags = DRM_CLOEXEC,
-        };
+   struct drm_prime_handle args = {
+      .handle = bo->gem_handle,
+      .flags = DRM_CLOEXEC,
+   };

-        int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
-        if (ret == -1)
-                return -1;
+   int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
+   if (ret == -1)
+      return -1;

-        bo->flags |= PAN_BO_SHARED;
-        return args.fd;
+   bo->flags |= PAN_BO_SHARED;
+   return args.fd;
 }
-
--- a/src/panfrost/lib/pan_bo.h
+++ b/src/panfrost/lib/pan_bo.h
@ -26,113 +26,106 @@
 #ifndef __PAN_BO_H__
 #define __PAN_BO_H__

+#include <time.h>
 #include "util/list.h"
 #include "panfrost-job.h"
-#include <time.h>

 /* Flags for allocated memory */

 /* This memory region is executable */
-#define PAN_BO_EXECUTE            (1 << 0)
+#define PAN_BO_EXECUTE (1 << 0)

 /* This memory region should be lazily allocated and grow-on-page-fault. Must
 * be used in conjunction with INVISIBLE */
-#define PAN_BO_GROWABLE           (1 << 1)
+#define PAN_BO_GROWABLE (1 << 1)

 /* This memory region should not be mapped to the CPU */
-#define PAN_BO_INVISIBLE          (1 << 2)
+#define PAN_BO_INVISIBLE (1 << 2)

 /* This region may not be used immediately and will not mmap on allocate
 * (semantically distinct from INVISIBLE, which cannot never be mmaped) */
-#define PAN_BO_DELAY_MMAP         (1 << 3)
+#define PAN_BO_DELAY_MMAP (1 << 3)

 /* BO is shared across processes (imported or exported) and therefore cannot be
 * cached locally */
-#define PAN_BO_SHARED             (1 << 4)
+#define PAN_BO_SHARED (1 << 4)

 /* GPU access flags */

 /* BO is either shared (can be accessed by more than one GPU batch) or private
 * (reserved by a specific GPU job). */
-#define PAN_BO_ACCESS_PRIVATE         (0 << 0)
-#define PAN_BO_ACCESS_SHARED          (1 << 0)
+#define PAN_BO_ACCESS_PRIVATE (0 << 0)
+#define PAN_BO_ACCESS_SHARED  (1 << 0)

 /* BO is being read/written by the GPU */
-#define PAN_BO_ACCESS_READ            (1 << 1)
-#define PAN_BO_ACCESS_WRITE           (1 << 2)
-#define PAN_BO_ACCESS_RW              (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)
+#define PAN_BO_ACCESS_READ  (1 << 1)
+#define PAN_BO_ACCESS_WRITE (1 << 2)
+#define PAN_BO_ACCESS_RW    (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)

 /* BO is accessed by the vertex/tiler job. */
-#define PAN_BO_ACCESS_VERTEX_TILER    (1 << 3)
+#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3)

 /* BO is accessed by the fragment job. */
-#define PAN_BO_ACCESS_FRAGMENT        (1 << 4)
+#define PAN_BO_ACCESS_FRAGMENT (1 << 4)

 typedef uint8_t pan_bo_access;

 struct panfrost_device;

 struct panfrost_ptr {
-        /* CPU address */
-        void *cpu;
+   /* CPU address */
+   void *cpu;

-        /* GPU address */
-        mali_ptr gpu;
+   /* GPU address */
+   mali_ptr gpu;
 };

 struct panfrost_bo {
-        /* Must be first for casting */
-        struct list_head bucket_link;
+   /* Must be first for casting */
+   struct list_head bucket_link;

-        /* Used to link the BO to the BO cache LRU list. */
-        struct list_head lru_link;
+   /* Used to link the BO to the BO cache LRU list. */
+   struct list_head lru_link;

-        /* Store the time this BO was use last, so the BO cache logic can evict
-         * stale BOs.
-         */
-        time_t last_used;
+   /* Store the time this BO was use last, so the BO cache logic can evict
+    * stale BOs.
+    */
+   time_t last_used;

-        /* Atomic reference count */
-        int32_t refcnt;
+   /* Atomic reference count */
+   int32_t refcnt;

-        struct panfrost_device *dev;
+   struct panfrost_device *dev;

-        /* Mapping for the entire object (all levels) */
-        struct panfrost_ptr ptr;
+   /* Mapping for the entire object (all levels) */
+   struct panfrost_ptr ptr;

-        /* Size of all entire trees */
-        size_t size;
+   /* Size of all entire trees */
+   size_t size;

-        int gem_handle;
+   int gem_handle;

-        uint32_t flags;
+   uint32_t flags;

-        /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
-         * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
-         * when the BO is idle.
-         */
-        uint32_t gpu_access;
+   /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
+    * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
+    * when the BO is idle.
+    */
+   uint32_t gpu_access;

-        /* Human readable description of the BO for debugging. */
-        const char *label;
+   /* Human readable description of the BO for debugging. */
+   const char *label;
 };

-bool
-panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers);
-void
-panfrost_bo_reference(struct panfrost_bo *bo);
-void
-panfrost_bo_unreference(struct panfrost_bo *bo);
-struct panfrost_bo *
-panfrost_bo_create(struct panfrost_device *dev, size_t size,
-                   uint32_t flags, const char *label);
-void
-panfrost_bo_mmap(struct panfrost_bo *bo);
-struct panfrost_bo *
-panfrost_bo_import(struct panfrost_device *dev, int fd);
-int
-panfrost_bo_export(struct panfrost_bo *bo);
-void
-panfrost_bo_cache_evict_all(struct panfrost_device *dev);
+bool panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
+                      bool wait_readers);
+void panfrost_bo_reference(struct panfrost_bo *bo);
+void panfrost_bo_unreference(struct panfrost_bo *bo);
+struct panfrost_bo *panfrost_bo_create(struct panfrost_device *dev, size_t size,
+                                       uint32_t flags, const char *label);
+void panfrost_bo_mmap(struct panfrost_bo *bo);
+struct panfrost_bo *panfrost_bo_import(struct panfrost_device *dev, int fd);
+int panfrost_bo_export(struct panfrost_bo *bo);
+void panfrost_bo_cache_evict_all(struct panfrost_device *dev);

 #endif /* __PAN_BO_H__ */
--- a/src/panfrost/lib/pan_clear.c
+++ b/src/panfrost/lib/pan_clear.c
@ -26,11 +26,11 @@
 #include "genxml/gen_macros.h"

 #include <string.h>
-#include "pan_util.h"
-#include "pan_format.h"
 #include "gallium/auxiliary/util/u_pack_color.h"
-#include "util/rounding.h"
 #include "util/format_srgb.h"
+#include "util/rounding.h"
+#include "pan_format.h"
+#include "pan_util.h"

 /* Clear colours are packed as the internal format of the tilebuffer, looked up
 * in the blendable formats table given the render target format.
@ -49,8 +49,8 @@
 static void
 pan_pack_color_32(uint32_t *packed, uint32_t v)
 {
-        for (unsigned i = 0; i < 4; ++i)
-                packed[i] = v;
+   for (unsigned i = 0; i < 4; ++i)
+      packed[i] = v;
 }

 /* For m integer bits and n fractional bits, calculate the conversion factor,
@ -61,22 +61,22 @@ pan_pack_color_32(uint32_t *packed, uint32_t v)
 static inline uint32_t
 float_to_fixed(float f, unsigned bits_int, unsigned bits_frac, bool dither)
 {
-        uint32_t m = (1 << bits_int) - 1;
+   uint32_t m = (1 << bits_int) - 1;

-        if (dither) {
-                float factor = m << bits_frac;
-                return _mesa_roundevenf(f * factor);
-        } else {
-                uint32_t v = _mesa_roundevenf(f * (float) m);
-                return v << bits_frac;
-        }
+   if (dither) {
+      float factor = m << bits_frac;
+      return _mesa_roundevenf(f * factor);
+   } else {
+      uint32_t v = _mesa_roundevenf(f * (float)m);
+      return v << bits_frac;
+   }
 }

 struct mali_tib_layout {
-        unsigned int_r, frac_r;
-        unsigned int_g, frac_g;
-        unsigned int_b, frac_b;
-        unsigned int_a, frac_a;
+   unsigned int_r, frac_r;
+   unsigned int_g, frac_g;
+   unsigned int_b, frac_b;
+   unsigned int_a, frac_a;
 };

 /* clang-format off */
@ -93,76 +93,77 @@ static const struct mali_tib_layout tib_layouts[] = {
 /* Raw values are stored as-is but replicated for multisampling */

 static void
-pan_pack_raw(uint32_t *packed, const union pipe_color_union *color, enum pipe_format format)
+pan_pack_raw(uint32_t *packed, const union pipe_color_union *color,
+             enum pipe_format format)
 {
-        union util_color out = { 0 };
-        unsigned size = util_format_get_blocksize(format);
-        assert(size <= 16);
+   union util_color out = {0};
+   unsigned size = util_format_get_blocksize(format);
+   assert(size <= 16);

-        util_pack_color(color->f, format, &out);
+   util_pack_color(color->f, format, &out);

-        if (size == 1) {
-                unsigned s = out.ui[0] | (out.ui[0] << 8);
-                pan_pack_color_32(packed, s | (s << 16));
-        } else if (size == 2)
-                pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
-        else if (size <= 4)
-                pan_pack_color_32(packed, out.ui[0]);
-        else if (size <= 8) {
-                memcpy(packed + 0, out.ui, 8);
-                memcpy(packed + 2, out.ui, 8);
-        } else {
-                memcpy(packed, out.ui, 16);
-        }
+   if (size == 1) {
+      unsigned s = out.ui[0] | (out.ui[0] << 8);
+      pan_pack_color_32(packed, s | (s << 16));
+   } else if (size == 2)
+      pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
+   else if (size <= 4)
+      pan_pack_color_32(packed, out.ui[0]);
+   else if (size <= 8) {
+      memcpy(packed + 0, out.ui, 8);
+      memcpy(packed + 2, out.ui, 8);
+   } else {
+      memcpy(packed, out.ui, 16);
+   }
 }

 void
 pan_pack_color(uint32_t *packed, const union pipe_color_union *color,
               enum pipe_format format, bool dithered)
 {
-        /* Set of blendable formats is common across versions. TODO: v9 */
-        enum mali_color_buffer_internal_format internal =
-                panfrost_blendable_formats_v7[format].internal;
+   /* Set of blendable formats is common across versions. TODO: v9 */
+   enum mali_color_buffer_internal_format internal =
+      panfrost_blendable_formats_v7[format].internal;

-        if (internal == MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE) {
-                pan_pack_raw(packed, color, format);
-                return;
-        }
+   if (internal == MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE) {
+      pan_pack_raw(packed, color, format);
+      return;
+   }

-        /* Saturate to [0, 1] by definition of UNORM. Prevents overflow. */
-        float r = SATURATE(color->f[0]);
-        float g = SATURATE(color->f[1]);
-        float b = SATURATE(color->f[2]);
-        float a = SATURATE(color->f[3]);
+   /* Saturate to [0, 1] by definition of UNORM. Prevents overflow. */
+   float r = SATURATE(color->f[0]);
+   float g = SATURATE(color->f[1]);
+   float b = SATURATE(color->f[2]);
+   float a = SATURATE(color->f[3]);

-        /* Fill in alpha = 1.0 by default */
-        if (!util_format_has_alpha(format))
-                a = 1.0;
+   /* Fill in alpha = 1.0 by default */
+   if (!util_format_has_alpha(format))
+      a = 1.0;

-        /* Convert colourspace while we still have floats */
-        if (util_format_is_srgb(format)) {
-                r = util_format_linear_to_srgb_float(r);
-                g = util_format_linear_to_srgb_float(g);
-                b = util_format_linear_to_srgb_float(b);
-        }
+   /* Convert colourspace while we still have floats */
+   if (util_format_is_srgb(format)) {
+      r = util_format_linear_to_srgb_float(r);
+      g = util_format_linear_to_srgb_float(g);
+      b = util_format_linear_to_srgb_float(b);
+   }

-        /* Look up the layout of the tilebuffer */
-        assert(internal < ARRAY_SIZE(tib_layouts));
-        struct mali_tib_layout l = tib_layouts[internal];
+   /* Look up the layout of the tilebuffer */
+   assert(internal < ARRAY_SIZE(tib_layouts));
+   struct mali_tib_layout l = tib_layouts[internal];

-        unsigned count_r = l.int_r + l.frac_r;
-        unsigned count_g = l.int_g + l.frac_g + count_r;
-        unsigned count_b = l.int_b + l.frac_b + count_g;
-        ASSERTED unsigned count_a = l.int_a + l.frac_a + count_b;
+   unsigned count_r = l.int_r + l.frac_r;
+   unsigned count_g = l.int_g + l.frac_g + count_r;
+   unsigned count_b = l.int_b + l.frac_b + count_g;
+   ASSERTED unsigned count_a = l.int_a + l.frac_a + count_b;

-        /* Must fill the word */
-        assert(count_a == 32);
+   /* Must fill the word */
+   assert(count_a == 32);

-        /* Convert the transformed float colour to the given layout */
-        uint32_t ur = float_to_fixed(r, l.int_r, l.frac_r, dithered) << 0;
-        uint32_t ug = float_to_fixed(g, l.int_g, l.frac_g, dithered) << count_r;
-        uint32_t ub = float_to_fixed(b, l.int_b, l.frac_b, dithered) << count_g;
-        uint32_t ua = float_to_fixed(a, l.int_a, l.frac_a, dithered) << count_b;
+   /* Convert the transformed float colour to the given layout */
+   uint32_t ur = float_to_fixed(r, l.int_r, l.frac_r, dithered) << 0;
+   uint32_t ug = float_to_fixed(g, l.int_g, l.frac_g, dithered) << count_r;
+   uint32_t ub = float_to_fixed(b, l.int_b, l.frac_b, dithered) << count_g;
+   uint32_t ua = float_to_fixed(a, l.int_a, l.frac_a, dithered) << count_b;

-        pan_pack_color_32(packed, ur | ug | ub | ua);
+   pan_pack_color_32(packed, ur | ug | ub | ua);
 }
--- a/src/panfrost/lib/pan_cs.c
+++ b/src/panfrost/lib/pan_cs.c
--- a/Show more
+++ b/Show more